@@ -114,6 +114,8 @@ jobs:
114114
115115 build-and-test-llama3 :
116116 runs-on : ubicloud-gpu-standard-1-latest
117+ container :
118+ image : nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
117119 env :
118120 HF_TOKEN : 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
119121 steps :
@@ -122,19 +124,19 @@ jobs:
122124 - run : echo "::add-mask::$HF_TOKEN"
123125
124126 - name : Install OpenMP
125- run : sudo apt-get update && sudo apt-get install -y libomp-dev
127+ run : apt-get update && apt-get install -y libomp-dev libopenmpi-dev python3-pip
126128
127129 - name : Install dependencies
128130 run : pip install -r requirements.txt
129131
130132 - name : Run preprocessing
131- run : python dev/data/tinyshakespeare.py --model_desc llama-3
133+ run : python3 dev/data/tinyshakespeare.py --model_desc llama-3
132134
133135 - name : Train model
134- run : python train_llama3.py --write_tensors 1 --dtype float32 --offload 1
136+ run : python3 train_llama3.py --write_tensors 1 --dtype float32 --offload 1
135137
136138 - name : Build FP32 precision
137- run : PRECISION=FP32 make test_llama3cu
139+ run : PRECISION=FP32 NO_MULTI_GPU=1 make test_llama3cu
138140
139141 - name : Run default
140142 run : ./test_llama3cu
@@ -146,7 +148,7 @@ jobs:
146148 run : ./test_llama3cu -r 2
147149
148150 - name : Build BF16 precision
149- run : PRECISION=BF16 make train_llama3cu test_llama3cu
151+ run : PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
150152
151153 - name : Run default
152154 run : ./test_llama3cu
@@ -165,7 +167,7 @@ jobs:
165167 git clone https://github.com/NVIDIA/cudnn-frontend.git
166168
167169 - name : Build with cuDNN
168- run : USE_CUDNN=1 PRECISION=BF16 make train_llama3cu test_llama3cu
170+ run : USE_CUDNN=1 PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
169171
170172 - name : Train model with cuDNN
171173 run : ./train_llama3cu
0 commit comments