@@ -115,29 +115,29 @@ jobs:
115115 build-and-test-llama3 :
116116 name : Build and test LLama3.2 1B
117117 runs-on : ubicloud-gpu-standard-1-latest
118- env :
119- HF_TOKEN : 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
118+ container :
119+ image : nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
120120 steps :
121121 - name : Checkout code
122122 uses : actions/checkout@v4
123- - run : echo "::add-mask::$HF_TOKEN "
123+ - run : echo "::add-mask::$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') "
124124
125125 - name : Install OpenMP
126- run : sudo apt-get update && sudo apt-get install -y libomp-dev
126+ run : apt-get update && apt-get install -y libomp-dev libopenmpi-dev python3-pip
127127
128128 - name : Install dependencies
129129 run : pip install -r requirements.txt
130130
131131 - name : Run preprocessing
132- run : python dev/data/tinyshakespeare.py --model_desc llama-3
132+ run : HF_TOKEN=$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') python3 dev/data/tinyshakespeare.py --model_desc llama-3
133133
134134 - name : Train model
135135 # use the first 10 layers, so that everything fits into the 20GB of
136136 # the A4000 Ada that we have in CI
137- run : python train_llama3.py --write_tensors 1 --dtype float32 --depth 10
137+ run : HF_TOKEN=$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') python3 train_llama3.py --write_tensors 1 --dtype float32 --depth 10
138138
139139 - name : Build FP32 precision
140- run : PRECISION=FP32 make test_llama3cu
140+ run : PRECISION=FP32 NO_MULTI_GPU=1 make test_llama3cu
141141
142142 - name : Run default
143143 run : ./test_llama3cu
@@ -149,7 +149,7 @@ jobs:
149149 run : ./test_llama3cu -r 2
150150
151151 - name : Build BF16 precision
152- run : PRECISION=BF16 make train_llama3cu test_llama3cu
152+ run : PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
153153
154154 - name : Run default (BF16)
155155 run : ./test_llama3cu
@@ -166,15 +166,12 @@ jobs:
166166 build-and-test-llama3-untied :
167167 name : Build and test LLama3.2 1B with untie weights
168168 runs-on : ubicloud-gpu-standard-1-latest
169- env :
170- HF_TOKEN : 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
171169 steps :
172170 - name : Checkout code
173171 uses : actions/checkout@v4
174- - run : echo "::add-mask::$HF_TOKEN"
175172
176173 - name : Install OpenMP
177- run : sudo apt-get update && sudo apt-get install -y libomp-dev
174+ run : sudo apt-get update && sudo apt-get install -y libomp-dev git
178175
179176 - name : Install dependencies
180177 run : pip install -r requirements.txt
@@ -202,7 +199,7 @@ jobs:
202199 git clone https://github.com/NVIDIA/cudnn-frontend.git
203200
204201 - name : Build with cuDNN
205- run : USE_CUDNN=1 PRECISION=BF16 make train_llama3cu test_llama3cu
202+ run : USE_CUDNN=1 PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
206203
207204 - name : Train model with cuDNN
208205 run : ./train_llama3cu
0 commit comments