Skip to content

Commit 9341d29

Browse files
authored
debug nv imgs (#124)
* debug nv imgs * update img build * update 124 -> 128 * debug python * update sync imgs
1 parent f5493d7 commit 9341d29

File tree

4 files changed

+23
-22
lines changed

4 files changed

+23
-22
lines changed

.github/workflows/build-docker-gpu.yml

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ concurrency:
1818
env:
1919
BUILDKIT_PROGRESS: "plain" # Full logs for CI build.
2020
REGISTRY_SRC: ${{ vars.REGISTRY_SRC || 'docker.io' }} # For BASE_NAMESPACE of images: where to pull base images from, docker.io or other source registry URL.
21-
REGISTRY_DST: ${{ vars.REGISTRY_DST || 'docker.io' }} # For tags of built images: where to push images to, docker.io or other destination registry URL.
21+
REGISTRY_DST: ${{ vars.REGISTRY_DST || 'quay.io' }} # For tags of built images: where to push images to, docker.io or other destination registry URL.
2222
# DOCKER_REGISTRY_USERNAME and DOCKER_REGISTRY_PASSWORD is required for docker image push, they should be set in CI secrets.
2323
DOCKER_REGISTRY_USERNAME: ${{ vars.DOCKER_REGISTRY_USERNAME }}
2424
DOCKER_REGISTRY_PASSWORD: ${{ secrets.DOCKER_REGISTRY_PASSWORD }}
@@ -33,31 +33,31 @@ jobs:
3333
# latest cuda supported by paddlepadle: https://www.paddlepaddle.org.cn/
3434
# latest cuda supported by vllm: https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=cuda
3535
qpod_cuda_126:
36-
name: 'cuda_12.6,cuda'
36+
name: 'cuda_12.6,cuda,nvidia-cuda'
3737
runs-on: ubuntu-latest
3838
steps:
3939
- uses: actions/checkout@v4
4040
- run: |
4141
source ./tool.sh
42-
build_image tmp latest docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04" && clear_images nvidia/cuda
42+
build_image_no_tag nvidia-cuda 12.6.3-cudnn-devel-ubuntu24.04 docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04" && clear_images nvidia/cuda
4343
export IMG_PREFIX_SRC="${IMG_PREFIX_DST}"
44-
build_image tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=tmp"
44+
build_image_no_tag tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=nvidia-cuda:12.6.3-cudnn-devel-ubuntu24.04"
4545
build_image cuda_12.6 latest docker_cuda/nvidia-cuda.Dockerfile --build-arg "BASE_IMG=tmp"
4646
alias_image cuda_12.6 latest cuda latest
4747
push_image cuda
4848
4949
# reserved for vllm: https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile
50-
qpod_cuda_124:
51-
name: 'cuda_12.4'
50+
qpod_cuda_128:
51+
name: 'cuda_12.8'
5252
runs-on: ubuntu-latest
5353
steps:
5454
- uses: actions/checkout@v4
5555
- run: |
5656
source ./tool.sh
57-
build_image tmp latest docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04" && clear_images nvidia/cuda
57+
build_image_no_tag nvidia-cuda 12.8.1-cudnn-devel-ubuntu24.04 docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04" && clear_images nvidia/cuda
5858
export IMG_PREFIX_SRC="${IMG_PREFIX_DST}"
59-
build_image tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=tmp"
60-
build_image cuda_12.4 latest docker_cuda/nvidia-cuda.Dockerfile --build-arg "BASE_IMG=tmp"
59+
build_image_no_tag tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=nvidia-cuda:12.8.1-cudnn-devel-ubuntu24.04"
60+
build_image cuda_12.8 latest docker_cuda/nvidia-cuda.Dockerfile --build-arg "BASE_IMG=tmp"
6161
push_image cuda
6262
6363
# reserved for paddlepaddl 2.6: https://www.paddlepaddle.org.cn
@@ -68,9 +68,9 @@ jobs:
6868
- uses: actions/checkout@v4
6969
- run: |
7070
source ./tool.sh
71-
build_image tmp latest docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:12.0.1-cudnn8-devel-ubuntu22.04" && clear_images nvidia/cuda
71+
build_image_no_tag nvidia-cuda 12.0.1-cudnn8-devel-ubuntu22.04 docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:12.0.1-cudnn8-devel-ubuntu22.04" && clear_images nvidia/cuda
7272
export IMG_PREFIX_SRC="${IMG_PREFIX_DST}"
73-
build_image tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=tmp"
73+
build_image_no_tag tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=nvidia-cuda:12.0.1-cudnn8-devel-ubuntu22.04"
7474
build_image cuda_12.0 latest docker_cuda/nvidia-cuda.Dockerfile --build-arg "BASE_IMG=tmp"
7575
push_image cuda
7676
@@ -82,9 +82,9 @@ jobs:
8282
- uses: actions/checkout@v4
8383
- run: |
8484
source ./tool.sh
85-
build_image tmp latest docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04" && clear_images nvidia/cuda
85+
build_image_no_tag nvidia-cuda 11.8.0-cudnn8-devel-ubuntu22.04 docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04" && clear_images nvidia/cuda
8686
export IMG_PREFIX_SRC="${IMG_PREFIX_DST}"
87-
build_image tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=tmp"
87+
build_image_no_tag tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=nvidia-cuda:11.8.0-cudnn8-devel-ubuntu22.04"
8888
build_image cuda_11.8 latest docker_cuda/nvidia-cuda.Dockerfile --build-arg "BASE_IMG=tmp"
8989
push_image cuda
9090
@@ -97,9 +97,9 @@ jobs:
9797
- uses: actions/checkout@v4
9898
- run: |
9999
source ./tool.sh
100-
build_image tmp latest docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04" && clear_images nvidia/cuda
100+
build_image_no_tag nvidia-cuda 11.2.2-cudnn8-devel-ubuntu20.04 docker_atom/Dockerfile --build-arg "BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04" && clear_images nvidia/cuda
101101
export IMG_PREFIX_SRC="${IMG_PREFIX_DST}"
102-
build_image tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=tmp" --build-arg "PYTHON_VERSION=3.8"
102+
build_image_no_tag tmp latest docker_base/Dockerfile --build-arg "BASE_IMG=nvidia-cuda:11.2.2-cudnn8-devel-ubuntu20.04" --build-arg "PYTHON_VERSION=3.8"
103103
build_image cuda_11.2 latest docker_cuda/nvidia-cuda.Dockerfile --build-arg "BASE_IMG=tmp"
104104
push_image cuda
105105
@@ -165,15 +165,15 @@ jobs:
165165
alias_image py-nlp-cuda126 latest py-nlp latest
166166
push_image
167167
168-
qpod_py-nlp-cuda124:
169-
name: 'py-nlp-cuda124'
170-
needs: qpod_cuda_124
168+
qpod_py-nlp-cuda128:
169+
name: 'py-nlp-cuda128'
170+
needs: qpod_cuda_128
171171
runs-on: ubuntu-latest
172172
steps:
173173
- uses: actions/checkout@v4
174174
- run: |
175175
source ./tool.sh && export IMG_PREFIX_SRC="${IMG_PREFIX_DST}"
176-
build_image py-nlp-cuda124 latest docker_core/Dockerfile --build-arg "BASE_IMG=cuda_12.4" --build-arg "ARG_PROFILE_PYTHON=datascience,mkl,torch,nlp"
176+
build_image py-nlp-cuda128 latest docker_core/Dockerfile --build-arg "BASE_IMG=cuda_12.8" --build-arg "ARG_PROFILE_PYTHON=datascience,mkl,torch,nlp"
177177
push_image
178178
179179

.github/workflows/build-docker.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ concurrency:
1818
env:
1919
BUILDKIT_PROGRESS: "plain" # Full logs for CI build.
2020
REGISTRY_SRC: ${{ vars.REGISTRY_SRC || 'docker.io' }} # For BASE_NAMESPACE of images: where to pull base images from, docker.io or other source registry URL.
21-
REGISTRY_DST: ${{ vars.REGISTRY_DST || 'docker.io' }} # For tags of built images: where to push images to, docker.io or other destination registry URL.
21+
REGISTRY_DST: ${{ vars.REGISTRY_DST || 'quay.io' }} # For tags of built images: where to push images to, docker.io or other destination registry URL.
2222
# DOCKER_REGISTRY_USERNAME and DOCKER_REGISTRY_PASSWORD is required for docker image push, they should be set in CI secrets.
2323
DOCKER_REGISTRY_USERNAME: ${{ vars.DOCKER_REGISTRY_USERNAME }}
2424
DOCKER_REGISTRY_PASSWORD: ${{ secrets.DOCKER_REGISTRY_PASSWORD }}

docker_atom/work/script-setup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ trusted-host=pypi.python.org pypi.org files.pythonhosted.org
3232
EOF
3333
fi
3434

35-
echo 'export PATH=${PATH}:${CONDA_PREFIX:-"/opt/conda"}/bin' >> /etc/profile.d/path-conda.sh
35+
echo 'export PATH=${CONDA_PREFIX:-"/opt/conda"}/bin:${PATH}' >> /etc/profile.d/path-conda.sh
3636
ln -sf "${CONDA_PREFIX}/bin/conda" /usr/bin/
3737

3838
conda config --system --prepend channels conda-forge \

docker_base/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ ARG PYTHON_VERSION="3.12"
1212
ENV CONDA_PREFIX=/opt/conda
1313

1414
RUN set -eux && source /opt/utils/script-setup.sh && source /opt/utils/script-setup-sys.sh \
15-
&& export PATH=$PATH:${CONDA_PREFIX}/bin \
15+
&& export PATH=${CONDA_PREFIX}/bin:$PATH \
1616
&& install_apt /opt/utils/install_list_base.apt \
1717
&& echo "Install tini:" && setup_tini \
1818
&& export SYS_PY_REPLACE=${SYS_PY_REPLACE} \
@@ -32,6 +32,7 @@ RUN set -eux && source /opt/utils/script-setup.sh && source /opt/utils/script-se
3232
&& sed -i "s/${PYTHON_VERSION_DEFAULT}/${PYTHON_VERSION}/g" /usr/share/python3/debian_defaults \
3333
&& echo "/usr/share/pyshared/" >> "${PYTHON_PTH_FILE}" \
3434
&& echo "/usr/share/python3/" >> "${PYTHON_PTH_FILE}" \
35+
&& cp -rf /usr/lib/python3/dist-packages/* ./ \
3536
&& rm -rf $(/usr/bin/python3 -c 'import sys; print(" ".join(i for i in sys.path if "python" in i))') /usr/bin/python3* /usr/lib/python${PYTHON_VERSION} \
3637
&& rm -rf /usr/lib/python${PYTHON_VERSION} && ln -sf "${CONDA_PREFIX}"/lib/python${PYTHON_VERSION} /usr/lib/ ; \
3738
else \

0 commit comments

Comments
 (0)