@@ -150,8 +150,8 @@ ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
150150ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
151151# ################### BASE BUILD IMAGE ####################
152152
153- # ################### WHEEL BUILD IMAGE ####################
154- FROM base AS build
153+ # ################### CSRC BUILD IMAGE ####################
154+ FROM base AS csrc- build
155155ARG TARGETPLATFORM
156156
157157ARG PIP_INDEX_URL UV_INDEX_URL
@@ -172,10 +172,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \
172172 uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
173173 --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
174174
175- COPY . .
176- ARG GIT_REPO_CHECK=0
177- RUN --mount=type=bind,source=.git,target=.git \
178- if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi
175+ WORKDIR /workspace
176+
177+ COPY pyproject.toml setup.py CMakeLists.txt ./
178+ COPY cmake cmake/
179+ COPY csrc csrc/
180+ COPY vllm/envs.py vllm/envs.py
181+ COPY vllm/__init__.py vllm/__init__.py
179182
180183# max jobs used by Ninja to build extensions
181184ARG max_jobs=2
@@ -195,9 +198,11 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0
195198ARG VLLM_USE_PRECOMPILED=""
196199ARG VLLM_MAIN_CUDA_VERSION=""
197200
201+ # Use dummy version for csrc-build wheel (only .so files are extracted, version doesn't matter)
202+ ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+csrc.build"
203+
198204# if USE_SCCACHE is set, use sccache to speed up compilation
199205RUN --mount=type=cache,target=/root/.cache/uv \
200- --mount=type=bind,source=.git,target=.git \
201206 if [ "$USE_SCCACHE" = "1" ]; then \
202207 echo "Installing sccache..." \
203208 && curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \
@@ -223,7 +228,6 @@ ENV VLLM_TARGET_DEVICE=${vllm_target_device}
223228ENV CCACHE_DIR=/root/.cache/ccache
224229RUN --mount=type=cache,target=/root/.cache/ccache \
225230 --mount=type=cache,target=/root/.cache/uv \
226- --mount=type=bind,source=.git,target=.git \
227231 if [ "$USE_SCCACHE" != "1" ]; then \
228232 # Clean any existing CMake artifacts
229233 rm -rf .deps && \
@@ -232,6 +236,52 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
232236 export VLLM_DOCKER_BUILD_CONTEXT=1 && \
233237 python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
234238 fi
239+ # ################### CSRC BUILD IMAGE ####################
240+
241+ # ################### WHEEL BUILD IMAGE ####################
242+ FROM base AS build
243+ ARG TARGETPLATFORM
244+
245+ ARG PIP_INDEX_URL UV_INDEX_URL
246+ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
247+ ARG PYTORCH_CUDA_INDEX_BASE_URL
248+
249+ # install build dependencies
250+ COPY requirements/build.txt requirements/build.txt
251+
252+ # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
253+ # Reference: https://github.com/astral-sh/uv/pull/1694
254+ ENV UV_HTTP_TIMEOUT=500
255+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
256+ # Use copy mode to avoid hardlink failures with Docker cache mounts
257+ ENV UV_LINK_MODE=copy
258+
259+ RUN --mount=type=cache,target=/root/.cache/uv \
260+ uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
261+ --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
262+
263+ WORKDIR /workspace
264+
265+ COPY --from=csrc-build /workspace/dist /precompiled-wheels
266+
267+ COPY . .
268+
269+ ARG GIT_REPO_CHECK=0
270+ RUN --mount=type=bind,source=.git,target=.git \
271+ if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi
272+
273+ ARG vllm_target_device="cuda"
274+ ENV VLLM_TARGET_DEVICE=${vllm_target_device}
275+
276+ # Skip adding +precompiled suffix to version (preserves git-derived version)
277+ ENV VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX=1
278+
279+ RUN --mount=type=cache,target=/root/.cache/uv \
280+ --mount=type=bind,source=.git,target=.git \
281+ if [ "${vllm_target_device}" = "cuda" ]; then \
282+ export VLLM_PRECOMPILED_WHEEL_LOCATION=$(ls /precompiled-wheels/*.whl); \
283+ fi && \
284+ python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
235285
236286# Install DeepGEMM from source
237287ARG DEEPGEMM_GIT_REF
0 commit comments