Skip to content

Commit 125cf63

Browse files
authored
build: refine the build process, make normal workflow code works. (#90)
* Update build-check-share-runner.yml * ci: adjust code clone for build test. * thridparty: reduce mkl size, remove unused so. * xformers: replace link with source. * cutlass: try use local tar rather than git submodule. * ci: remove shared runner * Update README.md * mm: add qwen vl2.5 model support. (#86) - add qwen vl 2.5 model support. - Qwen VL2.5 only support 'transformers' as vit engine, (trt not support yet.) - upgrade package version to make sure VL2.5 code is added. test command: server: `dashinfer_vlm_serve --model qwen/Qwen2.5-VL-3B-Instruct --vision_engine transformers --port 8000 --host=127.0.0.1` client: ``` curl http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d \ '{"model": "qwen/Qwen2.5-VL-3B-Instruct", "messages": [{"role": "user", "content": [{ "type": "text", "text": "Describe the image." }, {"type": "image_url", "image_url": {"url": "https://farm4.staticflickr.com/3075/3168662394_7d7103de7d_z_d.jpg"}}]}], "max_completion_tokens": 1024, "top_p": 0.5, "temperature": 0.1, "frequency_penalty": 1.05 }' ``` result: ``` {"id":"chatcmpl-rxqDiCQEJweEeeB7FADiER","object":"chat.completion", "created":1747992522,"model":"model","choices":[{"index":0,"message":{"role":"assistant","content":"The image features a small hummingbird perched on a branch. The bird is positioned in the center of the scene, with its vibrant colors and delicate features clearly visible. The hummingbird appears to be enjoying its time in nature, possibly searching for food or simply resting on the branch. \n\nThere are no other birds or animals present in the image, making it a solitary moment captured in this natural setting."},"finish_reason":"stop"}],"usage":{"prompt_tokens":382,"total_tokens":95,"completion_tokens":81}} ``` * Update build-check.yml * Update build-check.yml * ci: fix release script. (#88) * cmake: nccl find lib without version. * flash attn: less memory footprint for compile. * workflow: fix typo. * ci: fix git safe dir issue. * multimodel: minor fix for benchmark * build: support 90a by default. * [Build]: upgrade pybind11 for python 3.12, also remove cutlass folder. * docker: fine centos docker and ubi8 docker. * add build script for ubi8 support. * github: action use ubi8 image rather than centos image. * github: try use github provided machine. * Revert "github: try use github provided machine." This reverts commit 6705c60. * github: remove prefix of docker image. * github: change workflow image to ubi8 images.
1 parent ca3dfa5 commit 125cf63

25 files changed

+246
-66
lines changed

.github/workflows/build-check.yml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@ jobs:
1212
strategy:
1313
matrix:
1414
arch: [X64]
15-
image: ["dev-centos7-cu124:v1"]
15+
image: ["docker.cnb.cool/thinksrc/dashinfer/dev-ubi8-cu124:latest"]
1616
enable_cuda: [0, 1]
1717
exclude:
1818
- arch: X64
1919
image: "dev-centos8-arm:v2"
2020
runs-on: [self-hosted, Linux, "${{ matrix.arch }}"]
2121
container:
22-
image: dashinfer/${{ matrix.image }}
22+
image: ${{ matrix.image }}
2323
env:
2424
# force use node16 instead of node20
2525
# otherwise it may cause GLIBCXX_2.27 not found
@@ -68,15 +68,12 @@ jobs:
6868
strategy:
6969
matrix:
7070
arch: [X64]
71-
image: ["dev-centos7-cu124:v1"]
71+
image: ["docker.cnb.cool/thinksrc/dashinfer/dev-ubi8-cu124:latest"]
7272
enable_cuda: [0, 1]
7373
runs-on: [self-hosted, Linux, "${{ matrix.arch }}"]
7474
container:
75-
image: dashinfer/${{ matrix.image }}
75+
image: ${{ matrix.image }}
7676
env:
77-
# force use node16 instead of node20
78-
# otherwise it may cause GLIBCXX_2.27 not found
79-
# ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
8077
ENABLE_CUDA: ${{ matrix.enable_cuda }}
8178
BUILD_VERSION: 3.10
8279
steps:

.github/workflows/release_packages_all.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
strategy:
1717
matrix:
1818
arch: [X64, ARM64]
19-
image: ["dev-centos7-cu124:v1", "dev-centos8-arm:v2"]
19+
image: ["docker.cnb.cool/thinksrc/dashinfer/dev-ubi8-cu124:latest", "dashinfer/dev-centos8-arm:v2"]
2020
enable_cuda: [0, 1]
2121
exclude:
2222
- arch: X64
@@ -27,7 +27,7 @@ jobs:
2727
enable_cuda: 1
2828
runs-on: [self-hosted, Linux, "${{ matrix.arch }}"]
2929
container:
30-
image: dashinfer/${{ matrix.image }}
30+
image: ${{ matrix.image }}
3131
env:
3232
# force use node16 instead of node20
3333
# otherwise it may cause GLIBCXX_2.27 not found
@@ -97,7 +97,7 @@ jobs:
9797
strategy:
9898
matrix:
9999
arch: [X64, ARM64]
100-
image: ["dev-centos7-cu124:v1", "dev-centos8-arm:v2"]
100+
image: ["docker.cnb.cool/thinksrc/dashinfer/dev-ubi8-cu124:latest", "dashinfer/dev-centos8-arm:v2"]
101101
enable_cuda: [0, 1]
102102
exclude:
103103
- arch: X64
@@ -108,7 +108,7 @@ jobs:
108108
enable_cuda: 1
109109
runs-on: [self-hosted, Linux, "${{ matrix.arch }}"]
110110
container:
111-
image: dashinfer/${{ matrix.image }}
111+
image: ${{ matrix.image }}
112112
env:
113113
# force use node16 instead of node20
114114
# otherwise it may cause GLIBCXX_2.27 not found

.github/workflows/release_packages_cuda_only.yml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ jobs:
1717
strategy:
1818
matrix:
1919
arch: [X64]
20-
image: ["dev-centos7-cu124:v1"]
20+
image: ["docker.cnb.cool/thinksrc/dashinfer/dev-ubi8-cu124:latest"]
2121
enable_cuda: [1]
2222

2323
runs-on: [self-hosted, Linux, "${{ matrix.arch }}"]
2424
container:
25-
image: dashinfer/${{ matrix.image }}
25+
image: ${{ matrix.image }}
2626
env:
2727
# force use node16 instead of node20
2828
# otherwise it may cause GLIBCXX_2.27 not found
@@ -36,7 +36,8 @@ jobs:
3636
uses: actions/checkout@v4
3737
with:
3838
lfs: true
39-
submdules: false
39+
submodules: false
40+
4041

4142
- name: Build tgz package
4243
shell: bash
@@ -47,6 +48,7 @@ jobs:
4748
fi
4849
source activate ds_py
4950
51+
git config --global --add safe.directory '*'
5052
git fetch --tags
5153
TAG_NAME=$(git describe --tags $(git rev-list --tags --max-count=1))
5254
VERSION_NUMBER=$(echo "$TAG_NAME" | sed 's/^v//' | sed 's/-.*$//')
@@ -83,22 +85,20 @@ jobs:
8385
strategy:
8486
matrix:
8587
arch: [X64]
86-
image: ["dev-centos7-cu124:v1"]
88+
image: ["docker.cnb.cool/thinksrc/dashinfer/dev-ubi8-cu124:latest"]
8789
enable_cuda: [1]
8890
runs-on: [self-hosted, Linux, "${{ matrix.arch }}"]
8991
container:
90-
image: dashinfer/${{ matrix.image }}
92+
image: ${{ matrix.image }}
9193
env:
92-
# force use node16 instead of node20
93-
# otherwise it may cause GLIBCXX_2.27 not found
94-
# ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
9594
ENABLE_CUDA: ${{ matrix.enable_cuda }}
9695
steps:
9796
- name: Check out code
9897
uses: actions/checkout@v4
9998
with:
10099
lfs: true
101-
submdules: false
100+
submodules: false
101+
102102

103103
- name: Build manylinux wheels
104104
shell: bash
@@ -109,6 +109,7 @@ jobs:
109109
source /miniconda/etc/profile.d/conda.sh
110110
fi
111111
112+
git config --global --add safe.directory '*'
112113
git fetch --tags
113114
TAG_NAME=$(git describe --tags $(git rev-list --tags --max-count=1))
114115
VERSION_NUMBER=$(echo "$TAG_NAME" | sed 's/^v//')

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616

1717
## News
18-
1918
- [2024/12] 🔥 DashInfer: Announcing the release of v2.0, now with enhanced GPU (CUDA) support! This version includes features like prefix caching (with GPU & CPU swapping), guided decoding, optimized attention for GQA, a lockless reactor engine, and newly added support for the VLM model (Qwen-VL) and MoE Models. For more details, please refer to the [release notes](https://dashinfer.readthedocs.io/en/latest/index.html#v2-0-0).
2019

2120
- [2024/06] DashInfer: v1.0 release with x86 & ARMv9 CPU and CPU flash attention support.
@@ -208,8 +207,8 @@ If you find them useful, please feel free to cite these papers:
208207
- [x] Prefix Cache: Support GPU Prefix Cache and CPU Swap
209208
- [x] Quantization: Fp8 A8W8 Activation quantization support on CUDA.
210209
- [x] LORA: Continues Batch LORA Optimization.
211-
- [ ] Parallel Context phase and Generation phase within engine.
212-
- [ ] More effective MoE Operator on GPU.
210+
- [x] Parallel Context phase and Generation phase within engine.
211+
- [x] More effective MoE Operator on GPU.
213212
- [ ] Porting to AMD(ROCm) Platform.
214213

215214
# License

build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ clean="OFF"
66
with_platform="${AS_PLATFORM:-cuda}"
77
# cuda related version, provide a defualt value for cuda 11.4
88
cuda_version="${AS_CUDA_VERSION:-12.4}"
9-
cuda_sm="${AS_CUDA_SM:-80;86;90a}"
9+
cuda_sm="${AS_CUDA_SM:-80;90a}"
1010
NCCL_VERSION="${AS_NCCL_VERSION:-2.23.4}"
1111
build_folder="${AS_BUILD_FOLDER:-build}"
1212
force_conan="${AS_FORCE_CONAN:-OFF}"

cmake/FindNCCL.cmake

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,26 @@ else()
1818
endif()
1919

2020
message("find nccl with ${NCCL_LIBNAME}")
21+
# 优先查找带版本号的 nccl 库
2122
find_library(
22-
AS_NCCL_LIBRARY ${NCCL_LIBNAME}
23-
PATH_SUFFIXES lib lib64 nccl-${NCCL_VERSION}-cuda-${CUDA_VERSION}/lib64)
23+
AS_NCCL_LIBRARY_VERSIONED
24+
NAMES nccl-${NCCL_VERSION}
25+
PATH_SUFFIXES lib lib64 nccl-${NCCL_VERSION}-cuda-${CUDA_VERSION}/lib64
26+
)
27+
28+
# 如果没找到,再查找不带版本号的 nccl 库
29+
if(NOT AS_NCCL_LIBRARY_VERSIONED)
30+
message("find nccl without version number, searching ${CUDAToolkit_LIBRARY_DIR}")
31+
find_library(
32+
AS_NCCL_LIBRARY
33+
NAMES nccl
34+
PATHS
35+
${CUDAToolkit_LIBRARY_DIR}
36+
)
37+
else()
38+
message("found nccl with version number")
39+
set(AS_NCCL_LIBRARY ${AS_NCCL_LIBRARY_VERSIONED})
40+
endif()
2441

2542
if(ENABLE_NV_STATIC_LIB)
2643
message("add nccl static lib")
@@ -42,7 +59,7 @@ install(FILES ${NCCL_LIBS}
4259
DESTINATION ${CMAKE_INSTALL_LIBDIR})
4360
endif()
4461

45-
62+
message("find nccl at ${NCCL_INCLUDE_DIR} lib: ${AS_NCCL_LIBRARY}")
4663
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR
4764
AS_NCCL_LIBRARY)
4865

cmake/flash-attention.cmake

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ include(ExternalProject)
4444

4545
message(STATUS "Use flash-attention from external project")
4646
set(FLASH_ATTENTION_GIT_REPO https://github.com/Dao-AILab/flash-attention.git)
47+
# mirror for china.
48+
# set(FLASH_ATTENTION_GIT_REPO https://gitee.com/lanyuflying/flash-attention.git)
4749
set(FLASH_ATTENTION_GIT_TAG 7551202cb2dd245432bc878447e19015c0af3c22)
4850
set(FLASH_ATTENTION_GIT_PATCH ${PROJECT_SOURCE_DIR}/third_party/patch/flash-attn.patch)
4951

@@ -60,7 +62,7 @@ include(ExternalProject)
6062
SOURCE_SUBDIR csrc
6163
DEPENDS project_cutlass
6264
CMAKE_GENERATOR "Ninja"
63-
BUILD_COMMAND ${CMAKE_COMMAND} --build . -j32 -v
65+
BUILD_COMMAND ${CMAKE_COMMAND} --build . -j2 -v
6466
BUILD_BYPRODUCTS ${FLASHATTN_LIBRARY_PATH}/${FLASHATTN_LIBRARY_NAME}
6567
USES_TERMINAL true
6668
CMAKE_CACHE_ARGS

conan/conanfile.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
protobuf/3.18.3
33
gtest/1.11.0
44
glog/0.5.0
5-
pybind11/2.8.1
5+
pybind11/2.13.6
66
zlib/1.2.13
77
[generators]
88
cmake

conan/conanfile_arm.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
gtest/1.11.0
44
glog/0.5.0
55
libunwind/1.7.2
6-
pybind11/2.8.1
6+
pybind11/2.13.6
77
zlib/1.2.13
88
[generators]
99
cmake

conan/conanfile_openmpi.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
protobuf/3.18.3
33
gtest/1.11.0
44
glog/0.5.0
5-
pybind11/2.8.1
5+
pybind11/2.13.6
66
openmpi/4.1.0
77
hwloc/2.9.3 # 2.10 only support shared lib, use older version to build static lib.
88
zlib/1.2.13

0 commit comments

Comments
 (0)