Skip to content

deepseek-distill-qwen-32b crush #162

@zhm-algo

Description

@zhm-algo

#!/bin/bash
model="/llm/models/DeepSeek-R1-Distill-Qwen-32B"
served_model_name="DeepSeek-R1-Distill-Qwen-32B"
export TORCH_LLM_ALLREDUCE=1
export VLLM_USE_V1=1
export CCL_ZE_IPC_EXCHANGE=pidfd
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_OFFLOAD_WEIGHTS_BEFORE_QUANT=1
export ZE_AFFINITY_MASK=8,9,10,11
python3 -m vllm.entrypoints.openai.api_server
--model $model
--served-model-name $served_model_name
--dtype=float16
--enforce-eager
--port 8001
--host 0.0.0.0
--trust-remote-code
--disable-sliding-window
--gpu-memory-util=0.93
--max-num-batched-tokens=33000
--disable-log-requests
--max-model-len=33000
--block-size 64
--quantization fp8
-tp=4

报错日志
error.log

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions