File tree Expand file tree Collapse file tree 2 files changed +5
-1
lines changed
model_executor/layers/fused_moe Expand file tree Collapse file tree 2 files changed +5
-1
lines changed Original file line number Diff line number Diff line change 144144 VLLM_DP_MASTER_IP : str = ""
145145 VLLM_DP_MASTER_PORT : int = 0
146146 VLLM_MOE_DP_CHUNK_SIZE : int = 256
147+ VLLM_ENABLE_MOE_DP_CHUNK : bool = True
147148 VLLM_RANDOMIZE_DP_DUMMY_INPUTS : bool = False
148149 VLLM_RAY_DP_PACK_STRATEGY : Literal ["strict" , "fill" , "span" ] = "strict"
149150 VLLM_MARLIN_USE_ATOMIC_ADD : bool = False
@@ -1101,6 +1102,9 @@ def get_vllm_port() -> int | None:
11011102 # rank. All DP ranks process the activations in VLLM_MOE_DP_CHUNK_SIZE
11021103 # units.
11031104 "VLLM_MOE_DP_CHUNK_SIZE" : lambda : int (os .getenv ("VLLM_MOE_DP_CHUNK_SIZE" , "256" )),
1105+ "VLLM_ENABLE_MOE_DP_CHUNK" : lambda : bool (
1106+ int (os .getenv ("VLLM_ENABLE_MOE_DP_CHUNK" , "1" ))
1107+ ),
11041108 # Randomize inputs during dummy runs when using Data Parallel
11051109 "VLLM_RANDOMIZE_DP_DUMMY_INPUTS" : lambda : os .environ .get (
11061110 "VLLM_RANDOMIZE_DP_DUMMY_INPUTS" , "0"
Original file line number Diff line number Diff line change @@ -753,7 +753,7 @@ def use_dp_chunking(self) -> bool:
753753 self .moe_parallel_config .use_pplx_kernels
754754 or self .moe_parallel_config .use_deepep_ll_kernels
755755 or (self .dp_size > 1 and self .use_flashinfer_cutlass_kernels )
756- )
756+ ) and envs . VLLM_ENABLE_MOE_DP_CHUNK
757757
758758 @property
759759 def is_internal_router (self ) -> bool :
You can’t perform that action at this time.
0 commit comments