Skip to content

Commit 9d6235c

Browse files
authored
[moe] Allow disabling DP chunking (#29936)
Signed-off-by: Ming Yang <[email protected]>
1 parent f1599ca commit 9d6235c

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

vllm/envs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@
144144
VLLM_DP_MASTER_IP: str = ""
145145
VLLM_DP_MASTER_PORT: int = 0
146146
VLLM_MOE_DP_CHUNK_SIZE: int = 256
147+
VLLM_ENABLE_MOE_DP_CHUNK: bool = True
147148
VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
148149
VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict"
149150
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
@@ -1101,6 +1102,9 @@ def get_vllm_port() -> int | None:
11011102
# rank. All DP ranks process the activations in VLLM_MOE_DP_CHUNK_SIZE
11021103
# units.
11031104
"VLLM_MOE_DP_CHUNK_SIZE": lambda: int(os.getenv("VLLM_MOE_DP_CHUNK_SIZE", "256")),
1105+
"VLLM_ENABLE_MOE_DP_CHUNK": lambda: bool(
1106+
int(os.getenv("VLLM_ENABLE_MOE_DP_CHUNK", "1"))
1107+
),
11041108
# Randomize inputs during dummy runs when using Data Parallel
11051109
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS": lambda: os.environ.get(
11061110
"VLLM_RANDOMIZE_DP_DUMMY_INPUTS", "0"

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,7 @@ def use_dp_chunking(self) -> bool:
753753
self.moe_parallel_config.use_pplx_kernels
754754
or self.moe_parallel_config.use_deepep_ll_kernels
755755
or (self.dp_size > 1 and self.use_flashinfer_cutlass_kernels)
756-
)
756+
) and envs.VLLM_ENABLE_MOE_DP_CHUNK
757757

758758
@property
759759
def is_internal_router(self) -> bool:

0 commit comments

Comments
 (0)