Skip to content

Commit 67ea728

Browse files
committed
fix
1 parent 2f5b658 commit 67ea728

File tree

1 file changed

+132
-0
lines changed

1 file changed

+132
-0
lines changed

collector/common_test_cases.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import dataclasses
5+
import itertools
6+
from typing import Optional
7+
8+
9+
@dataclasses.dataclass
10+
class MoeCommonTestCase:
11+
num_tokens_list: list[int]
12+
hidden_size: int
13+
inter_size: int
14+
topk: int
15+
num_experts: int
16+
tp: int
17+
ep: int
18+
model_name: str
19+
token_expert_distribution: str
20+
power_law_alpha: Optional[float]
21+
22+
23+
def get_common_moe_test_cases():
24+
num_tokens = [
25+
1,
26+
2,
27+
4,
28+
8,
29+
16,
30+
32,
31+
48,
32+
64,
33+
80,
34+
96,
35+
128,
36+
160,
37+
192,
38+
256,
39+
320,
40+
384,
41+
512,
42+
768,
43+
1024,
44+
1536,
45+
2048,
46+
3072,
47+
4096,
48+
6144,
49+
8192,
50+
12288,
51+
16384,
52+
20480,
53+
32768,
54+
65536,
55+
]
56+
tp_list = [1, 2, 4, 8, 16, 32]
57+
ep_list = [1, 2, 4, 8, 16, 32, 64, 128, 256]
58+
num_gpu_list = [1, 2, 4, 8, 16, 32, 64, 128, 256]
59+
60+
token_distributions = [
61+
("balanced", 0.0),
62+
("power_law", 1.01),
63+
("power_law", 1.2),
64+
]
65+
66+
# alpha_list = [1.01, 1.2]
67+
# hidden_size,inter_s,topk,num_expert, gated act
68+
# [15360,30720,2,16],# GPT-MOE-1.8T
69+
# [15360,3840,16,128],# GPT-MOE-1.8T-FineGrained
70+
# [3584,2560,8,64],# Qwen2-57B
71+
# [2048,1408,4,60], #qwen1.5_moe
72+
# [2048,1408,6,64], #deepseekv1_moe
73+
# [5120,1536,6,160], #deepseekv2
74+
model_config_list = [
75+
[4096, 14336, 2, 8, "MOE_Mixtral8x7B"], # mixtral_8x7b
76+
[6144, 16384, 2, 8, "MOE_Mixtral8x22B"], # mixtral_8x22b
77+
[7168, 2048, 8, 256, "DEEPSEEK_V3"], # deepseekv3, will have 1 shared expert
78+
[2048, 768, 8, 128, "QWEN3_30B_A3B"], # qwen3-moe, 30b-a3b
79+
[4096, 1536, 8, 128, "QWEN3_235B"], # qwen3-moe, 235b-a22b
80+
[6144, 2560, 8, 160, "QWEN3_480B"], # qwen3-moe, 480b-a35b
81+
[7168, 2048, 8, 384, "KIMI_K2"], # kimi k2
82+
[2880, 2880, 4, 128, "GPT_OSS_120B"],
83+
[2880, 2880, 4, 32, "GPT_OSS_20B"],
84+
]
85+
86+
test_cases: list[MoeCommonTestCase] = []
87+
88+
for (
89+
num_gpu, # starting from fewer gpus. workaround for potential buffer bug in moe impl.
90+
model_config,
91+
tp,
92+
ep,
93+
(token_distribution, power_law_alpha),
94+
) in itertools.product(
95+
num_gpu_list,
96+
model_config_list,
97+
tp_list,
98+
ep_list,
99+
token_distributions,
100+
):
101+
hs, inter_s, topk, num_experts, model_name = model_config
102+
103+
# QWEN3_30B_A3B: exclude tp >= 8 as they are not used for actual deployments
104+
if model_name == "QWEN3_30B_A3B" and tp >= 8:
105+
continue
106+
107+
if tp * ep != num_gpu:
108+
continue
109+
if ep > num_experts:
110+
continue
111+
if num_experts % ep != 0:
112+
continue
113+
# we need to ensure inter_s can be divided by tp.
114+
if inter_s % tp != 0:
115+
continue
116+
117+
test_cases.append(
118+
MoeCommonTestCase(
119+
num_tokens_list=num_tokens,
120+
hidden_size=hs,
121+
inter_size=inter_s,
122+
topk=topk,
123+
num_experts=num_experts,
124+
tp=tp,
125+
ep=ep,
126+
model_name=model_name,
127+
token_expert_distribution=token_distribution,
128+
power_law_alpha=power_law_alpha,
129+
)
130+
)
131+
132+
return test_cases

0 commit comments

Comments
 (0)