Skip to content

Commit 13ea39b

Browse files
authored
[CPU]Parallelize over tokens in int4 moe (#29600)
Signed-off-by: Zhang Xiangze <[email protected]>
1 parent 4b61266 commit 13ea39b

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

csrc/moe/dynamic_4bit_int_moe_cpu.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,16 +93,16 @@ torch::Tensor dynamic_4bit_int_moe_cpu(
9393
}
9494
auto Y_all = at::empty({offsets[E], H}, x_c.options());
9595

96-
at::parallel_for(0, E, 1, [&](int64_t e_begin, int64_t e_end) {
96+
at::parallel_for(0, offsets[E], 0, [&](int64_t idx_begin, int64_t idx_end) {
9797
c10::InferenceMode guard;
98-
for (int64_t e = e_begin; e < e_end; ++e) {
99-
const int64_t te = counts[e];
100-
if (te == 0) {
98+
for (int64_t e = 0; e < E; ++e) {
99+
int64_t start = std::max(offsets[e], idx_begin);
100+
int64_t end = std::min(offsets[e + 1], idx_end);
101+
int64_t te = end - start;
102+
if (te <= 0) {
101103
continue;
102104
}
103105

104-
const int64_t start = offsets[e];
105-
106106
auto x_e = X_all.narrow(/*dim=*/0, /*start=*/start, /*length=*/te);
107107

108108
auto w13_e = w13_packed.select(/*dim=*/0, e);

0 commit comments

Comments
 (0)