fix

Red-Caesar · Red-Caesar · commit b7a869dd318d · 2025-11-28T17:23:24.000Z
Signed-off-by: Barbara Suslova &lt;barbara.suslova@axel-t.com&gt;
diff --git a/csrc/moe/moe_fused_gate.cu b/csrc/moe/moe_fused_gate.cu
@@ -1,3 +1,5 @@
+// copied from
+// https://github.com/sgl-project/sglang/blob/v0.5.5/sgl-kernel/csrc/moe/moe_fused_gate.cu
 #include <ATen/cuda/CUDAContext.h>
 #include <cuda_runtime.h>
 #include <cutlass/array.h>
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -5,7 +5,6 @@
 from collections.abc import Callable, Iterable
 from contextlib import nullcontext
 from enum import Enum
-from functools import partial
 from typing import Literal, cast, get_args, overload
 
 import torch
@@ -2053,7 +2052,7 @@ def combine_output(states: torch.Tensor) -> torch.Tensor:
 
                 return states
 
-            if self.shared_experts is not None and self.num_fused_shared_experts == 0:
+            if self.shared_experts is not None:
                 return (
                     final_hidden_states[0],
                     combine_output(final_hidden_states[1]),

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+// copied from`
	`2`	`+// https://github.com/sgl-project/sglang/blob/v0.5.5/sgl-kernel/csrc/moe/moe_fused_gate.cu`
`1`	`3`	`#include <ATen/cuda/CUDAContext.h>`
`2`	`4`	`#include <cuda_runtime.h>`
`3`	`5`	`#include <cutlass/array.h>`