Commit 6af85e40 authored by 王敏's avatar 王敏
Browse files

Merge remote-tracking branch 'origin/v0.15.1-dev' into v0.15.1-dev

# Conflicts:
#	vllm/model_executor/layers/fused_moe/modular_kernel.py
parents c80f5968 4a4fb3de
......@@ -2202,6 +2202,9 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
hidden_states, w1, w2, topk_ids
)
if use_nn_moe:
N = w1.size(-1)
if use_nn_moe:
N = w1.size(-1)
......
......@@ -1920,13 +1920,13 @@ class FusedMoE(CustomOp):
if self.capture is not None:
self.capture(topk_ids)
final_hidden_states = self.quant_method.apply(
layer=self,
x=x, # The type signture of this is wrong due to the hack.
topk_weights=topk_weights,
topk_ids=topk_ids,
use_nn_moe=self.use_nn_moe,
# use_fused_gate=self.use_fused_gate,
)
if has_separate_shared_experts:
......
......@@ -1210,7 +1210,7 @@ class FusedMoEModularKernel(torch.nn.Module):
workspace2=workspace2,
expert_tokens_meta=c_expert_tokens_meta,
apply_router_weight_on_input=apply_router_weight_on_input,
use_nn_moe=use_nn_moe
use_nn_moe=use_nn_moe,
)
return fused_out
......
......@@ -316,7 +316,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
topk_weights: torch.Tensor,
topk_ids: torch.Tensor,
use_nn_moe: bool | None = False,
use_fused_gate: bool | None = False,
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
assert self.kernel is not None
return self.kernel(
......
......@@ -1248,6 +1248,7 @@ class CompressedTensorsW8A8Int8MoEMethod(CompressedTensorsMoEMethod):
x: torch.Tensor,
topk_weights: torch.Tensor,
topk_ids: torch.Tensor,
use_nn_moe: bool | None = False,
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
from vllm.model_executor.layers.fused_moe import fused_experts
......@@ -1263,6 +1264,7 @@ class CompressedTensorsW8A8Int8MoEMethod(CompressedTensorsMoEMethod):
global_num_experts=layer.global_num_experts,
expert_map=layer.expert_map,
quant_config=self.moe_quant_config,
use_nn_moe=use_nn_moe,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment