"examples/vscode:/vscode.git/clone" did not exist on "38bf2ffb21d54eb67a68d4ba82125922f4a4c7ad"
Unverified Commit 31d992d2 authored by bnellnm's avatar bnellnm Committed by GitHub
Browse files

[Bugfix] Fix some issues with MoERunner PR #32344 (#34371)


Signed-off-by: default avatarBill Nell <bnell@redhat.com>
parent 5aff2699
...@@ -700,7 +700,7 @@ class FusedMoE(CustomOp): ...@@ -700,7 +700,7 @@ class FusedMoE(CustomOp):
@property @property
def gate(self) -> torch.nn.Module | None: def gate(self) -> torch.nn.Module | None:
return self._gate return self._gate if self.use_overlapped else None
@property @property
def tp_size(self): def tp_size(self):
...@@ -725,7 +725,7 @@ class FusedMoE(CustomOp): ...@@ -725,7 +725,7 @@ class FusedMoE(CustomOp):
@property @property
def is_internal_router(self) -> bool: def is_internal_router(self) -> bool:
# By default, router/gate is called before FusedMoE forward pass # By default, router/gate is called before FusedMoE forward pass
return self._gate is not None return self.gate is not None
def _maybe_init_expert_routing_tables( def _maybe_init_expert_routing_tables(
self, self,
...@@ -1457,7 +1457,6 @@ class FusedMoE(CustomOp): ...@@ -1457,7 +1457,6 @@ class FusedMoE(CustomOp):
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
router_logits: torch.Tensor, router_logits: torch.Tensor,
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]: ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
self.ensure_moe_quant_config_init()
return self.runner.forward( return self.runner.forward(
hidden_states, hidden_states,
router_logits, router_logits,
......
...@@ -63,6 +63,8 @@ def _moe_forward( ...@@ -63,6 +63,8 @@ def _moe_forward(
layer_name: str, layer_name: str,
) -> torch.Tensor: ) -> torch.Tensor:
layer = get_layer_from_name(layer_name) layer = get_layer_from_name(layer_name)
# TODO(bnell): this can be removed after MK migration is complete.
layer.ensure_moe_quant_config_init()
return layer.runner.forward_impl( return layer.runner.forward_impl(
layer, hidden_states, router_logits, shared_experts_input layer, hidden_states, router_logits, shared_experts_input
) )
...@@ -84,6 +86,8 @@ def _moe_forward_shared( ...@@ -84,6 +86,8 @@ def _moe_forward_shared(
layer_name: str, layer_name: str,
) -> tuple[torch.Tensor, torch.Tensor]: ) -> tuple[torch.Tensor, torch.Tensor]:
layer = get_layer_from_name(layer_name) layer = get_layer_from_name(layer_name)
# TODO(bnell): this can be removed after MK migration is complete.
layer.ensure_moe_quant_config_init()
return layer.runner.forward_impl( return layer.runner.forward_impl(
layer, hidden_states, router_logits, shared_experts_input layer, hidden_states, router_logits, shared_experts_input
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment