Unverified Commit b1235c3e authored by Li, Jiang's avatar Li, Jiang Committed by GitHub
Browse files

[Bugfix] Lazy import fused_experts in BitsAndBytesMoEMethod to avoid break...


[Bugfix] Lazy import fused_experts in BitsAndBytesMoEMethod to avoid break not-cuda-alike devices  (#20822)
Signed-off-by: default avatarjiang1.li <jiang1.li@intel.com>
parent 44d02f54
...@@ -5,7 +5,6 @@ from typing import Any, Callable, Optional, Union ...@@ -5,7 +5,6 @@ from typing import Any, Callable, Optional, Union
import torch import torch
from vllm.model_executor.layers.fused_moe import fused_experts
from vllm.model_executor.layers.fused_moe.layer import (FusedMoE, from vllm.model_executor.layers.fused_moe.layer import (FusedMoE,
FusedMoEMethodBase) FusedMoEMethodBase)
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
...@@ -467,6 +466,7 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase): ...@@ -467,6 +466,7 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
logical_to_physical_map: Optional[torch.Tensor] = None, logical_to_physical_map: Optional[torch.Tensor] = None,
logical_replica_count: Optional[torch.Tensor] = None, logical_replica_count: Optional[torch.Tensor] = None,
) -> torch.Tensor: ) -> torch.Tensor:
from vllm.model_executor.layers.fused_moe import fused_experts
if enable_eplb: if enable_eplb:
raise NotImplementedError( raise NotImplementedError(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment