"src/vscode:/vscode.git/clone" did not exist on "cd91fc06fe9513864fca6a57953ca85a7ae7836e"
Unverified Commit 9254a33a authored by Xiaoyu Zhang's avatar Xiaoyu Zhang Committed by GitHub
Browse files

avoid fused_moe_triton `padding` circular import (#2624)

parent 8a2681e2
...@@ -28,7 +28,6 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( ...@@ -28,7 +28,6 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
from vllm.model_executor.parameter import ModelWeightParameter, PerTensorScaleParameter from vllm.model_executor.parameter import ModelWeightParameter, PerTensorScaleParameter
from sglang.srt.layers.linear import LinearMethodBase, UnquantizedLinearMethod from sglang.srt.layers.linear import LinearMethodBase, UnquantizedLinearMethod
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import padding_size
from sglang.srt.layers.quantization.base_config import ( from sglang.srt.layers.quantization.base_config import (
QuantizationConfig, QuantizationConfig,
QuantizeMethodBase, QuantizeMethodBase,
...@@ -548,6 +547,10 @@ class Fp8MoEMethod: ...@@ -548,6 +547,10 @@ class Fp8MoEMethod:
layer.w2_input_scale = None layer.w2_input_scale = None
def process_weights_after_loading(self, layer: Module) -> None: def process_weights_after_loading(self, layer: Module) -> None:
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
padding_size, # Avoid circular import
)
# Block quant doesn't need to process weights after loading # Block quant doesn't need to process weights after loading
if self.block_quant: if self.block_quant:
return return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment