Fix unknown attribute of topk_indices_dtype in CompressedTensorsW8A8Fp8MoECutlassMethod (#20507)

Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com>

Fix unknown attribute of topk_indices_dtype in CompressedTensorsW8A8Fp8MoECutlassMethod (#20507)
Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com>
8aeaa910 · Lucia Fang · GitHub · 906e05d8 · 8aeaa910
Unverified Commit 8aeaa910 authored Jul 05, 2025 by Lucia Fang Committed by GitHub Jul 05, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 0 deletions

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py ...quantization/compressed_tensors/compressed_tensors_moe.py +2 -0

No files found.
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -368,6 +368,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
            "weights")
        self.input_quant = self.quant_config.target_scheme_map["Linear"].get(
            "input_activations")
+        self.topk_indices_dtype = None

        per_tensor = (self.weight_quant.strategy == QuantizationStrategy.TENSOR
                      and self.input_quant.strategy
@@ -738,6 +739,7 @@ class CompressedTensorsW8A8Fp8MoECutlassMethod(CompressedTensorsMoEMethod):

        from vllm.model_executor.layers.fused_moe.cutlass_moe import (
            cutlass_moe_fp8)
+        self.topk_indices_dtype = None
        self.fused_experts = cutlass_moe_fp8  # type: ignore
        self.disable_expert_map = False