Fix GPTQMarlinMoE (#7697)

b3fa5dc3 · Kyungmin Lee · GitHub · 00aec6ad · b3fa5dc3
Unverified Commit b3fa5dc3 authored Jul 02, 2025 by Kyungmin Lee Committed by GitHub Jul 01, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

python/sglang/srt/layers/quantization/gptq.py python/sglang/srt/layers/quantization/gptq.py +5 -1

No files found.
--- a/python/sglang/srt/layers/quantization/gptq.py
+++ b/python/sglang/srt/layers/quantization/gptq.py
@@ -344,6 +344,10 @@ class GPTQMarlinConfig(QuantizationConfig):
        if (num_bits, sym) not in cls.TYPE_MAP:
            return False

+        assert (
+            VLLM_AVAILABLE
+        ), "vllm is not installed, to use gptq_marlin, please install vllm"
+
        return check_marlin_supported(
            quant_type=cls.TYPE_MAP[(num_bits, sym)], group_size=group_size
        )
@@ -726,6 +730,6 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
            g_idx2=layer.w2_g_idx,
            sort_indices1=layer.w13_g_idx_sort_indices,
            sort_indices2=layer.w2_g_idx_sort_indices,
-            num_bits=self.quant_config.quant_type.size_bits,
+            quant_type_id=self.quant_config.quant_type.id,
            is_k_full=self.is_k_full,
        ).to(orig_dtype)