Unverified Commit b3fa5dc3 authored by Kyungmin Lee's avatar Kyungmin Lee Committed by GitHub
Browse files

Fix GPTQMarlinMoE (#7697)

parent 00aec6ad
......@@ -344,6 +344,10 @@ class GPTQMarlinConfig(QuantizationConfig):
if (num_bits, sym) not in cls.TYPE_MAP:
return False
assert (
VLLM_AVAILABLE
), "vllm is not installed, to use gptq_marlin, please install vllm"
return check_marlin_supported(
quant_type=cls.TYPE_MAP[(num_bits, sym)], group_size=group_size
)
......@@ -726,6 +730,6 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
g_idx2=layer.w2_g_idx,
sort_indices1=layer.w13_g_idx_sort_indices,
sort_indices2=layer.w2_g_idx_sort_indices,
num_bits=self.quant_config.quant_type.size_bits,
quant_type_id=self.quant_config.quant_type.id,
is_k_full=self.is_k_full,
).to(orig_dtype)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment