Unverified Commit 6220f3c6 authored by Tristan Leclercq's avatar Tristan Leclercq Committed by GitHub
Browse files

[Bugfix] Fix transformers model impl ignored for mixtral quant (#18602)


Signed-off-by: default avatarTristan Leclercq <tristanleclercq@gmail.com>
parent 52fb23f4
...@@ -225,17 +225,16 @@ def get_model_architecture( ...@@ -225,17 +225,16 @@ def get_model_architecture(
"fp8", "compressed-tensors", "gptq_marlin", "awq_marlin", "quark" "fp8", "compressed-tensors", "gptq_marlin", "awq_marlin", "quark"
] ]
if (model_config.quantization is not None
and model_config.quantization not in mixtral_supported
and "MixtralForCausalLM" in architectures):
architectures = ["QuantMixtralForCausalLM"]
vllm_supported_archs = ModelRegistry.get_supported_archs() vllm_supported_archs = ModelRegistry.get_supported_archs()
vllm_not_supported = not any(arch in vllm_supported_archs vllm_not_supported = not any(arch in vllm_supported_archs
for arch in architectures) for arch in architectures)
if (model_config.model_impl == ModelImpl.TRANSFORMERS or if (model_config.model_impl == ModelImpl.TRANSFORMERS or
model_config.model_impl != ModelImpl.VLLM and vllm_not_supported): model_config.model_impl != ModelImpl.VLLM and vllm_not_supported):
architectures = resolve_transformers_arch(model_config, architectures) architectures = resolve_transformers_arch(model_config, architectures)
elif (model_config.quantization is not None
and model_config.quantization not in mixtral_supported
and "MixtralForCausalLM" in architectures):
architectures = ["QuantMixtralForCausalLM"]
model_cls, arch = ModelRegistry.resolve_model_cls(architectures) model_cls, arch = ModelRegistry.resolve_model_cls(architectures)
if model_config.task == "embed": if model_config.task == "embed":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment