Unverified Commit daec4d26 authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Model]Improve Qwen3VLMoeForConditionalGeneration packed_modules_mapping (#27096)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent 6c9fdbf7
......@@ -350,6 +350,14 @@ class Qwen3MoeLLMForCausalLM(Qwen3MoeForCausalLM):
dummy_inputs=Qwen3VLDummyInputsBuilder,
)
class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",
"k_proj",
"v_proj",
],
}
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super(Qwen3VLForConditionalGeneration, self).__init__()
config: Qwen3VLMoeConfig = vllm_config.model_config.hf_config
......@@ -376,6 +384,11 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
self.language_model = Qwen3MoeLLMForCausalLM(
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model")
)
# Whether to include the gate_up_proj mapping is determined by
# the language model.
self.packed_modules_mapping = (
self.packed_modules_mapping | self.language_model.packed_modules_mapping
)
self.make_empty_intermediate_tensors = (
self.language_model.make_empty_intermediate_tensors
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment