Unverified Commit a1f53add authored by Runkai Tao's avatar Runkai Tao Committed by GitHub
Browse files

[BugFix] Align fused MoE-LoRA kernel config with actual weight shapes (#34396)


Signed-off-by: default avatarRunkai Tao <rt572@physics.rutgers.edu>
parent 05970c77
...@@ -83,7 +83,11 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA): ...@@ -83,7 +83,11 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA):
): ):
if envs.VLLM_TUNED_CONFIG_FOLDER: if envs.VLLM_TUNED_CONFIG_FOLDER:
hidden_size = layer.hidden_size hidden_size = layer.hidden_size
intermediate_size = layer.intermediate_size_per_partition intermediate_size = (
self.w2_lora_a_stacked[0].shape[-1]
if op_prefix == "w2"
else self.w13_lora_b_stacked[0].shape[-2]
)
shrink_config = get_lora_op_configs( shrink_config = get_lora_op_configs(
op_type=f"fused_moe_lora_{op_prefix}_shrink", op_type=f"fused_moe_lora_{op_prefix}_shrink",
max_loras=num_loras, max_loras=num_loras,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment