Unverified Commit f451b455 authored by xuebwang-amd's avatar xuebwang-amd Committed by GitHub
Browse files

[Quantization][ROCm] Fix MoE weight loading to be robust (Qwen3_MoE/Qwen3_next...


[Quantization][ROCm] Fix MoE weight loading to be robust (Qwen3_MoE/Qwen3_next as example models) (#33173)
Signed-off-by: default avatarxuebwang-amd <xuebwang@amd.com>
parent 3f96fcf6
......@@ -996,7 +996,9 @@ class FusedMoE(CustomOp):
shard_size = expert_data.shape[shard_dim] // 2
else:
shard_size = expert_data.shape[shard_dim]
if not load_full:
# Only narrow if the loaded_weight is not a scalar (0-dim tensor)
# and we're not loading the full weight
if not load_full and loaded_weight.ndim > 0:
loaded_weight = loaded_weight.narrow(
shard_dim, shard_size * tp_rank, shard_size
)
......@@ -1022,7 +1024,9 @@ class FusedMoE(CustomOp):
# down_proj: "RowParallel" so tp sharding on input_dim
# Narrow parameter and load.
shard_size = expert_data.shape[shard_dim]
if not load_full:
# Only narrow if the loaded_weight is not a scalar (0-dim tensor)
# and we're not loading the full weight
if not load_full and loaded_weight.ndim > 0:
loaded_weight = loaded_weight.narrow(
shard_dim, shard_size * tp_rank, shard_size
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment