"docs/vscode:/vscode.git/clone" did not exist on "2c4f59afc3d50fda805c4ad94c9d9be168cded0b"
Unverified Commit fdc135d7 authored by Tsukasa OI's avatar Tsukasa OI Committed by GitHub
Browse files

[Misc][Quantization] Clarify the intent of GGUF `FusedMoE` weight materialization (#30310)


Signed-off-by: default avatarTsukasa OI <floss_llm@irq.a4lg.com>
parent 4fa7ce46
...@@ -1200,10 +1200,14 @@ class FusedMoE(CustomOp): ...@@ -1200,10 +1200,14 @@ class FusedMoE(CustomOp):
if full_load: if full_load:
shard_dim += 1 shard_dim += 1
# Materialize GGUF UninitializedParameter # Materialize GGUF UninitializedParameter accounting merged weights
if is_gguf_weight and isinstance(param, UninitializedParameter): if is_gguf_weight and isinstance(param, UninitializedParameter):
# To materialize a tensor, we must have full shape including
# number of experts, making this portion to require `full_load`.
assert full_load
final_shape = list(loaded_weight.shape) final_shape = list(loaded_weight.shape)
if shard_id in ["w1", "w3"]: # w1 and w3 are merged per expert.
if shard_id in {"w1", "w3"}:
final_shape[1] *= 2 final_shape[1] *= 2
final_shape[shard_dim] = final_shape[shard_dim] // self.tp_size final_shape[shard_dim] = final_shape[shard_dim] // self.tp_size
param.materialize(final_shape, dtype=loaded_weight.dtype) param.materialize(final_shape, dtype=loaded_weight.dtype)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment