Commit 09bdce60 authored by yangql's avatar yangql
Browse files

修复awq-marlin的bug

parent ef8dd155
......@@ -1515,8 +1515,9 @@ def awq_marlin_moe_repack(
output = torch.empty((num_experts, size_k // 16, size_n * (num_bits // 2)),
device=b_q_weight.device,
dtype=b_q_weight.dtype)
output[e] = op.awq_marlin_repack(b_q_weight[e], size_k,
size_n, num_bits)
for e in range(num_experts):
output[e] = op.awq_marlin_repack(b_q_weight[e], size_k,
size_n, num_bits)
return output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment