Unverified Commit fcba05c4 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix Layer `weight_block_size` Assertion Issue (#24674)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 7a30fa87
...@@ -450,10 +450,10 @@ class Fp8LinearMethod(LinearMethodBase): ...@@ -450,10 +450,10 @@ class Fp8LinearMethod(LinearMethodBase):
# Activations not quantized for marlin. # Activations not quantized for marlin.
del layer.input_scale del layer.input_scale
# On B200, if E8M0 for DeepGemm is used, we need to # On Blackwell or Hopper, if E8M0 for DeepGemm is used, we need to
# requantize the weight and input to the specific scale # requantize the weight and input to the specific scale
# at the same time. # at the same time.
if is_deep_gemm_e8m0_used(): if is_deep_gemm_e8m0_used() and self.block_quant:
assert layer.weight_block_size is not None assert layer.weight_block_size is not None
block_sz = tuple(layer.weight_block_size) block_sz = tuple(layer.weight_block_size)
requant_weight_ue8m0_inplace( requant_weight_ue8m0_inplace(
...@@ -905,7 +905,7 @@ class Fp8MoEMethod(FusedMoEMethodBase): ...@@ -905,7 +905,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
del layer.w13_input_scale del layer.w13_input_scale
del layer.w2_input_scale del layer.w2_input_scale
if is_deep_gemm_e8m0_used(): if is_deep_gemm_e8m0_used() and self.block_quant:
assert layer.weight_block_size is not None assert layer.weight_block_size is not None
# Re-quantise the expert weights so their scales are UE8M0. # Re-quantise the expert weights so their scales are UE8M0.
block_sz = tuple(layer.weight_block_size) block_sz = tuple(layer.weight_block_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment