@@ -611,7 +611,7 @@ class Fp8EPMoEMethod(Fp8MoEMethod):
...
@@ -611,7 +611,7 @@ class Fp8EPMoEMethod(Fp8MoEMethod):
self.quant_config.weight_block_size[1],
self.quant_config.weight_block_size[1],
)
)
# NOTE(HandH1998): To ensure proper alignment of the block-wise quantization scales, the output_size of the weights for both the gate and up layers must be divisible by block_n.
# NOTE(HandH1998): To ensure proper alignment of the block-wise quantization scales, the output_size of the weights for both the gate and up layers must be divisible by block_n.
# Required by collum parallel or enabling merged weights
# Required by column parallel or enabling merged weights
# NOTE(HandH1998): To ensure proper alignment of the block-wise quantization scales, the output_size of the weights for both the gate and up layers must be divisible by block_n.
# NOTE(HandH1998): To ensure proper alignment of the block-wise quantization scales, the output_size of the weights for both the gate and up layers must be divisible by block_n.
# Required by collum parallel or enabling merged weights
# Required by column parallel or enabling merged weights