"apps/kg/vscode:/vscode.git/clone" did not exist on "df8a7be52d1d554c74dd01a08a90885d6a6dfa4c"
Unverified Commit 405780bc authored by kk's avatar kk Committed by GitHub
Browse files

[amd] Opt dsv3 moe (#7160)


Co-authored-by: default avatarwunhuang <wunhuang@amd.com>
parent 1dffee31
...@@ -82,6 +82,7 @@ _use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip ...@@ -82,6 +82,7 @@ _use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
if _is_hip: if _is_hip:
from aiter import ActivationType, QuantType from aiter import ActivationType, QuantType
from aiter.fused_moe import fused_moe
from aiter.fused_moe_bf16_asm import asm_moe, ck_moe_2stages from aiter.fused_moe_bf16_asm import asm_moe, ck_moe_2stages
from aiter.ops.shuffle import shuffle_weight from aiter.ops.shuffle import shuffle_weight
...@@ -1062,19 +1063,20 @@ class Fp8MoEMethod: ...@@ -1062,19 +1063,20 @@ class Fp8MoEMethod:
if _use_aiter: if _use_aiter:
assert not no_combine, f"{no_combine=} is not supported." assert not no_combine, f"{no_combine=} is not supported."
if self.block_quant: if self.block_quant:
# TODO(_use_aiter): FP8 block_quant only supports 'silu' for the time-being. return fused_moe(
assert (
activation == "silu"
), f"_use_aiter: FP8 bloack_quant {activation=} will be supported later, unset _use_aiter"
return asm_moe(
x, x,
layer.w13_weight, layer.w13_weight,
layer.w2_weight, layer.w2_weight,
topk_weights, topk_weights,
topk_ids, topk_ids,
layer.w13_weight_scale_inv, w1_scale=layer.w13_weight_scale_inv,
layer.w2_weight_scale_inv, w2_scale=layer.w2_weight_scale_inv,
block_shape=tuple(self.quant_config.weight_block_size), quant_type=QuantType.per_128x128,
activation=(
ActivationType.Silu
if activation == "silu"
else ActivationType.Gelu
),
expert_mask=None, expert_mask=None,
) )
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment