Unverified Commit afb752bc authored by Hubert Lu's avatar Hubert Lu Committed by GitHub
Browse files

[AMD] Fix missing per_token_group_quant_fp8 for ROCm (#5140)

parent 9731eca7
...@@ -765,6 +765,8 @@ def invoke_fused_moe_kernel( ...@@ -765,6 +765,8 @@ def invoke_fused_moe_kernel(
from sglang.srt.layers.quantization.fp8_kernel import ( from sglang.srt.layers.quantization.fp8_kernel import (
sglang_per_token_group_quant_fp8, sglang_per_token_group_quant_fp8,
) )
else:
from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
assert topk_weights.stride(1) == 1 assert topk_weights.stride(1) == 1
assert sorted_token_ids.stride(0) == 1 assert sorted_token_ids.stride(0) == 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment