Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
02114356
Unverified
Commit
02114356
authored
Nov 10, 2025
by
Yong Hoon Shin
Committed by
GitHub
Nov 10, 2025
Browse files
[ROCm] Add missing gemm_a8w8_blockscale import (#28378)
Signed-off-by:
Yong Hoon Shin
<
yhshin@meta.com
>
parent
30700b1c
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
20 deletions
+21
-20
vllm/model_executor/layers/quantization/utils/fp8_utils.py
vllm/model_executor/layers/quantization/utils/fp8_utils.py
+21
-20
No files found.
vllm/model_executor/layers/quantization/utils/fp8_utils.py
View file @
02114356
...
...
@@ -316,37 +316,38 @@ class W8A8BlockFp8LinearOp:
assert
self
.
act_quant_group_shape
==
GroupShape
(
1
,
128
)
n
,
k
=
weight
.
shape
if
input_scale
is
not
None
:
q_input
=
input_2d
# MI350 case uses triton kernel
if
(
use_triton
=
(
not
current_platform
.
is_fp8_fnuz
()
and
rocm_aiter_ops
.
is_triton_gemm_w8a8_tuned
(
n
,
k
)
):
)
if
use_triton
:
gemm_a8w8_blockscale_op
=
rocm_aiter_ops
.
triton_gemm_a8w8_blockscale
else
:
gemm_a8w8_blockscale_op
=
rocm_aiter_ops
.
gemm_w8a8_blockscale
if
input_scale
is
not
None
:
q_input
=
input_2d
# MI350 case uses triton kernel
elif
use_triton
:
q_input
,
input_scale
=
per_token_group_quant_fp8
(
input_2d
,
self
.
act_quant_group_shape
.
col
,
column_major_scales
=
False
,
use_ue8m0
=
False
,
)
return
rocm_aiter_ops
.
triton_gemm_a8w8_blockscale
(
q_input
,
weight
,
input_scale
,
weight_scale
,
input_2d
.
dtype
,
)
# MI300 uses tuned AITER ASM/C++ kernel
else
:
q_input
,
input_scale
=
rocm_aiter_ops
.
per_1x128_fp8_quant
(
input_2d
)
return
rocm_aiter_ops
.
gemm_w8a8_blockscale
(
return
gemm_a8w8_blockscale_op
(
q_input
,
weight
,
input_scale
,
weight_scale
,
input_2d
.
dtype
,
list
(
self
.
weight_group_shape
),
output_dtype
=
input_2d
.
dtype
,
)
def
_run_triton
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment