Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0f1dfa1e
"examples/pytorch/pointcloud/pct/ShapeNet.py" did not exist on "44089c8b4d4db4ca71e816e0de50dca972dbabdb"
Unverified
Commit
0f1dfa1e
authored
Jun 14, 2025
by
fzyzcjy
Committed by
GitHub
Jun 13, 2025
Browse files
Tiny add sanity checks for DeepGEMM inputs (#7157)
parent
e3ec6bf4
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
0 deletions
+15
-0
python/sglang/srt/layers/quantization/fp8_utils.py
python/sglang/srt/layers/quantization/fp8_utils.py
+15
-0
No files found.
python/sglang/srt/layers/quantization/fp8_utils.py
View file @
0f1dfa1e
...
@@ -239,6 +239,11 @@ def deepgemm_w8a8_block_fp8_linear_with_fallback(
...
@@ -239,6 +239,11 @@ def deepgemm_w8a8_block_fp8_linear_with_fallback(
column_major_scales
=
True
,
column_major_scales
=
True
,
scale_tma_aligned
=
True
,
scale_tma_aligned
=
True
,
)
)
if
get_bool_env_var
(
"SGLANG_W8A8_DEEPGEMM_SANITY_CHECK_UE8M0"
):
_check_ue8m0
(
"x_scale"
,
x_scale
)
_check_ue8m0
(
"weight_scale"
,
weight_scale
)
output
=
w8a8_block_fp8_matmul_deepgemm
(
output
=
w8a8_block_fp8_matmul_deepgemm
(
q_input
,
weight
,
x_scale
,
weight_scale
,
block_size
,
output_dtype
=
output_dtype
q_input
,
weight
,
x_scale
,
weight_scale
,
block_size
,
output_dtype
=
output_dtype
)
)
...
@@ -247,6 +252,11 @@ def deepgemm_w8a8_block_fp8_linear_with_fallback(
...
@@ -247,6 +252,11 @@ def deepgemm_w8a8_block_fp8_linear_with_fallback(
return
output
.
to
(
dtype
=
output_dtype
).
view
(
*
output_shape
)
return
output
.
to
(
dtype
=
output_dtype
).
view
(
*
output_shape
)
def
_check_ue8m0
(
name
,
x
):
x_ceil
=
ceil_to_ue8m0
(
x
)
assert
torch
.
all
(
x
==
x_ceil
),
f
"
{
name
=
}
{
x
=
}
{
x_ceil
=
}
"
def
aiter_w8a8_block_fp8_linear
(
def
aiter_w8a8_block_fp8_linear
(
input
:
torch
.
Tensor
,
input
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
...
@@ -380,6 +390,11 @@ def block_quant_dequant(
...
@@ -380,6 +390,11 @@ def block_quant_dequant(
return
(
x_q_block
.
to
(
torch
.
float32
)
*
x_scale_repeat
).
to
(
dtype
)
return
(
x_q_block
.
to
(
torch
.
float32
)
*
x_scale_repeat
).
to
(
dtype
)
# COPIED FROM DeepGEMM
def
ceil_to_ue8m0
(
x
:
torch
.
Tensor
):
return
torch
.
pow
(
2.0
,
torch
.
ceil
(
torch
.
log2
(
x
.
abs
())))
def
channel_quant_to_tensor_quant
(
def
channel_quant_to_tensor_quant
(
x_q_channel
:
torch
.
Tensor
,
x_q_channel
:
torch
.
Tensor
,
x_s
:
torch
.
Tensor
,
x_s
:
torch
.
Tensor
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment