Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
e3ec6bf4
"vscode:/vscode.git/clone" did not exist on "ad2a1c7acca3e1f70f1691da9fe139278b11c75d"
Unverified
Commit
e3ec6bf4
authored
Jun 14, 2025
by
fzyzcjy
Committed by
GitHub
Jun 13, 2025
Browse files
Minor speed up block_quant_dequant (#6814)
parent
b04df75a
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
19 deletions
+7
-19
python/sglang/srt/layers/quantization/fp8_utils.py
python/sglang/srt/layers/quantization/fp8_utils.py
+7
-19
No files found.
python/sglang/srt/layers/quantization/fp8_utils.py
View file @
e3ec6bf4
...
@@ -369,27 +369,15 @@ def block_quant_dequant(
...
@@ -369,27 +369,15 @@ def block_quant_dequant(
The output is an unquantized tensor with dtype.
The output is an unquantized tensor with dtype.
"""
"""
block_n
,
block_k
=
block_size
[
0
],
block_size
[
1
]
block_n
,
block_k
=
block_size
[
0
],
block_size
[
1
]
n
,
k
=
x_q_block
.
shape
*
_
,
n
,
k
=
x_q_block
.
shape
n_tiles
=
(
n
+
block_n
-
1
)
//
block_n
k_tiles
=
(
k
+
block_k
-
1
)
//
block_k
assert
n_tiles
==
x_s
.
shape
[
0
]
assert
k_tiles
==
x_s
.
shape
[
1
]
x_dq_block
=
torch
.
empty_like
(
x_q_block
,
dtype
=
dtype
)
for
j
in
range
(
n_tiles
):
# ... n_scale k_scale -> ... (n_scale block_n) (k_scale block_k)
for
i
in
range
(
k_tiles
):
x_scale_repeat
=
x_s
.
repeat_interleave
(
block_n
,
dim
=-
2
).
repeat_interleave
(
x_q_block_tile
=
x_q_block
[
block_k
,
dim
=-
1
j
*
block_n
:
min
((
j
+
1
)
*
block_n
,
n
),
)
i
*
block_k
:
min
((
i
+
1
)
*
block_k
,
k
),
x_scale_repeat
=
x_scale_repeat
[...,
:
n
,
:
k
]
]
x_dq_block_tile
=
x_dq_block
[
j
*
block_n
:
min
((
j
+
1
)
*
block_n
,
n
),
i
*
block_k
:
min
((
i
+
1
)
*
block_k
,
k
),
]
x_dq_block_tile
[:,
:]
=
x_q_block_tile
.
to
(
torch
.
float32
)
*
x_s
[
j
][
i
]
return
x_
d
q_block
return
(
x_q_block
.
to
(
torch
.
float32
)
*
x_scale_repeat
).
to
(
dtype
)
def
channel_quant_to_tensor_quant
(
def
channel_quant_to_tensor_quant
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment