Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
3c9740d2
Unverified
Commit
3c9740d2
authored
Apr 12, 2025
by
Zhaoyi Li
Committed by
GitHub
Apr 11, 2025
Browse files
update variable naming and comments for rocm (#5299)
parent
2eb55770
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
14 additions
and
10 deletions
+14
-10
sgl-kernel/benchmark/bench_per_tensor_quant_fp8.py
sgl-kernel/benchmark/bench_per_tensor_quant_fp8.py
+2
-2
sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py
sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py
+2
-2
sgl-kernel/benchmark/bench_per_token_quant_fp8.py
sgl-kernel/benchmark/bench_per_token_quant_fp8.py
+2
-2
sgl-kernel/csrc/torch_extension_rocm.cc
sgl-kernel/csrc/torch_extension_rocm.cc
+4
-0
sgl-kernel/tests/test_per_tensor_quant_fp8.py
sgl-kernel/tests/test_per_tensor_quant_fp8.py
+2
-2
sgl-kernel/tests/test_per_token_quant_fp8.py
sgl-kernel/tests/test_per_token_quant_fp8.py
+2
-2
No files found.
sgl-kernel/benchmark/bench_per_tensor_quant_fp8.py
View file @
3c9740d2
...
@@ -11,8 +11,8 @@ from vllm import _custom_ops as ops
...
@@ -11,8 +11,8 @@ from vllm import _custom_ops as ops
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
def
vllm_scaled_fp8_quant
(
def
vllm_scaled_fp8_quant
(
...
...
sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py
View file @
3c9740d2
...
@@ -8,8 +8,8 @@ from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_group_quant_
...
@@ -8,8 +8,8 @@ from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_group_quant_
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
@
triton
.
jit
@
triton
.
jit
...
...
sgl-kernel/benchmark/bench_per_token_quant_fp8.py
View file @
3c9740d2
...
@@ -9,8 +9,8 @@ from vllm import _custom_ops as ops
...
@@ -9,8 +9,8 @@ from vllm import _custom_ops as ops
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
def
vllm_per_token_quant_fp8
(
def
vllm_per_token_quant_fp8
(
...
...
sgl-kernel/csrc/torch_extension_rocm.cc
View file @
3c9740d2
...
@@ -61,11 +61,15 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
...
@@ -61,11 +61,15 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
"moe_align_block_size(Tensor topk_ids, int num_experts, int block_size, Tensor! sorted_token_ids, Tensor! "
"moe_align_block_size(Tensor topk_ids, int num_experts, int block_size, Tensor! sorted_token_ids, Tensor! "
"experts_ids, Tensor! num_tokens_post_pad, Tensor! token_cnts_buffer, Tensor! cumsum_buffer) -> ()"
);
"experts_ids, Tensor! num_tokens_post_pad, Tensor! token_cnts_buffer, Tensor! cumsum_buffer) -> ()"
);
m
.
impl
(
"moe_align_block_size"
,
torch
::
kCUDA
,
&
moe_align_block_size
);
m
.
impl
(
"moe_align_block_size"
,
torch
::
kCUDA
,
&
moe_align_block_size
);
m
.
def
(
m
.
def
(
"topk_softmax(Tensor! topk_weights, Tensor! topk_indices, Tensor! "
"topk_softmax(Tensor! topk_weights, Tensor! topk_indices, Tensor! "
"token_expert_indices, Tensor gating_output) -> ()"
);
"token_expert_indices, Tensor gating_output) -> ()"
);
m
.
impl
(
"topk_softmax"
,
torch
::
kCUDA
,
&
topk_softmax
);
m
.
impl
(
"topk_softmax"
,
torch
::
kCUDA
,
&
topk_softmax
);
/*
* From csrc/speculative
*/
m
.
def
(
m
.
def
(
"verify_tree_greedy(Tensor! predicts, Tensor! accept_index, Tensor! accept_token_num, "
"verify_tree_greedy(Tensor! predicts, Tensor! accept_index, Tensor! accept_token_num, "
"Tensor candidates, Tensor retrive_index, Tensor retrive_next_token, Tensor retrive_next_sibling, "
"Tensor candidates, Tensor retrive_index, Tensor retrive_next_token, Tensor retrive_next_sibling, "
...
...
sgl-kernel/tests/test_per_tensor_quant_fp8.py
View file @
3c9740d2
...
@@ -7,8 +7,8 @@ from sgl_kernel import sgl_per_tensor_quant_fp8
...
@@ -7,8 +7,8 @@ from sgl_kernel import sgl_per_tensor_quant_fp8
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
def
sglang_scaled_fp8_quant
(
def
sglang_scaled_fp8_quant
(
...
...
sgl-kernel/tests/test_per_token_quant_fp8.py
View file @
3c9740d2
...
@@ -7,8 +7,8 @@ from sgl_kernel import sgl_per_token_quant_fp8
...
@@ -7,8 +7,8 @@ from sgl_kernel import sgl_per_token_quant_fp8
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
def
torch_per_token_quant_fp8
(
tensor
,
inv_scale
):
def
torch_per_token_quant_fp8
(
tensor
,
inv_scale
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment