Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
3c9740d2
"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "2e5203be043f107eae5c1b6788584d199f403286"
Unverified
Commit
3c9740d2
authored
Apr 12, 2025
by
Zhaoyi Li
Committed by
GitHub
Apr 11, 2025
Browse files
update variable naming and comments for rocm (#5299)
parent
2eb55770
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
14 additions
and
10 deletions
+14
-10
sgl-kernel/benchmark/bench_per_tensor_quant_fp8.py
sgl-kernel/benchmark/bench_per_tensor_quant_fp8.py
+2
-2
sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py
sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py
+2
-2
sgl-kernel/benchmark/bench_per_token_quant_fp8.py
sgl-kernel/benchmark/bench_per_token_quant_fp8.py
+2
-2
sgl-kernel/csrc/torch_extension_rocm.cc
sgl-kernel/csrc/torch_extension_rocm.cc
+4
-0
sgl-kernel/tests/test_per_tensor_quant_fp8.py
sgl-kernel/tests/test_per_tensor_quant_fp8.py
+2
-2
sgl-kernel/tests/test_per_token_quant_fp8.py
sgl-kernel/tests/test_per_token_quant_fp8.py
+2
-2
No files found.
sgl-kernel/benchmark/bench_per_tensor_quant_fp8.py
View file @
3c9740d2
...
@@ -11,8 +11,8 @@ from vllm import _custom_ops as ops
...
@@ -11,8 +11,8 @@ from vllm import _custom_ops as ops
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
def
vllm_scaled_fp8_quant
(
def
vllm_scaled_fp8_quant
(
...
...
sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py
View file @
3c9740d2
...
@@ -8,8 +8,8 @@ from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_group_quant_
...
@@ -8,8 +8,8 @@ from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_group_quant_
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
@
triton
.
jit
@
triton
.
jit
...
...
sgl-kernel/benchmark/bench_per_token_quant_fp8.py
View file @
3c9740d2
...
@@ -9,8 +9,8 @@ from vllm import _custom_ops as ops
...
@@ -9,8 +9,8 @@ from vllm import _custom_ops as ops
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
def
vllm_per_token_quant_fp8
(
def
vllm_per_token_quant_fp8
(
...
...
sgl-kernel/csrc/torch_extension_rocm.cc
View file @
3c9740d2
...
@@ -61,11 +61,15 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
...
@@ -61,11 +61,15 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
"moe_align_block_size(Tensor topk_ids, int num_experts, int block_size, Tensor! sorted_token_ids, Tensor! "
"moe_align_block_size(Tensor topk_ids, int num_experts, int block_size, Tensor! sorted_token_ids, Tensor! "
"experts_ids, Tensor! num_tokens_post_pad, Tensor! token_cnts_buffer, Tensor! cumsum_buffer) -> ()"
);
"experts_ids, Tensor! num_tokens_post_pad, Tensor! token_cnts_buffer, Tensor! cumsum_buffer) -> ()"
);
m
.
impl
(
"moe_align_block_size"
,
torch
::
kCUDA
,
&
moe_align_block_size
);
m
.
impl
(
"moe_align_block_size"
,
torch
::
kCUDA
,
&
moe_align_block_size
);
m
.
def
(
m
.
def
(
"topk_softmax(Tensor! topk_weights, Tensor! topk_indices, Tensor! "
"topk_softmax(Tensor! topk_weights, Tensor! topk_indices, Tensor! "
"token_expert_indices, Tensor gating_output) -> ()"
);
"token_expert_indices, Tensor gating_output) -> ()"
);
m
.
impl
(
"topk_softmax"
,
torch
::
kCUDA
,
&
topk_softmax
);
m
.
impl
(
"topk_softmax"
,
torch
::
kCUDA
,
&
topk_softmax
);
/*
* From csrc/speculative
*/
m
.
def
(
m
.
def
(
"verify_tree_greedy(Tensor! predicts, Tensor! accept_index, Tensor! accept_token_num, "
"verify_tree_greedy(Tensor! predicts, Tensor! accept_index, Tensor! accept_token_num, "
"Tensor candidates, Tensor retrive_index, Tensor retrive_next_token, Tensor retrive_next_sibling, "
"Tensor candidates, Tensor retrive_index, Tensor retrive_next_token, Tensor retrive_next_sibling, "
...
...
sgl-kernel/tests/test_per_tensor_quant_fp8.py
View file @
3c9740d2
...
@@ -7,8 +7,8 @@ from sgl_kernel import sgl_per_tensor_quant_fp8
...
@@ -7,8 +7,8 @@ from sgl_kernel import sgl_per_tensor_quant_fp8
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
def
sglang_scaled_fp8_quant
(
def
sglang_scaled_fp8_quant
(
...
...
sgl-kernel/tests/test_per_token_quant_fp8.py
View file @
3c9740d2
...
@@ -7,8 +7,8 @@ from sgl_kernel import sgl_per_token_quant_fp8
...
@@ -7,8 +7,8 @@ from sgl_kernel import sgl_per_token_quant_fp8
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
is_hip
_
=
is_hip
()
_
is_hip
=
is_hip
()
fp8_type_
=
torch
.
float8_e4m3fnuz
if
is_hip
_
else
torch
.
float8_e4m3fn
fp8_type_
=
torch
.
float8_e4m3fnuz
if
_
is_hip
else
torch
.
float8_e4m3fn
def
torch_per_token_quant_fp8
(
tensor
,
inv_scale
):
def
torch_per_token_quant_fp8
(
tensor
,
inv_scale
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment