Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
a1175a4e
Commit
a1175a4e
authored
Nov 22, 2025
by
maxiao1
Browse files
Merge remote-tracking branch 'origin/v0.5.4_dev' into sglang_v0.5.5
parents
0c006b88
31653dd9
Changes
62
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
220 additions
and
0 deletions
+220
-0
sgl-kernel/python/sgl_kernel/kvcacheio.py
sgl-kernel/python/sgl_kernel/kvcacheio.py
+125
-0
sgl-kernel/setup_hip.py
sgl-kernel/setup_hip.py
+95
-0
No files found.
sgl-kernel/python/sgl_kernel/kvcacheio.py
View file @
a1175a4e
...
@@ -9,6 +9,58 @@ def is_hip() -> bool:
...
@@ -9,6 +9,58 @@ def is_hip() -> bool:
_is_hip
=
is_hip
()
_is_hip
=
is_hip
()
def
dcu_create_extend_after_decode_spec_info
(
verified_id
:
torch
.
Tensor
,
seq_lens
:
torch
.
Tensor
,
accept_lens
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
new_verified_id
:
torch
.
Tensor
,
bs
:
int
,
):
torch
.
ops
.
sgl_kernel
.
dcu_create_extend_after_decode_spec_info
(
verified_id
,
seq_lens
,
accept_lens
,
positions
,
new_verified_id
,
bs
,
)
def
dcu_alloc_extend_kernel
(
pre_lens_ptr
:
torch
.
Tensor
,
seq_lens_ptr
:
torch
.
Tensor
,
last_loc_ptr
:
torch
.
Tensor
,
free_page_ptr
:
torch
.
Tensor
,
out_indices
:
torch
.
Tensor
,
bs
:
int
,
page_size
:
int
,
):
torch
.
ops
.
sgl_kernel
.
dcu_alloc_extend_kernel
(
pre_lens_ptr
,
seq_lens_ptr
,
last_loc_ptr
,
free_page_ptr
,
out_indices
,
bs
,
page_size
,
)
def
dcu_alloc_decode_kernel
(
seq_lens_ptr
:
torch
.
Tensor
,
last_loc_ptr
:
torch
.
Tensor
,
free_page_ptr
:
torch
.
Tensor
,
out_indices
:
torch
.
Tensor
,
bs
:
int
,
page_size
:
int
,
):
torch
.
ops
.
sgl_kernel
.
dcu_alloc_decode_kernel
(
seq_lens_ptr
,
last_loc_ptr
,
free_page_ptr
,
out_indices
,
bs
,
page_size
,
)
def
transfer_kv_per_layer
(
def
transfer_kv_per_layer
(
src_k
:
torch
.
Tensor
,
src_k
:
torch
.
Tensor
,
...
@@ -305,3 +357,76 @@ def transfer_kv_all_layer_mla_lf_pf(
...
@@ -305,3 +357,76 @@ def transfer_kv_all_layer_mla_lf_pf(
block_quota
,
block_quota
,
num_warps_per_block
,
num_warps_per_block
,
)
)
def
dcu_assign_req_to_token_pool
(
req_pool_indices
:
torch
.
Tensor
,
req_to_token
:
torch
.
Tensor
,
allocate_lens
:
torch
.
Tensor
,
new_allocate_lens
:
torch
.
Tensor
,
out_cache_loc
:
torch
.
Tensor
,
shape
:
int
,
bs
:
int
,
):
torch
.
ops
.
sgl_kernel
.
dcu_assign_req_to_token_pool
(
req_pool_indices
,
req_to_token
,
allocate_lens
,
new_allocate_lens
,
out_cache_loc
,
shape
,
bs
,
)
def
dcu_get_last_loc
(
req_to_token
:
torch
.
Tensor
,
req_pool_indices
:
torch
.
Tensor
,
prefix_lens
:
torch
.
Tensor
,
):
result
=
torch
.
ops
.
sgl_kernel
.
dcu_get_last_loc
(
req_to_token
,
req_pool_indices
,
prefix_lens
,
)
return
result
def
dcu_assign_extend_cache_locs
(
req_pool_indices
:
torch
.
Tensor
,
req_to_token
:
torch
.
Tensor
,
start_offset
:
torch
.
Tensor
,
end_offset
:
torch
.
Tensor
,
out_cache_loc
:
torch
.
Tensor
,
pool_len
:
int
,
bs
:
int
,
):
torch
.
ops
.
sgl_kernel
.
dcu_assign_extend_cache_locs
(
req_pool_indices
,
req_to_token
,
start_offset
,
end_offset
,
out_cache_loc
,
pool_len
,
bs
,
)
def
dcu_create_chunked_prefix_cache_kv_indices
(
req_to_token
:
torch
.
Tensor
,
req_pool_indices
:
torch
.
Tensor
,
chunk_starts
:
torch
.
Tensor
,
chunk_seq_lens
:
torch
.
Tensor
,
chunk_cu_seq_lens
:
torch
.
Tensor
,
chunk_kv_indices
:
torch
.
Tensor
,
col_num
:
int
,
bs
:
int
,
):
torch
.
ops
.
sgl_kernel
.
dcu_create_chunked_prefix_cache_kv_indices
(
req_to_token
,
req_pool_indices
,
chunk_starts
,
chunk_seq_lens
,
chunk_cu_seq_lens
,
chunk_kv_indices
,
col_num
,
bs
,
)
sgl-kernel/setup_hip.py
0 → 100644
View file @
a1175a4e
# Copyright 2025 SGLang Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import
os
import
platform
import
sys
from
pathlib
import
Path
import
torch
from
setuptools
import
find_packages
,
setup
from
torch.utils.cpp_extension
import
BuildExtension
,
CUDAExtension
root
=
Path
(
__file__
).
parent
.
resolve
()
arch
=
platform
.
machine
().
lower
()
def
_get_version
():
with
open
(
root
/
"pyproject.toml"
)
as
f
:
for
line
in
f
:
if
line
.
startswith
(
"version"
):
return
line
.
split
(
"="
)[
1
].
strip
().
strip
(
'"'
)
operator_namespace
=
"sgl_kernel"
include_dirs
=
[
root
/
"include"
,
root
/
"include"
/
"impl"
,
root
/
"csrc"
,
]
sources
=
[
"csrc/allreduce/custom_all_reduce.hip"
,
"csrc/allreduce/quick_all_reduce.cu"
,
"csrc/common_extension_rocm.cc"
,
"csrc/elementwise/activation.cu"
,
"csrc/grammar/apply_token_bitmask_inplace_cuda.cu"
,
"csrc/moe/moe_align_kernel.cu"
,
"csrc/moe/moe_topk_softmax_kernels.cu"
,
"csrc/speculative/eagle_utils.cu"
,
"csrc/kvcacheio/transfer.cu"
,
"csrc/attention/merge_attn_states.cu"
,
]
cxx_flags
=
[
"-O3"
]
libraries
=
[
"hiprtc"
,
"amdhip64"
,
"c10"
,
"torch"
,
"torch_python"
]
extra_link_args
=
[
"-Wl,-rpath,$ORIGIN/../../torch/lib"
,
f
"-L/usr/lib/
{
arch
}
-linux-gnu"
]
hipcc_flags
=
[
"-DNDEBUG"
,
f
"-DOPERATOR_NAMESPACE=
{
operator_namespace
}
"
,
"-O3"
,
"-Xcompiler"
,
"-fPIC"
,
"-std=c++17"
,
"-DENABLE_BF16"
,
"-DENABLE_FP8"
,
]
ext_modules
=
[
CUDAExtension
(
name
=
"sgl_kernel.common_ops"
,
sources
=
sources
,
include_dirs
=
include_dirs
,
extra_compile_args
=
{
"nvcc"
:
hipcc_flags
,
"cxx"
:
cxx_flags
,
},
libraries
=
libraries
,
extra_link_args
=
extra_link_args
,
py_limited_api
=
False
,
),
]
setup
(
name
=
"sgl-kernel"
,
version
=
_get_version
(),
packages
=
find_packages
(
where
=
"python"
),
package_dir
=
{
""
:
"python"
},
ext_modules
=
ext_modules
,
cmdclass
=
{
"build_ext"
:
BuildExtension
.
with_options
(
use_ninja
=
True
)},
options
=
{
"bdist_wheel"
:
{
"py_limited_api"
:
"cp39"
}},
)
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment