Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
86497d99
Unverified
Commit
86497d99
authored
Aug 10, 2025
by
huangtingwei
Committed by
GitHub
Aug 09, 2025
Browse files
fix page first per layer pf2lf kernel (#8915)
Co-authored-by:
Zhiqiang Xie
<
xiezhq@stanford.edu
>
parent
5c31b35d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
15 additions
and
5 deletions
+15
-5
python/sglang/srt/mem_cache/memory_pool_host.py
python/sglang/srt/mem_cache/memory_pool_host.py
+2
-0
sgl-kernel/csrc/common_extension.cc
sgl-kernel/csrc/common_extension.cc
+3
-3
sgl-kernel/csrc/kvcacheio/transfer.cu
sgl-kernel/csrc/kvcacheio/transfer.cu
+4
-2
sgl-kernel/include/sgl_kernel_ops.h
sgl-kernel/include/sgl_kernel_ops.h
+2
-0
sgl-kernel/python/sgl_kernel/kvcacheio.py
sgl-kernel/python/sgl_kernel/kvcacheio.py
+4
-0
No files found.
python/sglang/srt/mem_cache/memory_pool_host.py
View file @
86497d99
...
...
@@ -358,6 +358,7 @@ class MHATokenToKVPoolHost(HostKVCache):
dst_v
=
device_pool
.
v_buffer
[
layer_id
],
src_indices
=
host_indices
,
dst_indices
=
device_indices
,
layer_id
=
layer_id
,
item_size
=
self
.
token_stride_size
,
src_layout_dim
=
self
.
layout_dim
,
)
...
...
@@ -585,6 +586,7 @@ class MLATokenToKVPoolHost(HostKVCache):
dst
=
device_pool
.
kv_buffer
[
layer_id
],
src_indices
=
host_indices
,
dst_indices
=
device_indices
,
layer_id
=
layer_id
,
item_size
=
self
.
token_stride_size
,
src_layout_dim
=
self
.
layout_dim
,
)
...
...
sgl-kernel/csrc/common_extension.cc
View file @
86497d99
...
...
@@ -250,7 +250,7 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m
.
impl
(
"transfer_kv_per_layer"
,
torch
::
kCUDA
,
&
transfer_kv_per_layer
);
m
.
def
(
"transfer_kv_per_layer_pf_lf(Tensor src_k, Tensor dst_k, Tensor src_v, Tensor dst_v, Tensor src_indices, Tensor "
"dst_indices, int item_size, int src_layout_dim, int block_quota, int num_warps_per_block) -> ()"
);
"dst_indices, int
layer_id, int
item_size, int src_layout_dim, int block_quota, int num_warps_per_block) -> ()"
);
m
.
impl
(
"transfer_kv_per_layer_pf_lf"
,
torch
::
kCUDA
,
&
transfer_kv_per_layer_pf_lf
);
m
.
def
(
"transfer_kv_all_layer(Tensor src_k_layers, Tensor dst_k_layers, Tensor src_v_layers, Tensor dst_v_layers, "
...
...
@@ -267,8 +267,8 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
"block_quota, int num_warps_per_block) -> ()"
);
m
.
impl
(
"transfer_kv_per_layer_mla"
,
torch
::
kCUDA
,
&
transfer_kv_per_layer_mla
);
m
.
def
(
"transfer_kv_per_layer_mla_pf_lf(Tensor src, Tensor dst, Tensor src_indices, Tensor dst_indices, int
item_size
, "
"int src_layout_dim, int block_quota, int num_warps_per_block) -> ()"
);
"transfer_kv_per_layer_mla_pf_lf(Tensor src, Tensor dst, Tensor src_indices, Tensor dst_indices, int
layer_id
, "
"int
item_size, int
src_layout_dim, int block_quota, int num_warps_per_block) -> ()"
);
m
.
impl
(
"transfer_kv_per_layer_mla_pf_lf"
,
torch
::
kCUDA
,
&
transfer_kv_per_layer_mla_pf_lf
);
m
.
def
(
"transfer_kv_all_layer_mla(Tensor src_layers, Tensor dst_layers, Tensor src_indices, Tensor dst_indices, int "
...
...
sgl-kernel/csrc/kvcacheio/transfer.cu
View file @
86497d99
...
...
@@ -210,6 +210,7 @@ void transfer_kv_per_layer_pf_lf(
at
::
Tensor
dst_v
,
const
at
::
Tensor
src_indices
,
const
at
::
Tensor
dst_indices
,
int64_t
layer_id
,
int64_t
item_size
,
int64_t
src_layout_dim
,
int64_t
block_quota
,
...
...
@@ -222,7 +223,7 @@ void transfer_kv_per_layer_pf_lf(
dst_v
,
src_indices
,
dst_indices
,
0
,
layer_id
,
1
,
item_size
,
src_layout_dim
,
...
...
@@ -336,6 +337,7 @@ void transfer_kv_per_layer_mla_pf_lf(
at
::
Tensor
dst
,
const
at
::
Tensor
src_indices
,
const
at
::
Tensor
dst_indices
,
int64_t
layer_id
,
int64_t
item_size
,
int64_t
src_layout_dim
,
int64_t
block_quota
,
...
...
@@ -348,7 +350,7 @@ void transfer_kv_per_layer_mla_pf_lf(
empty
,
src_indices
,
dst_indices
,
0
,
layer_id
,
1
,
item_size
,
src_layout_dim
,
...
...
sgl-kernel/include/sgl_kernel_ops.h
View file @
86497d99
...
...
@@ -419,6 +419,7 @@ void transfer_kv_per_layer_pf_lf(
at
::
Tensor
dst_v
,
const
at
::
Tensor
src_indices
,
const
at
::
Tensor
dst_indices
,
int64_t
layer_id
,
int64_t
item_size
,
int64_t
src_layout_dim
,
int64_t
block_quota
,
...
...
@@ -463,6 +464,7 @@ void transfer_kv_per_layer_mla_pf_lf(
at
::
Tensor
dst
,
const
at
::
Tensor
src_indices
,
const
at
::
Tensor
dst_indices
,
int64_t
layer_id
,
int64_t
item_size
,
int64_t
src_layout_dim
,
int64_t
block_quota
,
...
...
sgl-kernel/python/sgl_kernel/kvcacheio.py
View file @
86497d99
...
...
@@ -34,6 +34,7 @@ def transfer_kv_per_layer_pf_lf(
dst_v
:
torch
.
Tensor
,
src_indices
:
torch
.
Tensor
,
dst_indices
:
torch
.
Tensor
,
layer_id
:
int
,
item_size
:
int
,
src_layout_dim
:
int
,
block_quota
:
int
=
2
,
...
...
@@ -46,6 +47,7 @@ def transfer_kv_per_layer_pf_lf(
dst_v
,
src_indices
,
dst_indices
,
layer_id
,
item_size
,
src_layout_dim
,
block_quota
,
...
...
@@ -144,6 +146,7 @@ def transfer_kv_per_layer_mla_pf_lf(
dst
:
torch
.
Tensor
,
src_indices
:
torch
.
Tensor
,
dst_indices
:
torch
.
Tensor
,
layer_id
:
int
,
item_size
:
int
,
src_layout_dim
:
int
,
block_quota
:
int
=
2
,
...
...
@@ -154,6 +157,7 @@ def transfer_kv_per_layer_mla_pf_lf(
dst
,
src_indices
,
dst_indices
,
layer_id
,
item_size
,
src_layout_dim
,
block_quota
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment