Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
11886dc8
Commit
11886dc8
authored
Nov 07, 2025
by
liucong
Browse files
限制dcu_alloc_extend_kernel的使用范围
parent
ec78c4c5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
25 additions
and
13 deletions
+25
-13
python/sglang/srt/mem_cache/allocator.py
python/sglang/srt/mem_cache/allocator.py
+23
-11
sgl-kernel/csrc/kvcacheio/transfer.cu
sgl-kernel/csrc/kvcacheio/transfer.cu
+2
-2
No files found.
python/sglang/srt/mem_cache/allocator.py
View file @
11886dc8
...
@@ -487,17 +487,29 @@ class PagedTokenToKVPoolAllocator(BaseTokenToKVPoolAllocator):
...
@@ -487,17 +487,29 @@ class PagedTokenToKVPoolAllocator(BaseTokenToKVPoolAllocator):
(
extend_num_tokens
,),
dtype
=
torch
.
int64
,
device
=
self
.
device
(
extend_num_tokens
,),
dtype
=
torch
.
int64
,
device
=
self
.
device
)
)
if
self
.
sglang_kvalloc_kernel
:
if
self
.
sglang_kvalloc_kernel
:
dcu_alloc_extend_kernel
(
if
bs
<
3
:
pre_lens_ptr
=
prefix_lens
,
dcu_alloc_extend_kernel
(
seq_lens_ptr
=
seq_lens
,
pre_lens_ptr
=
prefix_lens
,
last_loc_ptr
=
last_loc
,
seq_lens_ptr
=
seq_lens
,
free_page_ptr
=
self
.
free_pages
,
last_loc_ptr
=
last_loc
,
out_indices
=
out_indices
,
free_page_ptr
=
self
.
free_pages
,
bs
=
bs
,
out_indices
=
out_indices
,
bs_upper
=
next_power_of_2
(
bs
),
bs
=
bs
,
page_size
=
self
.
page_size
,
bs_upper
=
next_power_of_2
(
bs
),
max_num_extend_tokens
=
self
.
seen_max_num_extend_tokens_next_power_of_2
,
page_size
=
self
.
page_size
,
)
max_num_extend_tokens
=
self
.
seen_max_num_extend_tokens_next_power_of_2
,
)
else
:
alloc_extend_kernel
[(
bs
,)](
prefix_lens
,
seq_lens
,
last_loc
,
self
.
free_pages
,
out_indices
,
next_power_of_2
(
bs
),
self
.
page_size
,
self
.
seen_max_num_extend_tokens_next_power_of_2
,
)
else
:
else
:
alloc_extend_kernel
[(
bs
,)](
alloc_extend_kernel
[(
bs
,)](
prefix_lens
,
prefix_lens
,
...
...
sgl-kernel/csrc/kvcacheio/transfer.cu
View file @
11886dc8
...
@@ -664,7 +664,7 @@ __global__ void launch_alloc_extend_kernel(
...
@@ -664,7 +664,7 @@ __global__ void launch_alloc_extend_kernel(
int64_t
last_loc
=
last_loc_ptr
[
pid
];
int64_t
last_loc
=
last_loc_ptr
[
pid
];
int64_t
num_part1
=
safe_min
(
seq_len
,
ceil_div
(
pre_len
,
page_size
)
*
page_size
)
-
pre_len
;
int64_t
num_part1
=
safe_min
(
seq_len
,
ceil_div
(
pre_len
,
page_size
)
*
page_size
)
-
pre_len
;
for
(
int64_t
offset
=
0
;
offset
<
num_part1
;
offset
++
)
{
for
(
int64_t
offset
=
0
;
offset
<
num_part1
&&
offset
<
page_size
;
offset
++
)
{
int64_t
output_idx
=
output_start_loc
+
offset
;
int64_t
output_idx
=
output_start_loc
+
offset
;
out_indices
[
output_idx
]
=
last_loc
+
1
+
offset
;
out_indices
[
output_idx
]
=
last_loc
+
1
+
offset
;
}
}
...
@@ -674,7 +674,7 @@ __global__ void launch_alloc_extend_kernel(
...
@@ -674,7 +674,7 @@ __global__ void launch_alloc_extend_kernel(
}
}
int64_t
num_part2
=
(
seq_len
/
page_size
)
*
page_size
-
ceil_div
(
pre_len
,
page_size
)
*
page_size
;
int64_t
num_part2
=
(
seq_len
/
page_size
)
*
page_size
-
ceil_div
(
pre_len
,
page_size
)
*
page_size
;
for
(
int64_t
offset
=
0
;
offset
<
num_part2
;
offset
++
)
{
for
(
int64_t
offset
=
0
;
offset
<
num_part2
&&
offset
<
max_num_extend_tokens
;
offset
++
)
{
int64_t
page_idx
=
new_page_start_loc
+
offset
/
page_size
;
int64_t
page_idx
=
new_page_start_loc
+
offset
/
page_size
;
int64_t
page_start
=
free_page_ptr
[
page_idx
];
int64_t
page_start
=
free_page_ptr
[
page_idx
];
int64_t
output_idx
=
output_start_loc
+
num_part1
+
offset
;
int64_t
output_idx
=
output_start_loc
+
num_part1
+
offset
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment