Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
fb7421db
Unverified
Commit
fb7421db
authored
Aug 11, 2024
by
Liangsheng Yin
Committed by
GitHub
Aug 12, 2024
Browse files
minor: some potential bugs (#1044)
parent
14b64930
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
6 deletions
+7
-6
python/sglang/srt/mem_cache/chunk_cache.py
python/sglang/srt/mem_cache/chunk_cache.py
+4
-3
python/sglang/srt/model_executor/forward_batch_info.py
python/sglang/srt/model_executor/forward_batch_info.py
+3
-3
No files found.
python/sglang/srt/mem_cache/chunk_cache.py
View file @
fb7421db
...
@@ -2,7 +2,7 @@ from __future__ import annotations
...
@@ -2,7 +2,7 @@ from __future__ import annotations
"""Cache for chunked prefill, used when RadixCache is disabled."""
"""Cache for chunked prefill, used when RadixCache is disabled."""
from
typing
import
TYPE_CHECKING
,
Callable
from
typing
import
TYPE_CHECKING
,
Callable
,
List
,
Optional
from
sglang.srt.mem_cache.base_prefix_cache
import
BasePrefixCache
from
sglang.srt.mem_cache.base_prefix_cache
import
BasePrefixCache
from
sglang.srt.mem_cache.memory_pool
import
BaseTokenToKVPool
,
ReqToTokenPool
from
sglang.srt.mem_cache.memory_pool
import
BaseTokenToKVPool
,
ReqToTokenPool
...
@@ -30,12 +30,13 @@ class ChunkCache(BasePrefixCache):
...
@@ -30,12 +30,13 @@ class ChunkCache(BasePrefixCache):
def
reset
(
self
):
def
reset
(
self
):
self
.
entries
=
{}
self
.
entries
=
{}
def
match_prefix
(
self
,
rid
,
**
kwargs
):
def
match_prefix
(
self
,
rid
:
int
,
key
:
List
[
int
]
):
if
rid
not
in
self
.
entries
:
if
rid
not
in
self
.
entries
:
return
[],
None
return
[],
None
entry
=
self
.
entries
[
rid
]
entry
=
self
.
entries
[
rid
]
return
entry
.
value
,
entry
max_prefix_len
=
len
(
key
)
return
entry
.
value
[:
max_prefix_len
],
entry
def
cache_finished_req
(
self
,
req
:
Req
,
token_ids
:
Optional
[
List
[
int
]]
=
None
):
def
cache_finished_req
(
self
,
req
:
Req
,
token_ids
:
Optional
[
List
[
int
]]
=
None
):
if
token_ids
is
None
:
if
token_ids
is
None
:
...
...
python/sglang/srt/model_executor/forward_batch_info.py
View file @
fb7421db
...
@@ -140,13 +140,13 @@ class InputMetadata:
...
@@ -140,13 +140,13 @@ class InputMetadata:
if
self
.
forward_mode
==
ForwardMode
.
DECODE
:
if
self
.
forward_mode
==
ForwardMode
.
DECODE
:
self
.
extend_seq_lens
=
self
.
extend_start_loc
=
self
.
extend_no_prefix
=
None
self
.
extend_seq_lens
=
self
.
extend_start_loc
=
self
.
extend_no_prefix
=
None
else
:
else
:
prefix
_lens_cpu
=
[
extend
_lens_cpu
=
[
len
(
r
.
fill_ids
)
-
len
(
r
.
prefix_indices
)
for
r
in
batch
.
reqs
len
(
r
.
fill_ids
)
-
len
(
r
.
prefix_indices
)
for
r
in
batch
.
reqs
]
]
self
.
extend_seq_lens
=
torch
.
tensor
(
prefix
_lens_cpu
,
device
=
"cuda"
)
self
.
extend_seq_lens
=
torch
.
tensor
(
extend
_lens_cpu
,
device
=
"cuda"
)
self
.
extend_start_loc
=
torch
.
zeros_like
(
self
.
seq_lens
)
self
.
extend_start_loc
=
torch
.
zeros_like
(
self
.
seq_lens
)
self
.
extend_start_loc
[
1
:]
=
torch
.
cumsum
(
self
.
extend_seq_lens
[:
-
1
],
dim
=
0
)
self
.
extend_start_loc
[
1
:]
=
torch
.
cumsum
(
self
.
extend_seq_lens
[:
-
1
],
dim
=
0
)
self
.
extend_no_prefix
=
all
(
x
==
0
for
x
in
prefix_lens_cpu
)
self
.
extend_no_prefix
=
all
(
len
(
r
.
prefix_indices
)
==
0
for
r
in
batch
.
reqs
)
@
classmethod
@
classmethod
def
from_schedule_batch
(
def
from_schedule_batch
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment