Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
45d6592d
"vscode:/vscode.git/clone" did not exist on "460c72fb5f8fa79f0bcd9bf6bf9094aae761428e"
Unverified
Commit
45d6592d
authored
Feb 03, 2024
by
Ying Sheng
Committed by
GitHub
Feb 03, 2024
Browse files
Fix no-cache mode (#136)
parent
f6bfe3aa
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
5 deletions
+6
-5
python/sglang/srt/managers/router/infer_batch.py
python/sglang/srt/managers/router/infer_batch.py
+6
-5
No files found.
python/sglang/srt/managers/router/infer_batch.py
View file @
45d6592d
...
...
@@ -215,8 +215,9 @@ class Batch:
extend_num_tokens
=
seq_lens
.
sum
()
-
prefix_lens
.
sum
()
out_cache_loc
=
self
.
token_to_kv_pool
.
alloc
(
extend_num_tokens
)
if
out_cache_loc
is
None
:
self
.
tree_cache
.
evict
(
extend_num_tokens
,
self
.
token_to_kv_pool
.
free
)
out_cache_loc
=
self
.
token_to_kv_pool
.
alloc
(
extend_num_tokens
)
if
not
self
.
tree_cache
.
disable
:
self
.
tree_cache
.
evict
(
extend_num_tokens
,
self
.
token_to_kv_pool
.
free
)
out_cache_loc
=
self
.
token_to_kv_pool
.
alloc
(
extend_num_tokens
)
if
out_cache_loc
is
None
:
print
(
"Prefill out of memory. This should nerver happen."
)
...
...
@@ -277,11 +278,11 @@ class Batch:
def
check_decode_mem
(
self
):
bs
=
len
(
self
.
reqs
)
avai_size
=
self
.
token_to_kv_pool
.
available_size
()
if
avai_size
>=
bs
:
if
self
.
token_to_kv_pool
.
available_size
()
>=
bs
:
return
True
self
.
tree_cache
.
evict
(
bs
,
self
.
token_to_kv_pool
.
free
)
if
not
self
.
tree_cache
.
disable
:
self
.
tree_cache
.
evict
(
bs
,
self
.
token_to_kv_pool
.
free
)
if
self
.
token_to_kv_pool
.
available_size
()
>=
bs
:
return
True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment