Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7dd8a7e6
Unverified
Commit
7dd8a7e6
authored
Aug 03, 2024
by
min-xu-et
Committed by
GitHub
Aug 03, 2024
Browse files
fixed an error handling in bench_latency.py (#904)
parent
947402c8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
8 deletions
+11
-8
python/sglang/srt/managers/schedule_batch.py
python/sglang/srt/managers/schedule_batch.py
+11
-8
No files found.
python/sglang/srt/managers/schedule_batch.py
View file @
7dd8a7e6
...
@@ -380,13 +380,15 @@ class Batch:
...
@@ -380,13 +380,15 @@ class Batch:
extend_num_tokens
=
seq_lens
.
sum
()
-
prefix_lens
.
sum
()
extend_num_tokens
=
seq_lens
.
sum
()
-
prefix_lens
.
sum
()
out_cache_loc
=
self
.
token_to_kv_pool
.
alloc
(
extend_num_tokens
)
out_cache_loc
=
self
.
token_to_kv_pool
.
alloc
(
extend_num_tokens
)
if
out_cache_loc
is
None
:
if
out_cache_loc
is
None
:
self
.
tree_cache
.
evict
(
extend_num_tokens
,
self
.
token_to_kv_pool
.
free
)
if
self
.
tree_cache
is
not
None
:
out_cache_loc
=
self
.
token_to_kv_pool
.
alloc
(
extend_num_tokens
)
self
.
tree_cache
.
evict
(
extend_num_tokens
,
self
.
token_to_kv_pool
.
free
)
out_cache_loc
=
self
.
token_to_kv_pool
.
alloc
(
extend_num_tokens
)
if
out_cache_loc
is
None
:
if
out_cache_loc
is
None
:
logger
.
error
(
"Prefill out of memory. This should never happen."
)
logger
.
error
(
"Prefill out of memory. Try to lower your batch size."
)
self
.
tree_cache
.
pretty_print
()
if
self
.
tree_cache
is
not
None
:
exit
()
self
.
tree_cache
.
pretty_print
()
exit
(
1
)
pt
=
0
pt
=
0
for
i
in
range
(
bs
):
for
i
in
range
(
bs
):
...
@@ -637,9 +639,10 @@ class Batch:
...
@@ -637,9 +639,10 @@ class Batch:
self
.
out_cache_loc
=
self
.
token_to_kv_pool
.
alloc
(
bs
)
self
.
out_cache_loc
=
self
.
token_to_kv_pool
.
alloc
(
bs
)
if
self
.
out_cache_loc
is
None
:
if
self
.
out_cache_loc
is
None
:
logger
.
error
(
"Decode out of memory. This should never happen."
)
logger
.
error
(
"Decode out of memory. Try to lower your batch size."
)
self
.
tree_cache
.
pretty_print
()
if
self
.
tree_cache
is
not
None
:
exit
()
self
.
tree_cache
.
pretty_print
()
exit
(
1
)
self
.
req_to_token_pool
.
req_to_token
[
self
.
req_to_token_pool
.
req_to_token
[
self
.
req_pool_indices
,
self
.
seq_lens
-
1
self
.
req_pool_indices
,
self
.
seq_lens
-
1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment