Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
d77caa2b
Unverified
Commit
d77caa2b
authored
Jan 20, 2025
by
Seungduk Kim
Committed by
GitHub
Jan 19, 2025
Browse files
[#2812] Make the decode status dict capcity adjustable by a CLI param (#2839)
parent
8b6a4486
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
20 additions
and
3 deletions
+20
-3
python/sglang/srt/managers/detokenizer_manager.py
python/sglang/srt/managers/detokenizer_manager.py
+20
-3
No files found.
python/sglang/srt/managers/detokenizer_manager.py
View file @
d77caa2b
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
import
dataclasses
import
dataclasses
import
logging
import
logging
import
os
import
signal
import
signal
from
collections
import
OrderedDict
from
collections
import
OrderedDict
from
typing
import
Dict
,
List
,
Union
from
typing
import
Dict
,
List
,
Union
...
@@ -35,6 +36,12 @@ from sglang.utils import find_printable_text, get_exception_traceback
...
@@ -35,6 +36,12 @@ from sglang.utils import find_printable_text, get_exception_traceback
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
# Maximum number of request states that detokenizer can hold. When exceeded,
# oldest request states will be evicted. Default: 65536 (1<<16).
# For more details, see: https://github.com/sgl-project/sglang/issues/2812
# Use power of 2 values for better memory allocation.
DETOKENIZER_MAX_STATES
=
int
(
os
.
environ
.
get
(
"SGLANG_DETOKENIZER_MAX_STATES"
,
1
<<
16
))
@
dataclasses
.
dataclass
@
dataclasses
.
dataclass
class
DecodeStatus
:
class
DecodeStatus
:
...
@@ -74,7 +81,7 @@ class DetokenizerManager:
...
@@ -74,7 +81,7 @@ class DetokenizerManager:
revision
=
server_args
.
revision
,
revision
=
server_args
.
revision
,
)
)
self
.
decode_status
=
LimitedCapacityDict
()
self
.
decode_status
=
LimitedCapacityDict
(
capacity
=
DETOKENIZER_MAX_STATES
)
def
trim_matched_stop
(
def
trim_matched_stop
(
self
,
output
:
Union
[
str
,
List
[
int
]],
finished_reason
:
Dict
,
no_stop_trim
:
bool
self
,
output
:
Union
[
str
,
List
[
int
]],
finished_reason
:
Dict
,
no_stop_trim
:
bool
...
@@ -156,7 +163,17 @@ class DetokenizerManager:
...
@@ -156,7 +163,17 @@ class DetokenizerManager:
# Incremental decoding
# Incremental decoding
output_strs
=
[]
output_strs
=
[]
for
i
in
range
(
bs
):
for
i
in
range
(
bs
):
try
:
s
=
self
.
decode_status
[
recv_obj
.
rids
[
i
]]
s
=
self
.
decode_status
[
recv_obj
.
rids
[
i
]]
except
KeyError
:
raise
RuntimeError
(
f
"Decode status not found for request
{
recv_obj
.
rids
[
i
]
}
. "
"It may be due to the request being evicted from the decode status due to memory pressure. "
"Please increase the maximum number of requests by setting "
"the SGLANG_DETOKENIZER_MAX_STATES environment variable to a bigger value than the default value. "
f
"The current value is
{
DETOKENIZER_MAX_STATES
}
. "
"For more details, see: https://github.com/sgl-project/sglang/issues/2812"
)
new_text
=
read_texts
[
i
][
len
(
surr_texts
[
i
])
:]
new_text
=
read_texts
[
i
][
len
(
surr_texts
[
i
])
:]
if
recv_obj
.
finished_reasons
[
i
]
is
None
:
if
recv_obj
.
finished_reasons
[
i
]
is
None
:
# Streaming chunk: update the decode status
# Streaming chunk: update the decode status
...
@@ -197,7 +214,7 @@ class DetokenizerManager:
...
@@ -197,7 +214,7 @@ class DetokenizerManager:
class
LimitedCapacityDict
(
OrderedDict
):
class
LimitedCapacityDict
(
OrderedDict
):
def
__init__
(
self
,
capacity
=
1
<<
15
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
capacity
:
int
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
capacity
=
capacity
self
.
capacity
=
capacity
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment