Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
3b141e15
Unverified
Commit
3b141e15
authored
Jan 13, 2025
by
Lianmin Zheng
Committed by
GitHub
Jan 13, 2025
Browse files
Dump requests (#2862)
parent
6249e4a1
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
2 deletions
+36
-2
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+28
-0
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+8
-2
No files found.
python/sglang/srt/managers/tokenizer_manager.py
View file @
3b141e15
...
@@ -18,10 +18,12 @@ import copy
...
@@ -18,10 +18,12 @@ import copy
import
dataclasses
import
dataclasses
import
logging
import
logging
import
os
import
os
import
pickle
import
signal
import
signal
import
sys
import
sys
import
time
import
time
import
uuid
import
uuid
from
datetime
import
datetime
from
typing
import
Any
,
Awaitable
,
Dict
,
Generic
,
List
,
Optional
,
Tuple
,
TypeVar
,
Union
from
typing
import
Any
,
Awaitable
,
Dict
,
Generic
,
List
,
Optional
,
Tuple
,
TypeVar
,
Union
import
fastapi
import
fastapi
...
@@ -105,6 +107,7 @@ class TokenizerManager:
...
@@ -105,6 +107,7 @@ class TokenizerManager:
# Parse args
# Parse args
self
.
server_args
=
server_args
self
.
server_args
=
server_args
self
.
enable_metrics
=
server_args
.
enable_metrics
self
.
enable_metrics
=
server_args
.
enable_metrics
self
.
dump_requsts_folder
=
server_args
.
dump_requests_folder
# Init inter-process communication
# Init inter-process communication
context
=
zmq
.
asyncio
.
Context
(
2
)
context
=
zmq
.
asyncio
.
Context
(
2
)
...
@@ -163,6 +166,7 @@ class TokenizerManager:
...
@@ -163,6 +166,7 @@ class TokenizerManager:
# Store states
# Store states
self
.
to_create_loop
=
True
self
.
to_create_loop
=
True
self
.
rid_to_state
:
Dict
[
str
,
ReqState
]
=
{}
self
.
rid_to_state
:
Dict
[
str
,
ReqState
]
=
{}
self
.
dump_request_list
:
List
[
Tuple
]
=
[]
# The event to notify the weight sync is finished.
# The event to notify the weight sync is finished.
self
.
model_update_lock
=
RWLock
()
self
.
model_update_lock
=
RWLock
()
...
@@ -680,6 +684,9 @@ class TokenizerManager:
...
@@ -680,6 +684,9 @@ class TokenizerManager:
if
self
.
enable_metrics
:
if
self
.
enable_metrics
:
self
.
collect_metrics
(
state
,
recv_obj
,
i
)
self
.
collect_metrics
(
state
,
recv_obj
,
i
)
if
self
.
dump_requsts_folder
and
state
.
finished
:
self
.
dump_requests
(
state
,
out_dict
)
elif
isinstance
(
recv_obj
,
OpenSessionReqOutput
):
elif
isinstance
(
recv_obj
,
OpenSessionReqOutput
):
self
.
session_futures
[
recv_obj
.
session_id
].
set_result
(
self
.
session_futures
[
recv_obj
.
session_id
].
set_result
(
recv_obj
.
session_id
if
recv_obj
.
success
else
None
recv_obj
.
session_id
if
recv_obj
.
success
else
None
...
@@ -818,6 +825,27 @@ class TokenizerManager:
...
@@ -818,6 +825,27 @@ class TokenizerManager:
(
time
.
time
()
-
state
.
created_time
)
/
completion_tokens
(
time
.
time
()
-
state
.
created_time
)
/
completion_tokens
)
)
def
dump_requests
(
self
,
state
:
ReqState
,
out_dict
:
dict
):
self
.
dump_request_list
.
append
(
(
state
.
obj
,
out_dict
,
state
.
created_time
,
time
.
time
())
)
if
len
(
self
.
dump_request_list
)
>
int
(
os
.
environ
.
get
(
"SGLANG_DUMP_REQUESTS_THRESHOLD"
,
"1000"
)
):
to_dump
=
self
.
dump_request_list
self
.
dump_request_list
=
[]
def
background_task
():
os
.
makedirs
(
self
.
dump_requsts_folder
,
exist_ok
=
True
)
current_time
=
datetime
.
now
()
filename
=
current_time
.
strftime
(
"%Y-%m-%d_%H-%M-%S"
)
+
".pkl"
with
open
(
os
.
path
.
join
(
self
.
dump_requsts_folder
,
filename
),
"wb"
)
as
f
:
pickle
.
dump
(
to_dump
,
f
)
# Schedule the task to run in the background without awaiting it
asyncio
.
create_task
(
asyncio
.
to_thread
(
background_task
))
class
SignalHandler
:
class
SignalHandler
:
def
__init__
(
self
,
tokenizer_manager
):
def
__init__
(
self
,
tokenizer_manager
):
...
...
python/sglang/srt/server_args.py
View file @
3b141e15
...
@@ -23,7 +23,6 @@ from typing import List, Optional
...
@@ -23,7 +23,6 @@ from typing import List, Optional
import
torch
import
torch
from
sglang.srt.hf_transformers_utils
import
check_gguf_file
from
sglang.srt.hf_transformers_utils
import
check_gguf_file
from
sglang.srt.speculative.spec_info
import
SpeculativeAlgorithm
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
get_amdgpu_memory_capacity
,
get_amdgpu_memory_capacity
,
get_hpu_memory_capacity
,
get_hpu_memory_capacity
,
...
@@ -89,6 +88,7 @@ class ServerArgs:
...
@@ -89,6 +88,7 @@ class ServerArgs:
show_time_cost
:
bool
=
False
show_time_cost
:
bool
=
False
enable_metrics
:
bool
=
False
enable_metrics
:
bool
=
False
decode_log_interval
:
int
=
40
decode_log_interval
:
int
=
40
dump_requests_folder
:
str
=
""
# API related
# API related
api_key
:
Optional
[
str
]
=
None
api_key
:
Optional
[
str
]
=
None
...
@@ -554,7 +554,13 @@ class ServerArgs:
...
@@ -554,7 +554,13 @@ class ServerArgs:
"--decode-log-interval"
,
"--decode-log-interval"
,
type
=
int
,
type
=
int
,
default
=
ServerArgs
.
decode_log_interval
,
default
=
ServerArgs
.
decode_log_interval
,
help
=
"The log interval of decode batch"
,
help
=
"The log interval of decode batch."
,
)
parser
.
add_argument
(
"--dump-requests-folder"
,
type
=
str
,
default
=
ServerArgs
.
decode_log_interval
,
help
=
"Dump raw requests to a folder for replay."
,
)
)
# API related
# API related
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment