Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
322421fa
Unverified
Commit
322421fa
authored
Feb 05, 2024
by
Cody Yu
Committed by
GitHub
Feb 05, 2024
Browse files
Add warmup to SRT server (#146)
parent
8ff870bf
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
39 additions
and
17 deletions
+39
-17
python/sglang/srt/server.py
python/sglang/srt/server.py
+39
-17
No files found.
python/sglang/srt/server.py
View file @
322421fa
...
@@ -389,7 +389,7 @@ def launch_server(server_args, pipe_finish_writer):
...
@@ -389,7 +389,7 @@ def launch_server(server_args, pipe_finish_writer):
assert
proc_router
.
is_alive
()
and
proc_detoken
.
is_alive
()
assert
proc_router
.
is_alive
()
and
proc_detoken
.
is_alive
()
def
launch_server
():
def
_
launch_server
():
# Launch api server
# Launch api server
uvicorn
.
run
(
uvicorn
.
run
(
app
,
app
,
...
@@ -400,26 +400,48 @@ def launch_server(server_args, pipe_finish_writer):
...
@@ -400,26 +400,48 @@ def launch_server(server_args, pipe_finish_writer):
loop
=
"uvloop"
,
loop
=
"uvloop"
,
)
)
t
=
threading
.
Thread
(
target
=
launch_server
)
t
=
threading
.
Thread
(
target
=
_
launch_server
)
t
.
start
()
t
.
start
()
if
pipe_finish_writer
:
url
=
server_args
.
url
()
url
=
server_args
.
url
()
for
_
in
range
(
60
):
time
.
sleep
(
1
)
success
=
False
try
:
for
i
in
range
(
60
):
requests
.
get
(
url
+
"/get_model_info"
,
timeout
=
5
)
time
.
sleep
(
1
)
break
try
:
except
requests
.
exceptions
.
RequestException
as
e
:
res
=
requests
.
get
(
url
+
"/get_model_info"
,
timeout
=
5
)
pass
success
=
True
else
:
break
if
pipe_finish_writer
is
not
None
:
except
requests
.
exceptions
.
RequestException
as
e
:
pipe_finish_writer
.
send
(
str
(
e
))
pass
if
success
:
pipe_finish_writer
.
send
(
"init ok"
)
else
:
else
:
print
(
e
,
flush
=
True
)
return
# Warmup
try
:
print
(
"Warmup..."
,
flush
=
True
)
res
=
requests
.
post
(
url
+
"/generate"
,
json
=
{
"text"
:
"Say this is a warmup request."
,
"sampling_params"
:
{
"temperature"
:
0
,
"max_new_tokens"
:
16
,
},
},
timeout
=
60
,
)
print
(
f
"Warmup done. model response:
{
res
.
json
()[
'text'
]
}
"
)
except
requests
.
exceptions
.
RequestException
as
e
:
if
pipe_finish_writer
is
not
None
:
pipe_finish_writer
.
send
(
str
(
e
))
pipe_finish_writer
.
send
(
str
(
e
))
else
:
print
(
e
,
flush
=
True
)
return
if
pipe_finish_writer
is
not
None
:
pipe_finish_writer
.
send
(
"init ok"
)
class
Runtime
:
class
Runtime
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment