Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
06008bc2
Unverified
Commit
06008bc2
authored
Feb 12, 2024
by
Lianmin Zheng
Committed by
GitHub
Feb 12, 2024
Browse files
Fix server launch for jupyter notebook (#186)
parent
bb824da4
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
42 additions
and
38 deletions
+42
-38
python/sglang/srt/server.py
python/sglang/srt/server.py
+42
-38
No files found.
python/sglang/srt/server.py
View file @
06008bc2
...
@@ -464,7 +464,6 @@ def launch_server(server_args, pipe_finish_writer):
...
@@ -464,7 +464,6 @@ def launch_server(server_args, pipe_finish_writer):
assert
proc_router
.
is_alive
()
and
proc_detoken
.
is_alive
()
assert
proc_router
.
is_alive
()
and
proc_detoken
.
is_alive
()
def
_launch_server
():
def
_launch_server
():
# Launch api server
uvicorn
.
run
(
uvicorn
.
run
(
app
,
app
,
host
=
server_args
.
host
,
host
=
server_args
.
host
,
...
@@ -474,49 +473,54 @@ def launch_server(server_args, pipe_finish_writer):
...
@@ -474,49 +473,54 @@ def launch_server(server_args, pipe_finish_writer):
loop
=
"uvloop"
,
loop
=
"uvloop"
,
)
)
t
=
threading
.
Thread
(
target
=
_launch_server
)
def
_wait_and_warmup
():
t
.
start
()
url
=
server_args
.
url
()
for
_
in
range
(
60
):
time
.
sleep
(
1
)
try
:
requests
.
get
(
url
+
"/get_model_info"
,
timeout
=
5
)
break
except
requests
.
exceptions
.
RequestException
as
e
:
pass
else
:
if
pipe_finish_writer
is
not
None
:
pipe_finish_writer
.
send
(
str
(
e
))
else
:
print
(
e
,
flush
=
True
)
return
url
=
server_args
.
url
()
# Warmup
for
_
in
range
(
60
):
time
.
sleep
(
1
)
try
:
try
:
requests
.
get
(
url
+
"/get_model_info"
,
timeout
=
5
)
# print("Warmup...", flush=True)
break
res
=
requests
.
post
(
url
+
"/generate"
,
json
=
{
"text"
:
"Say this is a warmup request."
,
"sampling_params"
:
{
"temperature"
:
0
,
"max_new_tokens"
:
16
,
},
},
timeout
=
60
,
)
# print(f"Warmup done. model response: {res.json()['text']}")
# print("=" * 20, "Server is ready", "=" * 20, flush=True)
except
requests
.
exceptions
.
RequestException
as
e
:
except
requests
.
exceptions
.
RequestException
as
e
:
pass
if
pipe_finish_writer
is
not
None
:
else
:
pipe_finish_writer
.
send
(
str
(
e
))
if
pipe_finish_writer
is
not
None
:
else
:
pipe_finish_writer
.
send
(
str
(
e
))
print
(
e
,
flush
=
True
)
else
:
return
print
(
e
,
flush
=
True
)
return
# Warmup
try
:
# print("Warmup...", flush=True)
res
=
requests
.
post
(
url
+
"/generate"
,
json
=
{
"text"
:
"Say this is a warmup request."
,
"sampling_params"
:
{
"temperature"
:
0
,
"max_new_tokens"
:
16
,
},
},
timeout
=
60
,
)
# print(f"Warmup done. model response: {res.json()['text']}")
# print("=" * 20, "Server is ready", "=" * 20, flush=True)
except
requests
.
exceptions
.
RequestException
as
e
:
if
pipe_finish_writer
is
not
None
:
if
pipe_finish_writer
is
not
None
:
pipe_finish_writer
.
send
(
str
(
e
))
pipe_finish_writer
.
send
(
"init ok"
)
else
:
print
(
e
,
flush
=
True
)
return
if
pipe_finish_writer
is
not
None
:
t
=
threading
.
Thread
(
target
=
_wait_and_warmup
)
pipe_finish_writer
.
send
(
"init ok"
)
t
.
start
()
try
:
_launch_server
()
finally
:
t
.
join
()
class
Runtime
:
class
Runtime
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment