Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b548801d
Unverified
Commit
b548801d
authored
Oct 30, 2024
by
Lianmin Zheng
Committed by
GitHub
Oct 30, 2024
Browse files
Update docs (#1839)
parent
539df95d
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
165 additions
and
198 deletions
+165
-198
docs/deploy.py
docs/deploy.py
+1
-1
docs/deploy_docs.sh
docs/deploy_docs.sh
+0
-0
docs/openai_api.ipynb
docs/openai_api.ipynb
+92
-75
docs/send_request.ipynb
docs/send_request.ipynb
+51
-80
python/sglang/srt/mem_cache/flush_cache.py
python/sglang/srt/mem_cache/flush_cache.py
+1
-1
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+1
-1
python/sglang/srt/server.py
python/sglang/srt/server.py
+2
-2
python/sglang/utils.py
python/sglang/utils.py
+7
-37
scripts/ci_install_dependency.sh
scripts/ci_install_dependency.sh
+4
-0
scripts/killall_sglang.sh
scripts/killall_sglang.sh
+4
-0
scripts/version_branch_to_tag.sh
scripts/version_branch_to_tag.sh
+2
-1
No files found.
docs/deploy.py
View file @
b548801d
#
!/usr/bin/python3
#
Deploy the documents
import
os
from
datetime
import
datetime
...
...
docs/deploy_docs.sh
deleted
100644 → 0
View file @
539df95d
docs/openai_api.ipynb
View file @
b548801d
This diff is collapsed.
Click to expand it.
docs/send_request.ipynb
View file @
b548801d
This diff is collapsed.
Click to expand it.
python/sglang/srt/mem_cache/flush_cache.py
View file @
b548801d
...
...
@@ -29,5 +29,5 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--url"
,
type
=
str
,
default
=
"http://localhost:30000"
)
args
=
parser
.
parse_args
()
response
=
requests
.
ge
t
(
args
.
url
+
"/flush_cache"
)
response
=
requests
.
pos
t
(
args
.
url
+
"/flush_cache"
)
assert
response
.
status_code
==
200
python/sglang/srt/model_executor/model_runner.py
View file @
b548801d
...
...
@@ -124,7 +124,7 @@ class ModelRunner:
"Automatically turn off --chunked-prefill-size and adjust --mem-fraction-static for multimodal models."
)
server_args
.
chunked_prefill_size
=
None
se
rver_args
.
mem_fraction_static
*=
0.95
se
lf
.
mem_fraction_static
*=
0.95
# TODO: qwen2-vl does not support radix cache now, set disable_radix_cache=True automatically
if
self
.
model_config
.
hf_config
.
architectures
==
[
"Qwen2VLForConditionalGeneration"
...
...
python/sglang/srt/server.py
View file @
b548801d
...
...
@@ -139,7 +139,7 @@ async def get_server_args():
return
dataclasses
.
asdict
(
tokenizer_manager
.
server_args
)
@
app
.
ge
t
(
"/flush_cache"
)
@
app
.
pos
t
(
"/flush_cache"
)
async
def
flush_cache
():
"""Flush the radix cache."""
tokenizer_manager
.
flush_cache
()
...
...
@@ -180,7 +180,7 @@ async def get_memory_pool_size():
return
ret
except
Exception
as
e
:
return
JSONResponse
(
return
OR
JSONResponse
(
{
"error"
:
{
"message"
:
str
(
e
)}},
status_code
=
HTTPStatus
.
BAD_REQUEST
)
...
...
python/sglang/utils.py
View file @
b548801d
...
...
@@ -19,7 +19,6 @@ from typing import Optional, Union
import
numpy
as
np
import
requests
import
torch
from
IPython.display
import
HTML
,
display
from
tqdm
import
tqdm
...
...
@@ -332,14 +331,13 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
headers
=
{
"Authorization"
:
"Bearer None"
},
)
if
response
.
status_code
==
200
:
time
.
sleep
(
5
)
print_highlight
(
"""
\n
NOTE: Typically, the server runs in a separate terminal.
In this notebook, we run the server and notebook code together, so their outputs are combined.
To improve clarity, the server logs are displayed in the original black color, while the notebook outputs are highlighted in blue.
"""
Server and notebook outputs are combined for clarity.
Typically, the server runs in a separate terminal.
Server output is gray; notebook output is highlighted.
"""
)
break
...
...
@@ -350,36 +348,8 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
def
terminate_process
(
process
):
"""Safely terminate a process and clean up GPU memory.
Args:
process: subprocess.Popen object to terminate
"""
try
:
process
.
terminate
()
try
:
process
.
wait
(
timeout
=
5
)
except
subprocess
.
TimeoutExpired
:
if
os
.
name
!=
"nt"
:
try
:
pgid
=
os
.
getpgid
(
process
.
pid
)
os
.
killpg
(
pgid
,
signal
.
SIGTERM
)
time
.
sleep
(
1
)
if
process
.
poll
()
is
None
:
os
.
killpg
(
pgid
,
signal
.
SIGKILL
)
except
ProcessLookupError
:
pass
else
:
process
.
kill
()
process
.
wait
()
except
Exception
as
e
:
print
(
f
"Warning:
{
e
}
"
)
finally
:
gc
.
collect
()
if
torch
.
cuda
.
is_available
():
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
ipc_collect
()
time
.
sleep
(
2
)
from
sglang.srt.utils
import
kill_child_process
kill_child_process
(
process
.
pid
,
include_self
=
True
)
def
print_highlight
(
html_content
:
str
):
...
...
scripts/ci_install_dependency.sh
View file @
b548801d
"""
Install the dependency in CI.
"""
pip
install
--upgrade
pip
pip
install
-e
"python[all]"
pip
install
transformers
==
4.45.2
...
...
scripts/killall_sglang.sh
View file @
b548801d
"""
Kill all SGLang processes and free the GPU memory.
"""
kill
-9
$(
ps aux |
grep
'multiprocessing.spawn'
|
grep
-v
'grep'
|
awk
'{print $2}'
)
kill
-9
$(
ps aux |
grep
'sglang.launch_server'
|
grep
-v
'grep'
|
awk
'{print $2}'
)
scripts/version_branch_to_tag.sh
View file @
b548801d
#!/bin/bash
# This script tags all remote branches starting with 'v' with the same name as the branch,
# This script is used for release.
# It tags all remote branches starting with 'v' with the same name as the branch,
# deletes the corresponding branches from the remote, and pushes the tags to the remote repository.
git fetch origin
--prune
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment