Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c31f084c
Unverified
Commit
c31f084c
authored
Aug 07, 2024
by
Yineng Zhang
Committed by
GitHub
Aug 07, 2024
Browse files
chore: update vllm to 0.5.4 (#966)
parent
a01ddd96
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
15 additions
and
18 deletions
+15
-18
.github/workflows/e2e-test.yml
.github/workflows/e2e-test.yml
+1
-2
.github/workflows/unit-test.yml
.github/workflows/unit-test.yml
+1
-2
README.md
README.md
+2
-2
docker/Dockerfile
docker/Dockerfile
+1
-1
python/pyproject.toml
python/pyproject.toml
+1
-1
python/sglang/check_env.py
python/sglang/check_env.py
+1
-0
test/srt/models/test_causal_models.py
test/srt/models/test_causal_models.py
+1
-3
test/srt/run_suite.py
test/srt/run_suite.py
+1
-1
test/srt/test_chunked_prefill.py
test/srt/test_chunked_prefill.py
+1
-1
test/srt/test_eval_accuracy.py
test/srt/test_eval_accuracy.py
+1
-1
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+1
-1
test/srt/test_srt_endpoint.py
test/srt/test_srt_endpoint.py
+1
-1
test/srt/test_torch_compile.py
test/srt/test_torch_compile.py
+1
-1
test/srt/test_vision_openai_server.py
test/srt/test_vision_openai_server.py
+1
-1
No files found.
.github/workflows/e2e-test.yml
View file @
c31f084c
...
...
@@ -34,8 +34,7 @@ jobs:
pip cache purge
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
pip install --upgrade transformers
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
name
:
Benchmark Serving Throughput
run
:
|
...
...
.github/workflows/unit-test.yml
View file @
c31f084c
...
...
@@ -34,8 +34,7 @@ jobs:
pip cache purge
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
pip install --upgrade transformers
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
pip install accelerate
-
name
:
Test Frontend Language
...
...
README.md
View file @
c31f084c
...
...
@@ -49,7 +49,7 @@ pip install --upgrade pip
pip install "sglang[all]"
# Install FlashInfer CUDA kernels
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.
3
/
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.
4
/
```
### Method 2: From source
...
...
@@ -62,7 +62,7 @@ pip install --upgrade pip
pip install -e "python[all]"
# Install FlashInfer CUDA kernels
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.
3
/
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.
4
/
```
### Method 3: Using docker
...
...
docker/Dockerfile
View file @
c31f084c
...
...
@@ -29,6 +29,6 @@ RUN pip3 --no-cache-dir install --upgrade pip \
&&
git clone
--depth
=
1 https://github.com/sgl-project/sglang.git
\
&&
cd
sglang
\
&&
pip
--no-cache-dir
install
-e
"python[all]"
\
&&
pip3
--no-cache-dir
install
flashinfer
-i
https://flashinfer.ai/whl/cu121/torch2.
3
/
&&
pip3
--no-cache-dir
install
flashinfer
-i
https://flashinfer.ai/whl/cu121/torch2.
4
/
ENV
DEBIAN_FRONTEND=interactive
python/pyproject.toml
View file @
c31f084c
...
...
@@ -23,7 +23,7 @@ dependencies = [
srt
=
[
"aiohttp"
,
"fastapi"
,
"hf_transfer"
,
"huggingface_hub"
,
"interegular"
,
"packaging"
,
"pillow"
,
"psutil"
,
"pydantic"
,
"python-multipart"
,
"torch"
,
"uvicorn"
,
"uvloop"
,
"zmq"
,
"vllm==0.5.
3.post1
"
,
"outlines>=0.0.44"
]
"vllm==0.5.
4
"
,
"outlines>=0.0.44"
]
openai
=
[
"openai>=1.0"
,
"tiktoken"
]
anthropic
=
["anthropic>=0.20.0"]
litellm
=
["litellm>=1.0.0"]
...
...
python/sglang/check_env.py
View file @
c31f084c
...
...
@@ -14,6 +14,7 @@ PACKAGE_LIST = [
"sglang"
,
"flashinfer"
,
"triton"
,
"transformers"
,
"requests"
,
"tqdm"
,
"numpy"
,
...
...
test/srt/models/test_causal_models.py
View file @
c31f084c
...
...
@@ -18,9 +18,7 @@ import torch
from
sglang.test.runners
import
DEFAULT_PROMPTS
,
HFRunner
,
SRTRunner
MODELS
=
[
# (model_name, tp_size)
(
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
1
),
# ("meta-llama/Meta-Llama-3.1-8B-Instruct", 2),
]
TORCH_DTYPES
=
[
torch
.
float16
]
...
...
@@ -51,7 +49,7 @@ class TestCausalModels(unittest.TestCase):
hf_logprobs
=
torch
.
Tensor
(
hf_outputs
.
top_input_logprobs
[
i
])
srt_logprobs
=
torch
.
Tensor
(
srt_outputs
.
top_input_logprobs
[
i
])
tolerance
=
2
e-2
tolerance
=
3
e-2
assert
torch
.
all
(
abs
(
hf_logprobs
-
srt_logprobs
)
<
tolerance
),
f
"prefill logprobs not all close"
...
...
test/srt/run_suite.py
View file @
c31f084c
...
...
@@ -20,7 +20,7 @@ if __name__ == "__main__":
arg_parser
.
add_argument
(
"--timeout-per-file"
,
type
=
int
,
default
=
1
000
,
default
=
2
000
,
help
=
"The time limit for running one file in seconds."
,
)
arg_parser
.
add_argument
(
...
...
test/srt/test_chunked_prefill.py
View file @
c31f084c
...
...
@@ -11,7 +11,7 @@ class TestAccuracy(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://
localhost
:8157"
cls
.
base_url
=
"http://
127.0.0.1
:8157"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
...
...
test/srt/test_eval_accuracy.py
View file @
c31f084c
...
...
@@ -11,7 +11,7 @@ class TestAccuracy(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://
localhost
:8157"
cls
.
base_url
=
"http://
127.0.0.1
:8157"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
@
classmethod
...
...
test/srt/test_openai_server.py
View file @
c31f084c
...
...
@@ -14,7 +14,7 @@ class TestOpenAIServer(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://
localhost
:8157"
cls
.
base_url
=
"http://
127.0.0.1
:8157"
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
api_key
=
cls
.
api_key
...
...
test/srt/test_srt_endpoint.py
View file @
c31f084c
...
...
@@ -13,7 +13,7 @@ class TestSRTEndpoint(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://
localhost:
{
8157
}
"
cls
.
base_url
=
"http://
127.0.0.1:
8157"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
@
classmethod
...
...
test/srt/test_torch_compile.py
View file @
c31f084c
...
...
@@ -11,7 +11,7 @@ class TestAccuracy(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://
localhost
:8157"
cls
.
base_url
=
"http://
127.0.0.1
:8157"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
other_args
=
[
"--enable-torch-compile"
]
)
...
...
test/srt/test_vision_openai_server.py
View file @
c31f084c
...
...
@@ -13,7 +13,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"liuhaotian/llava-v1.6-vicuna-7b"
cls
.
base_url
=
"http://
localhost
:8157"
cls
.
base_url
=
"http://
127.0.0.1
:8157"
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment