Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b58ae7a2
Unverified
Commit
b58ae7a2
authored
Aug 10, 2025
by
Lianmin Zheng
Committed by
GitHub
Aug 10, 2025
Browse files
Simplify frontend language (#9029)
parent
6345069f
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
19 additions
and
17 deletions
+19
-17
benchmark/gsm8k/bench_sglang.py
benchmark/gsm8k/bench_sglang.py
+1
-1
benchmark/hellaswag/bench_sglang.py
benchmark/hellaswag/bench_sglang.py
+1
-1
python/pyproject.toml
python/pyproject.toml
+11
-10
python/sglang/README.md
python/sglang/README.md
+1
-1
python/sglang/__init__.py
python/sglang/__init__.py
+3
-2
python/sglang/lang/api.py
python/sglang/lang/api.py
+0
-0
python/sglang/lang/backend/__init__.py
python/sglang/lang/backend/__init__.py
+0
-0
python/sglang/test/few_shot_gsm8k.py
python/sglang/test/few_shot_gsm8k.py
+1
-1
python/sglang/test/few_shot_gsm8k_engine.py
python/sglang/test/few_shot_gsm8k_engine.py
+1
-1
No files found.
benchmark/gsm8k/bench_sglang.py
View file @
b58ae7a2
...
@@ -7,7 +7,7 @@ import time
...
@@ -7,7 +7,7 @@ import time
import
numpy
as
np
import
numpy
as
np
from
sglang.api
import
set_default_backend
from
sglang.
lang.
api
import
set_default_backend
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
add_common_sglang_args_and_parse
,
add_common_sglang_args_and_parse
,
dump_bench_raw_result
,
dump_bench_raw_result
,
...
...
benchmark/hellaswag/bench_sglang.py
View file @
b58ae7a2
...
@@ -5,7 +5,7 @@ import time
...
@@ -5,7 +5,7 @@ import time
import
numpy
as
np
import
numpy
as
np
from
sglang.api
import
set_default_backend
from
sglang.
lang.
api
import
set_default_backend
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
add_common_sglang_args_and_parse
,
add_common_sglang_args_and_parse
,
select_sglang_backend
,
select_sglang_backend
,
...
...
python/pyproject.toml
View file @
b58ae7a2
...
@@ -47,10 +47,10 @@ runtime_common = [
...
@@ -47,10 +47,10 @@ runtime_common = [
"sentencepiece"
,
"sentencepiece"
,
"soundfile==0.13.1"
,
"soundfile==0.13.1"
,
"scipy"
,
"scipy"
,
"torchao==0.9.0"
,
"transformers==4.55.0"
,
"timm==1.0.16"
,
"timm==1.0.16"
,
"tiktoken"
,
"tiktoken"
,
"torchao==0.9.0"
,
"transformers==4.55.0"
,
"uvicorn"
,
"uvicorn"
,
"uvloop"
,
"uvloop"
,
"xgrammar==0.1.22"
,
"xgrammar==0.1.22"
,
...
@@ -84,6 +84,9 @@ srt_hip = [
...
@@ -84,6 +84,9 @@ srt_hip = [
"petit_kernel==0.0.2"
,
"petit_kernel==0.0.2"
,
]
]
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
srt_cpu
=
["sglang[runtime_common]
", "
einops
"]
# xpu is not enabled in public vllm and torch whl,
# xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
srt_xpu
=
["sglang[runtime_common]"]
srt_xpu
=
["sglang[runtime_common]"]
...
@@ -92,8 +95,6 @@ srt_xpu = ["sglang[runtime_common]"]
...
@@ -92,8 +95,6 @@ srt_xpu = ["sglang[runtime_common]"]
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu
=
["sglang[runtime_common]"]
srt_hpu
=
["sglang[runtime_common]"]
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
srt_cpu
=
["sglang[runtime_common]
", "
einops
"]
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu
=
["sglang[runtime_common]"]
srt_npu
=
["sglang[runtime_common]"]
...
@@ -112,12 +113,12 @@ test = [
...
@@ -112,12 +113,12 @@ test = [
"sentence_transformers"
,
"sentence_transformers"
,
"pytest"
,
"pytest"
,
]
]
all
=
["sglang[srt]
", "
sglang
[openai]
", "
sglang
[anthropic]
",
"
sglang
[litellm]
",
"
sglang
[torch_memory_saver]
", "
sglang
[decord]"]
all
=
["sglang[srt]
", "
sglang
[openai]
", "
sglang
[anthropic]
", "
sglang
[torch_memory_saver]
", "
sglang
[decord]"]
all_hip
=
["sglang[srt_hip]
", "
sglang
[openai]
", "
sglang
[anthropic]
",
"
sglang
[litellm]
",
"
sglang
[decord]"]
all_hip
=
["sglang[srt_hip]
", "
sglang
[openai]
", "
sglang
[anthropic]
", "
sglang
[decord]"]
all_xpu
=
["sglang[srt_xpu]
", "
sglang
[openai]
", "
sglang
[anthropic]
",
"
sglang
[litellm]
",
"
sglang
[decord]"]
all_xpu
=
["sglang[srt_xpu]
", "
sglang
[openai]
", "
sglang
[anthropic]
", "
sglang
[decord]"]
all_hpu
=
["sglang[srt_hpu]
", "
sglang
[openai]
", "
sglang
[anthropic]
",
"
sglang
[litellm]
",
"
sglang
[decord]"]
all_hpu
=
["sglang[srt_hpu]
", "
sglang
[openai]
", "
sglang
[anthropic]
", "
sglang
[decord]"]
all_cpu
=
["sglang[srt_cpu]
", "
sglang
[openai]
", "
sglang
[anthropic]
",
"
sglang
[litellm]
",
"
sglang
[decord]"]
all_cpu
=
["sglang[srt_cpu]
", "
sglang
[openai]
", "
sglang
[anthropic]
", "
sglang
[decord]"]
all_npu
=
["sglang[srt_npu]
", "
sglang
[openai]
", "
sglang
[anthropic]
",
"
sglang
[litellm]
",
"
sglang
[decord]"]
all_npu
=
["sglang[srt_npu]
", "
sglang
[openai]
", "
sglang
[anthropic]
", "
sglang
[decord]"]
dev
=
["sglang[all]
", "
sglang
[test]"]
dev
=
["sglang[all]
", "
sglang
[test]"]
dev_hip
=
["sglang[all_hip]
", "
sglang
[test]"]
dev_hip
=
["sglang[all_hip]
", "
sglang
[test]"]
...
...
python/sglang/README.md
View file @
b58ae7a2
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
-
`srt`
: The backend engine for running local models. (SRT = SGLang Runtime).
-
`srt`
: The backend engine for running local models. (SRT = SGLang Runtime).
-
`test`
: The test utilities.
-
`test`
: The test utilities.
-
`api.py`
: The public APIs.
-
`api.py`
: The public APIs.
-
`bench_offline_throughput.py`
: Benchmark the
throughput
in the offline mode.
-
`bench_offline_throughput.py`
: Benchmark the
performance
in the offline mode.
-
`bench_one_batch.py`
: Benchmark the latency of running a single static batch without a server.
-
`bench_one_batch.py`
: Benchmark the latency of running a single static batch without a server.
-
`bench_one_batch_server.py`
: Benchmark the latency of running a single batch with a server.
-
`bench_one_batch_server.py`
: Benchmark the latency of running a single batch with a server.
-
`bench_serving.py`
: Benchmark online serving with dynamic requests.
-
`bench_serving.py`
: Benchmark online serving with dynamic requests.
...
...
python/sglang/__init__.py
View file @
b58ae7a2
# SGLang public APIs
# SGLang public APIs
# Frontend Language APIs
# Frontend Language APIs
from
sglang.api
import
(
from
sglang.global_config
import
global_config
from
sglang.lang.api
import
(
Engine
,
Engine
,
Runtime
,
Runtime
,
assistant
,
assistant
,
...
@@ -25,13 +26,13 @@ from sglang.api import (
...
@@ -25,13 +26,13 @@ from sglang.api import (
user_end
,
user_end
,
video
,
video
,
)
)
from
sglang.global_config
import
global_config
from
sglang.lang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.lang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.lang.choices
import
(
from
sglang.lang.choices
import
(
greedy_token_selection
,
greedy_token_selection
,
token_length_normalized
,
token_length_normalized
,
unconditional_likelihood_normalized
,
unconditional_likelihood_normalized
,
)
)
from
sglang.srt.entrypoints.engine
import
Engine
from
sglang.utils
import
LazyImport
from
sglang.utils
import
LazyImport
from
sglang.version
import
__version__
from
sglang.version
import
__version__
...
...
python/sglang/api.py
→
python/sglang/
lang/
api.py
View file @
b58ae7a2
File moved
python/sglang/lang/backend/__init__.py
deleted
100644 → 0
View file @
6345069f
python/sglang/test/few_shot_gsm8k.py
View file @
b58ae7a2
...
@@ -12,7 +12,7 @@ import time
...
@@ -12,7 +12,7 @@ import time
import
numpy
as
np
import
numpy
as
np
from
sglang.api
import
set_default_backend
from
sglang.
lang.
api
import
set_default_backend
from
sglang.lang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.lang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.utils
import
download_and_cache_file
,
dump_state_text
,
read_jsonl
from
sglang.utils
import
download_and_cache_file
,
dump_state_text
,
read_jsonl
...
...
python/sglang/test/few_shot_gsm8k_engine.py
View file @
b58ae7a2
...
@@ -8,7 +8,7 @@ import time
...
@@ -8,7 +8,7 @@ import time
import
numpy
as
np
import
numpy
as
np
import
sglang
as
sgl
import
sglang
as
sgl
from
sglang.api
import
set_default_backend
from
sglang.
lang.
api
import
set_default_backend
from
sglang.lang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.lang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.utils
import
download_and_cache_file
,
dump_state_text
,
read_jsonl
from
sglang.utils
import
download_and_cache_file
,
dump_state_text
,
read_jsonl
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment