Unverified Commit b58ae7a2 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Simplify frontend language (#9029)

parent 6345069f
...@@ -7,7 +7,7 @@ import time ...@@ -7,7 +7,7 @@ import time
import numpy as np import numpy as np
from sglang.api import set_default_backend from sglang.lang.api import set_default_backend
from sglang.test.test_utils import ( from sglang.test.test_utils import (
add_common_sglang_args_and_parse, add_common_sglang_args_and_parse,
dump_bench_raw_result, dump_bench_raw_result,
......
...@@ -5,7 +5,7 @@ import time ...@@ -5,7 +5,7 @@ import time
import numpy as np import numpy as np
from sglang.api import set_default_backend from sglang.lang.api import set_default_backend
from sglang.test.test_utils import ( from sglang.test.test_utils import (
add_common_sglang_args_and_parse, add_common_sglang_args_and_parse,
select_sglang_backend, select_sglang_backend,
......
...@@ -47,10 +47,10 @@ runtime_common = [ ...@@ -47,10 +47,10 @@ runtime_common = [
"sentencepiece", "sentencepiece",
"soundfile==0.13.1", "soundfile==0.13.1",
"scipy", "scipy",
"torchao==0.9.0",
"transformers==4.55.0",
"timm==1.0.16", "timm==1.0.16",
"tiktoken", "tiktoken",
"torchao==0.9.0",
"transformers==4.55.0",
"uvicorn", "uvicorn",
"uvloop", "uvloop",
"xgrammar==0.1.22", "xgrammar==0.1.22",
...@@ -84,6 +84,9 @@ srt_hip = [ ...@@ -84,6 +84,9 @@ srt_hip = [
"petit_kernel==0.0.2", "petit_kernel==0.0.2",
] ]
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
srt_cpu = ["sglang[runtime_common]", "einops"]
# xpu is not enabled in public vllm and torch whl, # xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
srt_xpu = ["sglang[runtime_common]"] srt_xpu = ["sglang[runtime_common]"]
...@@ -92,8 +95,6 @@ srt_xpu = ["sglang[runtime_common]"] ...@@ -92,8 +95,6 @@ srt_xpu = ["sglang[runtime_common]"]
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html # https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu = ["sglang[runtime_common]"] srt_hpu = ["sglang[runtime_common]"]
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
srt_cpu = ["sglang[runtime_common]", "einops"]
# https://vllm-ascend.readthedocs.io/en/latest/installation.html # https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu = ["sglang[runtime_common]"] srt_npu = ["sglang[runtime_common]"]
...@@ -112,12 +113,12 @@ test = [ ...@@ -112,12 +113,12 @@ test = [
"sentence_transformers", "sentence_transformers",
"pytest", "pytest",
] ]
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[torch_memory_saver]", "sglang[decord]"] all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"] all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"] all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"] all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"] all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"] all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
dev = ["sglang[all]", "sglang[test]"] dev = ["sglang[all]", "sglang[test]"]
dev_hip = ["sglang[all_hip]", "sglang[test]"] dev_hip = ["sglang[all_hip]", "sglang[test]"]
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime). - `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
- `test`: The test utilities. - `test`: The test utilities.
- `api.py`: The public APIs. - `api.py`: The public APIs.
- `bench_offline_throughput.py`: Benchmark the throughput in the offline mode. - `bench_offline_throughput.py`: Benchmark the performance in the offline mode.
- `bench_one_batch.py`: Benchmark the latency of running a single static batch without a server. - `bench_one_batch.py`: Benchmark the latency of running a single static batch without a server.
- `bench_one_batch_server.py`: Benchmark the latency of running a single batch with a server. - `bench_one_batch_server.py`: Benchmark the latency of running a single batch with a server.
- `bench_serving.py`: Benchmark online serving with dynamic requests. - `bench_serving.py`: Benchmark online serving with dynamic requests.
......
# SGLang public APIs # SGLang public APIs
# Frontend Language APIs # Frontend Language APIs
from sglang.api import ( from sglang.global_config import global_config
from sglang.lang.api import (
Engine, Engine,
Runtime, Runtime,
assistant, assistant,
...@@ -25,13 +26,13 @@ from sglang.api import ( ...@@ -25,13 +26,13 @@ from sglang.api import (
user_end, user_end,
video, video,
) )
from sglang.global_config import global_config
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.lang.choices import ( from sglang.lang.choices import (
greedy_token_selection, greedy_token_selection,
token_length_normalized, token_length_normalized,
unconditional_likelihood_normalized, unconditional_likelihood_normalized,
) )
from sglang.srt.entrypoints.engine import Engine
from sglang.utils import LazyImport from sglang.utils import LazyImport
from sglang.version import __version__ from sglang.version import __version__
......
...@@ -12,7 +12,7 @@ import time ...@@ -12,7 +12,7 @@ import time
import numpy as np import numpy as np
from sglang.api import set_default_backend from sglang.lang.api import set_default_backend
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
......
...@@ -8,7 +8,7 @@ import time ...@@ -8,7 +8,7 @@ import time
import numpy as np import numpy as np
import sglang as sgl import sglang as sgl
from sglang.api import set_default_backend from sglang.lang.api import set_default_backend
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment