Unverified Commit b58ae7a2 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Simplify frontend language (#9029)

parent 6345069f
......@@ -7,7 +7,7 @@ import time
import numpy as np
from sglang.api import set_default_backend
from sglang.lang.api import set_default_backend
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
dump_bench_raw_result,
......
......@@ -5,7 +5,7 @@ import time
import numpy as np
from sglang.api import set_default_backend
from sglang.lang.api import set_default_backend
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
select_sglang_backend,
......
......@@ -47,10 +47,10 @@ runtime_common = [
"sentencepiece",
"soundfile==0.13.1",
"scipy",
"torchao==0.9.0",
"transformers==4.55.0",
"timm==1.0.16",
"tiktoken",
"torchao==0.9.0",
"transformers==4.55.0",
"uvicorn",
"uvloop",
"xgrammar==0.1.22",
......@@ -84,6 +84,9 @@ srt_hip = [
"petit_kernel==0.0.2",
]
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
srt_cpu = ["sglang[runtime_common]", "einops"]
# xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
srt_xpu = ["sglang[runtime_common]"]
......@@ -92,8 +95,6 @@ srt_xpu = ["sglang[runtime_common]"]
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu = ["sglang[runtime_common]"]
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
srt_cpu = ["sglang[runtime_common]", "einops"]
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu = ["sglang[runtime_common]"]
......@@ -112,12 +113,12 @@ test = [
"sentence_transformers",
"pytest",
]
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[torch_memory_saver]", "sglang[decord]"]
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
dev = ["sglang[all]", "sglang[test]"]
dev_hip = ["sglang[all_hip]", "sglang[test]"]
......
......@@ -5,7 +5,7 @@
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
- `test`: The test utilities.
- `api.py`: The public APIs.
- `bench_offline_throughput.py`: Benchmark the throughput in the offline mode.
- `bench_offline_throughput.py`: Benchmark the performance in the offline mode.
- `bench_one_batch.py`: Benchmark the latency of running a single static batch without a server.
- `bench_one_batch_server.py`: Benchmark the latency of running a single batch with a server.
- `bench_serving.py`: Benchmark online serving with dynamic requests.
......
# SGLang public APIs
# Frontend Language APIs
from sglang.api import (
from sglang.global_config import global_config
from sglang.lang.api import (
Engine,
Runtime,
assistant,
......@@ -25,13 +26,13 @@ from sglang.api import (
user_end,
video,
)
from sglang.global_config import global_config
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.lang.choices import (
greedy_token_selection,
token_length_normalized,
unconditional_likelihood_normalized,
)
from sglang.srt.entrypoints.engine import Engine
from sglang.utils import LazyImport
from sglang.version import __version__
......
......@@ -12,7 +12,7 @@ import time
import numpy as np
from sglang.api import set_default_backend
from sglang.lang.api import set_default_backend
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
......
......@@ -8,7 +8,7 @@ import time
import numpy as np
import sglang as sgl
from sglang.api import set_default_backend
from sglang.lang.api import set_default_backend
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment