Unverified Commit f20b6a3f authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[minor] Sync style changes (#9376)

parent 3680d6f8
...@@ -59,7 +59,7 @@ jobs: ...@@ -59,7 +59,7 @@ jobs:
cd test/srt cd test/srt
python3 run_suite.py --suite per-commit-8-gpu-h20 python3 run_suite.py --suite per-commit-8-gpu-h20
pr-test-finish: pr-test-h20-finish:
needs: [ needs: [
check-changes, check-changes,
per-commit-8-gpu-h20, per-commit-8-gpu-h20,
......
...@@ -38,7 +38,7 @@ repos: ...@@ -38,7 +38,7 @@ repos:
hooks: hooks:
- id: codespell - id: codespell
additional_dependencies: ['tomli'] additional_dependencies: ['tomli']
args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi'] args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi,makro,wil,rouge']
exclude: | exclude: |
(?x)^( (?x)^(
test/srt/test_reasoning_parser\.py| test/srt/test_reasoning_parser\.py|
......
# SGLang on Ascend NPUs # Ascend NPUs
You can install SGLang using any of the methods below. Please go through `System Settings` section to ensure the clusters are roaring at max performance. Feel free to leave an issue [here at sglang](https://github.com/sgl-project/sglang/issues) if you encounter any issues or have any problems. You can install SGLang using any of the methods below. Please go through `System Settings` section to ensure the clusters are roaring at max performance. Feel free to leave an issue [here at sglang](https://github.com/sgl-project/sglang/issues) if you encounter any issues or have any problems.
......
...@@ -85,8 +85,11 @@ srt_hip = [ ...@@ -85,8 +85,11 @@ srt_hip = [
"wave-lang==1.0.1", "wave-lang==1.0.1",
] ]
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu # https://docs.sglang.ai/platforms/cpu_server.html
srt_cpu = ["sglang[runtime_common]", "einops"] srt_cpu = ["sglang[runtime_common]"]
# https://docs.sglang.ai/platforms/ascend_npu.html
srt_npu = ["sglang[runtime_common]"]
# xpu is not enabled in public vllm and torch whl, # xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
...@@ -96,9 +99,6 @@ srt_xpu = ["sglang[runtime_common]"] ...@@ -96,9 +99,6 @@ srt_xpu = ["sglang[runtime_common]"]
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html # https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu = ["sglang[runtime_common]"] srt_hpu = ["sglang[runtime_common]"]
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu = ["sglang[runtime_common]"]
openai = ["openai==1.99.1", "tiktoken"] openai = ["openai==1.99.1", "tiktoken"]
anthropic = ["anthropic>=0.20.0"] anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"] litellm = ["litellm>=1.0.0"]
......
...@@ -26,7 +26,7 @@ from sglang.bench_serving import get_tokenizer, sample_random_requests ...@@ -26,7 +26,7 @@ from sglang.bench_serving import get_tokenizer, sample_random_requests
from sglang.profiler import run_profile from sglang.profiler import run_profile
from sglang.srt.entrypoints.http_server import launch_server from sglang.srt.entrypoints.http_server import launch_server
from sglang.srt.server_args import ServerArgs from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import kill_process_tree from sglang.srt.utils import is_blackwell, kill_process_tree
from sglang.test.test_utils import is_in_ci, write_github_step_summary from sglang.test.test_utils import is_in_ci, write_github_step_summary
...@@ -363,7 +363,12 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs): ...@@ -363,7 +363,12 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
acc_length, acc_length,
trace_link, trace_link,
) in result: ) in result:
hourly_cost = 2 * server_args.tp_size # $2/hour for one H100 if is_blackwell():
hourly_cost_per_gpu = 4 # $4/hour for one B200
else:
hourly_cost_per_gpu = 2 # $2/hour for one H100
hourly_cost = hourly_cost_per_gpu * server_args.tp_size
input_util = 0.7 input_util = 0.7
accept_length = round(acc_length, 2) if acc_length is not None else "n/a" accept_length = round(acc_length, 2) if acc_length is not None else "n/a"
line = ( line = (
......
...@@ -9,6 +9,7 @@ import argparse ...@@ -9,6 +9,7 @@ import argparse
import json import json
import os import os
import time import time
import urllib.parse
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from typing import List, Optional from typing import List, Optional
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment