[minor] Sync style changes (#9376)

f20b6a3f · Lianmin Zheng · GitHub · 3680d6f8 · f20b6a3f · f20b6a3f
Unverified Commit f20b6a3f authored Aug 19, 2025 by Lianmin Zheng Committed by GitHub Aug 19, 2025
6 changed files
--- a/.github/workflows/pr-test-h20.yml
+++ b/.github/workflows/pr-test-h20.yml
@@ -59,7 +59,7 @@ jobs:
          cd test/srt
          python3 run_suite.py --suite per-commit-8-gpu-h20

-  pr-test-finish:
+  pr-test-h20-finish:
    needs: [
      check-changes,
      per-commit-8-gpu-h20,

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,7 +38,7 @@ repos:
    hooks:
      - id: codespell
        additional_dependencies: ['tomli']
-        args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi']
+        args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi,makro,wil,rouge']
        exclude: |
          (?x)^(
            test/srt/test_reasoning_parser\.py|

--- a/docs/platforms/ascend_npu.md
+++ b/docs/platforms/ascend_npu.md
-# SGLang on Ascend NPUs
+# Ascend NPUs

 You can install SGLang using any of the methods below. Please go through `System Settings` section to ensure the clusters are roaring at max performance. Feel free to leave an issue [here at sglang](https://github.com/sgl-project/sglang/issues) if you encounter any issues or have any problems.


--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -85,8 +85,11 @@ srt_hip = [
    "wave-lang==1.0.1",
 ]

-# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
-srt_cpu = ["sglang[runtime_common]", "einops"]
+# https://docs.sglang.ai/platforms/cpu_server.html
+srt_cpu = ["sglang[runtime_common]"]
+
+# https://docs.sglang.ai/platforms/ascend_npu.html
+srt_npu = ["sglang[runtime_common]"]

 # xpu is not enabled in public vllm and torch whl,
 # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
@@ -96,9 +99,6 @@ srt_xpu = ["sglang[runtime_common]"]
 # https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
 srt_hpu = ["sglang[runtime_common]"]

-# https://vllm-ascend.readthedocs.io/en/latest/installation.html
-srt_npu = ["sglang[runtime_common]"]
-
 openai = ["openai==1.99.1", "tiktoken"]
 anthropic = ["anthropic>=0.20.0"]
 litellm = ["litellm>=1.0.0"]

--- a/python/sglang/bench_one_batch_server.py
+++ b/python/sglang/bench_one_batch_server.py
@@ -26,7 +26,7 @@ from sglang.bench_serving import get_tokenizer, sample_random_requests
 from sglang.profiler import run_profile
 from sglang.srt.entrypoints.http_server import launch_server
 from sglang.srt.server_args import ServerArgs
-from sglang.srt.utils import kill_process_tree
+from sglang.srt.utils import is_blackwell, kill_process_tree
 from sglang.test.test_utils import is_in_ci, write_github_step_summary


@@ -363,7 +363,12 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
        acc_length,
        trace_link,
    ) in result:
-        hourly_cost = 2 * server_args.tp_size  # $2/hour for one H100
+        if is_blackwell():
+            hourly_cost_per_gpu = 4  # $4/hour for one B200
+        else:
+            hourly_cost_per_gpu = 2  # $2/hour for one H100
+
+        hourly_cost = hourly_cost_per_gpu * server_args.tp_size
        input_util = 0.7
        accept_length = round(acc_length, 2) if acc_length is not None else "n/a"
        line = (

--- a/python/sglang/profiler.py
+++ b/python/sglang/profiler.py
@@ -9,6 +9,7 @@ import argparse
 import json
 import os
 import time
+import urllib.parse
 from argparse import ArgumentParser
 from pathlib import Path
 from typing import List, Optional