Unverified Commit 9491d6e5 authored by Simo Lin's avatar Simo Lin Committed by GitHub
Browse files

[router] include rust benchamrks (#9932)

parent f64b8e3e
...@@ -9,6 +9,7 @@ on: ...@@ -9,6 +9,7 @@ on:
branches: [ main ] branches: [ main ]
paths: paths:
- "sgl-router/**" - "sgl-router/**"
types: [opened, synchronize, reopened, labeled]
workflow_dispatch: workflow_dispatch:
concurrency: concurrency:
...@@ -19,9 +20,67 @@ permissions: ...@@ -19,9 +20,67 @@ permissions:
pull-requests: write pull-requests: write
issues: write issues: write
jobs: jobs:
benchmark-router: # Quick check job that always runs on PRs
benchmark-compile-check:
name: Benchmark Compilation Check
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_rust.sh
- name: Setup sccache
uses: mozilla-actions/sccache-action@v0.0.3
continue-on-error: true
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
sgl-router/target/
key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-
- name: Check benchmarks compile
run: |
source "$HOME/.cargo/env"
cd sgl-router/
# Try to use sccache, but disable if it fails
if command -v sccache &> /dev/null; then
echo "Testing sccache availability..."
# Try to start sccache and check if it works
export RUSTC_WRAPPER=sccache
export SCCACHE_GHA_ENABLED="true"
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
echo "sccache is working, using it for compilation"
else
echo "sccache failed to start, falling back to regular cargo"
unset RUSTC_WRAPPER
unset SCCACHE_GHA_ENABLED
fi
else
echo "sccache not available, using regular cargo"
fi
cargo check --benches
# Full benchmark jobs that only run with label or on main branch
benchmark-request-processing:
name: Request Processing Benchmark
if: |
github.repository == 'sgl-project/sglang' &&
(github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'benchmark'))
runs-on: ubuntu-latest
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
...@@ -33,6 +92,10 @@ jobs: ...@@ -33,6 +92,10 @@ jobs:
run: | run: |
bash scripts/ci/ci_install_rust.sh bash scripts/ci/ci_install_rust.sh
- name: Setup sccache
uses: mozilla-actions/sccache-action@v0.0.3
continue-on-error: true
- name: Cache Rust dependencies - name: Cache Rust dependencies
uses: actions/cache@v4 uses: actions/cache@v4
with: with:
...@@ -46,40 +109,128 @@ jobs: ...@@ -46,40 +109,128 @@ jobs:
restore-keys: | restore-keys: |
${{ runner.os }}-cargo- ${{ runner.os }}-cargo-
- name: Build router in release mode - name: Run request processing benchmark
timeout-minutes: 30
run: | run: |
source "$HOME/.cargo/env" source "$HOME/.cargo/env"
cd sgl-router/ cd sgl-router/
cargo build --release # Try to use sccache, but disable if it fails
if command -v sccache &> /dev/null; then
echo "Testing sccache availability..."
# Try to start sccache and check if it works
export RUSTC_WRAPPER=sccache
export SCCACHE_GHA_ENABLED="true"
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
echo "sccache is working, using it for compilation"
else
echo "sccache failed to start, falling back to regular cargo"
unset RUSTC_WRAPPER
unset SCCACHE_GHA_ENABLED
fi
else
echo "sccache not available, using regular cargo"
fi
# Run only the summary benchmark for quick validation in PRs
cargo bench --bench request_processing -- benchmark_summary --exact
- name: Run quick benchmarks - name: Upload benchmark results
timeout-minutes: 15 if: always()
uses: actions/upload-artifact@v4
with:
name: request-processing-results-${{ github.sha }}
path: |
sgl-router/target/criterion/benchmark_summary/
retention-days: 30
benchmark-tokenizer:
name: Tokenizer Benchmark
if: |
github.repository == 'sgl-project/sglang' &&
(github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'benchmark'))
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 100
- name: Install dependencies
run: |
bash scripts/ci/ci_install_rust.sh
- name: Setup sccache
uses: mozilla-actions/sccache-action@v0.0.3
continue-on-error: true
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
sgl-router/target/
key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-
- name: Run tokenizer benchmark
timeout-minutes: 30
run: | run: |
source "$HOME/.cargo/env" source "$HOME/.cargo/env"
cd sgl-router/ cd sgl-router/
# Run quick benchmarks for PR validation using Python script # Try to use sccache, but disable if it fails
python3 scripts/run_benchmarks.py --quick --validate-thresholds --save-results if command -v sccache &> /dev/null; then
echo "Testing sccache availability..."
# Try to start sccache and check if it works
export RUSTC_WRAPPER=sccache
export SCCACHE_GHA_ENABLED="true"
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
echo "sccache is working, using it for compilation"
else
echo "sccache failed to start, falling back to regular cargo"
unset RUSTC_WRAPPER
unset SCCACHE_GHA_ENABLED
fi
else
echo "sccache not available, using regular cargo"
fi
cargo bench --bench tokenizer_benchmark
- name: Upload benchmark results - name: Upload benchmark results
if: always() if: always()
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: benchmark-results-${{ github.sha }} name: tokenizer-results-${{ github.sha }}
path: | path: |
sgl-router/target/criterion/ sgl-router/target/criterion/tokenizer*/
retention-days: 30 retention-days: 30
benchmark-integration-test: benchmark-tool-parser:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' name: Tool Parser Benchmark
if: |
github.repository == 'sgl-project/sglang' &&
(github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.pull_request.labels.*.name, 'benchmark'))
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
with:
fetch-depth: 100
- name: Install dependencies - name: Install dependencies
run: | run: |
bash scripts/ci/ci_install_rust.sh bash scripts/ci/ci_install_rust.sh
- name: Setup sccache
uses: mozilla-actions/sccache-action@v0.0.3
continue-on-error: true
- name: Cache Rust dependencies - name: Cache Rust dependencies
uses: actions/cache@v4 uses: actions/cache@v4
with: with:
...@@ -93,17 +244,79 @@ jobs: ...@@ -93,17 +244,79 @@ jobs:
restore-keys: | restore-keys: |
${{ runner.os }}-cargo- ${{ runner.os }}-cargo-
- name: Run benchmark integration tests - name: Run tool parser benchmark
timeout-minutes: 10 timeout-minutes: 30
run: | run: |
source "$HOME/.cargo/env" source "$HOME/.cargo/env"
cd sgl-router/ cd sgl-router/
# Run integration tests to ensure benchmark code compiles and works # Try to use sccache, but disable if it fails
cargo test --test benchmark_integration if command -v sccache &> /dev/null; then
echo "Testing sccache availability..."
# Try to start sccache and check if it works
export RUSTC_WRAPPER=sccache
export SCCACHE_GHA_ENABLED="true"
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
echo "sccache is working, using it for compilation"
else
echo "sccache failed to start, falling back to regular cargo"
unset RUSTC_WRAPPER
unset SCCACHE_GHA_ENABLED
fi
else
echo "sccache not available, using regular cargo"
fi
cargo bench --bench tool_parser_benchmark
- name: Upload benchmark results
if: always()
uses: actions/upload-artifact@v4
with:
name: tool-parser-results-${{ github.sha }}
path: |
sgl-router/target/criterion/tool_parser*/
retention-days: 30
benchmark-summary:
name: Benchmark Summary
needs: [benchmark-request-processing, benchmark-tokenizer, benchmark-tool-parser]
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request')
runs-on: ubuntu-latest
steps:
- name: Download all benchmark results
uses: actions/download-artifact@v4
with:
pattern: '*-results-${{ github.sha }}'
path: benchmark-results
- name: Verify benchmark compilation - name: Generate summary
run: | run: |
source "$HOME/.cargo/env" echo "## Benchmark Results Summary" > summary.md
cd sgl-router/ echo "" >> summary.md
# Ensure all benchmarks compile without running them echo "### Request Processing" >> summary.md
cargo check --benches if [ -d "benchmark-results/request-processing-results-${{ github.sha }}" ]; then
echo "✅ Completed" >> summary.md
else
echo "❌ Failed or skipped" >> summary.md
fi
echo "" >> summary.md
echo "### Tokenizer" >> summary.md
if [ -d "benchmark-results/tokenizer-results-${{ github.sha }}" ]; then
echo "✅ Completed" >> summary.md
else
echo "❌ Failed or skipped" >> summary.md
fi
echo "" >> summary.md
echo "### Tool Parser" >> summary.md
if [ -d "benchmark-results/tool-parser-results-${{ github.sha }}" ]; then
echo "✅ Completed" >> summary.md
else
echo "❌ Failed or skipped" >> summary.md
fi
cat summary.md
- name: Upload summary
uses: actions/upload-artifact@v4
with:
name: benchmark-summary-${{ github.sha }}
path: summary.md
retention-days: 30
#!/usr/bin/env python3
"""
GitHub PR Comment Poster for Benchmark Results
Posts benchmark results as comments on GitHub PRs with update capability.
Replaces JavaScript logic in GitHub Actions for better maintainability.
"""
import argparse
import os
import sys
from pathlib import Path
from typing import Dict, Optional
import requests
class GitHubCommentPoster:
"""Handles posting benchmark results as GitHub PR comments."""
def __init__(self, token: str, repo_owner: str, repo_name: str):
self.token = token
self.repo_owner = repo_owner
self.repo_name = repo_name
self.base_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}"
self.headers = {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json",
}
def read_benchmark_results(self, results_file: str) -> Dict[str, str]:
"""Read benchmark results from file."""
results = {}
filepath = Path(results_file)
if not filepath.exists():
print(f"Results file not found: {filepath}")
return {"error": "Results file not found"}
try:
with open(filepath, "r") as f:
for line in f:
line = line.strip()
if "=" in line:
key, value = line.split("=", 1)
results[key] = value
except Exception as e:
print(f"Error reading results file: {e}")
return {"error": str(e)}
return results
def format_benchmark_comment(
self, results: Dict[str, str], pr_number: int, commit_sha: str
) -> str:
"""Format benchmark results into a GitHub comment."""
serialization_time = results.get("serialization_time", "N/A")
deserialization_time = results.get("deserialization_time", "N/A")
adaptation_time = results.get("adaptation_time", "N/A")
total_time = results.get("total_time", "N/A")
comment = f"""
### SGLang Router Benchmark Results
**Performance Summary for PR #{pr_number}**
The router benchmarks have completed successfully!
**Performance Thresholds:** All passed
- Serialization: < 2μs
- Deserialization: < 2μs
- PD Adaptation: < 5μs
- Total Pipeline: < 10μs
**Measured Results:**
- Serialization: `{serialization_time}`ns
- Deserialization: `{deserialization_time}`ns
- PD Adaptation: `{adaptation_time}`ns
- Total Pipeline: `{total_time}`ns
**Detailed Reports:**
- Download the `benchmark-results-{commit_sha}` artifact for HTML reports
- Run `make bench` locally for detailed analysis
**Commit:** {commit_sha}
""".strip()
return comment
def find_existing_comment(self, pr_number: int) -> Optional[int]:
"""Find existing benchmark comment in the PR."""
url = f"{self.base_url}/issues/{pr_number}/comments"
try:
response = requests.get(url, headers=self.headers)
response.raise_for_status()
comments = response.json()
for comment in comments:
if comment.get("user", {}).get(
"login"
) == "github-actions[bot]" and "SGLang Router Benchmark Results" in comment.get(
"body", ""
):
return comment["id"]
except requests.RequestException as e:
print(f"Error fetching comments: {e}")
return None
def post_comment(self, pr_number: int, comment_body: str) -> bool:
"""Post a new comment on the PR."""
url = f"{self.base_url}/issues/{pr_number}/comments"
data = {"body": comment_body}
try:
response = requests.post(url, headers=self.headers, json=data)
response.raise_for_status()
print(f"Posted new benchmark comment on PR #{pr_number}")
return True
except requests.RequestException as e:
print(f"Error posting comment: {e}")
return False
def update_comment(self, comment_id: int, comment_body: str) -> bool:
"""Update an existing comment."""
url = f"{self.base_url}/issues/comments/{comment_id}"
data = {"body": comment_body}
try:
response = requests.patch(url, headers=self.headers, json=data)
response.raise_for_status()
print(f"Updated existing benchmark comment (ID: {comment_id})")
return True
except requests.RequestException as e:
print(f"Error updating comment: {e}")
return False
def post_or_update_comment(
self, pr_number: int, results_file: str, commit_sha: str
) -> bool:
"""Post or update benchmark results comment on PR."""
# Read benchmark results
results = self.read_benchmark_results(results_file)
if "error" in results:
print(f"Failed to read benchmark results: {results['error']}")
return False
# Format comment
comment_body = self.format_benchmark_comment(results, pr_number, commit_sha)
# Check for existing comment
existing_comment_id = self.find_existing_comment(pr_number)
if existing_comment_id:
return self.update_comment(existing_comment_id, comment_body)
else:
return self.post_comment(pr_number, comment_body)
def main():
parser = argparse.ArgumentParser(description="Post benchmark results to GitHub PR")
parser.add_argument(
"--pr-number", type=int, required=True, help="Pull request number"
)
parser.add_argument("--commit-sha", type=str, required=True, help="Commit SHA")
parser.add_argument(
"--results-file",
type=str,
default="benchmark_results.env",
help="Path to benchmark results file",
)
parser.add_argument(
"--repo-owner", type=str, default="sgl-project", help="GitHub repository owner"
)
parser.add_argument(
"--repo-name", type=str, default="sglang", help="GitHub repository name"
)
args = parser.parse_args()
# Get GitHub token from environment
github_token = os.environ.get("GITHUB_TOKEN")
if not github_token:
print("Error: GITHUB_TOKEN environment variable is required")
sys.exit(1)
# Create poster and post comment
poster = GitHubCommentPoster(github_token, args.repo_owner, args.repo_name)
success = poster.post_or_update_comment(
args.pr_number, args.results_file, args.commit_sha
)
if not success:
print("Failed to post benchmark comment")
sys.exit(1)
print("Benchmark comment posted successfully!")
if __name__ == "__main__":
main()
// Integration test to ensure benchmarks compile and basic functionality works
// This prevents benchmarks from breaking in CI
//
// UPDATED: Removed deprecated ToPdRequest usage, now uses direct JSON serialization
use serde_json::{from_str, to_string, to_value};
use sglang_router_rs::core::{BasicWorker, WorkerType};
use sglang_router_rs::protocols::spec::{
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateParameters, GenerateRequest,
SamplingParams, StringOrArray, UserMessageContent,
};
/// Create a default GenerateRequest for benchmarks with minimal fields set
fn default_generate_request() -> GenerateRequest {
GenerateRequest {
text: None,
prompt: None,
input_ids: None,
stream: false,
parameters: None,
sampling_params: None,
return_logprob: false,
// SGLang Extensions
lora_path: None,
session_params: None,
return_hidden_states: false,
rid: None,
}
}
/// Create a default ChatCompletionRequest for benchmarks with minimal fields set
fn default_chat_completion_request() -> ChatCompletionRequest {
ChatCompletionRequest {
model: String::new(),
messages: vec![],
max_tokens: None,
max_completion_tokens: None,
temperature: None,
top_p: None,
n: None,
stream: false,
stream_options: None,
stop: None,
presence_penalty: None,
frequency_penalty: None,
logit_bias: None,
logprobs: false,
top_logprobs: None,
user: None,
response_format: None,
seed: None,
tools: None,
tool_choice: None,
parallel_tool_calls: None,
function_call: None,
functions: None,
// SGLang Extensions
top_k: None,
min_p: None,
min_tokens: None,
repetition_penalty: None,
regex: None,
ebnf: None,
stop_token_ids: None,
no_stop_trim: false,
ignore_eos: false,
continue_final_message: false,
skip_special_tokens: true,
// SGLang Extensions
lora_path: None,
session_params: None,
separate_reasoning: true,
stream_reasoning: true,
return_hidden_states: false,
}
}
/// Create a default CompletionRequest for benchmarks with minimal fields set
fn default_completion_request() -> CompletionRequest {
CompletionRequest {
model: String::new(),
prompt: StringOrArray::String(String::new()),
suffix: None,
max_tokens: None,
temperature: None,
top_p: None,
n: None,
stream: false,
stream_options: None,
logprobs: None,
echo: false,
stop: None,
presence_penalty: None,
frequency_penalty: None,
best_of: None,
logit_bias: None,
user: None,
seed: None,
// SGLang Extensions
top_k: None,
min_p: None,
min_tokens: None,
repetition_penalty: None,
regex: None,
ebnf: None,
json_schema: None,
stop_token_ids: None,
no_stop_trim: false,
ignore_eos: false,
skip_special_tokens: true,
// SGLang Extensions
lora_path: None,
session_params: None,
return_hidden_states: false,
other: serde_json::Map::new(),
}
}
#[allow(dead_code)]
fn create_test_worker() -> BasicWorker {
BasicWorker::new(
"http://test-server:8000".to_string(),
WorkerType::Prefill {
bootstrap_port: Some(5678),
},
)
}
#[test]
fn test_benchmark_request_creation() {
// Ensure all benchmark request types can be created without panicking
let generate_req = GenerateRequest {
text: Some("Test prompt".to_string()),
parameters: Some(GenerateParameters {
max_new_tokens: Some(100),
temperature: Some(0.8),
top_p: Some(0.9),
top_k: Some(50),
repetition_penalty: Some(1.0),
..Default::default()
}),
sampling_params: Some(SamplingParams {
temperature: Some(0.8),
top_p: Some(0.9),
top_k: Some(50),
frequency_penalty: Some(0.0),
presence_penalty: Some(0.0),
repetition_penalty: Some(1.0),
..Default::default()
}),
..default_generate_request()
};
let chat_req = ChatCompletionRequest {
model: "test-model".to_string(),
messages: vec![ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Text("Test message".to_string()),
name: None,
}],
max_tokens: Some(150),
max_completion_tokens: Some(150),
temperature: Some(0.7),
top_p: Some(1.0),
n: Some(1),
presence_penalty: Some(0.0),
frequency_penalty: Some(0.0),
parallel_tool_calls: Some(true),
..default_chat_completion_request()
};
let completion_req = CompletionRequest {
model: "test-model".to_string(),
prompt: StringOrArray::String("Test prompt".to_string()),
max_tokens: Some(50),
temperature: Some(0.8),
top_p: Some(1.0),
n: Some(1),
presence_penalty: Some(0.0),
frequency_penalty: Some(0.0),
best_of: Some(1),
..default_completion_request()
};
// Test serialization works
assert!(to_string(&generate_req).is_ok());
assert!(to_string(&chat_req).is_ok());
assert!(to_string(&completion_req).is_ok());
}
#[test]
fn test_benchmark_serialization_roundtrip() {
// Test serialization/deserialization roundtrip for benchmark types
let generate_req = GenerateRequest {
text: Some("Test prompt".to_string()),
..default_generate_request()
};
// Serialize and deserialize
let json = to_string(&generate_req).expect("Serialization should work");
let deserialized: GenerateRequest = from_str(&json).expect("Deserialization should work");
// Verify basic field equality
assert_eq!(generate_req.text, deserialized.text);
assert_eq!(generate_req.stream, deserialized.stream);
assert_eq!(generate_req.return_logprob, deserialized.return_logprob);
}
#[test]
fn test_benchmark_direct_json_routing() {
// Test direct JSON routing functionality for benchmark types (replaces regular routing)
let generate_req = GenerateRequest {
text: Some("Test prompt".to_string()),
..default_generate_request()
};
// Test direct JSON conversion (replaces regular routing methods)
let json = to_value(&generate_req).unwrap();
let json_string = to_string(&json).unwrap();
let bytes = json_string.as_bytes();
// Verify conversions work
assert!(!json_string.is_empty());
assert!(!bytes.is_empty());
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment