Unverified Commit 00ffde20 authored by Byron Hsu's avatar Byron Hsu Committed by GitHub
Browse files

setup router python binding ci (#1999)

parent ddeb9d42
# Reference: https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/.github/workflows/build_wheels.yml#L1
name: Release SGLang Router to PyPI
on:
workflow_dispatch:
jobs:
build:
name: Build on ${{ matrix.os }} (${{ matrix.target }})
runs-on: ${{ matrix.os }}-latest
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu
target: x86_64
steps:
- uses: actions/checkout@v4
with:
path: sglang-repo
- name: Move rust folder to root and delete sglang-repo
run: |
mv sglang-repo/rust/* .
rm -rf sglang-repo
ls -alt
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install build dependencies
run: |
python -m pip install -U pip
python -m pip install build twine auditwheel
- name: Build package
uses: pypa/cibuildwheel@v2.21.3
env:
CIBW_BUILD: "cp38-manylinux_x86_64 cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64"
CIBW_BEFORE_ALL: |
yum update && yum install -y openssl-devel && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
CIBW_ENVIRONMENT: "PATH=$HOME/.cargo/bin:$PATH"
- name: List built packages
run: ls -lh wheelhouse/
- name: Check packages
run: twine check --strict wheelhouse/*
- uses: actions/upload-artifact@v4
with:
name: packages-${{ matrix.os }}-${{ matrix.target }}
path: wheelhouse/
build-sdist:
name: Build SDist
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
path: sglang-repo
- name: Move rust folder to root and delete sglang-repo
run: |
mv sglang-repo/rust/* .
rm -rf sglang-repo
ls -alt
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Build SDist
run: |
pip install build
python -m build --sdist
- uses: actions/upload-artifact@v4
with:
name: sdist
path: dist/*.tar.gz
upload:
name: Upload to PyPI
needs: [build, build-sdist]
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v4
with:
path: dist
merge-multiple: true
- name: Upload to PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: |
pip install twine
twine upload dist/* --verbose
...@@ -2091,7 +2091,7 @@ dependencies = [ ...@@ -2091,7 +2091,7 @@ dependencies = [
] ]
[[package]] [[package]]
name = "sglang_router" name = "sglang_router_rs"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"actix-web", "actix-web",
......
[package] [package]
name = "sglang_router" name = "sglang_router_rs"
version = "0.0.0" version = "0.0.0"
edition = "2021" edition = "2021"
[[bin]] [[bin]]
name = "sglang_router" name = "sglang_router_rs"
path = "src/main.rs" path = "src/main.rs"
[lib] [lib]
name = "sglang_router" name = "sglang_router_rs"
# Pure Rust library: Just omit crate-type (defaults to rlib) # Pure Rust library: Just omit crate-type (defaults to rlib)
# Python/C binding + Rust library: Use ["cdylib", "rlib"] # Python/C binding + Rust library: Use ["cdylib", "rlib"]
crate-type = ["cdylib", "rlib"] crate-type = ["cdylib", "rlib"]
......
# Must include:
include Cargo.toml # Rust project configuration
recursive-include src *.rs # Rust source files
# SGLang Router (Experimental)
SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances.
## Prerequisites
- Rust and Cargo installed
```bash
# Install rustup (Rust installer and version manager)
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
# Follow the installation prompts, then reload your shell
source $HOME/.cargo/env
# Verify installation
rustc --version
cargo --version
```
- Python with pip installed
## Build Process
### 1. Build Rust Project
```bash
cargo build
```
### 2. Build Python Binding
#### Option A: Build and Install Wheel
1. Build the wheel package:
```bash
pip install setuptools-rust wheel build
python -m build
```
2. Install the generated wheel:
```bash
pip install <path-to-wheel>
```
#### Option B: Development Mode
For development purposes, you can install the package in editable mode:
```bash
pip install -e .
```
**Note:** When modifying Rust code, you must rebuild the wheel for changes to take effect.
## CI/CD Setup
The continuous integration pipeline consists of three main steps:
### 1. Build Wheels
- Uses `cibuildwheel` to create manylinux x86_64 packages
- Compatible with major Linux distributions (Ubuntu, CentOS, etc.)
- Additional configurations can be added to support other OS/architectures
- Reference: [cibuildwheel documentation](https://cibuildwheel.pypa.io/en/stable/)
### 2. Build Source Distribution
- Creates a source distribution containing the raw, unbuilt code
- Enables `pip` to build the package from source when prebuilt wheels are unavailable
### 3. Publish to PyPI
- Uploads both wheels and source distribution to PyPI
The CI configuration is based on the [tiktoken workflow](https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/.github/workflows/build_wheels.yml#L1).
# a lightweihgt wrapper on router with argument type and comments
# no wrapper on policy type => direct export
from sglang_router_rs import PolicyType
from .router import Router
from typing import List, Optional
from sglang_router_rs import PolicyType
from sglang_router_rs import Router as _Router
class Router:
"""
A high-performance router for distributing requests across worker nodes.
Args:
worker_urls: List of URLs for worker nodes that will handle requests
policy: Load balancing policy to use. Options:
- PolicyType.Random: Randomly select workers
- PolicyType.RoundRobin: Distribute requests in round-robin fashion
- PolicyType.ApproxTree: Tree-based routing using tokenizer similarity
host: Host address to bind the router server
port: Port number to bind the router server
tokenizer_path: Path to tokenizer model file (required for ApproxTree policy)
cache_threshold: Caching threshold value between 0-1
"""
def __init__(
self,
worker_urls: List[str],
policy: PolicyType = PolicyType.RoundRobin,
host: str = "127.0.0.1",
port: int = 3001,
tokenizer_path: Optional[str] = None,
cache_threshold: float = 0.50,
):
self._router = _Router(
worker_urls=worker_urls,
policy=policy,
host=host,
port=port,
tokenizer_path=tokenizer_path,
cache_threshold=cache_threshold,
)
def start(self) -> None:
"""Start the router server.
This method blocks until the server is shut down.
"""
self._router.start()
[build-system] [build-system]
requires = ["maturin>=1.5.1,<2.0"] requires = ["setuptools>=45", "wheel", "setuptools-rust>=1.5.2"]
build-backend = "maturin" build-backend = "setuptools.build_meta"
[project] [project]
name = "sglang_router" name = "sglang-router"
requires-python = ">=3.9" version = "0.0.2"
description = "SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances."
authors = [{name = "Byron Hsu", email = "byronhsu1230@gmail.com"}]
requires-python = ">=3.8"
readme = "README.md"
classifiers = [ classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Rust",
"Programming Language :: Python :: 3",
] ]
dynamic = ["version"]
[tool.maturin] # https://github.com/PyO3/setuptools-rust?tab=readme-ov-file
bindings = 'pyo3' [tool.setuptools.packages]
find = { where = ["py_src"] }
[[tool.setuptools-rust.ext-modules]]
target = "sglang_router_rs"
path = "Cargo.toml"
binding = "PyO3"
# SGLang Router (Experimental)
SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances.
## Architecture
1. `src/`: rust impl of the router
2. `py_src/`: lightweight python interafce on top of rust python binding. This will be published as `sglang-router` pypi package
## Installation
WIP. Ideally just
```bash
pip install sglang-router
```
## Development
### Rust
1. Install Rust
```bash
# Install rustup (Rust installer and version manager)
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
# Follow the installation prompts, then reload your shell
source $HOME/.cargo/env
# Verify installation
rustc --version
cargo --version
```
2. Build the router
```bash
# Navigate to the rust directory
cd ./rust
# Build the project
cargo build
# Verify the binary works correctly
./target/debug/router --help
```
The help command will show available options:
```
Usage: router [OPTIONS]
Options:
--host <HOST> [default: 127.0.0.1]
--port <PORT> [default: 3001]
--worker-urls <WORKER_URLS>
--policy <POLICY> [default: round_robin] [possible values: round_robin, random]
-h, --help Print help
-V, --version Print version
```
### Python Binding
1. Create a virtual environment
```bash
$ python -m venv .venv
$ source .venv/bin/activate
```
2. Install python dependencies
```bash
$ pip install maturin
$ pip install patchelf
```
3. Install rust python binding
```bash
$ maturin develop
🔗 Found pyo3 bindings
🐍 Found CPython 3.10 at /home/jobuser/resources/sglang/rust/.venv/bin/python
📡 Using build options bindings from pyproject.toml
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.11s
📦 Built wheel for CPython 3.10 to /tmp/.tmpJb65sc/sglang_router-0.0.0-cp310-cp310-linux_x86_64.whl
✏️ Setting installed package as editable
🛠 Installed sglang_router-0.0.0
```
4. Alternatively, if you don't want to create a venv, you can also build the binding as a wheel and install it
```bash
$ maturin build --interpreter python
...
Compiling pyo3 v0.22.6
Compiling pyo3-macros v0.22.6
Compiling sglang_router v0.0.0 (/home/jobuser/sglang/rust)
Finished `dev` profile [unoptimized + debuginfo] target(s) in 9.67s
🖨 Copied external shared libraries to package sglang_router.libs directory:
/usr/lib/libssl.so.1.1.1k
/usr/lib/libcrypto.so.1.1.1k
📦 Built wheel for CPython 3.10 to <wheel path>
$ pip install <wheel path>
```
## Usage
1. Launch worker instances
```bash
# Launch first worker on GPU 0
export CUDA_VISIBLE_DEVICES=0
python -m sglang.launch_server \
--model-path meta-llama/Meta-Llama-3.1-8B-Instruct \
--host 127.0.0.1 \
--port 30000
# Launch second worker on GPU 1
export CUDA_VISIBLE_DEVICES=1
python -m sglang.launch_server \
--model-path meta-llama/Meta-Llama-3.1-8B-Instruct \
--host 127.0.0.1 \
--port 30002
```
2. Launch router and connect to workers
```bash
./target/debug/router --worker-urls http://127.0.0.1:30000,http://127.0.0.1:30002
```
...@@ -90,7 +90,7 @@ impl Router { ...@@ -90,7 +90,7 @@ impl Router {
} }
#[pymodule] #[pymodule]
fn sglang_router(m: &Bound<'_, PyModule>) -> PyResult<()> { fn sglang_router_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PolicyType>()?; m.add_class::<PolicyType>()?;
m.add_class::<Router>()?; m.add_class::<Router>()?;
Ok(()) Ok(())
......
import sglang_router as router
# Create a Router instance with:
# - host: the address to bind to (e.g., "127.0.0.1")
# - port: the port number (e.g., 3001)
# - worker_urls: list of worker URLs to distribute requests to
router = router.Router(
host="127.0.0.1",
port=3001,
worker_urls=[
"http://localhost:30000",
"http://localhost:30002",
],
policy="random",
)
# Start the router - this will block and run the server
router.start()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment