Commit 118f1fc7 authored by maxiao1's avatar maxiao1
Browse files

sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct

parents
name: Release PyPI
on:
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:
jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
environment: "prod"
steps:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Checkout repository
uses: actions/checkout@v4
- name: Upload to pypi
run: |
cd python
cp ../README.md ../LICENSE .
pip install build
python3 -m build
pip install twine
python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
name: Release SGLang Kernel Wheel (cu118)
on:
workflow_dispatch:
inputs:
tag_name:
type: string
push:
branches:
- main
paths:
- sgl-kernel/python/sgl_kernel/version.py
jobs:
build-wheels:
if: github.repository == 'sgl-project/sglang'
runs-on: sgl-kernel-release-node
strategy:
matrix:
python-version: ["3.9"]
cuda-version: ["11.8"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/*
release:
needs: build-wheels
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.WHL_TOKEN }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -A
git commit -m "update whl index"
git push
name: Release SGLang Kernels
on:
push:
branches:
- main
paths:
- sgl-kernel/python/sgl_kernel/version.py
workflow_dispatch:
inputs:
tag_name:
type: string
required: false
concurrency:
group: release-sglang-kernels-${{ github.ref }}
cancel-in-progress: true
jobs:
build-cu129:
if: github.repository == 'sgl-project/sglang'
runs-on: sgl-kernel-release-node
strategy:
matrix:
python-version: ["3.10"]
cuda-version: ["12.9"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload to PyPI
working-directory: sgl-kernel
run: |
pip install twine
python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
build-cu124:
if: github.repository == 'sgl-project/sglang'
needs: build-cu129
runs-on: sgl-kernel-release-node
strategy:
matrix:
python-version: ["3.10"]
cuda-version: ["12.4"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/*
release-cu124:
needs: build-cu124
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.WHL_TOKEN }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --cuda 124
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -A
git commit -m "update whl index"
git push
build-cu128:
if: github.repository == 'sgl-project/sglang'
needs: build-cu129
runs-on: sgl-kernel-release-node
strategy:
matrix:
python-version: ["3.10"]
cuda-version: ["12.8"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/*
release-cu128:
needs: build-cu128
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.WHL_TOKEN }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --cuda 128
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -A
git commit -m "update whl index"
git push
build-cu129-aarch64:
if: github.repository == 'sgl-project/sglang'
runs-on: sgl-kernel-release-node-arm
strategy:
matrix:
python-version: ["3.10"]
cuda-version: ["12.9"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" aarch64
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64
path: sgl-kernel/dist/*
release-cu129-aarch64:
needs: build-cu129-aarch64
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.WHL_TOKEN }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --cuda 129
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -A
git commit -m "update whl index"
git push
name: VLLM Dependency Test
on:
push:
branches: [ main ]
paths:
- "python/**"
- "scripts/ci/**"
- "test/**"
pull_request:
branches: [ main ]
paths:
- "python/**"
- "scripts/ci/**"
- "test/**"
concurrency:
group: vllm-dependency-test-${{ github.ref }}
cancel-in-progress: true
jobs:
vllm-dependency-test:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/ci_install_dependency.sh
pip install "bitsandbytes>=0.44.0"
pip install "sgl-kernel==0.3.7"
- name: Run vLLM dependency tests
timeout-minutes: 60
run: |
export SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK=1
cd test/srt
python3 run_suite.py --suite vllm_dependency_test --timeout-per-file 3600
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
# Tokenizer cache for tests
.tokenizer_cache/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
# MacOS
.DS_Store
# Vim
*.swp
# Documentation
docs/_build
# SGL
benchmark/mmlu/data
benchmark/mmlu/data.tar
benchmark/llava_bench/images
benchmark/llava_bench/mme_pack
*.jsonl
tmp*.txt
# Plots
*.png
*.pdf
# personnal
work_dirs/
*.csv
!logo.png
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
compile_commands.json
*.iml
# VSCode
.vscode
1
# Autoenv
.env.leave
# Rust lib
Cargo.lock
lmms-eval
[settings]
profile=black
known_first_party=sglang
default_stages: [pre-commit, pre-push, manual]
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-symlinks
- id: destroyed-symlinks
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
args: [--allow-multiple-documents]
- id: check-toml
- id: check-ast
- id: check-added-large-files
- id: check-merge-conflict
- id: check-shebang-scripts-are-executable
- id: detect-private-key
- id: debug-statements
- id: no-commit-to-branch
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.7
hooks:
- id: ruff
args: [--select=F401, --fixable=F401]
files: ^(benchmark/|docs/|examples/)
exclude: \.ipynb$
- repo: https://github.com/psf/black
rev: 24.10.0
hooks:
- id: black-jupyter
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
hooks:
- id: codespell
additional_dependencies: ['tomli']
args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi,makro,wil,rouge']
exclude: |
(?x)^(
test/srt/test_reasoning_parser\.py|
docs/advanced_features/vlm_query\.ipynb
)$
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v18.1.8
hooks:
- id: clang-format
types_or: [c++, cuda]
args: [--style=file, --verbose]
- repo: https://github.com/kynan/nbstripout
rev: 0.8.1
hooks:
- id: nbstripout
args:
- '--keep-output'
- '--extra-keys=metadata.kernelspec metadata.language_info.version'
This diff is collapsed.
#!/bin/bash
# Start profiling via API
curl http://localhost:30000/start_profile -H "Content-Type: application/json"
# Benchmark serving using sglang with random dataset and tokenizer
# Define the log file with a timestamp
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOGFILE="sglang_client_log_$TIMESTAMP.json"
# Run the benchmark with specified parameters and save logs
python3 -m sglang.bench_serving \
--backend sglang \
--tokenizer Xenova/grok-1-tokenizer \
--dataset-name random \
--random-input 1024\
--random-output 1024 \
--num-prompts 240 \
--request-rate 8 \
--output-file online.jsonl 2>&1 | tee "$LOGFILE"
# Stop profiling via API
curl http://localhost:30000/stop_profile -H "Content-Type: application/json"
# Convert tracing file to csv & json
sqlite3 trace.rpd ".mode csv" ".header on" ".output trace.csv" "select * from top;" ".output stdout"
python3 /sgl-workspace/rocmProfileData/tools/rpd2tracing.py trace.rpd trace.json
# download and install RPD
apt update && apt install -y sqlite3 libsqlite3-dev libfmt-dev
# install rpd module
git clone https://github.com/ROCmSoftwarePlatform/rocmProfileData
cd rocmProfileData
git apply rpd.patch
make && make install
cd rocpd_python && python setup.py install && cd ..
cd rpd_tracer && make clean;make install && python setup.py install && cd ..
#!/bin/bash
################################################################################
# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
OUTPUT_FILE="trace.rpd"
if [ "$1" = "-o" ] ; then
OUTPUT_FILE=$2
shift
shift
fi
if [ -e ${OUTPUT_FILE} ] ; then
rm ${OUTPUT_FILE}
fi
python3 -m rocpd.schema --create ${OUTPUT_FILE}
if [ $? != 0 ] ; then
echo "Error: Could not create rpd file. Please run 'python setup.py install' from the rocpd_python dir"
exit
fi
export RPDT_FILENAME=${OUTPUT_FILE}
export RPDT_AUTOSTART=0
LD_PRELOAD=librocm-smi_64:librpd_tracer.so "$@"
diff --git a/rpd_tracer/Makefile b/rpd_tracer/Makefile
index e9d9feb..b2e9e1a 100644
--- a/rpd_tracer/Makefile
+++ b/rpd_tracer/Makefile
@@ -16,7 +16,7 @@ ifneq (,$(HIP_PATH))
$(info Building with roctracer)
RPD_LIBS += -L/opt/rocm/lib -lroctracer64 -lroctx64 -lamdhip64 -lrocm_smi64
RPD_INCLUDES += -I/opt/rocm/include -I/opt/rocm/include/roctracer -I/opt/rocm/include/hsa
- RPD_SRCS += RoctracerDataSource.cpp RocmSmiDataSource.cpp
+ RPD_SRCS += RoctracerDataSource.cpp
RPD_INCLUDES += -D__HIP_PLATFORM_AMD__
endif
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
index 62d1ff9..9021c01 100644
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -71,6 +71,8 @@ from sglang.srt.utils import (
suppress_other_loggers,
)
from sglang.utils import get_exception_traceback
+from rpdTracerControl import rpdTracerControl
+rpdTracerControl.skipCreate()
logger = logging.getLogger(__name__)
@@ -245,6 +247,7 @@ class Scheduler:
],
with_stack=True,
)
+ self.rpd = rpdTracerControl()
@torch.inference_mode()
def event_loop(self):
@@ -1027,15 +1030,24 @@ class Scheduler:
def start_profile(self) -> None:
if self.profiler is None:
raise RuntimeError("Profiler is not enabled.")
- self.profiler.start()
+ #self.profiler.start() #block pytorch profiler for rpd profiler enabling
+ if self.tp_rank == 0 or self.tp_rank == 1:
+ self.rpd.start()
+ self.rpd.rangePush("", "rpd profile range", "")
+ logger.info("rpd is enabled")
def stop_profile(self) -> None:
if self.profiler is None:
raise RuntimeError("Profiler is not enabled.")
- self.profiler.stop()
- self.profiler.export_chrome_trace(
- self.torch_profiler_trace_dir + "/" + str(time.time()) + ".trace.json.gz"
- )
+ #self.profiler.stop()
+ #self.profiler.export_chrome_trace(
+ # self.torch_profiler_trace_dir + "/" + str(time.time()) + ".trace.json.gz"
+ #)
+ if self.tp_rank ==0 or self.tp_rank ==1:
+ self.rpd.rangePop()
+ self.rpd.stop()
+ self.rpd.flush()
+ logger.info("rpd is done")
logger.info("Profiler is done")
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
index 62d1ff9..2edb427 100644
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -71,6 +71,8 @@ from sglang.srt.utils import (
suppress_other_loggers,
)
from sglang.utils import get_exception_traceback
+from rpdTracerControl import rpdTracerControl
+rpdTracerControl.skipCreate()
logger = logging.getLogger(__name__)
@@ -245,6 +247,7 @@ class Scheduler:
],
with_stack=True,
)
+ self.rpd = rpdTracerControl()
@torch.inference_mode()
def event_loop(self):
@@ -1027,15 +1030,26 @@ class Scheduler:
def start_profile(self) -> None:
if self.profiler is None:
raise RuntimeError("Profiler is not enabled.")
- self.profiler.start()
+ #self.profiler.start()
+ logger.info("torch profiler is disabled")
+ if self.tp_rank == 0 or self.tp_rank == 1:
+ self.rpd.setPythonTrace(True)
+ self.rpd.start()
+ self.rpd.rangePush("", "scheduler", "")
+ logger.info("rpd is enabled inside scheduler profiling")
def stop_profile(self) -> None:
if self.profiler is None:
raise RuntimeError("Profiler is not enabled.")
- self.profiler.stop()
- self.profiler.export_chrome_trace(
- self.torch_profiler_trace_dir + "/" + str(time.time()) + ".trace.json.gz"
- )
+ #self.profiler.stop()
+ #self.profiler.export_chrome_trace(
+ # self.torch_profiler_trace_dir + "/" + str(time.time()) + ".trace.json.gz"
+ #)
+ if self.tp_rank ==0 or self.tp_rank ==1:
+ self.rpd.rangePop()
+ self.rpd.stop()
+ self.rpd.flush()
+ logger.info("rpd is done inside scheduler")
logger.info("Profiler is done")
diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
index 2621ccd..181df85 100644
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -58,6 +58,10 @@ from sglang.srt.sampling.sampling_params import SamplingParams
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import is_generation_model, is_multimodal_model
+from rpdTracerControl import rpdTracerControl
+rpdTracerControl.skipCreate()
+
+
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
logger = logging.getLogger(__name__)
@@ -514,10 +518,20 @@ class TokenizerManager:
self.send_to_scheduler.send_pyobj(req)
def start_profile(self):
+ rpd = rpdTracerControl()
+ rpd.setPythonTrace(True)
+ rpd.start()
+ rpd.rangePush("", "tokenizer_manager", "")
+ logger.info("tokenizer_manager rpd profiling started!")
req = ProfileReq.START_PROFILE
self.send_to_scheduler.send_pyobj(req)
def stop_profile(self):
+ rpd = rpdTracerControl()
+ rpd.rangePop()
+ rpd.stop()
+ rpd.flush()
+ logger.info("rpd profiling is done inside tokenizer_manager!")
req = ProfileReq.STOP_PROFILE
self.send_to_scheduler.send_pyobj(req)
diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py
index 7111c93..2bd722c 100644
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -30,6 +30,8 @@ import threading
import time
from http import HTTPStatus
from typing import Dict, List, Optional, Union
+from rpdTracerControl import rpdTracerControl
+rpdTracerControl.skipCreate()
# Fix a bug of Python threading
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
@@ -152,6 +154,11 @@ async def flush_cache():
@app.post("/start_profile")
async def start_profile():
"""Start profiling."""
+ rpd = rpdTracerControl()
+ rpd.setPythonTrace(True)
+ rpd.start()
+ rpd.rangePush("", "server rpd profile range", "")
+ logger.info("rpd profiling started in server.py!")
tokenizer_manager.start_profile()
return Response(
content="Start profiling.\n",
@@ -164,6 +171,11 @@ async def start_profile():
async def stop_profile():
"""Stop profiling."""
tokenizer_manager.stop_profile()
+ rpd = rpdTracerControl()
+ rpd.rangePop()
+ rpd.stop()
+ rpd.flush()
+ logger.info("rpd profiling is done in server.py!")
return Response(
content="Stop profiling. This will take some time.\n",
status_code=200,
#!/bin/bash
# export SGLANG_TORCH_PROFILER_DIR=/data/sglang/
export SGLANG_TORCH_PROFILER_DIR=/sgl-workspace/sglang/profile/
# Get the current timestamp
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
# Define the log file with a timestamp
LOGFILE="sglang_server_log_$TIMESTAMP.json"
# Run the Python command and save the output to the log file
loadTracer.sh python3 -m sglang.launch_server \
--model-path /sgl-workspace/sglang/dummy_grok1 \
--tokenizer-path Xenova/grok-1-tokenizer \
--load-format dummy \
--quantization fp8 \
--tp 8 \
--port 30000 \
--disable-radix-cache 2>&1 | tee "$LOGFILE"
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
index 62d1ff9..6ecd78c 100644
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -240,7 +240,6 @@ class Scheduler:
)
self.profiler = torch.profiler.profile(
activities=[
- torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA,
],
with_stack=True,
@@ -1033,9 +1032,11 @@ class Scheduler:
if self.profiler is None:
raise RuntimeError("Profiler is not enabled.")
self.profiler.stop()
- self.profiler.export_chrome_trace(
- self.torch_profiler_trace_dir + "/" + str(time.time()) + ".trace.json.gz"
- )
+ if self.tp_rank == 0:
+ with open(f"stats_repro_{int(time.time())}.txt", "w") as f:
+ print(self.profiler.key_averages(group_by_input_shape=True).table(sort_by="cuda_time_total", row_limit=-1), file=f)
+ print("Profiling stats done.")
+
logger.info("Profiler is done")
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
.PHONY: check-deps install-deps format update help
# Show help for each target
help:
@echo "Available targets:"
@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
check-deps: ## Check and install required Python formatting dependencies
@command -v isort >/dev/null 2>&1 || (echo "Installing isort..." && pip install isort)
@command -v black >/dev/null 2>&1 || (echo "Installing black..." && pip install black)
install-deps: ## Install Python formatting tools (isort and black)
pip install isort black
format: check-deps ## Format modified Python files using isort and black
@echo "Formatting modified Python files..."
git diff --name-only --diff-filter=M | grep '\.py$$' | xargs -I {} sh -c 'isort {} && black {}'
FILES_TO_UPDATE = docker/Dockerfile.rocm \
python/pyproject.toml \
python/sglang/version.py \
docs/developer_guide/setup_github_runner.md \
docs/get_started/install.md \
docs/platforms/amd_gpu.md \
docs/platforms/ascend_npu.md \
benchmark/deepseek_v3/README.md
update: ## Update version numbers across project files. Usage: make update <new_version>
@if [ -z "$(filter-out $@,$(MAKECMDGOALS))" ]; then \
echo "Version required. Usage: make update <new_version>"; \
exit 1; \
fi
@OLD_VERSION=$$(grep "version" python/sglang/version.py | cut -d '"' -f2); \
NEW_VERSION=$(filter-out $@,$(MAKECMDGOALS)); \
echo "Updating version from $$OLD_VERSION to $$NEW_VERSION"; \
for file in $(FILES_TO_UPDATE); do \
if [ "$(shell uname)" = "Darwin" ]; then \
sed -i '' -e "s/$$OLD_VERSION/$$NEW_VERSION/g" $$file; \
else \
sed -i -e "s/$$OLD_VERSION/$$NEW_VERSION/g" $$file; \
fi \
done; \
echo "Version update complete"
%:
@:
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment