Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
566d61d9
Unverified
Commit
566d61d9
authored
Feb 02, 2025
by
HAI
Committed by
GitHub
Feb 03, 2025
Browse files
ROCm: bump 6.3.0 (#3259)
parent
55f5fc68
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
28 additions
and
22 deletions
+28
-22
.github/workflows/release-docker-amd.yml
.github/workflows/release-docker-amd.yml
+3
-3
docker/Dockerfile.rocm
docker/Dockerfile.rocm
+2
-2
docs/developer/setup_github_runner.md
docs/developer/setup_github_runner.md
+2
-2
docs/start/install.md
docs/start/install.md
+3
-3
python/pyproject.toml
python/pyproject.toml
+8
-10
python/sglang/srt/constrained/outlines_backend.py
python/sglang/srt/constrained/outlines_backend.py
+9
-1
python/sglang/srt/custom_op.py
python/sglang/srt/custom_op.py
+1
-1
No files found.
.github/workflows/release-docker-amd.yml
View file @
566d61d9
...
...
@@ -14,7 +14,7 @@ jobs:
environment
:
'
prod'
strategy
:
matrix
:
rocm_version
:
[
'
6.
2
.0'
]
rocm_version
:
[
'
6.
3
.0'
]
build_type
:
[
'
all'
,
'
srt'
]
steps
:
-
name
:
Checkout repository
...
...
@@ -41,8 +41,8 @@ jobs:
run
:
|
version=$(cat python/sglang/version.py | cut -d'"' -f2)
if [ "${{ matrix.rocm_version }}" = "6.
2
.0" ]; then
rocm_tag="rocm6
2
0"
if [ "${{ matrix.rocm_version }}" = "6.
3
.0" ]; then
rocm_tag="rocm6
3
0"
else
echo "Unsupported ROCm version"
exit 1
...
...
docker/Dockerfile.rocm
View file @
566d61d9
# Usage (to build SGLang ROCm docker image):
# docker build --build-arg SGL_BRANCH=v0.4.2.post1 -t v0.4.2.post1-rocm6
2
0 -f Dockerfile.rocm .
# docker build --build-arg SGL_BRANCH=v0.4.2.post1 -t v0.4.2.post1-rocm6
3
0 -f Dockerfile.rocm .
# default base image
ARG BASE_IMAGE="rocm
shared
/vllm-
rocm
:20250114
-tuned-elementwise-layernorm
"
ARG BASE_IMAGE="rocm/vllm-
dev
:20250114"
FROM $BASE_IMAGE AS base
USER root
...
...
docs/developer/setup_github_runner.md
View file @
566d61d9
...
...
@@ -11,9 +11,9 @@ docker pull nvidia/cuda:12.1.1-devel-ubuntu22.04
# Nvidia
docker run --shm-size 128g -it -v /tmp/huggingface:/hf_home --gpus all nvidia/cuda:12.1.1-devel-ubuntu22.04 /bin/bash
# AMD
docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 128g -it -v /tmp/huggingface:/hf_home lmsysorg/sglang:v0.4.2.post1-rocm6
2
0 /bin/bash
docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 128g -it -v /tmp/huggingface:/hf_home lmsysorg/sglang:v0.4.2.post1-rocm6
3
0 /bin/bash
# AMD just the last 2 GPUs
docker run --rm --device=/dev/kfd --device=/dev/dri/renderD176 --device=/dev/dri/renderD184 --group-add video --shm-size 128g -it -v /tmp/huggingface:/hf_home lmsysorg/sglang:v0.4.2.post1-rocm6
2
0 /bin/bash
docker run --rm --device=/dev/kfd --device=/dev/dri/renderD176 --device=/dev/dri/renderD184 --group-add video --shm-size 128g -it -v /tmp/huggingface:/hf_home lmsysorg/sglang:v0.4.2.post1-rocm6
3
0 /bin/bash
```
### Step 2: Configure the runner by `config.sh`
...
...
docs/start/install.md
View file @
566d61d9
...
...
@@ -54,7 +54,7 @@ docker run --gpus all \
Note: To AMD ROCm system with Instinct/MI GPUs, it is recommended to use
`docker/Dockerfile.rocm`
to build images, example and usage as below:
```
bash
docker build
--build-arg
SGL_BRANCH
=
v0.4.2.post1
-t
v0.4.2.post1-rocm6
2
0
-f
Dockerfile.rocm
.
docker build
--build-arg
SGL_BRANCH
=
v0.4.2.post1
-t
v0.4.2.post1-rocm6
3
0
-f
Dockerfile.rocm
.
alias
drun
=
'docker run -it --rm --network=host --device=/dev/kfd --device=/dev/dri --ipc=host \
--shm-size 16G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
...
...
@@ -63,11 +63,11 @@ alias drun='docker run -it --rm --network=host --device=/dev/kfd --device=/dev/d
drun
-p
30000:30000
\
-v
~/.cache/huggingface:/root/.cache/huggingface
\
--env
"HF_TOKEN=<secret>"
\
v0.4.2.post1-rocm6
2
0
\
v0.4.2.post1-rocm6
3
0
\
python3
-m
sglang.launch_server
--model-path
meta-llama/Llama-3.1-8B-Instruct
--host
0.0.0.0
--port
30000
# Till flashinfer backend available, --attention-backend triton --sampling-backend pytorch are set by default
drun v0.4.2.post1-rocm6
2
0 python3
-m
sglang.bench_one_batch
--batch-size
32
--input
1024
--output
128
--model
amd/Meta-Llama-3.1-8B-Instruct-FP8-KV
--tp
8
--quantization
fp8
drun v0.4.2.post1-rocm6
3
0 python3
-m
sglang.bench_one_batch
--batch-size
32
--input
1024
--output
128
--model
amd/Meta-Llama-3.1-8B-Instruct-FP8-KV
--tp
8
--quantization
fp8
```
## Method 4: Using docker compose
...
...
python/pyproject.toml
View file @
566d61d9
...
...
@@ -19,31 +19,29 @@ dependencies = ["requests", "tqdm", "numpy", "IPython", "setproctitle"]
runtime_common
=
[
"aiohttp"
,
"decord"
,
"fastapi"
,
"hf_transfer"
,
"huggingface_hub"
,
"interegular"
,
"modelscope"
,
"orjson"
,
"outlines>=0.0.44,<0.1.0"
,
"packaging"
,
"pillow"
,
"prometheus-client>=0.20.0"
,
"psutil"
,
"pydantic"
,
"python-multipart"
,
"pyzmq>=25.1.2"
,
"torchao>=0.7.0"
,
"uvicorn"
,
"uvloop"
,
"xgrammar>=0.1.10"
"orjson"
,
"packaging"
,
"pillow"
,
"prometheus-client>=0.20.0"
,
"psutil"
,
"pydantic"
,
"python-multipart"
,
"pyzmq>=25.1.2"
,
"torchao>=0.7.0"
,
"uvicorn"
,
"uvloop"
,
"xgrammar>=0.1.10"
]
srt
=
[
"sglang[runtime_common]"
,
"cuda-python"
,
"sgl-kernel>=0.0.3.post1"
,
"torch"
,
"vllm==0.6.4.post1"
,
"flashinfer==0.1.6"
"flashinfer==0.1.6"
,
"outlines>=0.0.44,<0.1.0"
]
# HIP (Heterogeneous-computing Interface for Portability) for AMD
# => base docker rocm/vllm-dev:20241022, not from public vllm whl
srt_hip
=
["sglang[runtime_common]
", "
torch
", "
vllm==
0.6.
3
.post
2.
dev
1
"]
srt_hip
=
["sglang[runtime_common]
", "
torch
", "
vllm==
0.6.
7
.dev
2
", "
outlines==
0.1.1
1
"]
# xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
srt_xpu
=
["sglang[runtime_common]"]
srt_xpu
=
["sglang[runtime_common]
"
, "
outlines>=
0.0.44
,
<
0.1.0
"
]
#For Intel Gaudi(device : hpu) follow the installation guide
#https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu
=
["sglang[runtime_common]"]
srt_hpu
=
["sglang[runtime_common]
"
, "
outlines>=
0.0.44
,
<
0.1.0
"
]
# CPU: currently, there are no pre-built vllm wheels for CPU.
# To install vllm for CPU, please follow the instruction here:
# https://docs.vllm.ai/en/latest/getting_started/installation/cpu/index.html
srt_cpu
=
["sglang[runtime_common]
", "
torch
"]
srt_cpu
=
["sglang[runtime_common]
", "
torch
"
, "
outlines>=
0.0.44
,
<
0.1.0
"
]
openai
=
[
"openai>=1.0"
,
"tiktoken"
]
anthropic
=
["anthropic>=0.20.0"]
...
...
python/sglang/srt/constrained/outlines_backend.py
View file @
566d61d9
...
...
@@ -20,7 +20,6 @@ from typing import Dict, List, Optional, Tuple, Union
import
interegular
import
torch
from
outlines.fsm.guide
import
RegexGuide
from
outlines.fsm.json_schema
import
build_regex_from_schema
from
outlines.models.transformers
import
TransformerTokenizer
from
pydantic
import
BaseModel
...
...
@@ -29,6 +28,15 @@ from sglang.srt.constrained.base_grammar_backend import (
BaseGrammarObject
,
)
from
sglang.srt.constrained.outlines_jump_forward
import
OutlinesJumpForwardMap
from
sglang.srt.utils
import
is_hip
is_hip_
=
is_hip
()
if
is_hip_
:
from
outlines_core.fsm.json_schema
import
build_regex_from_schema
else
:
from
outlines.fsm.json_schema
import
build_regex_from_schema
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/custom_op.py
View file @
566d61d9
...
...
@@ -20,7 +20,7 @@ class CustomOp(nn.Module):
raise
NotImplementedError
def
forward_hip
(
self
,
*
args
,
**
kwargs
):
r
aise
NotImplementedError
r
eturn
self
.
forward_native
(
*
args
,
**
kwargs
)
def
forward_xpu
(
self
,
*
args
,
**
kwargs
):
return
self
.
forward_native
(
*
args
,
**
kwargs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment