Unverified Commit d8e9d61f authored by HAI's avatar HAI Committed by GitHub
Browse files

[Build, ROCm] Dockerfile.rocm for Instinct GPUs, with package updates (#1861)

parent a2e0424a
# Usage (to build SGLang ROCm docker image):
# docker build --build-arg SGL_BRANCH=v0.3.4.post2 -t testImage -f Dockerfile.rocm .
# default base image
ARG BASE_IMAGE="rocm/vllm-dev:20241022"
FROM $BASE_IMAGE AS base
USER root
WORKDIR /sgl-workspace
ARG SGL_REPO="https://github.com/sgl-project/sglang"
ENV SGL_DEFAULT="main"
ARG SGL_BRANCH=${SGL_DEFAULT}
RUN git clone ${SGL_REPO} \
&& cd sglang \
&& if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
echo "Using ${SGL_DEFAULT}, default branch."; \
else \
echo "Using ${SGL_BRANCH} branch."; \
git checkout ${SGL_BRANCH}; \
fi \
&& if [ "$BUILD_TYPE" = "srt" ]; then \
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
else \
python -m pip --no-cache-dir install -e "python[all_hip]"; \
fi
RUN cp -r /sgl-workspace/sglang /sglang
RUN python -m pip cache purge
# Performance environment variable.
ENV HIP_FORCE_DEV_KERNARG=1
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
ENV NCCL_MIN_NCHANNELS=112
ENV MOE_PADDING=1
ENV VLLM_FP8_PADDING=1
ENV VLLM_FP8_ACT_PADDING=1
ENV VLLM_FP8_WEIGHT_PADDING=1
ENV VLLM_FP8_REDUCE_CONV=1
CMD ["/bin/bash"]
...@@ -20,9 +20,12 @@ runtime_common = ["aiohttp", "decord", "fastapi", "hf_transfer", "huggingface_hu ...@@ -20,9 +20,12 @@ runtime_common = ["aiohttp", "decord", "fastapi", "hf_transfer", "huggingface_hu
"orjson", "packaging", "pillow", "psutil", "pydantic", "python-multipart", "orjson", "packaging", "pillow", "psutil", "pydantic", "python-multipart",
"torchao", "uvicorn", "uvloop", "zmq", "torchao", "uvicorn", "uvloop", "zmq",
"outlines>=0.0.44", "modelscope"] "outlines>=0.0.44", "modelscope"]
srt = ["sglang[runtime_common]", "torch", "vllm==0.6.3.post1"]
# HIP (Heterogeneous-computing Interface for Portability) for AMD
# => base docker rocm/vllm-dev:20241022, not from public vllm whl
srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"]
# xpu is not enabled in public vllm and torch whl, # xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
srt = ["sglang[runtime_common]", "torch", "vllm==0.6.3.post1"]
srt_xpu = ["sglang[runtime_common]"] srt_xpu = ["sglang[runtime_common]"]
openai = ["openai>=1.0", "tiktoken"] openai = ["openai>=1.0", "tiktoken"]
...@@ -37,8 +40,10 @@ test = [ ...@@ -37,8 +40,10 @@ test = [
"peft", "peft",
] ]
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"] all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"] all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
dev = ["sglang[all]", "sglang[test]"] dev = ["sglang[all]", "sglang[test]"]
dev_hip = ["sglang[all_hip]", "sglang[test]"]
dev_xpu = ["sglang[all_xpu]", "sglang[test]"] dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
[project.urls] [project.urls]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment