Dockerfile 4.03 KB
Newer Older
1
2
3
4
ARG CUDA_VERSION=12.5.1

FROM nvcr.io/nvidia/tritonserver:24.04-py3-min

Yineng Zhang's avatar
Yineng Zhang committed
5
ARG BUILD_TYPE=all
Ying Sheng's avatar
Ying Sheng committed
6
7
8
9
ENV DEBIAN_FRONTEND=noninteractive

RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
Yineng Zhang's avatar
Yineng Zhang committed
10
11
12
    && apt update -y \
    && apt install software-properties-common -y \
    && add-apt-repository ppa:deadsnakes/ppa -y && apt update \
13
    && apt install python3.10 python3.10-dev -y \
14
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
Yineng Zhang's avatar
Yineng Zhang committed
15
    && update-alternatives --set python3 /usr/bin/python3.10 && apt install python3.10-distutils -y \
16
    && apt install curl git sudo libibverbs-dev -y \
17
    && apt install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 \
Yineng Zhang's avatar
Yineng Zhang committed
18
    && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py \
Ying Sheng's avatar
Ying Sheng committed
19
    && python3 --version \
Liangsheng Yin's avatar
Liangsheng Yin committed
20
21
    && python3 -m pip --version \
    && rm -rf /var/lib/apt/lists/* \
Yineng Zhang's avatar
Yineng Zhang committed
22
    && apt clean
Ying Sheng's avatar
Ying Sheng committed
23

24
25
26
# For openbmb/MiniCPM models
RUN pip3 install datamodel_code_generator

Ying Sheng's avatar
Ying Sheng committed
27
28
WORKDIR /sgl-workspace

29
ARG CUDA_VERSION
Yineng Zhang's avatar
Yineng Zhang committed
30
RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six \
31
    && git clone --depth=1 https://github.com/sgl-project/sglang.git \
Yineng Zhang's avatar
Yineng Zhang committed
32
33
34
35
    && if [ "$CUDA_VERSION" = "12.1.1" ]; then \
         python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu121; \
       elif [ "$CUDA_VERSION" = "12.4.1" ]; then \
         python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu124; \
36
37
       elif [ "$CUDA_VERSION" = "12.5.1" ]; then \
         python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu124; \
Yineng Zhang's avatar
Yineng Zhang committed
38
39
       elif [ "$CUDA_VERSION" = "11.8.0" ]; then \
         python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118; \
40
         python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118; \
Yineng Zhang's avatar
Yineng Zhang committed
41
42
43
       else \
         echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \
       fi \
44
    && cd sglang \
Yineng Zhang's avatar
Yineng Zhang committed
45
    && if [ "$BUILD_TYPE" = "srt" ]; then \
46
         if [ "$CUDA_VERSION" = "12.1.1" ]; then \
47
           python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu121/torch2.5/flashinfer-python; \
48
         elif [ "$CUDA_VERSION" = "12.4.1" ]; then \
49
           python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python; \
50
         elif [ "$CUDA_VERSION" = "12.5.1" ]; then \
51
           python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python; \
52
         elif [ "$CUDA_VERSION" = "11.8.0" ]; then \
53
           python3 -m pip --no-cache-dir install -e "python[srt]" --find-links https://flashinfer.ai/whl/cu118/torch2.5/flashinfer-python; \
54
           python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118; \
55
56
57
         else \
           echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \
         fi; \
Yineng Zhang's avatar
Yineng Zhang committed
58
       else \
59
         if [ "$CUDA_VERSION" = "12.1.1" ]; then \
60
           python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu121/torch2.5/flashinfer-python; \
61
         elif [ "$CUDA_VERSION" = "12.4.1" ]; then \
62
           python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python; \
63
         elif [ "$CUDA_VERSION" = "12.5.1" ]; then \
64
           python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python; \
65
         elif [ "$CUDA_VERSION" = "11.8.0" ]; then \
66
           python3 -m pip --no-cache-dir install -e "python[all]" --find-links https://flashinfer.ai/whl/cu118/torch2.5/flashinfer-python; \
67
           python3 -m pip install sgl-kernel -i https://docs.sglang.ai/whl/cu118; \
68
69
70
         else \
           echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \
         fi; \
Yineng Zhang's avatar
Yineng Zhang committed
71
72
       fi

Ying Sheng's avatar
Ying Sheng committed
73
ENV DEBIAN_FRONTEND=interactive