Dockerfile_cu124 1.99 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel AS base

WORKDIR /app

ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8

# use tsinghua source
RUN sed -i 's|http://archive.ubuntu.com/ubuntu/|https://mirrors.tuna.tsinghua.edu.cn/ubuntu/|g' /etc/apt/sources.list \
    && sed -i 's|http://security.ubuntu.com/ubuntu/|https://mirrors.tuna.tsinghua.edu.cn/ubuntu/|g' /etc/apt/sources.list

RUN apt-get update && apt-get install -y vim tmux zip unzip wget git build-essential libibverbs-dev ca-certificates \
LiangLiu's avatar
LiangLiu committed
14
    curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev \
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
    && apt-get clean && rm -rf /var/lib/apt/lists/*

RUN pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv ruff pre-commit -U

RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \
    && python use_existing_torch.py && pip install -r requirements/build.txt \
    && pip install --no-cache-dir --no-build-isolation -v -e .

RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \
    && make build && make clean

RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
    imageio-ffmpeg einops loguru qtorch ftfy easydict

LiangLiu's avatar
LiangLiu committed
31
32
RUN conda install conda-forge::ffmpeg=8.0.0 -y && ln -s /opt/conda/bin/ffmpeg /usr/bin/ffmpeg

33
34
35
36
37
38
RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive

RUN cd flash-attention && python setup.py install && rm -rf build

RUN cd flash-attention/hopper && python setup.py install && rm -rf build

Yang Yong(雍洋)'s avatar
Yang Yong(雍洋) committed
39
RUN git clone https://github.com/ModelTC/SageAttention.git
40

Yang Yong(雍洋)'s avatar
Yang Yong(雍洋) committed
41
RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
42

43
44
45
46
RUN git clone https://github.com/KONAKONA666/q8_kernels.git

RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build

47
WORKDIR /workspace