Dockerfile 1.44 KB
Newer Older
dongjw's avatar
dongjw committed
1
FROM pytorch/pytorch:2.5.1-cuda12.1-cudnn9-devel as compile_server
2
3


liam's avatar
liam committed
4
ARG CPU_INSTRUCT=NATIVE
dongjw's avatar
dongjw committed
5
6

# 设置工作目录和 CUDA 路径
7
WORKDIR /workspace
dongjw's avatar
dongjw committed
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
ENV CUDA_HOME=/usr/local/cuda



# 安装依赖
RUN apt update -y
RUN apt install -y --no-install-recommends \
    libtbb-dev \
    libssl-dev \
    libcurl4-openssl-dev \
    libaio1 \
    libaio-dev \
    libfmt-dev \
    libgflags-dev \
    zlib1g-dev \
    patchelf \
24
25
26
27
28
    git \
    wget \
    vim \
    gcc \
    g++ \
dongjw's avatar
dongjw committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
    cmake
# 拷贝代码
RUN git clone https://github.com/kvcache-ai/ktransformers.git 
# 清理 apt 缓存
RUN rm -rf /var/lib/apt/lists/*

# 进入项目目录
WORKDIR /workspace/ktransformers
# 初始化子模块
RUN git submodule update --init --recursive

# 升级 pip
RUN pip install --upgrade pip

# 安装构建依赖
RUN pip install ninja pyproject numpy cpufeature aiohttp zmq openai

# 安装 flash-attn(提前装可以避免后续某些编译依赖出错)
RUN pip install flash-attn

# 安装 ktransformers 本体(含编译)
RUN CPU_INSTRUCT=${CPU_INSTRUCT} \
    USE_BALANCE_SERVE=1 \
    KTRANSFORMERS_FORCE_BUILD=TRUE \
    TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" \
    pip install . --no-build-isolation --verbose

RUN pip install third_party/custom_flashinfer/
# 清理 pip 缓存
RUN pip cache purge

# 拷贝 C++ 运行时库
RUN cp /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/conda/lib/

# 保持容器运行(调试用)
liam Yuan's avatar
liam Yuan committed
64
ENTRYPOINT ["tail", "-f", "/dev/null"]