Dockerfile 1.6 KB
Newer Older
dongjw's avatar
dongjw committed
1
FROM pytorch/pytorch:2.5.1-cuda12.1-cudnn9-devel as compile_server
2

dongjw's avatar
dongjw committed
3
4
5
6
# 设置代理
ENV http_proxy=http://127.0.0.1:20181
ENV https_proxy=http://127.0.0.1:20181
ENV all_proxy=http://127.0.0.1:20181
7

liam's avatar
liam committed
8
ARG CPU_INSTRUCT=NATIVE
dongjw's avatar
dongjw committed
9
10

# 设置工作目录和 CUDA 路径
11
WORKDIR /workspace
dongjw's avatar
dongjw committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
ENV CUDA_HOME=/usr/local/cuda



# 安装依赖
RUN apt update -y
RUN apt install -y --no-install-recommends \
    libtbb-dev \
    libssl-dev \
    libcurl4-openssl-dev \
    libaio1 \
    libaio-dev \
    libfmt-dev \
    libgflags-dev \
    zlib1g-dev \
    patchelf \
28
29
30
31
32
    git \
    wget \
    vim \
    gcc \
    g++ \
dongjw's avatar
dongjw committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
    cmake
# 拷贝代码
RUN git clone https://github.com/kvcache-ai/ktransformers.git 
# 清理 apt 缓存
RUN rm -rf /var/lib/apt/lists/*

# 进入项目目录
WORKDIR /workspace/ktransformers
RUN git checkout work-concurrent
# 初始化子模块
RUN git submodule update --init --recursive

# 升级 pip
RUN pip install --upgrade pip

# 安装构建依赖
RUN pip install ninja pyproject numpy cpufeature aiohttp zmq openai

# 安装 flash-attn(提前装可以避免后续某些编译依赖出错)
RUN pip install flash-attn

# 安装 ktransformers 本体(含编译)
RUN CPU_INSTRUCT=${CPU_INSTRUCT} \
    USE_BALANCE_SERVE=1 \
    KTRANSFORMERS_FORCE_BUILD=TRUE \
    TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" \
    pip install . --no-build-isolation --verbose

RUN pip install third_party/custom_flashinfer/
# 清理 pip 缓存
RUN pip cache purge

# 拷贝 C++ 运行时库
RUN cp /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/conda/lib/

# 保持容器运行(调试用)
ENTRYPOINT ["tail", "-f", "/dev/null"]