Dockerfile 9.38 KB
Newer Older
Michael Yang's avatar
Michael Yang committed
1
2
3
# vim: filetype=dockerfile

ARG FLAVOR=${TARGETARCH}
4
ARG PARALLEL=8
Michael Yang's avatar
Michael Yang committed
5

6
ARG ROCMVERSION=6.3.3
Michael Yang's avatar
Michael Yang committed
7
ARG JETPACK5VERSION=r35.4.1
8
ARG JETPACK6VERSION=r36.4.0
Michael Yang's avatar
Michael Yang committed
9
ARG CMAKEVERSION=3.31.2
10
ARG VULKANVERSION=1.4.321.1
Michael Yang's avatar
Michael Yang committed
11

Daniel Hiltgen's avatar
Daniel Hiltgen committed
12
# We require gcc v10 minimum.  v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
13
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
14
RUN yum install -y yum-utils \
15
16
17
    && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
    && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
    && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
Daniel Hiltgen's avatar
Daniel Hiltgen committed
18
    && dnf install -y ccache \
19
    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
20
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
21
22
23
24
25
26
27
28
29
30
ARG VULKANVERSION
RUN wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
    && tar xvf /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
    && dnf -y install ninja-build \
    && ln -s /usr/bin/python3 /usr/bin/python \  
    && /${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \
    && /${VULKANVERSION}/vulkansdk -j 8 shaderc
RUN cp -r /${VULKANVERSION}/x86_64/include/* /usr/local/include/ \
    && cp -r /${VULKANVERSION}/x86_64/lib/* /usr/local/lib
ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
31
32

FROM --platform=linux/arm64 almalinux:8 AS base-arm64
Michael Yang's avatar
Michael Yang committed
33
34
# install epel-release for ccache
RUN yum install -y yum-utils epel-release \
35
    && dnf install -y clang ccache \
Michael Yang's avatar
Michael Yang committed
36
37
38
39
40
41
42
43
44
    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
ENV CC=clang CXX=clang++

FROM base-${TARGETARCH} AS base
ARG CMAKEVERSION
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
ENV LDFLAGS=-s

FROM base AS cpu
45
46
RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
47
ARG PARALLEL
48
49
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
50
RUN --mount=type=cache,target=/root/.ccache \
Michael Yang's avatar
Michael Yang committed
51
    cmake --preset 'CPU' \
52
53
        && cmake --build --parallel ${PARALLEL} --preset 'CPU' \
        && cmake --install build --component CPU --strip --parallel ${PARALLEL}
Michael Yang's avatar
Michael Yang committed
54

55
56
57
58
FROM base AS cuda-11
ARG CUDA11VERSION=11.8
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
ENV PATH=/usr/local/cuda-11/bin:$PATH
59
ARG PARALLEL
60
61
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
62
RUN --mount=type=cache,target=/root/.ccache \
63
    cmake --preset 'CUDA 11' \
64
65
        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 11' \
        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
66

Michael Yang's avatar
Michael Yang committed
67
FROM base AS cuda-12
68
69
ARG CUDA12VERSION=12.8
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
Michael Yang's avatar
Michael Yang committed
70
ENV PATH=/usr/local/cuda-12/bin:$PATH
71
ARG PARALLEL
72
73
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
74
RUN --mount=type=cache,target=/root/.ccache \
75
    cmake --preset 'CUDA 12' \
76
77
        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 12' \
        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
78

79
80
81
82
83

FROM base AS cuda-13
ARG CUDA13VERSION=13.0
RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-}
ENV PATH=/usr/local/cuda-13/bin:$PATH
84
ARG PARALLEL
85
86
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
87
RUN --mount=type=cache,target=/root/.ccache \
88
    cmake --preset 'CUDA 13' \
89
90
        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 13' \
        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
91
92


Michael Yang's avatar
Michael Yang committed
93
FROM base AS rocm-6
94
ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
95
ARG PARALLEL
96
97
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
98
RUN --mount=type=cache,target=/root/.ccache \
99
    cmake --preset 'ROCm 6' \
100
101
        && cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \
        && cmake --install build --component HIP --strip --parallel ${PARALLEL}
102
RUN rm -f dist/lib/ollama/rocm/rocblas/library/*gfx90[06]*
Michael Yang's avatar
Michael Yang committed
103
104
105
106
107
108
109

FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
ARG CMAKEVERSION
RUN apt-get update && apt-get install -y curl ccache \
    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
110
ARG PARALLEL
Michael Yang's avatar
Michael Yang committed
111
RUN --mount=type=cache,target=/root/.ccache \
112
    cmake --preset 'JetPack 5' \
113
114
        && cmake --build --parallel ${PARALLEL} --preset 'JetPack 5' \
        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
Michael Yang's avatar
Michael Yang committed
115
116
117
118
119
120
121

FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
ARG CMAKEVERSION
RUN apt-get update && apt-get install -y curl ccache \
    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
122
ARG PARALLEL
Michael Yang's avatar
Michael Yang committed
123
RUN --mount=type=cache,target=/root/.ccache \
124
    cmake --preset 'JetPack 6' \
125
126
        && cmake --build --parallel ${PARALLEL} --preset 'JetPack 6' \
        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
Michael Yang's avatar
Michael Yang committed
127

128
FROM base AS vulkan
129
130
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
131
RUN --mount=type=cache,target=/root/.ccache \
132
    cmake --preset 'Vulkan' \
133
        && cmake --build --parallel --preset 'Vulkan' \
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
        && cmake --install build --component Vulkan --strip --parallel 8

FROM base AS mlx
ARG CUDA13VERSION=13.0
RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-} \
    && dnf install -y openblas-devel lapack-devel \
    && dnf install -y libcudnn9-cuda-13 libcudnn9-devel-cuda-13 \
    && dnf install -y libnccl libnccl-devel
ENV PATH=/usr/local/cuda-13/bin:$PATH
ENV BLAS_INCLUDE_DIRS=/usr/include/openblas
ENV LAPACK_INCLUDE_DIRS=/usr/include/openblas
ENV CGO_LDFLAGS="-L/usr/local/cuda-13/lib64 -L/usr/local/cuda-13/targets/x86_64-linux/lib/stubs"
ARG PARALLEL
WORKDIR /go/src/github.com/ollama/ollama
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
COPY x/ml/backend/mlx x/ml/backend/mlx
COPY go.mod go.sum .
RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
ENV PATH=/usr/local/go/bin:$PATH
RUN go mod download
RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
        && cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \
        && cmake --install build --component MLX --strip --parallel ${PARALLEL}
COPY . .
ARG GOFLAGS="'-ldflags=-w -s'"
ENV CGO_ENABLED=1
ARG CGO_CFLAGS
ARG CGO_CXXFLAGS
# TODO wire up the actual MLX engine here instead of building the main binary...
RUN mkdir -p dist/bin
RUN go build -tags mlx -trimpath -buildmode=pie -o dist/bin/imagegen ./x/imagegen/cmd/engine
167
168


Michael Yang's avatar
Michael Yang committed
169
170
FROM base AS build
WORKDIR /go/src/github.com/ollama/ollama
171
172
173
174
COPY go.mod go.sum .
RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
ENV PATH=/usr/local/go/bin:$PATH
RUN go mod download
Michael Yang's avatar
Michael Yang committed
175
176
177
COPY . .
ARG GOFLAGS="'-ldflags=-w -s'"
ENV CGO_ENABLED=1
178
179
ARG CGO_CFLAGS
ARG CGO_CXXFLAGS
Michael Yang's avatar
Michael Yang committed
180
181
182
183
RUN --mount=type=cache,target=/root/.cache/go-build \
    go build -trimpath -buildmode=pie -o /bin/ollama .

FROM --platform=linux/amd64 scratch AS amd64
184
185
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
186
187
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
COPY --from=vulkan  dist/lib/ollama  /lib/ollama/
188
189
COPY --from=mlx     /go/src/github.com/ollama/ollama/dist/lib/ollama /lib/ollama/
COPY --from=mlx     /go/src/github.com/ollama/ollama/dist/bin/ /bin/
Michael Yang's avatar
Michael Yang committed
190
191

FROM --platform=linux/arm64 scratch AS arm64
192
193
194
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
COPY --from=cuda-13 dist/lib/ollama/ /lib/ollama/
195
196
COPY --from=jetpack-5 dist/lib/ollama/ /lib/ollama/
COPY --from=jetpack-6 dist/lib/ollama/ /lib/ollama/
Michael Yang's avatar
Michael Yang committed
197

198
FROM scratch AS rocm
Daniel Hiltgen's avatar
Daniel Hiltgen committed
199
COPY --from=rocm-6 dist/lib/ollama /lib/ollama
Michael Yang's avatar
Michael Yang committed
200
201

FROM ${FLAVOR} AS archive
202
ARG VULKANVERSION
Michael Yang's avatar
Michael Yang committed
203
204
205
COPY --from=cpu dist/lib/ollama /lib/ollama
COPY --from=build /bin/ollama /bin/ollama

206
FROM ubuntu:24.04
Michael Yang's avatar
Michael Yang committed
207
RUN apt-get update \
208
    && apt-get install -y ca-certificates libvulkan1 \
Michael Yang's avatar
Michael Yang committed
209
210
211
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*
COPY --from=archive /bin /usr/bin
212
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
Michael Yang's avatar
Michael Yang committed
213
COPY --from=archive /lib/ollama /usr/lib/ollama
214
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
215
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
216
ENV NVIDIA_VISIBLE_DEVICES=all
Michael Yang's avatar
Michael Yang committed
217
218
ENV OLLAMA_HOST=0.0.0.0:11434
EXPOSE 11434
Jeffrey Morgan's avatar
Jeffrey Morgan committed
219
ENTRYPOINT ["/bin/ollama"]
Jeffrey Morgan's avatar
Jeffrey Morgan committed
220
CMD ["serve"]