Dockerfile 8.41 KB
Newer Older
1
ARG GOLANG_VERSION=1.22.8
2
ARG CMAKE_VERSION=3.22.1
3
ARG CUDA_VERSION_11=11.3.1
Daniel Hiltgen's avatar
Daniel Hiltgen committed
4
ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
5
ARG CUDA_VERSION_12=12.4.0
Daniel Hiltgen's avatar
Daniel Hiltgen committed
6
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
7
ARG ROCM_VERSION=6.1.2
Michael Yang's avatar
Michael Yang committed
8

9
10
11
12
13
14
15
16
17
18
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
#
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
#
### Then incremental builds will be much faster in this container
#
# make -C llama -j 10 && go build -trimpath -o dist/linux-amd64/ollama .
#
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
19
ARG CMAKE_VERSION
20
21
22
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
23
COPY ./scripts/rh_linux_deps.sh /
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
    dnf clean all && \
    dnf install -y \
    zsh \
    cuda-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
    cuda-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
# TODO intel oneapi goes here...
ENV GOARCH amd64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]

### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
# Note: this does not contain jetson variants
#
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
#
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
47
ARG CMAKE_VERSION
48
49
50
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
51
COPY ./scripts/rh_linux_deps.sh /
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
    dnf config-manager --set-enabled appstream && \
    dnf clean all && \
    dnf install -y \
    zsh \
    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
ENV GOARCH amd64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]

FROM --platform=linux/amd64 unified-builder-amd64 AS runners-amd64
COPY . .
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_SKIP_CUDA_11_GENERATE
ARG OLLAMA_SKIP_CUDA_12_GENERATE
ARG OLLAMA_SKIP_ROCM_GENERATE
Daniel Hiltgen's avatar
Daniel Hiltgen committed
74
75
ARG CUDA_V11_ARCHITECTURES
ARG CUDA_V12_ARCHITECTURES
76
ARG OLLAMA_FAST_BUILD
77
RUN --mount=type=cache,target=/root/.ccache \
78
79
80
81
82
    if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
        make -C llama -j $(expr $(nproc) / 2 ) ; \
    else \
        make -C llama -j 5 ; \
    fi
83

84
85
86
87
88
89
90
91
FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64
COPY . .
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_SKIP_CUDA_11_GENERATE
ARG OLLAMA_SKIP_CUDA_12_GENERATE
ARG CUDA_V11_ARCHITECTURES
ARG CUDA_V12_ARCHITECTURES
ARG OLLAMA_FAST_BUILD
92
RUN --mount=type=cache,target=/root/.ccache \
93
    make -C llama -j 8
94

95

96
97
# Intermediate stages used for ./scripts/build_linux.sh
FROM --platform=linux/amd64 centos:7 AS builder-amd64
98
99
100
101
ARG CMAKE_VERSION
ARG GOLANG_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
102
103
104
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV CGO_ENABLED 1
ENV GOARCH amd64
105
WORKDIR /go/src/github.com/ollama/ollama
106
107

FROM --platform=linux/amd64 builder-amd64 AS build-amd64
108
COPY . .
109
110
COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/build/ build/
111
112
ARG GOFLAGS
ARG CGO_CFLAGS
113
ARG OLLAMA_SKIP_ROCM_GENERATE
114
RUN --mount=type=cache,target=/root/.ccache \
115
    go build -trimpath -o dist/linux-amd64/bin/ollama .
116
117
RUN cd dist/linux-$GOARCH && \
    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
118
119
120
121
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
    cd dist/linux-$GOARCH-rocm && \
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
    fi
Michael Yang's avatar
Michael Yang committed
122

123
124
FROM --platform=linux/arm64 rockylinux:8 AS builder-arm64
ARG CMAKE_VERSION
125
ARG GOLANG_VERSION
126
127
128
129
130
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
ENV CGO_ENABLED 1
ENV GOARCH arm64
131
WORKDIR /go/src/github.com/ollama/ollama
132
133

FROM --platform=linux/arm64 builder-arm64 AS build-arm64
Jeffrey Morgan's avatar
Jeffrey Morgan committed
134
COPY . .
135
136
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
137
138
ARG GOFLAGS
ARG CGO_CFLAGS
139
RUN --mount=type=cache,target=/root/.ccache \
140
    go build -trimpath -o dist/linux-arm64/bin/ollama .
141
142
RUN cd dist/linux-$GOARCH && \
    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
143

144
145
146
147
FROM --platform=linux/amd64 scratch AS dist-amd64
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
FROM --platform=linux/arm64 scratch AS dist-arm64
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
148
FROM dist-$TARGETARCH AS dist
149
150
151


# Optimized container images do not cary nested payloads
152
FROM --platform=linux/amd64 builder-amd64 AS container-build-amd64
153
154
155
156
157
158
159
WORKDIR /go/src/github.com/ollama/ollama
COPY . .
ARG GOFLAGS
ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \
    go build -trimpath -o dist/linux-amd64/bin/ollama .

160
FROM --platform=linux/arm64 builder-arm64 AS container-build-arm64
161
162
163
164
165
166
WORKDIR /go/src/github.com/ollama/ollama
COPY . .
ARG GOFLAGS
ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \
    go build -trimpath -o dist/linux-arm64/bin/ollama .
Jeffrey Morgan's avatar
Jeffrey Morgan committed
167

168
169
170
171
172
173
174
175
176
177
178
179
# For amd64 container images, filter out cuda/rocm to minimize size
FROM runners-amd64 AS runners-cuda-amd64
RUN rm -rf \
    ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
    ./dist/linux-amd64/lib/ollama/runners/rocm*

FROM runners-amd64 AS runners-rocm-amd64
RUN rm -rf \
    ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
    ./dist/linux-amd64/lib/ollama/libcu*.so* \
    ./dist/linux-amd64/lib/ollama/runners/cuda*

180
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
181
182
RUN apt-get update && \
    apt-get install -y ca-certificates && \
183
    rm -rf /var/lib/apt/lists/*
184
COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
185
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
186

187
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
188
189
RUN apt-get update && \
    apt-get install -y ca-certificates && \
190
    rm -rf /var/lib/apt/lists/*
191
COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
192
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
193

194
195
196
197
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
# across releases
198
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
199
200
RUN apt-get update && \
    apt-get install -y ca-certificates && \
201
    rm -rf /var/lib/apt/lists/*
202
COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
203
204
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/

205
EXPOSE 11434
206
ENV OLLAMA_HOST 0.0.0.0
207
208
209
210

ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]

211
FROM runtime-$TARGETARCH
212
EXPOSE 11434
213
ENV OLLAMA_HOST 0.0.0.0
214
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
215
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
216
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
217
ENV NVIDIA_VISIBLE_DEVICES=all
218

Jeffrey Morgan's avatar
Jeffrey Morgan committed
219
ENTRYPOINT ["/bin/ollama"]
Jeffrey Morgan's avatar
Jeffrey Morgan committed
220
CMD ["serve"]