Dockerfile 8.28 KB
Newer Older
1
ARG GOLANG_VERSION=1.22.8
2
3
ARG CUDA_VERSION_11=11.3.1
ARG CUDA_VERSION_12=12.4.0
Daniel Hiltgen's avatar
Daniel Hiltgen committed
4
ARG ROCM_VERSION=6.1.2
5
6
ARG JETPACK_6=r36.2.0
ARG JETPACK_5=r35.4.1
Michael Yang's avatar
Michael Yang committed
7

8
9
10
11
12
13
14
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
#
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
#
### Then incremental builds will be much faster in this container
#
15
# make -j 10 dist
16
17
18
19
20
#
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
21
COPY ./scripts/rh_linux_deps.sh /
22
23
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
24
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
25
26
27
28
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
    dnf clean all && \
    dnf install -y \
    zsh \
29
30
    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# TODO intel oneapi goes here...
ENV GOARCH amd64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]

### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
# Note: this does not contain jetson variants
#
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
#
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
47
COPY ./scripts/rh_linux_deps.sh /
48
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
49
50
51
52
53
54
55
56
57
58
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
    dnf config-manager --set-enabled appstream && \
    dnf clean all && \
    dnf install -y \
    zsh \
    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
59
ENV GOARCH arm64
60
61
62
63
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]

64
FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64
65
66
67
68
COPY . .
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_SKIP_ROCM_GENERATE
ARG OLLAMA_FAST_BUILD
Daniel Hiltgen's avatar
Daniel Hiltgen committed
69
ARG VERSION
70
RUN --mount=type=cache,target=/root/.ccache \
71
    if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
72
        make -j $(expr $(nproc) / 2 ) dist ; \
73
    else \
74
75
76
77
78
79
80
        make -j 5 dist ; \
    fi
RUN cd dist/linux-$GOARCH && \
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
    cd dist/linux-$GOARCH-rocm && \
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
81
    fi
82
83
84
85
86
87
88
89
90
91
92
93
94

# Jetsons need to be built in discrete stages
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
ARG GOLANG_VERSION
RUN apt-get update && apt-get install -y git curl ccache && \
    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
    apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /go/src/github.com/ollama/ollama/
COPY . .
ARG CGO_CFLAGS
ENV GOARCH arm64
Daniel Hiltgen's avatar
Daniel Hiltgen committed
95
ARG VERSION
96
RUN --mount=type=cache,target=/root/.ccache \
97
    make -j 5 dist_cuda_v11 \
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
        CUDA_ARCHITECTURES="72;87" \
        GPU_RUNNER_VARIANT=_jetpack5 \
        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5

FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64
ARG GOLANG_VERSION
RUN apt-get update && apt-get install -y git curl ccache && \
    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
    apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /go/src/github.com/ollama/ollama/
COPY . .
ARG CGO_CFLAGS
ENV GOARCH arm64
Daniel Hiltgen's avatar
Daniel Hiltgen committed
114
ARG VERSION
115
RUN --mount=type=cache,target=/root/.ccache \
116
    make -j 5 dist_cuda_v12 \
117
118
119
120
        CUDA_ARCHITECTURES="87" \
        GPU_RUNNER_VARIANT=_jetpack6 \
        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
121

122
FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64
123
COPY . .
124
125
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_FAST_BUILD
Daniel Hiltgen's avatar
Daniel Hiltgen committed
126
ARG VERSION
127
RUN --mount=type=cache,target=/root/.ccache \
128
    make -j 5 dist
129
130
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
131
RUN cd dist/linux-$GOARCH && \
132
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
133
RUN cd dist/linux-$GOARCH-jetpack5 && \
134
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
135
RUN cd dist/linux-$GOARCH-jetpack6 && \
136
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
137

138
139
140
141
FROM --platform=linux/amd64 scratch AS dist-amd64
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
FROM --platform=linux/arm64 scratch AS dist-arm64
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
142
FROM dist-$TARGETARCH AS dist
143
144


145
# For amd64 container images, filter out cuda/rocm to minimize size
146
FROM build-amd64 AS runners-cuda-amd64
147
148
149
150
RUN rm -rf \
    ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
    ./dist/linux-amd64/lib/ollama/runners/rocm*

151
FROM build-amd64 AS runners-rocm-amd64
152
153
154
155
156
RUN rm -rf \
    ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
    ./dist/linux-amd64/lib/ollama/libcu*.so* \
    ./dist/linux-amd64/lib/ollama/runners/cuda*

157
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
158
159
RUN apt-get update && \
    apt-get install -y ca-certificates && \
160
    apt-get clean && rm -rf /var/lib/apt/lists/*
161
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
162
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
163

164
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
165
166
RUN apt-get update && \
    apt-get install -y ca-certificates && \
167
    apt-get clean && rm -rf /var/lib/apt/lists/*
168
169
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
170
171
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
172

173

174
175
176
177
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
# across releases
178
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
179
180
RUN apt-get update && \
    apt-get install -y ca-certificates && \
181
    apt-get clean && rm -rf /var/lib/apt/lists/*
182
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
183
184
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/

185
EXPOSE 11434
186
ENV OLLAMA_HOST 0.0.0.0
187
188
189
190

ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]

191
FROM runtime-$TARGETARCH
192
EXPOSE 11434
193
ENV OLLAMA_HOST 0.0.0.0
194
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
195
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
196
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
197
ENV NVIDIA_VISIBLE_DEVICES=all
198

Jeffrey Morgan's avatar
Jeffrey Morgan committed
199
ENTRYPOINT ["/bin/ollama"]
Jeffrey Morgan's avatar
Jeffrey Morgan committed
200
CMD ["serve"]