Dockerfile 8.29 KB
Newer Older
1
ARG GOLANG_VERSION=1.22.8
2
3
ARG CUDA_VERSION_11=11.3.1
ARG CUDA_VERSION_12=12.4.0
Daniel Hiltgen's avatar
Daniel Hiltgen committed
4
ARG ROCM_VERSION=6.1.2
5
6
ARG JETPACK_6=r36.2.0
ARG JETPACK_5=r35.4.1
Michael Yang's avatar
Michael Yang committed
7

8
9
10
11
12
13
14
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
#
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
#
### Then incremental builds will be much faster in this container
#
15
# make -j 10 dist
16
17
18
19
20
#
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
21
COPY ./scripts/rh_linux_deps.sh /
22
23
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
24
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
25
26
27
28
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
    dnf clean all && \
    dnf install -y \
    zsh \
29
30
    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# TODO intel oneapi goes here...
ENV GOARCH amd64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]

### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
# Note: this does not contain jetson variants
#
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
#
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
47
COPY ./scripts/rh_linux_deps.sh /
48
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
49
50
51
52
53
54
55
56
57
58
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
    dnf config-manager --set-enabled appstream && \
    dnf clean all && \
    dnf install -y \
    zsh \
    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
59
ENV GOARCH arm64
60
61
62
63
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]

64
FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64
65
66
67
68
COPY . .
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_SKIP_ROCM_GENERATE
ARG OLLAMA_FAST_BUILD
Daniel Hiltgen's avatar
Daniel Hiltgen committed
69
ARG VERSION
70
ARG CUSTOM_CPU_FLAGS
71
RUN --mount=type=cache,target=/root/.ccache \
72
    if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
73
        make -j $(nproc) dist ; \
74
    else \
75
76
77
78
79
80
81
        make -j 5 dist ; \
    fi
RUN cd dist/linux-$GOARCH && \
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
    cd dist/linux-$GOARCH-rocm && \
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
82
    fi
83
84
85
86
87
88
89
90
91
92
93
94
95

# Jetsons need to be built in discrete stages
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
ARG GOLANG_VERSION
RUN apt-get update && apt-get install -y git curl ccache && \
    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
    apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /go/src/github.com/ollama/ollama/
COPY . .
ARG CGO_CFLAGS
ENV GOARCH arm64
Daniel Hiltgen's avatar
Daniel Hiltgen committed
96
ARG VERSION
97
RUN --mount=type=cache,target=/root/.ccache \
98
    make -j 5 dist_cuda_v11 \
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
        CUDA_ARCHITECTURES="72;87" \
        GPU_RUNNER_VARIANT=_jetpack5 \
        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5

FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64
ARG GOLANG_VERSION
RUN apt-get update && apt-get install -y git curl ccache && \
    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
    apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /go/src/github.com/ollama/ollama/
COPY . .
ARG CGO_CFLAGS
ENV GOARCH arm64
Daniel Hiltgen's avatar
Daniel Hiltgen committed
115
ARG VERSION
116
RUN --mount=type=cache,target=/root/.ccache \
117
    make -j 5 dist_cuda_v12 \
118
119
120
121
        CUDA_ARCHITECTURES="87" \
        GPU_RUNNER_VARIANT=_jetpack6 \
        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
122

123
FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64
124
COPY . .
125
126
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_FAST_BUILD
Daniel Hiltgen's avatar
Daniel Hiltgen committed
127
ARG VERSION
128
RUN --mount=type=cache,target=/root/.ccache \
129
    make -j 5 dist
130
131
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
132
RUN cd dist/linux-$GOARCH && \
133
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
134
RUN cd dist/linux-$GOARCH-jetpack5 && \
135
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
136
RUN cd dist/linux-$GOARCH-jetpack6 && \
137
    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
138

139
140
141
142
FROM --platform=linux/amd64 scratch AS dist-amd64
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
FROM --platform=linux/arm64 scratch AS dist-arm64
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
143
FROM dist-$TARGETARCH AS dist
144
145


146
# For amd64 container images, filter out cuda/rocm to minimize size
147
FROM build-amd64 AS runners-cuda-amd64
148
149
150
151
RUN rm -rf \
    ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
    ./dist/linux-amd64/lib/ollama/runners/rocm*

152
FROM build-amd64 AS runners-rocm-amd64
153
154
155
156
157
RUN rm -rf \
    ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
    ./dist/linux-amd64/lib/ollama/libcu*.so* \
    ./dist/linux-amd64/lib/ollama/runners/cuda*

158
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
159
160
RUN apt-get update && \
    apt-get install -y ca-certificates && \
161
    apt-get clean && rm -rf /var/lib/apt/lists/*
162
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
163
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
164

165
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
166
167
RUN apt-get update && \
    apt-get install -y ca-certificates && \
168
    apt-get clean && rm -rf /var/lib/apt/lists/*
169
170
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
171
172
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
173

174

175
176
177
178
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
# across releases
179
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
180
181
RUN apt-get update && \
    apt-get install -y ca-certificates && \
182
    apt-get clean && rm -rf /var/lib/apt/lists/*
183
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
184
185
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/

186
EXPOSE 11434
187
ENV OLLAMA_HOST 0.0.0.0
188
189
190
191

ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]

192
FROM runtime-$TARGETARCH
193
EXPOSE 11434
194
ENV OLLAMA_HOST 0.0.0.0
195
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
196
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
197
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
198
ENV NVIDIA_VISIBLE_DEVICES=all
199

Jeffrey Morgan's avatar
Jeffrey Morgan committed
200
ENTRYPOINT ["/bin/ollama"]
Jeffrey Morgan's avatar
Jeffrey Morgan committed
201
CMD ["serve"]