gen_linux.sh 12.1 KB
Newer Older
1
#!/bin/bash
2
3
# This script is intended to run inside the go generate
# working directory must be llm/generate/
4

5
# First we build one or more CPU based LLM libraries
6
#
7
8
# Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
# library dependencies
9
#
10
11
12
# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  The ROCM
# libraries are quite large, and also dynamically load data files at runtime
# which in turn are large, so we don't attempt to cary them as payload
13
14
15

set -ex
set -o pipefail
16
compress_pids=""
17

18
19
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() {
20
21
22
23
    if [ -n "${AMDGPU_TARGETS}" ]; then
        echo "${AMDGPU_TARGETS}"
        return
    fi
24
25
26
27
28
29
    GPU_LIST=(
        "gfx900"
        "gfx906:xnack-"
        "gfx908:xnack-"
        "gfx90a:xnack+"
        "gfx90a:xnack-"
30
31
32
        "gfx940"
        "gfx941"
        "gfx942"
33
34
35
36
37
38
39
40
41
42
43
44
45
        "gfx1010"
        "gfx1012"
        "gfx1030"
        "gfx1100"
        "gfx1101"
        "gfx1102"
    )
    (
        IFS=$';'
        echo "'${GPU_LIST[*]}'"
    )
}

46
echo "Starting linux generate script"
47
48
49
50
51
52
53
if [ -z "${CUDACXX}" ]; then
    if [ -x /usr/local/cuda/bin/nvcc ]; then
        export CUDACXX=/usr/local/cuda/bin/nvcc
    else
        # Try the default location in case it exists
        export CUDACXX=$(command -v nvcc)
    fi
54
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
55
COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
56
57
58
59
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
60

61
init_vars
62
63
64
65
66
67
if [ -z "${OLLAMA_SKIP_STATIC_GENERATE}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
    # Builds by default, allows skipping, forces build if OLLAMA_CPU_TARGET="static"
    # Enables optimized Dockerfile builds using a blanket skip and targeted overrides
    # Static build for linking into the Go binary
    init_vars
    CMAKE_TARGETS="--target llama --target ggml"
68
    CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DGGML_NATIVE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off ${CMAKE_DEFS}"
69
70
71
    BUILD_DIR="../build/linux/${ARCH}_static"
    echo "Building static library"
    build
Jeremy's avatar
Jeremy committed
72
fi
73

Jeremy's avatar
Jeremy committed
74
75
init_vars
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
76
77
78
    # Users building from source can tune the exact flags we pass to cmake for configuring
    # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
    if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
79
        init_vars
80
        echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
Daniel Hiltgen's avatar
Daniel Hiltgen committed
81
        CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
82
        RUNNER="cpu"
83
        BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
84
85
        echo "Building custom CPU"
        build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
86
        install
87
        dist
88
        compress
89
90
    else
        # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
91
92
93
94
        # -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
        # -DGGML_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
        # -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
        # -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
95
        # Note: the following seem to yield slower results than AVX2 - ymmv
96
97
98
        # -DGGML_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
        # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
        # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
99

Daniel Hiltgen's avatar
Daniel Hiltgen committed
100
        COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
101
102
103
104
        if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
            #
            # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
            #
105
            init_vars
106
            CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
107
            RUNNER=cpu
108
            BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
109
110
            echo "Building LCD CPU"
            build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
111
            install
112
            dist
113
            compress
114
        fi
115

116
        if [ "${ARCH}" == "x86_64" ]; then
117
            #
118
            # ARM chips in M1/M2/M3-based MACs and NVidia Tegra devices do not currently support avx extensions.
119
            #
120
121
122
123
124
125
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
                #
                # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
                # Approximately 400% faster than LCD on same CPU
                #
                init_vars
126
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
127
                RUNNER=cpu_avx
128
                BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
129
130
                echo "Building AVX CPU"
                build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
131
                install
132
                dist
133
                compress
134
            fi
135

136
137
138
139
140
141
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
                #
                # ~2013 CPU Dynamic library
                # Approximately 10% faster than AVX on same CPU
                #
                init_vars
142
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
143
                RUNNER=cpu_avx2
144
                BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
145
146
                echo "Building AVX2 CPU"
                build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
147
                install
148
                dist
149
                compress
150
            fi
151
        fi
152
    fi
153
154
155
else
    echo "Skipping CPU generation step as requested"
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
156

157
158
# If needed, look for the default CUDA toolkit location
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
159
160
161
    CUDA_LIB_DIR=/usr/local/cuda/lib64
fi

162
163
164
165
166
# If needed, look for CUDA on Arch Linux
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
    CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
fi

167
168
169
170
171
# Allow override in case libcudart is in the wrong place
if [ -z "${CUDART_LIB_DIR}" ]; then
    CUDART_LIB_DIR="${CUDA_LIB_DIR}"
fi

172
if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
173
174
    echo "CUDA libraries detected - building dynamic CUDA library"
    init_vars
175
    CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
176
    if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
177
178
        CUDA_VARIANT=_v${CUDA_MAJOR}
    fi
179
180
    if [ "${ARCH}" == "arm64" ]; then
        echo "ARM CPU detected - disabling unsupported AVX instructions"
Roy Yang's avatar
Roy Yang committed
181

182
183
        # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
        #
Roy Yang's avatar
Roy Yang committed
184
185
        # CUDA compute < 6.0 lacks proper FP16 support on ARM.
        # Disabling has minimal performance effect while maintaining compatibility.
186
        ARM64_DEFS="-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_CUDA_F16=off"
187
    fi
188
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
189
190
    if [ -n "${OLLAMA_CUSTOM_CUDA_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_CUDA_DEFS=\"${OLLAMA_CUSTOM_CUDA_DEFS}\""
191
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
Jeremy's avatar
Jeremy committed
192
        echo "Building custom CUDA GPU"
193
    else
Daniel Hiltgen's avatar
Daniel Hiltgen committed
194
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
195
    fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
196
197
    export CUDAFLAGS="-t8"
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
198
    RUNNER=cuda${CUDA_VARIANT}
199
    BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
200
    export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
201
    CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}"
202
    build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
203
    install
204
    dist
205
    echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
206
207
208
    mkdir -p "${CUDA_DIST_DIR}"
    for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
        cp -a "${lib}" "${CUDA_DIST_DIR}"
209
    done
210
    compress
211

212
fi
213

Wang,Zhe's avatar
Wang,Zhe committed
214
215
216
217
218
if [ -z "${ONEAPI_ROOT}" ]; then
    # Try the default location in case it exists
    ONEAPI_ROOT=/opt/intel/oneapi
fi

219
if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
Wang,Zhe's avatar
Wang,Zhe committed
220
221
222
223
    echo "OneAPI libraries detected - building dynamic OneAPI library"
    init_vars
    source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
    CC=icx
224
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
225
    RUNNER=oneapi
226
    BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
227
    ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
228
    export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
Wang,Zhe's avatar
Wang,Zhe committed
229
230
231
232
    DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
    build

    # copy oneAPI dependencies
Daniel Hiltgen's avatar
Daniel Hiltgen committed
233
    mkdir -p "${ONEAPI_DIST_DIR}"
Wang,Zhe's avatar
Wang,Zhe committed
234
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
Daniel Hiltgen's avatar
Daniel Hiltgen committed
235
        cp -a "${dep}" "${ONEAPI_DIST_DIR}"
Wang,Zhe's avatar
Wang,Zhe committed
236
    done
Daniel Hiltgen's avatar
Daniel Hiltgen committed
237
238
239
240
241
242
243
244
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
    install
245
    dist
Wang,Zhe's avatar
Wang,Zhe committed
246
247
248
    compress
fi

249
if [ -z "${ROCM_PATH}" ]; then
250
251
252
253
    # Try the default location in case it exists
    ROCM_PATH=/opt/rocm
fi

254
if [ -z "${CLBlast_DIR}" ]; then
255
256
257
258
259
260
    # Try the default location in case it exists
    if [ -d /usr/lib/cmake/CLBlast ]; then
        export CLBlast_DIR=/usr/lib/cmake/CLBlast
    fi
fi

261
if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
262
    echo "ROCm libraries detected - building dynamic ROCm library"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
263
264
    if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
265
    fi
266
    init_vars
267
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DGGML_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
268
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
269
270
271
272
    if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
        CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
        echo "Building custom ROCM GPU"
273
    fi
274
    RUNNER=rocm${ROCM_VARIANT}
275
    BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
276
277
    # ROCm dependencies are too large to fit into a unified bundle
    ROCM_DIST_DIR="${DIST_BASE}/../linux-${GOARCH}-rocm/lib/ollama"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
278
279
280
    # TODO figure out how to disable runpath (rpath)
    # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
    export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
281
    build
282

Daniel Hiltgen's avatar
Daniel Hiltgen committed
283
284
    # copy the ROCM dependencies
    mkdir -p "${ROCM_DIST_DIR}"
285
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${GOARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf ); do
Daniel Hiltgen's avatar
Daniel Hiltgen committed
286
        cp -a "${dep}"* "${ROCM_DIST_DIR}"
287
288
289
        if [ $(readlink -f "${dep}") != "${dep}" ] ; then
            cp $(readlink -f "${dep}") "${ROCM_DIST_DIR}"
        fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
290
    done
Daniel Hiltgen's avatar
Daniel Hiltgen committed
291
    install
292
    dist
293
    compress
294
fi
295
296

cleanup
297
wait_for_compress
298
echo "go generate completed.  LLM runners: $(cd ${PAYLOAD_BASE}; echo *)"