gen_linux.sh 11.4 KB
Newer Older
1
#!/bin/bash
2
3
# This script is intended to run inside the go generate
# working directory must be llm/generate/
4

5
# First we build one or more CPU based LLM libraries
6
#
7
8
# Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
# library dependencies
9
#
10
11
12
# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  The ROCM
# libraries are quite large, and also dynamically load data files at runtime
# which in turn are large, so we don't attempt to cary them as payload
13
14
15
16

set -ex
set -o pipefail

17
18
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() {
19
20
21
22
    if [ -n "${AMDGPU_TARGETS}" ]; then
        echo "${AMDGPU_TARGETS}"
        return
    fi
23
24
25
26
27
28
    GPU_LIST=(
        "gfx900"
        "gfx906:xnack-"
        "gfx908:xnack-"
        "gfx90a:xnack+"
        "gfx90a:xnack-"
29
30
31
        "gfx940"
        "gfx941"
        "gfx942"
32
33
34
35
36
37
38
39
40
41
42
43
44
        "gfx1010"
        "gfx1012"
        "gfx1030"
        "gfx1100"
        "gfx1101"
        "gfx1102"
    )
    (
        IFS=$';'
        echo "'${GPU_LIST[*]}'"
    )
}

45
echo "Starting linux generate script"
46
47
48
49
50
51
52
if [ -z "${CUDACXX}" ]; then
    if [ -x /usr/local/cuda/bin/nvcc ]; then
        export CUDACXX=/usr/local/cuda/bin/nvcc
    else
        # Try the default location in case it exists
        export CUDACXX=$(command -v nvcc)
    fi
53
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
54
COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
55
56
57
58
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
59

60
init_vars
61
62
63
64
65
66
if [ -z "${OLLAMA_SKIP_STATIC_GENERATE}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
    # Builds by default, allows skipping, forces build if OLLAMA_CPU_TARGET="static"
    # Enables optimized Dockerfile builds using a blanket skip and targeted overrides
    # Static build for linking into the Go binary
    init_vars
    CMAKE_TARGETS="--target llama --target ggml"
67
    CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DGGML_NATIVE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off ${CMAKE_DEFS}"
68
69
70
    BUILD_DIR="../build/linux/${ARCH}_static"
    echo "Building static library"
    build
Jeremy's avatar
Jeremy committed
71
fi
72

Jeremy's avatar
Jeremy committed
73
74
init_vars
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
75
76
77
    # Users building from source can tune the exact flags we pass to cmake for configuring
    # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
    if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
78
        init_vars
79
        echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
Daniel Hiltgen's avatar
Daniel Hiltgen committed
80
        CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
81
        BUILD_DIR="../build/linux/${ARCH}/cpu"
82
83
        echo "Building custom CPU"
        build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
84
        install
85
        compress
86
87
    else
        # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
88
89
90
91
        # -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
        # -DGGML_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
        # -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
        # -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
92
        # Note: the following seem to yield slower results than AVX2 - ymmv
93
94
95
        # -DGGML_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
        # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
        # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
96

Daniel Hiltgen's avatar
Daniel Hiltgen committed
97
        COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
98
99
100
101
        if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
            #
            # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
            #
102
            init_vars
103
            CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
104
            BUILD_DIR="../build/linux/${ARCH}/cpu"
105
106
            echo "Building LCD CPU"
            build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
107
            install
108
            compress
109
        fi
110

111
        if [ "${ARCH}" == "x86_64" ]; then
112
            #
113
            # ARM chips in M1/M2/M3-based MACs and NVidia Tegra devices do not currently support avx extensions.
114
            #
115
116
117
118
119
120
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
                #
                # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
                # Approximately 400% faster than LCD on same CPU
                #
                init_vars
121
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
122
                BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
123
124
                echo "Building AVX CPU"
                build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
125
                install
126
                compress
127
            fi
128

129
130
131
132
133
134
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
                #
                # ~2013 CPU Dynamic library
                # Approximately 10% faster than AVX on same CPU
                #
                init_vars
135
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
136
                BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
137
138
                echo "Building AVX2 CPU"
                build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
139
                install
140
                compress
141
            fi
142
        fi
143
    fi
144
145
146
else
    echo "Skipping CPU generation step as requested"
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
147

148
149
# If needed, look for the default CUDA toolkit location
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
150
151
152
    CUDA_LIB_DIR=/usr/local/cuda/lib64
fi

153
154
155
156
157
# If needed, look for CUDA on Arch Linux
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
    CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
fi

158
159
160
161
162
# Allow override in case libcudart is in the wrong place
if [ -z "${CUDART_LIB_DIR}" ]; then
    CUDART_LIB_DIR="${CUDA_LIB_DIR}"
fi

163
if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
164
165
    echo "CUDA libraries detected - building dynamic CUDA library"
    init_vars
166
    CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
167
168
169
    if [ -n "${CUDA_MAJOR}" ]; then
        CUDA_VARIANT=_v${CUDA_MAJOR}
    fi
170
171
    if [ "${ARCH}" == "arm64" ]; then
        echo "ARM CPU detected - disabling unsupported AVX instructions"
Roy Yang's avatar
Roy Yang committed
172

173
174
        # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
        #
Roy Yang's avatar
Roy Yang committed
175
176
        # CUDA compute < 6.0 lacks proper FP16 support on ARM.
        # Disabling has minimal performance effect while maintaining compatibility.
177
        ARM64_DEFS="-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_CUDA_F16=off"
178
    fi
179
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
180
181
    if [ -n "${OLLAMA_CUSTOM_CUDA_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_CUDA_DEFS=\"${OLLAMA_CUSTOM_CUDA_DEFS}\""
182
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
Jeremy's avatar
Jeremy committed
183
        echo "Building custom CUDA GPU"
184
    else
Daniel Hiltgen's avatar
Daniel Hiltgen committed
185
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
186
    fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
187
188
    export CUDAFLAGS="-t8"
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
189
    BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
190
191
    export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
    CUDA_DIST_DIR="${DIST_BASE}/ollama_libs"
192
    build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
193
194
195
196
    install
    mkdir -p "${CUDA_DIST_DIR}"
    for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
        cp -a "${lib}" "${CUDA_DIST_DIR}"
197
    done
198
    compress
199

200
fi
201

Wang,Zhe's avatar
Wang,Zhe committed
202
203
204
205
206
if [ -z "${ONEAPI_ROOT}" ]; then
    # Try the default location in case it exists
    ONEAPI_ROOT=/opt/intel/oneapi
fi

207
if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
Wang,Zhe's avatar
Wang,Zhe committed
208
209
210
211
    echo "OneAPI libraries detected - building dynamic OneAPI library"
    init_vars
    source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
    CC=icx
212
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
Wang,Zhe's avatar
Wang,Zhe committed
213
    BUILD_DIR="../build/linux/${ARCH}/oneapi"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
214
215
    ONEAPI_DIST_DIR="${DIST_BASE}/ollama_libs"
    export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
Wang,Zhe's avatar
Wang,Zhe committed
216
217
218
219
    DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
    build

    # copy oneAPI dependencies
Daniel Hiltgen's avatar
Daniel Hiltgen committed
220
    mkdir -p "${ONEAPI_DIST_DIR}"
Wang,Zhe's avatar
Wang,Zhe committed
221
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
Daniel Hiltgen's avatar
Daniel Hiltgen committed
222
        cp -a "${dep}" "${ONEAPI_DIST_DIR}"
Wang,Zhe's avatar
Wang,Zhe committed
223
    done
Daniel Hiltgen's avatar
Daniel Hiltgen committed
224
225
226
227
228
229
230
231
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
    install
Wang,Zhe's avatar
Wang,Zhe committed
232
233
234
    compress
fi

235
if [ -z "${ROCM_PATH}" ]; then
236
237
238
239
    # Try the default location in case it exists
    ROCM_PATH=/opt/rocm
fi

240
if [ -z "${CLBlast_DIR}" ]; then
241
242
243
244
245
246
    # Try the default location in case it exists
    if [ -d /usr/lib/cmake/CLBlast ]; then
        export CLBlast_DIR=/usr/lib/cmake/CLBlast
    fi
fi

247
if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
248
    echo "ROCm libraries detected - building dynamic ROCm library"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
249
250
    if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
251
    fi
252
    init_vars
253
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DLLAMA_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
254
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
255
256
257
258
    if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
        CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
        echo "Building custom ROCM GPU"
259
    fi
260
    BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
261
262
263
264
    ROCM_DIST_DIR="${DIST_BASE}/ollama_libs"
    # TODO figure out how to disable runpath (rpath)
    # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
    export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
265
    build
266

Daniel Hiltgen's avatar
Daniel Hiltgen committed
267
268
269
270
    # copy the ROCM dependencies
    mkdir -p "${ROCM_DIST_DIR}"
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do
        cp -a "${dep}"* "${ROCM_DIST_DIR}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
271
    done
Daniel Hiltgen's avatar
Daniel Hiltgen committed
272
    install
273
    compress
274
fi
275
276

cleanup
277
echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"