gen_linux.sh 11.5 KB
Newer Older
1
#!/bin/bash
2
3
# This script is intended to run inside the go generate
# working directory must be llm/generate/
4

5
# First we build one or more CPU based LLM libraries
6
#
7
8
# Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
# library dependencies
9
#
10
11
12
# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  The ROCM
# libraries are quite large, and also dynamically load data files at runtime
# which in turn are large, so we don't attempt to cary them as payload
13
14
15

set -ex
set -o pipefail
16
compress_pids=""
17

18
19
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() {
20
21
22
23
    if [ -n "${AMDGPU_TARGETS}" ]; then
        echo "${AMDGPU_TARGETS}"
        return
    fi
24
25
26
27
28
29
    GPU_LIST=(
        "gfx900"
        "gfx906:xnack-"
        "gfx908:xnack-"
        "gfx90a:xnack+"
        "gfx90a:xnack-"
30
31
32
        "gfx940"
        "gfx941"
        "gfx942"
33
34
35
36
37
38
39
40
41
42
43
44
45
        "gfx1010"
        "gfx1012"
        "gfx1030"
        "gfx1100"
        "gfx1101"
        "gfx1102"
    )
    (
        IFS=$';'
        echo "'${GPU_LIST[*]}'"
    )
}

46
echo "Starting linux generate script"
47
48
49
50
51
52
53
if [ -z "${CUDACXX}" ]; then
    if [ -x /usr/local/cuda/bin/nvcc ]; then
        export CUDACXX=/usr/local/cuda/bin/nvcc
    else
        # Try the default location in case it exists
        export CUDACXX=$(command -v nvcc)
    fi
54
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
55
COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
56
57
58
59
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
60

61
init_vars
62
63
64
65
66
67
if [ -z "${OLLAMA_SKIP_STATIC_GENERATE}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
    # Builds by default, allows skipping, forces build if OLLAMA_CPU_TARGET="static"
    # Enables optimized Dockerfile builds using a blanket skip and targeted overrides
    # Static build for linking into the Go binary
    init_vars
    CMAKE_TARGETS="--target llama --target ggml"
68
    CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DGGML_NATIVE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off ${CMAKE_DEFS}"
69
70
71
    BUILD_DIR="../build/linux/${ARCH}_static"
    echo "Building static library"
    build
Jeremy's avatar
Jeremy committed
72
fi
73

Jeremy's avatar
Jeremy committed
74
75
init_vars
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
76
77
78
    # Users building from source can tune the exact flags we pass to cmake for configuring
    # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
    if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
79
        init_vars
80
        echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
Daniel Hiltgen's avatar
Daniel Hiltgen committed
81
        CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
82
        BUILD_DIR="../build/linux/${ARCH}/cpu"
83
84
        echo "Building custom CPU"
        build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
85
        install
86
        compress
87
88
    else
        # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
89
90
91
92
        # -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
        # -DGGML_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
        # -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
        # -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
93
        # Note: the following seem to yield slower results than AVX2 - ymmv
94
95
96
        # -DGGML_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
        # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
        # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
97

Daniel Hiltgen's avatar
Daniel Hiltgen committed
98
        COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
99
100
101
102
        if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
            #
            # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
            #
103
            init_vars
104
            CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
105
            BUILD_DIR="../build/linux/${ARCH}/cpu"
106
107
            echo "Building LCD CPU"
            build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
108
            install
109
            compress
110
        fi
111

112
        if [ "${ARCH}" == "x86_64" ]; then
113
            #
114
            # ARM chips in M1/M2/M3-based MACs and NVidia Tegra devices do not currently support avx extensions.
115
            #
116
117
118
119
120
121
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
                #
                # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
                # Approximately 400% faster than LCD on same CPU
                #
                init_vars
122
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
123
                BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
124
125
                echo "Building AVX CPU"
                build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
126
                install
127
                compress
128
            fi
129

130
131
132
133
134
135
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
                #
                # ~2013 CPU Dynamic library
                # Approximately 10% faster than AVX on same CPU
                #
                init_vars
136
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
137
                BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
138
139
                echo "Building AVX2 CPU"
                build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
140
                install
141
                compress
142
            fi
143
        fi
144
    fi
145
146
147
else
    echo "Skipping CPU generation step as requested"
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
148

149
150
# If needed, look for the default CUDA toolkit location
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
151
152
153
    CUDA_LIB_DIR=/usr/local/cuda/lib64
fi

154
155
156
157
158
# If needed, look for CUDA on Arch Linux
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
    CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
fi

159
160
161
162
163
# Allow override in case libcudart is in the wrong place
if [ -z "${CUDART_LIB_DIR}" ]; then
    CUDART_LIB_DIR="${CUDA_LIB_DIR}"
fi

164
if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
165
166
    echo "CUDA libraries detected - building dynamic CUDA library"
    init_vars
167
    CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
168
    if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
169
170
        CUDA_VARIANT=_v${CUDA_MAJOR}
    fi
171
172
    if [ "${ARCH}" == "arm64" ]; then
        echo "ARM CPU detected - disabling unsupported AVX instructions"
Roy Yang's avatar
Roy Yang committed
173

174
175
        # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
        #
Roy Yang's avatar
Roy Yang committed
176
177
        # CUDA compute < 6.0 lacks proper FP16 support on ARM.
        # Disabling has minimal performance effect while maintaining compatibility.
178
        ARM64_DEFS="-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_CUDA_F16=off"
179
    fi
180
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
181
182
    if [ -n "${OLLAMA_CUSTOM_CUDA_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_CUDA_DEFS=\"${OLLAMA_CUSTOM_CUDA_DEFS}\""
183
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
Jeremy's avatar
Jeremy committed
184
        echo "Building custom CUDA GPU"
185
    else
Daniel Hiltgen's avatar
Daniel Hiltgen committed
186
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
187
    fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
188
189
    export CUDAFLAGS="-t8"
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
190
    BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
191
    export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
192
    CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/ollama_libs}"
193
    build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
194
    install
195
    echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
196
197
198
    mkdir -p "${CUDA_DIST_DIR}"
    for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
        cp -a "${lib}" "${CUDA_DIST_DIR}"
199
    done
200
    compress
201

202
fi
203

Wang,Zhe's avatar
Wang,Zhe committed
204
205
206
207
208
if [ -z "${ONEAPI_ROOT}" ]; then
    # Try the default location in case it exists
    ONEAPI_ROOT=/opt/intel/oneapi
fi

209
if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
Wang,Zhe's avatar
Wang,Zhe committed
210
211
212
213
    echo "OneAPI libraries detected - building dynamic OneAPI library"
    init_vars
    source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
    CC=icx
214
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
Wang,Zhe's avatar
Wang,Zhe committed
215
    BUILD_DIR="../build/linux/${ARCH}/oneapi"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
216
217
    ONEAPI_DIST_DIR="${DIST_BASE}/ollama_libs"
    export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
Wang,Zhe's avatar
Wang,Zhe committed
218
219
220
221
    DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
    build

    # copy oneAPI dependencies
Daniel Hiltgen's avatar
Daniel Hiltgen committed
222
    mkdir -p "${ONEAPI_DIST_DIR}"
Wang,Zhe's avatar
Wang,Zhe committed
223
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
Daniel Hiltgen's avatar
Daniel Hiltgen committed
224
        cp -a "${dep}" "${ONEAPI_DIST_DIR}"
Wang,Zhe's avatar
Wang,Zhe committed
225
    done
Daniel Hiltgen's avatar
Daniel Hiltgen committed
226
227
228
229
230
231
232
233
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
    install
Wang,Zhe's avatar
Wang,Zhe committed
234
235
236
    compress
fi

237
if [ -z "${ROCM_PATH}" ]; then
238
239
240
241
    # Try the default location in case it exists
    ROCM_PATH=/opt/rocm
fi

242
if [ -z "${CLBlast_DIR}" ]; then
243
244
245
246
247
248
    # Try the default location in case it exists
    if [ -d /usr/lib/cmake/CLBlast ]; then
        export CLBlast_DIR=/usr/lib/cmake/CLBlast
    fi
fi

249
if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
250
    echo "ROCm libraries detected - building dynamic ROCm library"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
251
252
    if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
253
    fi
254
    init_vars
255
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DLLAMA_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
256
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
257
258
259
260
    if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
        CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
        echo "Building custom ROCM GPU"
261
    fi
262
    BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
263
264
265
266
    ROCM_DIST_DIR="${DIST_BASE}/ollama_libs"
    # TODO figure out how to disable runpath (rpath)
    # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
    export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
267
    build
268

Daniel Hiltgen's avatar
Daniel Hiltgen committed
269
270
271
272
    # copy the ROCM dependencies
    mkdir -p "${ROCM_DIST_DIR}"
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do
        cp -a "${dep}"* "${ROCM_DIST_DIR}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
273
    done
Daniel Hiltgen's avatar
Daniel Hiltgen committed
274
    install
275
    compress
276
fi
277
278

cleanup
279
wait_for_compress
280
echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"