gen_linux.sh 11.4 KB
Newer Older
1
#!/bin/bash
2
3
# This script is intended to run inside the go generate
# working directory must be llm/generate/
4

5
# First we build one or more CPU based LLM libraries
6
#
7
8
# Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
# library dependencies
9
#
10
11
12
# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  The ROCM
# libraries are quite large, and also dynamically load data files at runtime
# which in turn are large, so we don't attempt to cary them as payload
13
14
15

set -ex
set -o pipefail
16
compress_pids=""
17

18
19
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() {
20
21
22
23
    if [ -n "${AMDGPU_TARGETS}" ]; then
        echo "${AMDGPU_TARGETS}"
        return
    fi
24
25
26
27
28
29
    GPU_LIST=(
        "gfx900"
        "gfx906:xnack-"
        "gfx908:xnack-"
        "gfx90a:xnack+"
        "gfx90a:xnack-"
30
31
32
        "gfx940"
        "gfx941"
        "gfx942"
33
34
35
36
37
38
39
40
41
42
43
44
45
        "gfx1010"
        "gfx1012"
        "gfx1030"
        "gfx1100"
        "gfx1101"
        "gfx1102"
    )
    (
        IFS=$';'
        echo "'${GPU_LIST[*]}'"
    )
}

46
echo "Starting linux generate script"
47
48
49
50
51
52
53
if [ -z "${CUDACXX}" ]; then
    if [ -x /usr/local/cuda/bin/nvcc ]; then
        export CUDACXX=/usr/local/cuda/bin/nvcc
    else
        # Try the default location in case it exists
        export CUDACXX=$(command -v nvcc)
    fi
54
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
55
COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
56
57
58
59
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
60

Jeremy's avatar
Jeremy committed
61
62
init_vars
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
63
64
65
    # Users building from source can tune the exact flags we pass to cmake for configuring
    # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
    if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
66
        init_vars
67
        echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
Daniel Hiltgen's avatar
Daniel Hiltgen committed
68
        CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
69
        RUNNER="cpu"
70
        BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
71
72
        echo "Building custom CPU"
        build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
73
        install
74
        dist
75
        compress
76
77
    else
        # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
78
79
80
81
        # -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
        # -DGGML_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
        # -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
        # -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
82
        # Note: the following seem to yield slower results than AVX2 - ymmv
83
84
85
        # -DGGML_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
        # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
        # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
86

Daniel Hiltgen's avatar
Daniel Hiltgen committed
87
        COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
88
89
90
91
        if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
            #
            # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
            #
92
            init_vars
93
            CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
94
            RUNNER=cpu
95
            BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
96
97
            echo "Building LCD CPU"
            build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
98
            install
99
            dist
100
            compress
101
        fi
102

103
        if [ "${ARCH}" == "x86_64" ]; then
104
            #
105
            # ARM chips in M1/M2/M3-based MACs and NVidia Tegra devices do not currently support avx extensions.
106
            #
107
108
109
110
111
112
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
                #
                # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
                # Approximately 400% faster than LCD on same CPU
                #
                init_vars
113
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
114
                RUNNER=cpu_avx
115
                BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
116
117
                echo "Building AVX CPU"
                build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
118
                install
119
                dist
120
                compress
121
            fi
122

123
124
125
126
127
128
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
                #
                # ~2013 CPU Dynamic library
                # Approximately 10% faster than AVX on same CPU
                #
                init_vars
129
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
130
                RUNNER=cpu_avx2
131
                BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
132
133
                echo "Building AVX2 CPU"
                build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
134
                install
135
                dist
136
                compress
137
            fi
138
        fi
139
    fi
140
141
142
else
    echo "Skipping CPU generation step as requested"
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
143

144
145
# If needed, look for the default CUDA toolkit location
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
146
147
148
    CUDA_LIB_DIR=/usr/local/cuda/lib64
fi

149
150
151
152
153
# If needed, look for CUDA on Arch Linux
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
    CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
fi

154
155
156
157
158
# Allow override in case libcudart is in the wrong place
if [ -z "${CUDART_LIB_DIR}" ]; then
    CUDART_LIB_DIR="${CUDA_LIB_DIR}"
fi

159
if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
160
161
    echo "CUDA libraries detected - building dynamic CUDA library"
    init_vars
162
    CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
163
    if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
164
165
        CUDA_VARIANT=_v${CUDA_MAJOR}
    fi
166
167
    if [ "${ARCH}" == "arm64" ]; then
        echo "ARM CPU detected - disabling unsupported AVX instructions"
Roy Yang's avatar
Roy Yang committed
168

169
170
        # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
        #
Roy Yang's avatar
Roy Yang committed
171
172
        # CUDA compute < 6.0 lacks proper FP16 support on ARM.
        # Disabling has minimal performance effect while maintaining compatibility.
173
        ARM64_DEFS="-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_CUDA_F16=off"
174
    fi
175
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
176
177
    if [ -n "${OLLAMA_CUSTOM_CUDA_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_CUDA_DEFS=\"${OLLAMA_CUSTOM_CUDA_DEFS}\""
178
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
Jeremy's avatar
Jeremy committed
179
        echo "Building custom CUDA GPU"
180
    else
Daniel Hiltgen's avatar
Daniel Hiltgen committed
181
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
182
    fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
183
184
    export CUDAFLAGS="-t8"
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
185
    RUNNER=cuda${CUDA_VARIANT}
186
    BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
187
    export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
188
    CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}"
189
    build
Daniel Hiltgen's avatar
Daniel Hiltgen committed
190
    install
191
    dist
192
    echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
193
194
195
    mkdir -p "${CUDA_DIST_DIR}"
    for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
        cp -a "${lib}" "${CUDA_DIST_DIR}"
196
    done
197
    compress
198

199
fi
200

Wang,Zhe's avatar
Wang,Zhe committed
201
202
203
204
205
if [ -z "${ONEAPI_ROOT}" ]; then
    # Try the default location in case it exists
    ONEAPI_ROOT=/opt/intel/oneapi
fi

206
if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
Wang,Zhe's avatar
Wang,Zhe committed
207
208
209
210
    echo "OneAPI libraries detected - building dynamic OneAPI library"
    init_vars
    source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
    CC=icx
211
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
212
    RUNNER=oneapi
213
    BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
214
    ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
215
    export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
Wang,Zhe's avatar
Wang,Zhe committed
216
217
218
219
    DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
    build

    # copy oneAPI dependencies
Daniel Hiltgen's avatar
Daniel Hiltgen committed
220
    mkdir -p "${ONEAPI_DIST_DIR}"
Wang,Zhe's avatar
Wang,Zhe committed
221
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
Daniel Hiltgen's avatar
Daniel Hiltgen committed
222
        cp -a "${dep}" "${ONEAPI_DIST_DIR}"
Wang,Zhe's avatar
Wang,Zhe committed
223
    done
Daniel Hiltgen's avatar
Daniel Hiltgen committed
224
225
226
227
228
229
230
231
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
    install
232
    dist
Wang,Zhe's avatar
Wang,Zhe committed
233
234
235
    compress
fi

236
if [ -z "${ROCM_PATH}" ]; then
237
238
239
240
    # Try the default location in case it exists
    ROCM_PATH=/opt/rocm
fi

241
if [ -z "${CLBlast_DIR}" ]; then
242
243
244
245
246
247
    # Try the default location in case it exists
    if [ -d /usr/lib/cmake/CLBlast ]; then
        export CLBlast_DIR=/usr/lib/cmake/CLBlast
    fi
fi

248
if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
249
    echo "ROCm libraries detected - building dynamic ROCm library"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
250
251
    if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
252
    fi
253
    init_vars
254
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DGGML_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
255
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
256
257
258
259
    if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
        CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
        echo "Building custom ROCM GPU"
260
    fi
261
    RUNNER=rocm${ROCM_VARIANT}
262
    BUILD_DIR="../build/linux/${GOARCH}/${RUNNER}"
263
264
    # ROCm dependencies are too large to fit into a unified bundle
    ROCM_DIST_DIR="${DIST_BASE}/../linux-${GOARCH}-rocm/lib/ollama"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
265
266
267
    # TODO figure out how to disable runpath (rpath)
    # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
    export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
268
    build
269

Daniel Hiltgen's avatar
Daniel Hiltgen committed
270
271
    # copy the ROCM dependencies
    mkdir -p "${ROCM_DIST_DIR}"
272
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${GOARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf ); do
Daniel Hiltgen's avatar
Daniel Hiltgen committed
273
        cp -a "${dep}"* "${ROCM_DIST_DIR}"
274
275
276
        if [ $(readlink -f "${dep}") != "${dep}" ] ; then
            cp $(readlink -f "${dep}") "${ROCM_DIST_DIR}"
        fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
277
    done
Daniel Hiltgen's avatar
Daniel Hiltgen committed
278
    install
279
    dist
280
    compress
281
fi
282
283

cleanup
284
wait_for_compress
285
echo "go generate completed.  LLM runners: $(cd ${PAYLOAD_BASE}; echo *)"