gen_linux.sh 12.1 KB
Newer Older
1
#!/bin/bash
2
3
# This script is intended to run inside the go generate
# working directory must be llm/generate/
4

5
# First we build one or more CPU based LLM libraries
6
#
7
8
# Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
# library dependencies
9
#
10
11
12
# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  The ROCM
# libraries are quite large, and also dynamically load data files at runtime
# which in turn are large, so we don't attempt to cary them as payload
13
14
15
16

set -ex
set -o pipefail

17
18
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() {
19
20
21
22
    if [ -n "${AMDGPU_TARGETS}" ]; then
        echo "${AMDGPU_TARGETS}"
        return
    fi
23
24
25
26
27
28
    GPU_LIST=(
        "gfx900"
        "gfx906:xnack-"
        "gfx908:xnack-"
        "gfx90a:xnack+"
        "gfx90a:xnack-"
29
30
31
        "gfx940"
        "gfx941"
        "gfx942"
32
33
34
35
36
37
38
39
40
41
42
43
44
        "gfx1010"
        "gfx1012"
        "gfx1030"
        "gfx1100"
        "gfx1101"
        "gfx1102"
    )
    (
        IFS=$';'
        echo "'${GPU_LIST[*]}'"
    )
}

45
echo "Starting linux generate script"
46
47
48
49
50
51
52
if [ -z "${CUDACXX}" ]; then
    if [ -x /usr/local/cuda/bin/nvcc ]; then
        export CUDACXX=/usr/local/cuda/bin/nvcc
    else
        # Try the default location in case it exists
        export CUDACXX=$(command -v nvcc)
    fi
53
fi
54
COMMON_CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
55
56
57
58
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
59

60
init_vars
61
62
63
64
65
66
if [ -z "${OLLAMA_SKIP_STATIC_GENERATE}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
    # Builds by default, allows skipping, forces build if OLLAMA_CPU_TARGET="static"
    # Enables optimized Dockerfile builds using a blanket skip and targeted overrides
    # Static build for linking into the Go binary
    init_vars
    CMAKE_TARGETS="--target llama --target ggml"
67
    CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DGGML_NATIVE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off ${CMAKE_DEFS}"
68
69
70
    BUILD_DIR="../build/linux/${ARCH}_static"
    echo "Building static library"
    build
Jeremy's avatar
Jeremy committed
71
fi
72

Jeremy's avatar
Jeremy committed
73
74
init_vars
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
75
76
77
    # Users building from source can tune the exact flags we pass to cmake for configuring
    # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
    if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
78
        init_vars
79
        echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
80
        CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
81
        BUILD_DIR="../build/linux/${ARCH}/cpu"
82
83
        echo "Building custom CPU"
        build
84
        compress
85
86
    else
        # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
87
88
89
90
        # -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
        # -DGGML_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
        # -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
        # -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
91
        # Note: the following seem to yield slower results than AVX2 - ymmv
92
93
94
        # -DGGML_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
        # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
        # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
95

96
        COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
97
98
99
100
        if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
            #
            # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
            #
101
            init_vars
102
            CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
103
            BUILD_DIR="../build/linux/${ARCH}/cpu"
104
105
            echo "Building LCD CPU"
            build
106
            compress
107
        fi
108

109
        if [ "${ARCH}" == "x86_64" ]; then
110
            #
111
            # ARM chips in M1/M2/M3-based MACs and NVidia Tegra devices do not currently support avx extensions.
112
            #
113
114
115
116
117
118
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
                #
                # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
                # Approximately 400% faster than LCD on same CPU
                #
                init_vars
119
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
120
                BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
121
122
                echo "Building AVX CPU"
                build
123
                compress
124
            fi
125

126
127
128
129
130
131
            if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
                #
                # ~2013 CPU Dynamic library
                # Approximately 10% faster than AVX on same CPU
                #
                init_vars
132
                CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
133
                BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
134
135
                echo "Building AVX2 CPU"
                build
136
                compress
137
            fi
138
        fi
139
    fi
140
141
142
else
    echo "Skipping CPU generation step as requested"
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
143

144
145
# If needed, look for the default CUDA toolkit location
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
146
147
148
    CUDA_LIB_DIR=/usr/local/cuda/lib64
fi

149
150
151
152
153
# If needed, look for CUDA on Arch Linux
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
    CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
fi

154
155
156
157
158
# Allow override in case libcudart is in the wrong place
if [ -z "${CUDART_LIB_DIR}" ]; then
    CUDART_LIB_DIR="${CUDA_LIB_DIR}"
fi

159
if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
160
161
    echo "CUDA libraries detected - building dynamic CUDA library"
    init_vars
162
    CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
163
164
165
    if [ -n "${CUDA_MAJOR}" ]; then
        CUDA_VARIANT=_v${CUDA_MAJOR}
    fi
166
167
    if [ "${ARCH}" == "arm64" ]; then
        echo "ARM CPU detected - disabling unsupported AVX instructions"
Roy Yang's avatar
Roy Yang committed
168

169
170
        # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
        #
Roy Yang's avatar
Roy Yang committed
171
172
        # CUDA compute < 6.0 lacks proper FP16 support on ARM.
        # Disabling has minimal performance effect while maintaining compatibility.
173
        ARM64_DEFS="-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_CUDA_F16=off"
174
    fi
175
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
176
177
    if [ -n "${OLLAMA_CUSTOM_CUDA_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_CUDA_DEFS=\"${OLLAMA_CUSTOM_CUDA_DEFS}\""
178
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
Jeremy's avatar
Jeremy committed
179
        echo "Building custom CUDA GPU"
180
    else
181
        CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} -DCMAKE_LIBRARY_PATH=/usr/local/cuda/compat"
182
183
    fi
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}"
184
    BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
185
    EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
186
    build
187

188
    # Carry the CUDA libs as payloads to help reduce dependency burden on users
189
190
191
    #
    # TODO - in the future we may shift to packaging these separately and conditionally
    #        downloading them in the install script.
192
    DEPS="$(ldd ${BUILD_DIR}/bin/ollama_llama_server )"
193
194
195
    for lib in libcudart.so libcublas.so libcublasLt.so ; do
        DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
        if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
196
            cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/bin/"
197
        elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
198
            cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/bin/"
199
        elif [ -e "${CUDART_LIB_DIR}/${lib}" ]; then
200
            cp -d ${CUDART_LIB_DIR}/${lib}* "${BUILD_DIR}/bin/"
201
        else
202
            cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/bin/"
203
204
        fi
    done
205
    compress
206

207
fi
208

Wang,Zhe's avatar
Wang,Zhe committed
209
210
211
212
213
if [ -z "${ONEAPI_ROOT}" ]; then
    # Try the default location in case it exists
    ONEAPI_ROOT=/opt/intel/oneapi
fi

214
if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
Wang,Zhe's avatar
Wang,Zhe committed
215
216
217
218
    echo "OneAPI libraries detected - building dynamic OneAPI library"
    init_vars
    source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
    CC=icx
219
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
Wang,Zhe's avatar
Wang,Zhe committed
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
    BUILD_DIR="../build/linux/${ARCH}/oneapi"
    EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
    DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
    build

    # copy oneAPI dependencies
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
        cp "${dep}" "${BUILD_DIR}/bin/"
    done
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/"
    cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/"
    compress
fi

239
if [ -z "${ROCM_PATH}" ]; then
240
241
242
243
    # Try the default location in case it exists
    ROCM_PATH=/opt/rocm
fi

244
if [ -z "${CLBlast_DIR}" ]; then
245
246
247
248
249
250
    # Try the default location in case it exists
    if [ -d /usr/lib/cmake/CLBlast ]; then
        export CLBlast_DIR=/usr/lib/cmake/CLBlast
    fi
fi

251
if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
252
    echo "ROCm libraries detected - building dynamic ROCm library"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
253
254
    if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
255
    fi
256
    init_vars
257
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DLLAMA_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
258
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
Jeremy's avatar
Jeremy committed
259
260
261
262
    if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
        CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
        echo "Building custom ROCM GPU"
263
    fi
264
    BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
265
    EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
266
    build
267

Daniel Hiltgen's avatar
Daniel Hiltgen committed
268
    # Record the ROCM dependencies
269
270
271
272
    rm -f "${BUILD_DIR}/bin/deps.txt"
    touch "${BUILD_DIR}/bin/deps.txt"
    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
        echo "${dep}" >> "${BUILD_DIR}/bin/deps.txt"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
273
    done
274
    # bomb out if for some reason we didn't get a few deps
275
276
    if [ $(cat "${BUILD_DIR}/bin/deps.txt" | wc -l ) -lt 8 ] ; then
        cat "${BUILD_DIR}/bin/deps.txt"
277
278
279
        echo "ERROR: deps file short"
        exit 1
    fi
280
    compress
281
fi
282
283

cleanup
284
echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"