gen_linux.sh 6.67 KB
Newer Older
1
#!/bin/bash
2
# This script is intended to run inside the go generate
3
# working directory must be llm/generate/
4
5
6
7
8
9
10
11
12
13
14

# First we build our default built-in library which will be linked into the CGO
# binary as a normal dependency. This default build is CPU based.
#
# Then we build a CUDA dynamic library (although statically linked with the CUDA
# library dependencies for maximum portability)
#
# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  ROCm is particularly
# important to be a dynamic lib even if it's the only GPU library detected because
# we can't redistribute the objectfiles but must rely on dynamic libraries at
# runtime, which could lead the server not to start if not present.
15
16
17
18

set -ex
set -o pipefail

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() {
    GPU_LIST=(
        "gfx803"
        "gfx900"
        "gfx906:xnack-"
        "gfx908:xnack-"
        "gfx90a:xnack+"
        "gfx90a:xnack-"
        "gfx1010"
        "gfx1012"
        "gfx1030"
        "gfx1100"
        "gfx1101"
        "gfx1102"
    )
    (
        IFS=$';'
        echo "'${GPU_LIST[*]}'"
    )
}

41
echo "Starting linux generate script"
42
if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
43
44
    export CUDACXX=/usr/local/cuda/bin/nvcc
fi
45
COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
46
47
48
49
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
50

51
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
    # Users building from source can tune the exact flags we pass to cmake for configuring
    # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
    if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
        CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
        echo "Building custom CPU"
        build
        install
        link_server_lib
    else
        # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
        # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
        # -DLLAMA_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
        # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
        # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
        # Note: the following seem to yield slower results than AVX2 - ymmv
        # -DLLAMA_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
        # -DLLAMA_AVX512_VBMI -- 2018 Intel Cannon Lake
        # -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
72

73
74
75
76
77
78
79
80
81
82
        COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
        #
        # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
        #
        CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
        echo "Building LCD CPU"
        build
        install
        link_server_lib
83

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
        #
        # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
        # Approximately 400% faster than LCD on same CPU
        #
        init_vars
        CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx"
        echo "Building AVX CPU"
        build
        install
        link_server_lib

        #
        # ~2013 CPU Dynamic library
        # Approximately 10% faster than AVX on same CPU
        #
        init_vars
        CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx2"
        echo "Building AVX2 CPU"
        build
        install
        link_server_lib
    fi
108
109
110
else
    echo "Skipping CPU generation step as requested"
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
111

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
for cudalibpath in "/usr/local/cuda/lib64" "/opt/cuda/targets/x86_64-linux/lib"; do
    if [ -d "$cudalibpath" ]; then
        echo "CUDA libraries detected - building dynamic CUDA library"
        init_vars
        CUDA_MAJOR=$(find "$cudalibpath" -name 'libcudart.so.*' -print | head -1 | cut -f3 -d. || true)
        if [ -n "${CUDA_MAJOR}" ]; then
            CUDA_VARIANT="_v${CUDA_MAJOR}"
        fi
        CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
        CUDA_LIB_DIR="$cudalibpath"
        build
        install
        gcc -fPIC -g -shared -o "${BUILD_DIR}/lib/libext_server.so" \
            -Wl,--whole-archive \
            "${BUILD_DIR}/lib/libext_server.a" \
            "${BUILD_DIR}/lib/libcommon.a" \
            "${BUILD_DIR}/lib/libllama.a" \
            -Wl,--no-whole-archive \
            "${CUDA_LIB_DIR}/libcudart_static.a" \
            "${CUDA_LIB_DIR}/libcublas_static.a" \
            "${CUDA_LIB_DIR}/libcublasLt_static.a" \
            "${CUDA_LIB_DIR}/libcudadevrt.a" \
            "${CUDA_LIB_DIR}/libculibos.a" \
            -lrt -lpthread -ldl -lstdc++ -lm
137
    fi
138
done
139

140
if [ -z "${ROCM_PATH}" ]; then
141
142
143
144
    # Try the default location in case it exists
    ROCM_PATH=/opt/rocm
fi

145
if [ -z "${CLBlast_DIR}" ]; then
146
147
148
149
150
151
    # Try the default location in case it exists
    if [ -d /usr/lib/cmake/CLBlast ]; then
        export CLBlast_DIR=/usr/lib/cmake/CLBlast
    fi
fi

152
153
if [ -d "${ROCM_PATH}" ]; then
    echo "ROCm libraries detected - building dynamic ROCm library"
154
155
156
    if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then
        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true)
    fi
157
    init_vars
158
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
159
    BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm${ROCM_VARIANT}"
160
    build
161
162
    install
    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
163
        -Wl,--whole-archive \
164
165
166
        ${BUILD_DIR}/lib/libext_server.a \
        ${BUILD_DIR}/lib/libcommon.a \
        ${BUILD_DIR}/lib/libllama.a \
167
168
169
170
171
172
        -Wl,--no-whole-archive \
        -lrt -lpthread -ldl -lstdc++ -lm \
        -L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
        -Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
        -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
fi
173
174

cleanup