"test/srt/openai_server/basic/test_serving_chat.py" did not exist on "ffd1a26e09127e09f33c21392ca2b285b44e5aef"
gen_linux.sh 6.47 KB
Newer Older
1
#!/bin/bash
2
# This script is intended to run inside the go generate
3
# working directory must be llm/generate/
4
5
6
7
8
9
10
11
12
13
14

# First we build our default built-in library which will be linked into the CGO
# binary as a normal dependency. This default build is CPU based.
#
# Then we build a CUDA dynamic library (although statically linked with the CUDA
# library dependencies for maximum portability)
#
# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  ROCm is particularly
# important to be a dynamic lib even if it's the only GPU library detected because
# we can't redistribute the objectfiles but must rely on dynamic libraries at
# runtime, which could lead the server not to start if not present.
15
16
17
18

set -ex
set -o pipefail

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() {
    GPU_LIST=(
        "gfx803"
        "gfx900"
        "gfx906:xnack-"
        "gfx908:xnack-"
        "gfx90a:xnack+"
        "gfx90a:xnack-"
        "gfx1010"
        "gfx1012"
        "gfx1030"
        "gfx1100"
        "gfx1101"
        "gfx1102"
    )
    (
        IFS=$';'
        echo "'${GPU_LIST[*]}'"
    )
}

41
echo "Starting linux generate script"
42
if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
43
44
    export CUDACXX=/usr/local/cuda/bin/nvcc
fi
45
COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
46
47
48
49
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
50

51
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
    # Users building from source can tune the exact flags we pass to cmake for configuring
    # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
    if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
        echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
        CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
        echo "Building custom CPU"
        build
        install
        link_server_lib
    else
        # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
        # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
        # -DLLAMA_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
        # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
        # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
        # Note: the following seem to yield slower results than AVX2 - ymmv
        # -DLLAMA_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
        # -DLLAMA_AVX512_VBMI -- 2018 Intel Cannon Lake
        # -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
72

73
74
75
76
77
78
79
80
81
82
        COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
        #
        # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
        #
        CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
        echo "Building LCD CPU"
        build
        install
        link_server_lib
83

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
        #
        # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
        # Approximately 400% faster than LCD on same CPU
        #
        init_vars
        CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx"
        echo "Building AVX CPU"
        build
        install
        link_server_lib

        #
        # ~2013 CPU Dynamic library
        # Approximately 10% faster than AVX on same CPU
        #
        init_vars
        CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx2"
        echo "Building AVX2 CPU"
        build
        install
        link_server_lib
    fi
108
109
110
else
    echo "Skipping CPU generation step as requested"
fi
Daniel Hiltgen's avatar
Daniel Hiltgen committed
111

112
113
114
if [ -d /usr/local/cuda/lib64/ ]; then
    echo "CUDA libraries detected - building dynamic CUDA library"
    init_vars
115
116
117
118
    CUDA_MAJOR=$(ls /usr/local/cuda/lib64/libcudart.so.* | head -1 | cut -f3 -d. || true)
    if [ -n "${CUDA_MAJOR}" ]; then
        CUDA_VARIANT=_v${CUDA_MAJOR}
    fi
119
    CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
120
    BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
121
122
    CUDA_LIB_DIR=/usr/local/cuda/lib64
    build
123
124
    install
    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
125
        -Wl,--whole-archive \
126
127
128
        ${BUILD_DIR}/lib/libext_server.a \
        ${BUILD_DIR}/lib/libcommon.a \
        ${BUILD_DIR}/lib/libllama.a \
129
130
131
132
133
134
135
        -Wl,--no-whole-archive \
        ${CUDA_LIB_DIR}/libcudart_static.a \
        ${CUDA_LIB_DIR}/libcublas_static.a \
        ${CUDA_LIB_DIR}/libcublasLt_static.a \
        ${CUDA_LIB_DIR}/libcudadevrt.a \
        ${CUDA_LIB_DIR}/libculibos.a \
        -lrt -lpthread -ldl -lstdc++ -lm
136
fi
137

138
if [ -z "${ROCM_PATH}" ]; then
139
140
141
142
    # Try the default location in case it exists
    ROCM_PATH=/opt/rocm
fi

143
if [ -z "${CLBlast_DIR}" ]; then
144
145
146
147
148
149
    # Try the default location in case it exists
    if [ -d /usr/lib/cmake/CLBlast ]; then
        export CLBlast_DIR=/usr/lib/cmake/CLBlast
    fi
fi

150
151
if [ -d "${ROCM_PATH}" ]; then
    echo "ROCm libraries detected - building dynamic ROCm library"
152
153
154
    if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then
        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true)
    fi
155
    init_vars
156
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
157
    BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm${ROCM_VARIANT}"
158
    build
159
160
    install
    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
161
        -Wl,--whole-archive \
162
163
164
        ${BUILD_DIR}/lib/libext_server.a \
        ${BUILD_DIR}/lib/libcommon.a \
        ${BUILD_DIR}/lib/libllama.a \
165
166
167
168
169
170
        -Wl,--no-whole-archive \
        -lrt -lpthread -ldl -lstdc++ -lm \
        -L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
        -Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
        -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
fi
171
172

cleanup