"llm/llama.cpp/git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "7013e5e2d779d68d2a9cdafa96f8fdb2645618a5"
Commit d470ebe7 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Add Jetson cuda variants for arm

This adds new variants for arm64 specific to Jetson platforms
parent c7bcb003
...@@ -3,6 +3,9 @@ ARG CMAKE_VERSION=3.22.1 ...@@ -3,6 +3,9 @@ ARG CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md # this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG CUDA_VERSION=11.3.1 ARG CUDA_VERSION=11.3.1
ARG ROCM_VERSION=6.1.2 ARG ROCM_VERSION=6.1.2
ARG JETPACK_6=r36.2.0
ARG JETPACK_5=r35.4.1
ARG JETPACK_4=r32.7.1
# Copy the minimal context we need to run the generate scripts # Copy the minimal context we need to run the generate scripts
FROM scratch AS llm-code FROM scratch AS llm-code
...@@ -22,7 +25,7 @@ ENV GOARCH amd64 ...@@ -22,7 +25,7 @@ ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-server-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
...@@ -31,11 +34,40 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ...@@ -31,11 +34,40 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS ARG CGO_CFLAGS
ENV GOARCH arm64 ENV GOARCH arm64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64
ARG CMAKE_VERSION
RUN apt-get update && apt-get install -y git curl && \
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ENV GOARCH arm64
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CUDA_VARIANT="_jetpack6" \
CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack6" \
CMAKE_CUDA_ARCHITECTURES="87" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS cuda-build-jetpack5-arm64
ARG CMAKE_VERSION
RUN apt-get update && apt-get install -y git curl && \
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ENV GOARCH arm64
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \ OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \ OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \ CUDA_VARIANT="_jetpack5" \
CUDA_VARIANT="_v11" \ CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack5" \
CMAKE_CUDA_ARCHITECTURES="72;87" \
bash gen_linux.sh bash gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
...@@ -123,8 +155,14 @@ ARG GOLANG_VERSION ...@@ -123,8 +155,14 @@ ARG GOLANG_VERSION
WORKDIR /go/src/github.com/ollama/ollama WORKDIR /go/src/github.com/ollama/ollama
COPY . . COPY . .
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
## arm binary += 381M
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
## arm binary += 330M
COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
ARG GOFLAGS ARG GOFLAGS
ARG CGO_CFLAGS ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
......
...@@ -15,7 +15,9 @@ import ( ...@@ -15,7 +15,9 @@ import (
"log/slog" "log/slog"
"os" "os"
"path/filepath" "path/filepath"
"regexp"
"runtime" "runtime"
"strconv"
"strings" "strings"
"sync" "sync"
"unsafe" "unsafe"
...@@ -215,7 +217,7 @@ func GetGPUInfo() GpuInfoList { ...@@ -215,7 +217,7 @@ func GetGPUInfo() GpuInfoList {
GpuInfo: GpuInfo{ GpuInfo: GpuInfo{
memInfo: mem, memInfo: mem,
Library: "cpu", Library: "cpu",
Variant: cpuCapability, Variant: cpuCapability.String(),
ID: "0", ID: "0",
}, },
}, },
...@@ -231,6 +233,35 @@ func GetGPUInfo() GpuInfoList { ...@@ -231,6 +233,35 @@ func GetGPUInfo() GpuInfoList {
depPath := GetDepDir() depPath := GetDepDir()
var cudaVariant string
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
if CudaTegra != "" {
ver := strings.Split(CudaTegra, ".")
if len(ver) > 0 {
cudaVariant = "jetpack" + ver[0]
}
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
r := regexp.MustCompile(` R(\d+) `)
m := r.FindSubmatch(data)
if len(m) != 2 {
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
} else {
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
// https://developer.nvidia.com/embedded/jetpack-archive
switch l4t {
case 35:
cudaVariant = "jetpack5"
case 36:
cudaVariant = "jetpack6"
default:
slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
}
}
}
}
}
// Load ALL libraries // Load ALL libraries
cHandles = initCudaHandles() cHandles = initCudaHandles()
...@@ -240,6 +271,7 @@ func GetGPUInfo() GpuInfoList { ...@@ -240,6 +271,7 @@ func GetGPUInfo() GpuInfoList {
gpuInfo := CudaGPUInfo{ gpuInfo := CudaGPUInfo{
GpuInfo: GpuInfo{ GpuInfo: GpuInfo{
Library: "cuda", Library: "cuda",
Variant: cudaVariant,
}, },
index: i, index: i,
} }
...@@ -266,7 +298,15 @@ func GetGPUInfo() GpuInfoList { ...@@ -266,7 +298,15 @@ func GetGPUInfo() GpuInfoList {
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath if depPath != "" {
gpuInfo.DependencyPath = depPath
// Check for variant specific directory
if cudaVariant != "" {
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil {
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant)
}
}
}
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = driverMajor gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor gpuInfo.DriverMinor = driverMinor
......
...@@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList { ...@@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
return []GpuInfo{ return []GpuInfo{
{ {
Library: "cpu", Library: "cpu",
Variant: GetCPUCapability(), Variant: GetCPUCapability().String(),
memInfo: mem, memInfo: mem,
}, },
} }
...@@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList { ...@@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
return []GpuInfo{ return []GpuInfo{
{ {
Library: "cpu", Library: "cpu",
Variant: GetCPUCapability(), Variant: GetCPUCapability().String(),
memInfo: mem, memInfo: mem,
}, },
} }
......
...@@ -19,7 +19,7 @@ type GpuInfo struct { ...@@ -19,7 +19,7 @@ type GpuInfo struct {
Library string `json:"library,omitempty"` Library string `json:"library,omitempty"`
// Optional variant to select (e.g. versions, cpu feature flags) // Optional variant to select (e.g. versions, cpu feature flags)
Variant CPUCapability `json:"variant"` Variant string `json:"variant"`
// MinimumMemory represents the minimum memory required to use the GPU // MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory uint64 `json:"-"` MinimumMemory uint64 `json:"-"`
...@@ -81,8 +81,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList { ...@@ -81,8 +81,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
for _, info := range l { for _, info := range l {
found := false found := false
requested := info.Library requested := info.Library
if info.Variant != CPUCapabilityNone { if info.Variant != CPUCapabilityNone.String() {
requested += "_" + info.Variant.String() requested += "_" + info.Variant
} }
for i, lib := range libs { for i, lib := range libs {
if lib == requested { if lib == requested {
......
...@@ -165,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then ...@@ -165,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
echo "CUDA libraries detected - building dynamic CUDA library" echo "CUDA libraries detected - building dynamic CUDA library"
init_vars init_vars
CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true) CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
if [ -n "${CUDA_MAJOR}" ]; then if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
CUDA_VARIANT=_v${CUDA_MAJOR} CUDA_VARIANT=_v${CUDA_MAJOR}
fi fi
if [ "${ARCH}" == "arm64" ]; then if [ "${ARCH}" == "arm64" ]; then
...@@ -189,9 +189,10 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then ...@@ -189,9 +189,10 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}" BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda" export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
CUDA_DIST_DIR="${DIST_BASE}/ollama_libs" CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/ollama_libs}"
build build
install install
echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
mkdir -p "${CUDA_DIST_DIR}" mkdir -p "${CUDA_DIST_DIR}"
for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
cp -a "${lib}" "${CUDA_DIST_DIR}" cp -a "${lib}" "${CUDA_DIST_DIR}"
......
...@@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string { ...@@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
// glob workDir for files that start with ollama_ // glob workDir for files that start with ollama_
availableServers := getAvailableServers() availableServers := getAvailableServers()
requested := info.Library requested := info.Library
if info.Variant != gpu.CPUCapabilityNone { if info.Variant != gpu.CPUCapabilityNone.String() {
requested += "_" + info.Variant.String() requested += "_" + info.Variant
} }
servers := []string{} servers := []string{}
......
...@@ -22,6 +22,7 @@ for TARGETARCH in ${BUILD_ARCH}; do ...@@ -22,6 +22,7 @@ for TARGETARCH in ${BUILD_ARCH}; do
-t builder:$TARGETARCH \ -t builder:$TARGETARCH \
. .
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
rm -rf ./dist/linux-$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist
docker rm builder-$TARGETARCH docker rm builder-$TARGETARCH
echo "Compressing final linux bundle..." echo "Compressing final linux bundle..."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment