Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
d470ebe7
Commit
d470ebe7
authored
May 30, 2024
by
Daniel Hiltgen
Browse files
Add Jetson cuda variants for arm
This adds new variants for arm64 specific to Jetson platforms
parent
c7bcb003
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
96 additions
and
16 deletions
+96
-16
Dockerfile
Dockerfile
+43
-5
gpu/gpu.go
gpu/gpu.go
+42
-2
gpu/gpu_darwin.go
gpu/gpu_darwin.go
+2
-2
gpu/types.go
gpu/types.go
+3
-3
llm/generate/gen_linux.sh
llm/generate/gen_linux.sh
+3
-2
llm/payload.go
llm/payload.go
+2
-2
scripts/build_linux.sh
scripts/build_linux.sh
+1
-0
No files found.
Dockerfile
View file @
d470ebe7
...
@@ -3,6 +3,9 @@ ARG CMAKE_VERSION=3.22.1
...
@@ -3,6 +3,9 @@ ARG CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG
CUDA_VERSION=11.3.1
ARG
CUDA_VERSION=11.3.1
ARG
ROCM_VERSION=6.1.2
ARG
ROCM_VERSION=6.1.2
ARG
JETPACK_6=r36.2.0
ARG
JETPACK_5=r35.4.1
ARG
JETPACK_4=r32.7.1
# Copy the minimal context we need to run the generate scripts
# Copy the minimal context we need to run the generate scripts
FROM
scratch AS llm-code
FROM
scratch AS llm-code
...
@@ -22,7 +25,7 @@ ENV GOARCH amd64
...
@@ -22,7 +25,7 @@ ENV GOARCH amd64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 bash gen_linux.sh
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 bash gen_linux.sh
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-
server-
arm64
ARG
CMAKE_VERSION
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
...
@@ -31,11 +34,40 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
...
@@ -31,11 +34,40 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
ENV
GOARCH arm64
ENV
GOARCH arm64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 bash gen_linux.sh
FROM
--platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64
ARG
CMAKE_VERSION
RUN
apt-get update
&&
apt-get
install
-y
git curl
&&
\
curl
-s
-L
https://github.com/Kitware/CMake/releases/download/v
${
CMAKE_VERSION
}
/cmake-
${
CMAKE_VERSION
}
-linux-
$(
uname
-m
)
.tar.gz |
tar
-zx
-C
/usr
--strip-components
1
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ENV
GOARCH arm64
ENV
LIBRARY_PATH /usr/local/cuda/lib64/stubs
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CUDA_VARIANT
=
"_jetpack6"
\
CUDA_DIST_DIR
=
"/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack6"
\
CMAKE_CUDA_ARCHITECTURES
=
"87"
\
bash gen_linux.sh
FROM
--platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS cuda-build-jetpack5-arm64
ARG
CMAKE_VERSION
RUN
apt-get update
&&
apt-get
install
-y
git curl
&&
\
curl
-s
-L
https://github.com/Kitware/CMake/releases/download/v
${
CMAKE_VERSION
}
/cmake-
${
CMAKE_VERSION
}
-linux-
$(
uname
-m
)
.tar.gz |
tar
-zx
-C
/usr
--strip-components
1
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ENV
GOARCH arm64
ENV
LIBRARY_PATH /usr/local/cuda/lib64/stubs
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V11_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_jetpack5"
\
CUDA_VARIANT
=
"_v11"
\
CUDA_DIST_DIR
=
"/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack5"
\
CMAKE_CUDA_ARCHITECTURES
=
"72;87"
\
bash gen_linux.sh
bash gen_linux.sh
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
...
@@ -123,8 +155,14 @@ ARG GOLANG_VERSION
...
@@ -123,8 +155,14 @@ ARG GOLANG_VERSION
WORKDIR
/go/src/github.com/ollama/ollama
WORKDIR
/go/src/github.com/ollama/ollama
COPY
. .
COPY
. .
COPY
--from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
## arm binary += 381M
COPY
--from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
## arm binary += 330M
COPY
--from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
ARG
GOFLAGS
ARG
GOFLAGS
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
...
...
gpu/gpu.go
View file @
d470ebe7
...
@@ -15,7 +15,9 @@ import (
...
@@ -15,7 +15,9 @@ import (
"log/slog"
"log/slog"
"os"
"os"
"path/filepath"
"path/filepath"
"regexp"
"runtime"
"runtime"
"strconv"
"strings"
"strings"
"sync"
"sync"
"unsafe"
"unsafe"
...
@@ -215,7 +217,7 @@ func GetGPUInfo() GpuInfoList {
...
@@ -215,7 +217,7 @@ func GetGPUInfo() GpuInfoList {
GpuInfo
:
GpuInfo
{
GpuInfo
:
GpuInfo
{
memInfo
:
mem
,
memInfo
:
mem
,
Library
:
"cpu"
,
Library
:
"cpu"
,
Variant
:
cpuCapability
,
Variant
:
cpuCapability
.
String
()
,
ID
:
"0"
,
ID
:
"0"
,
},
},
},
},
...
@@ -231,6 +233,35 @@ func GetGPUInfo() GpuInfoList {
...
@@ -231,6 +233,35 @@ func GetGPUInfo() GpuInfoList {
depPath
:=
GetDepDir
()
depPath
:=
GetDepDir
()
var
cudaVariant
string
if
runtime
.
GOARCH
==
"arm64"
&&
runtime
.
GOOS
==
"linux"
{
if
CudaTegra
!=
""
{
ver
:=
strings
.
Split
(
CudaTegra
,
"."
)
if
len
(
ver
)
>
0
{
cudaVariant
=
"jetpack"
+
ver
[
0
]
}
}
else
if
data
,
err
:=
os
.
ReadFile
(
"/etc/nv_tegra_release"
);
err
==
nil
{
r
:=
regexp
.
MustCompile
(
` R(\d+) `
)
m
:=
r
.
FindSubmatch
(
data
)
if
len
(
m
)
!=
2
{
slog
.
Info
(
"Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version"
)
}
else
{
if
l4t
,
err
:=
strconv
.
Atoi
(
string
(
m
[
1
]));
err
==
nil
{
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
// https://developer.nvidia.com/embedded/jetpack-archive
switch
l4t
{
case
35
:
cudaVariant
=
"jetpack5"
case
36
:
cudaVariant
=
"jetpack6"
default
:
slog
.
Info
(
"unsupported L4T version"
,
"nv_tegra_release"
,
string
(
data
))
}
}
}
}
}
// Load ALL libraries
// Load ALL libraries
cHandles
=
initCudaHandles
()
cHandles
=
initCudaHandles
()
...
@@ -240,6 +271,7 @@ func GetGPUInfo() GpuInfoList {
...
@@ -240,6 +271,7 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
:=
CudaGPUInfo
{
gpuInfo
:=
CudaGPUInfo
{
GpuInfo
:
GpuInfo
{
GpuInfo
:
GpuInfo
{
Library
:
"cuda"
,
Library
:
"cuda"
,
Variant
:
cudaVariant
,
},
},
index
:
i
,
index
:
i
,
}
}
...
@@ -266,7 +298,15 @@ func GetGPUInfo() GpuInfoList {
...
@@ -266,7 +298,15 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
DependencyPath
=
depPath
if
depPath
!=
""
{
gpuInfo
.
DependencyPath
=
depPath
// Check for variant specific directory
if
cudaVariant
!=
""
{
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
depPath
,
"cuda_"
+
cudaVariant
));
err
==
nil
{
gpuInfo
.
DependencyPath
=
filepath
.
Join
(
depPath
,
"cuda_"
+
cudaVariant
)
}
}
}
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
DriverMajor
=
driverMajor
gpuInfo
.
DriverMajor
=
driverMajor
gpuInfo
.
DriverMinor
=
driverMinor
gpuInfo
.
DriverMinor
=
driverMinor
...
...
gpu/gpu_darwin.go
View file @
d470ebe7
...
@@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
...
@@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
return
[]
GpuInfo
{
return
[]
GpuInfo
{
{
{
Library
:
"cpu"
,
Library
:
"cpu"
,
Variant
:
GetCPUCapability
(),
Variant
:
GetCPUCapability
()
.
String
()
,
memInfo
:
mem
,
memInfo
:
mem
,
},
},
}
}
...
@@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
...
@@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
return
[]
GpuInfo
{
return
[]
GpuInfo
{
{
{
Library
:
"cpu"
,
Library
:
"cpu"
,
Variant
:
GetCPUCapability
(),
Variant
:
GetCPUCapability
()
.
String
()
,
memInfo
:
mem
,
memInfo
:
mem
,
},
},
}
}
...
...
gpu/types.go
View file @
d470ebe7
...
@@ -19,7 +19,7 @@ type GpuInfo struct {
...
@@ -19,7 +19,7 @@ type GpuInfo struct {
Library
string
`json:"library,omitempty"`
Library
string
`json:"library,omitempty"`
// Optional variant to select (e.g. versions, cpu feature flags)
// Optional variant to select (e.g. versions, cpu feature flags)
Variant
CPUCapability
`json:"variant"`
Variant
string
`json:"variant"`
// MinimumMemory represents the minimum memory required to use the GPU
// MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory
uint64
`json:"-"`
MinimumMemory
uint64
`json:"-"`
...
@@ -81,8 +81,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
...
@@ -81,8 +81,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
for
_
,
info
:=
range
l
{
for
_
,
info
:=
range
l
{
found
:=
false
found
:=
false
requested
:=
info
.
Library
requested
:=
info
.
Library
if
info
.
Variant
!=
CPUCapabilityNone
{
if
info
.
Variant
!=
CPUCapabilityNone
.
String
()
{
requested
+=
"_"
+
info
.
Variant
.
String
()
requested
+=
"_"
+
info
.
Variant
}
}
for
i
,
lib
:=
range
libs
{
for
i
,
lib
:=
range
libs
{
if
lib
==
requested
{
if
lib
==
requested
{
...
...
llm/generate/gen_linux.sh
View file @
d470ebe7
...
@@ -165,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
...
@@ -165,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
echo
"CUDA libraries detected - building dynamic CUDA library"
echo
"CUDA libraries detected - building dynamic CUDA library"
init_vars
init_vars
CUDA_MAJOR
=
$(
ls
"
${
CUDA_LIB_DIR
}
"
/libcudart.so.
*
|
head
-1
|
cut
-f3
-d
.
||
true
)
CUDA_MAJOR
=
$(
ls
"
${
CUDA_LIB_DIR
}
"
/libcudart.so.
*
|
head
-1
|
cut
-f3
-d
.
||
true
)
if
[
-n
"
${
CUDA_MAJOR
}
"
]
;
then
if
[
-n
"
${
CUDA_MAJOR
}
"
-a
-z
"
${
CUDA_VARIANT
}
"
]
;
then
CUDA_VARIANT
=
_v
${
CUDA_MAJOR
}
CUDA_VARIANT
=
_v
${
CUDA_MAJOR
}
fi
fi
if
[
"
${
ARCH
}
"
==
"arm64"
]
;
then
if
[
"
${
ARCH
}
"
==
"arm64"
]
;
then
...
@@ -189,9 +189,10 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
...
@@ -189,9 +189,10 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
${
ARM64_DEFS
}
${
CMAKE_CUDA_DEFS
}
-DGGML_STATIC=off"
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
${
ARM64_DEFS
}
${
CMAKE_CUDA_DEFS
}
-DGGML_STATIC=off"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cuda
${
CUDA_VARIANT
}
"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cuda
${
CUDA_VARIANT
}
"
export
LLAMA_SERVER_LDFLAGS
=
"-L
${
CUDA_LIB_DIR
}
-lcudart -lcublas -lcublasLt -lcuda"
export
LLAMA_SERVER_LDFLAGS
=
"-L
${
CUDA_LIB_DIR
}
-lcudart -lcublas -lcublasLt -lcuda"
CUDA_DIST_DIR
=
"
${
DIST_BASE
}
/ollama_libs"
CUDA_DIST_DIR
=
"
${
CUDA_DIST_DIR
:-${
DIST_BASE
}
/ollama_libs
}
"
build
build
install
install
echo
"Installing CUDA dependencies in
${
CUDA_DIST_DIR
}
"
mkdir
-p
"
${
CUDA_DIST_DIR
}
"
mkdir
-p
"
${
CUDA_DIST_DIR
}
"
for
lib
in
${
CUDA_LIB_DIR
}
/libcudart.so
*
${
CUDA_LIB_DIR
}
/libcublas.so
*
${
CUDA_LIB_DIR
}
/libcublasLt.so
*
;
do
for
lib
in
${
CUDA_LIB_DIR
}
/libcudart.so
*
${
CUDA_LIB_DIR
}
/libcublas.so
*
${
CUDA_LIB_DIR
}
/libcublasLt.so
*
;
do
cp
-a
"
${
lib
}
"
"
${
CUDA_DIST_DIR
}
"
cp
-a
"
${
lib
}
"
"
${
CUDA_DIST_DIR
}
"
...
...
llm/payload.go
View file @
d470ebe7
...
@@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
...
@@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
// glob workDir for files that start with ollama_
// glob workDir for files that start with ollama_
availableServers
:=
getAvailableServers
()
availableServers
:=
getAvailableServers
()
requested
:=
info
.
Library
requested
:=
info
.
Library
if
info
.
Variant
!=
gpu
.
CPUCapabilityNone
{
if
info
.
Variant
!=
gpu
.
CPUCapabilityNone
.
String
()
{
requested
+=
"_"
+
info
.
Variant
.
String
()
requested
+=
"_"
+
info
.
Variant
}
}
servers
:=
[]
string
{}
servers
:=
[]
string
{}
...
...
scripts/build_linux.sh
View file @
d470ebe7
...
@@ -22,6 +22,7 @@ for TARGETARCH in ${BUILD_ARCH}; do
...
@@ -22,6 +22,7 @@ for TARGETARCH in ${BUILD_ARCH}; do
-t
builder:
$TARGETARCH
\
-t
builder:
$TARGETARCH
\
.
.
docker create
--platform
linux/
$TARGETARCH
--name
builder-
$TARGETARCH
builder:
$TARGETARCH
docker create
--platform
linux/
$TARGETARCH
--name
builder-
$TARGETARCH
builder:
$TARGETARCH
rm
-rf
./dist/linux-
$TARGETARCH
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/dist/linux-
$TARGETARCH
./dist
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/dist/linux-
$TARGETARCH
./dist
docker
rm
builder-
$TARGETARCH
docker
rm
builder-
$TARGETARCH
echo
"Compressing final linux bundle..."
echo
"Compressing final linux bundle..."
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment