Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
df011054
Unverified
Commit
df011054
authored
Nov 12, 2024
by
Daniel Hiltgen
Committed by
GitHub
Nov 12, 2024
Browse files
Jetpack support for Go server (#7217)
This adds support for the Jetson JetPack variants into the Go runner
parent
ac07160c
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
78 additions
and
20 deletions
+78
-20
Dockerfile
Dockerfile
+64
-8
discover/amd_linux.go
discover/amd_linux.go
+1
-1
discover/amd_windows.go
discover/amd_windows.go
+1
-1
discover/gpu.go
discover/gpu.go
+4
-4
discover/types.go
discover/types.go
+1
-1
llama/llama.go
llama/llama.go
+4
-2
llama/make/cuda.make
llama/make/cuda.make
+1
-1
llm/server.go
llm/server.go
+2
-2
No files found.
Dockerfile
View file @
df011054
...
@@ -5,6 +5,8 @@ ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
...
@@ -5,6 +5,8 @@ ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
ARG
CUDA_VERSION_12=12.4.0
ARG
CUDA_VERSION_12=12.4.0
ARG
CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG
CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG
ROCM_VERSION=6.1.2
ARG
ROCM_VERSION=6.1.2
ARG
JETPACK_6=r36.2.0
ARG
JETPACK_5=r35.4.1
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
#
#
...
@@ -13,7 +15,7 @@ ARG ROCM_VERSION=6.1.2
...
@@ -13,7 +15,7 @@ ARG ROCM_VERSION=6.1.2
#
#
### Then incremental builds will be much faster in this container
### Then incremental builds will be much faster in this container
#
#
# make
-C llama
-j 10 && go build -trimpath -o dist/linux-amd64/ollama .
# make -j 10 && go build -trimpath -o dist/linux-amd64/ollama .
#
#
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
ARG
CMAKE_VERSION
ARG
CMAKE_VERSION
...
@@ -76,9 +78,9 @@ ARG CUDA_V12_ARCHITECTURES
...
@@ -76,9 +78,9 @@ ARG CUDA_V12_ARCHITECTURES
ARG
OLLAMA_FAST_BUILD
ARG
OLLAMA_FAST_BUILD
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
if
grep
"^flags"
/proc/cpuinfo|grep avx>/dev/null
;
then
\
if
grep
"^flags"
/proc/cpuinfo|grep avx>/dev/null
;
then
\
make
-C
llama
-j
$(
expr
$(
nproc
)
/ 2
)
;
\
make
-j
$(
expr
$(
nproc
)
/ 2
)
;
\
else
\
else
\
make
-C
llama
-j
5
;
\
make
-j
5
;
\
fi
fi
FROM
--platform=linux/arm64 unified-builder-arm64 AS runners-arm64
FROM
--platform=linux/arm64 unified-builder-arm64 AS runners-arm64
...
@@ -90,7 +92,46 @@ ARG CUDA_V11_ARCHITECTURES
...
@@ -90,7 +92,46 @@ ARG CUDA_V11_ARCHITECTURES
ARG
CUDA_V12_ARCHITECTURES
ARG
CUDA_V12_ARCHITECTURES
ARG
OLLAMA_FAST_BUILD
ARG
OLLAMA_FAST_BUILD
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
make
-C
llama
-j
8
make
-j
5
# Jetsons need to be built in discrete stages
FROM
--platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
ARG
GOLANG_VERSION
RUN
apt-get update
&&
apt-get
install
-y
git curl ccache
&&
\
curl
-s
-L
https://dl.google.com/go/go
${
GOLANG_VERSION
}
.linux-arm64.tar.gz |
tar
xz
-C
/usr/local
&&
\
ln
-s
/usr/local/go/bin/go /usr/local/bin/go
&&
\
ln
-s
/usr/local/go/bin/gofmt /usr/local/bin/gofmt
&&
\
apt-get clean
&&
rm
-rf
/var/lib/apt/lists/
*
WORKDIR
/go/src/github.com/ollama/ollama/
COPY
. .
ARG
CGO_CFLAGS
ENV
GOARCH arm64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
make
-j
5 cuda_v11
\
CUDA_ARCHITECTURES
=
"72;87"
\
GPU_RUNNER_VARIANT
=
_jetpack5
\
CGO_EXTRA_LDFLAGS_LINUX
=
-L
/usr/local/cuda/lib64/stubs
\
DIST_LIB_DIR
=
/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama
\
DIST_GPU_RUNNER_DEPS_DIR
=
/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5
FROM
--platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64
ARG
GOLANG_VERSION
RUN
apt-get update
&&
apt-get
install
-y
git curl ccache
&&
\
curl
-s
-L
https://dl.google.com/go/go
${
GOLANG_VERSION
}
.linux-arm64.tar.gz |
tar
xz
-C
/usr/local
&&
\
ln
-s
/usr/local/go/bin/go /usr/local/bin/go
&&
\
ln
-s
/usr/local/go/bin/gofmt /usr/local/bin/gofmt
&&
\
apt-get clean
&&
rm
-rf
/var/lib/apt/lists/
*
WORKDIR
/go/src/github.com/ollama/ollama/
COPY
. .
ARG
CGO_CFLAGS
ENV
GOARCH arm64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
make
-j
5 cuda_v12
\
CUDA_ARCHITECTURES
=
"87"
\
GPU_RUNNER_VARIANT
=
_jetpack6
\
CGO_EXTRA_LDFLAGS_LINUX
=
-L
/usr/local/cuda/lib64/stubs
\
DIST_LIB_DIR
=
/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama
\
DIST_GPU_RUNNER_DEPS_DIR
=
/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
# Intermediate stages used for ./scripts/build_linux.sh
# Intermediate stages used for ./scripts/build_linux.sh
...
@@ -134,12 +175,20 @@ FROM --platform=linux/arm64 builder-arm64 AS build-arm64
...
@@ -134,12 +175,20 @@ FROM --platform=linux/arm64 builder-arm64 AS build-arm64
COPY
. .
COPY
. .
COPY
--from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
COPY
--from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
COPY
--from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/build/ build/
COPY
--from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/build/ build/
ARG
GOFLAGS
ARG
GOFLAGS
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
go build
-trimpath
-o
dist/linux-arm64/bin/ollama .
go build
-trimpath
-o
dist/linux-arm64/bin/ollama .
RUN
cd
dist/linux-
$GOARCH
&&
\
RUN
cd
dist/linux-
$GOARCH
&&
\
tar
--exclude
runners
-cf
-
.
| pigz
--best
>
../ollama-linux-
$GOARCH
.tgz
tar
--exclude
runners
-cf
-
.
| pigz
--best
>
../ollama-linux-
$GOARCH
.tgz
RUN
cd
dist/linux-
$GOARCH
-jetpack5
&&
\
tar
--exclude
runners
-cf
-
.
| pigz
--best
>
../ollama-linux-
$GOARCH
-jetpack5
.tgz
RUN
cd
dist/linux-
$GOARCH
-jetpack6
&&
\
tar
--exclude
runners
-cf
-
.
| pigz
--best
>
../ollama-linux-
$GOARCH
-jetpack6
.tgz
FROM
--platform=linux/amd64 scratch AS dist-amd64
FROM
--platform=linux/amd64 scratch AS dist-amd64
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
...
@@ -180,16 +229,23 @@ RUN rm -rf \
...
@@ -180,16 +229,23 @@ RUN rm -rf \
FROM
--platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
FROM
--platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
RUN
apt-get update
&&
\
RUN
apt-get update
&&
\
apt-get
install
-y
ca-certificates
&&
\
apt-get
install
-y
ca-certificates
&&
\
rm
-rf
/var/lib/apt/lists/
*
apt-get clean
&&
rm
-rf
/var/lib/apt/lists/
*
COPY
--from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
COPY
--from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
COPY
--from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
COPY
--from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
FROM
--platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
FROM
--platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
RUN
apt-get update
&&
\
RUN
apt-get update
&&
\
apt-get
install
-y
ca-certificates
&&
\
apt-get
install
-y
ca-certificates
&&
\
rm
-rf
/var/lib/apt/lists/
*
apt-get clean
&&
rm
-rf
/var/lib/apt/lists/
*
COPY
--from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
COPY
--from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
COPY
--from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
COPY
--from=cpu-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
COPY
--from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
COPY
--from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
COPY
--from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
COPY
--from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
FROM
--platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
FROM
--platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
...
@@ -198,7 +254,7 @@ FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
...
@@ -198,7 +254,7 @@ FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
RUN
apt-get update
&&
\
RUN
apt-get update
&&
\
apt-get
install
-y
ca-certificates
&&
\
apt-get
install
-y
ca-certificates
&&
\
rm
-rf
/var/lib/apt/lists/
*
apt-get clean
&&
rm
-rf
/var/lib/apt/lists/
*
COPY
--from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
COPY
--from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
COPY
--from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
COPY
--from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
...
...
discover/amd_linux.go
View file @
df011054
...
@@ -350,7 +350,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
...
@@ -350,7 +350,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
return
nil
,
err
return
nil
,
err
}
}
}
}
gpuInfo
.
DependencyPath
=
libDir
gpuInfo
.
DependencyPath
=
[]
string
{
libDir
}
if
gfxOverride
==
""
{
if
gfxOverride
==
""
{
// Only load supported list once
// Only load supported list once
...
...
discover/amd_windows.go
View file @
df011054
...
@@ -111,7 +111,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
...
@@ -111,7 +111,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
UnreliableFreeMemory
:
true
,
UnreliableFreeMemory
:
true
,
ID
:
strconv
.
Itoa
(
i
),
// TODO this is probably wrong if we specify visible devices
ID
:
strconv
.
Itoa
(
i
),
// TODO this is probably wrong if we specify visible devices
DependencyPath
:
libDir
,
DependencyPath
:
[]
string
{
libDir
}
,
MinimumMemory
:
rocmMinimumMemory
,
MinimumMemory
:
rocmMinimumMemory
,
Name
:
name
,
Name
:
name
,
Compute
:
gfx
,
Compute
:
gfx
,
...
...
discover/gpu.go
View file @
df011054
...
@@ -240,7 +240,7 @@ func GetGPUInfo() GpuInfoList {
...
@@ -240,7 +240,7 @@ func GetGPUInfo() GpuInfoList {
Library
:
"cpu"
,
Library
:
"cpu"
,
Variant
:
cpuCapability
.
String
(),
Variant
:
cpuCapability
.
String
(),
ID
:
"0"
,
ID
:
"0"
,
DependencyPath
:
depPath
,
DependencyPath
:
[]
string
{
depPath
}
,
},
},
CPUs
:
details
,
CPUs
:
details
,
},
},
...
@@ -293,11 +293,11 @@ func GetGPUInfo() GpuInfoList {
...
@@ -293,11 +293,11 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
.
DriverMinor
=
driverMinor
gpuInfo
.
DriverMinor
=
driverMinor
variant
:=
cudaVariant
(
gpuInfo
)
variant
:=
cudaVariant
(
gpuInfo
)
if
depPath
!=
""
{
if
depPath
!=
""
{
gpuInfo
.
DependencyPath
=
depPath
gpuInfo
.
DependencyPath
=
[]
string
{
depPath
}
// Check for variant specific directory
// Check for variant specific directory
if
variant
!=
""
{
if
variant
!=
""
{
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
));
err
==
nil
{
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
));
err
==
nil
{
gpuInfo
.
DependencyPath
=
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
)
gpuInfo
.
DependencyPath
=
[]
string
{
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
)
,
depPath
}
}
}
}
}
}
}
...
@@ -370,7 +370,7 @@ func GetGPUInfo() GpuInfoList {
...
@@ -370,7 +370,7 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
DependencyPath
=
depPath
gpuInfo
.
DependencyPath
=
[]
string
{
depPath
}
oneapiGPUs
=
append
(
oneapiGPUs
,
gpuInfo
)
oneapiGPUs
=
append
(
oneapiGPUs
,
gpuInfo
)
}
}
}
}
...
...
discover/types.go
View file @
df011054
...
@@ -25,7 +25,7 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
...
@@ -25,7 +25,7 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
MinimumMemory
uint64
`json:"-"`
MinimumMemory
uint64
`json:"-"`
// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
DependencyPath
string
`json:"lib_path,omitempty"`
DependencyPath
[]
string
`json:"lib_path,omitempty"`
// Extra environment variables specific to the GPU as list of [key,value]
// Extra environment variables specific to the GPU as list of [key,value]
EnvWorkarounds
[][
2
]
string
`json:"envs,omitempty"`
EnvWorkarounds
[][
2
]
string
`json:"envs,omitempty"`
...
...
llama/llama.go
View file @
df011054
...
@@ -21,6 +21,8 @@ package llama
...
@@ -21,6 +21,8 @@ package llama
#cgo cuda CFLAGS: -fPIE -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
#cgo cuda CFLAGS: -fPIE -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
#cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
#cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
#cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
#cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
#cgo cuda_jetpack5 LDFLAGS: -lggml_cuda_jetpack5 -L/usr/local/cuda-11/lib64
#cgo cuda_jetpack6 LDFLAGS: -lggml_cuda_jetpack6 -L/usr/local/cuda-12/lib64
#cgo cuda_v11 LDFLAGS: -lggml_cuda_v11 -L/usr/local/cuda-11/lib64
#cgo cuda_v11 LDFLAGS: -lggml_cuda_v11 -L/usr/local/cuda-11/lib64
#cgo cuda_v12 LDFLAGS: -lggml_cuda_v12 -L/usr/local/cuda-12/lib64
#cgo cuda_v12 LDFLAGS: -lggml_cuda_v12 -L/usr/local/cuda-12/lib64
#cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
#cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
...
@@ -36,8 +38,8 @@ package llama
...
@@ -36,8 +38,8 @@ package llama
#cgo linux CXXFLAGS: -D_GNU_SOURCE
#cgo linux CXXFLAGS: -D_GNU_SOURCE
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64
#cgo linux,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
-D__ARM_FEATURE_MATMUL_INT8
#cgo linux,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
#cgo linux,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
-D__ARM_FEATURE_MATMUL_INT8
#cgo linux,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/Linux/arm64
#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/Linux/arm64
#cgo linux,arm64,sve CFLAGS: -march=armv8.6-a+sve
#cgo linux,arm64,sve CFLAGS: -march=armv8.6-a+sve
#cgo linux,arm64,sve CXXFLAGS: -march=armv8.6-a+sve
#cgo linux,arm64,sve CXXFLAGS: -march=armv8.6-a+sve
...
...
llama/make/cuda.make
View file @
df011054
...
@@ -20,7 +20,7 @@ GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE
...
@@ -20,7 +20,7 @@ GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE
GPU_COMPILER_CXXFLAGS_WIN
=
$(CXXFLAGS)
-D_WIN32_WINNT
=
0x602
GPU_COMPILER_CXXFLAGS_WIN
=
$(CXXFLAGS)
-D_WIN32_WINNT
=
0x602
GPU_COMPILER_CXXFLAGS_LINUX
=
$(CXXFLAGS)
-Xcompiler
-fPIC
-D_GNU_SOURCE
GPU_COMPILER_CXXFLAGS_LINUX
=
$(CXXFLAGS)
-Xcompiler
-fPIC
-D_GNU_SOURCE
GPU_LIBS
=
$(
sort
$(
wildcard
$(
addsuffix
*
.
$(SHARED_EXT)
*
,
$(
addprefix
$(GPU_LIB_DIR)
/
$(SHARED_PREFIX)
,
$(GPU_RUNNER_LIBS_SHORT)
))))
GPU_LIBS
=
$(
sort
$(
wildcard
$(
addsuffix
*
.
$(SHARED_EXT)
*
,
$(
addprefix
$(GPU_LIB_DIR)
/
$(SHARED_PREFIX)
,
$(GPU_RUNNER_LIBS_SHORT)
))))
GPU_DIST_DEPS_LIBS
=
$(
sort
$(
addprefix
$(DIST_
LIB
_DIR)
/,
$(
notdir
$(GPU_LIBS)
)))
GPU_DIST_DEPS_LIBS
=
$(
sort
$(
addprefix
$(DIST_
GPU_RUNNER_DEPS
_DIR)
/,
$(
notdir
$(GPU_LIBS)
)))
ifeq
($(OS),linux)
ifeq
($(OS),linux)
CUDA_PATH
?=
/usr/local/cuda
CUDA_PATH
?=
/usr/local/cuda
...
...
llm/server.go
View file @
df011054
...
@@ -306,9 +306,9 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
...
@@ -306,9 +306,9 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
// Note: we always put the dependency path first
// Note: we always put the dependency path first
// since this was the exact version we compiled/linked against
// since this was the exact version we compiled/linked against
if
gpus
[
0
]
.
DependencyPath
!=
""
{
if
gpus
[
0
]
.
DependencyPath
!=
nil
{
// assume gpus from the same library have the same dependency path
// assume gpus from the same library have the same dependency path
libraryPaths
=
append
(
[]
string
{
gpus
[
0
]
.
DependencyPath
}
,
libraryPaths
...
)
libraryPaths
=
append
(
gpus
[
0
]
.
DependencyPath
,
libraryPaths
...
)
}
}
server
:=
filepath
.
Join
(
dir
,
"ollama_llama_server"
)
server
:=
filepath
.
Join
(
dir
,
"ollama_llama_server"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment