Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
3773fb64
Unverified
Commit
3773fb64
authored
Jan 11, 2024
by
Daniel Hiltgen
Committed by
GitHub
Jan 11, 2024
Browse files
Merge pull request #1935 from dhiltgen/cpu_fallback
Fix up the CPU fallback selection
parents
f84537e0
7427fa13
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
29 additions
and
16 deletions
+29
-16
gpu/gpu_darwin.go
gpu/gpu_darwin.go
+1
-1
llm/llm.go
llm/llm.go
+8
-6
llm/payload_common.go
llm/payload_common.go
+7
-0
llm/payload_test.go
llm/payload_test.go
+13
-9
No files found.
gpu/gpu_darwin.go
View file @
3773fb64
...
...
@@ -34,7 +34,7 @@ func GetGPUInfo() GpuInfo {
mem
,
_
:=
getCPUMem
()
if
runtime
.
GOARCH
==
"amd64"
{
return
GpuInfo
{
Library
:
"
default
"
,
Library
:
"
cpu
"
,
Variant
:
GetCPUVariant
(),
memInfo
:
mem
,
}
...
...
llm/llm.go
View file @
3773fb64
...
...
@@ -51,7 +51,6 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
graph
:=
int64
(
ggml
.
NumGQA
())
*
kv
/
6
info
:=
gpu
.
GetGPUInfo
()
library
:=
info
.
Library
switch
runtime
.
GOOS
{
case
"darwin"
:
if
opts
.
NumGPU
==
0
{
...
...
@@ -60,13 +59,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
if
size
+
kv
+
graph
>
vram
{
log
.
Println
(
"not enough vram available, falling back to CPU only"
)
info
.
Library
=
"cpu"
info
.
Variant
=
gpu
.
GetCPUVariant
()
opts
.
NumGPU
=
0
break
}
opts
.
NumGPU
=
1
default
:
if
l
ibrary
==
"cpu"
||
library
==
"default"
{
if
info
.
L
ibrary
==
"cpu"
{
log
.
Println
(
"GPU not available, falling back to CPU"
)
opts
.
NumGPU
=
0
break
...
...
@@ -74,7 +75,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
// don't use GPU at all if no layers are loaded
if
opts
.
NumGPU
==
0
{
library
=
"cpu"
info
.
Library
=
"cpu"
info
.
Variant
=
gpu
.
GetCPUVariant
()
break
}
...
...
@@ -101,7 +103,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
min
:=
graph
+
kv
*
layers
/
maxlayers
if
layers
<=
0
||
min
>
avg
{
log
.
Printf
(
"not enough vram available, falling back to CPU only"
)
library
=
"cpu"
info
.
Library
=
"cpu"
info
.
Variant
=
gpu
.
GetCPUVariant
()
opts
.
NumGPU
=
0
break
}
...
...
@@ -111,8 +114,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
opts
.
RopeFrequencyBase
=
0.0
opts
.
RopeFrequencyScale
=
0.0
gpuInfo
:=
gpu
.
GetGPUInfo
()
return
newLlmServer
(
gpuInfo
,
model
,
adapters
,
projectors
,
opts
)
return
newLlmServer
(
info
,
model
,
adapters
,
projectors
,
opts
)
}
// Give any native cgo implementations an opportunity to initialize
...
...
llm/payload_common.go
View file @
3773fb64
...
...
@@ -28,6 +28,13 @@ func getDynLibs(gpuInfo gpu.GpuInfo) []string {
if
gpuInfo
.
Library
==
"default"
{
return
[]
string
{
"default"
}
}
// TODO - temporary until we have multiple CPU variations for Darwin
// Short circuit on darwin with metal only
if
len
(
availableDynLibs
)
==
1
{
if
_
,
onlyMetal
:=
availableDynLibs
[
"metal"
];
onlyMetal
{
return
[]
string
{
availableDynLibs
[
"metal"
]}
}
}
exactMatch
:=
""
dynLibs
:=
[]
string
{}
...
...
llm/payload_test.go
View file @
3773fb64
...
...
@@ -16,39 +16,43 @@ func TestGetDynLibs(t *testing.T) {
assert
.
Len
(
t
,
res
,
1
)
assert
.
Equal
(
t
,
availableDynLibs
[
"cpu"
],
res
[
0
])
variant
:=
gpu
.
GetCPUVariant
()
if
variant
!=
""
{
variant
=
"_"
+
variant
}
availableDynLibs
=
map
[
string
]
string
{
"rocm_v5"
:
"X_rocm_v5"
,
"rocm_v6"
:
"X_rocm_v6"
,
"cpu"
:
"X_cpu"
,
"rocm_v5"
:
"X_rocm_v5"
,
"rocm_v6"
:
"X_rocm_v6"
,
"cpu"
+
variant
:
"X_cpu"
,
}
assert
.
Equal
(
t
,
true
,
rocmDynLibPresent
())
res
=
getDynLibs
(
gpu
.
GpuInfo
{
Library
:
"rocm"
})
assert
.
Len
(
t
,
res
,
3
)
assert
.
Equal
(
t
,
availableDynLibs
[
"rocm_v5"
],
res
[
0
])
assert
.
Equal
(
t
,
availableDynLibs
[
"rocm_v6"
],
res
[
1
])
assert
.
Equal
(
t
,
availableDynLibs
[
"cpu"
],
res
[
2
])
assert
.
Equal
(
t
,
availableDynLibs
[
"cpu"
+
variant
],
res
[
2
])
res
=
getDynLibs
(
gpu
.
GpuInfo
{
Library
:
"rocm"
,
Variant
:
"v6"
})
assert
.
Len
(
t
,
res
,
3
)
assert
.
Equal
(
t
,
availableDynLibs
[
"rocm_v6"
],
res
[
0
])
assert
.
Equal
(
t
,
availableDynLibs
[
"rocm_v5"
],
res
[
1
])
assert
.
Equal
(
t
,
availableDynLibs
[
"cpu"
],
res
[
2
])
assert
.
Equal
(
t
,
availableDynLibs
[
"cpu"
+
variant
],
res
[
2
])
res
=
getDynLibs
(
gpu
.
GpuInfo
{
Library
:
"cuda"
})
assert
.
Len
(
t
,
res
,
1
)
assert
.
Equal
(
t
,
availableDynLibs
[
"cpu"
],
res
[
0
])
assert
.
Equal
(
t
,
availableDynLibs
[
"cpu"
+
variant
],
res
[
0
])
res
=
getDynLibs
(
gpu
.
GpuInfo
{
Library
:
"default"
})
assert
.
Len
(
t
,
res
,
1
)
assert
.
Equal
(
t
,
"default"
,
res
[
0
])
availableDynLibs
=
map
[
string
]
string
{
"rocm"
:
"X_rocm_v5"
,
"cpu"
:
"X_cpu"
,
"rocm"
:
"X_rocm_v5"
,
"cpu"
+
variant
:
"X_cpu"
,
}
assert
.
Equal
(
t
,
true
,
rocmDynLibPresent
())
res
=
getDynLibs
(
gpu
.
GpuInfo
{
Library
:
"rocm"
,
Variant
:
"v6"
})
assert
.
Len
(
t
,
res
,
2
)
assert
.
Equal
(
t
,
availableDynLibs
[
"rocm"
],
res
[
0
])
assert
.
Equal
(
t
,
availableDynLibs
[
"cpu"
],
res
[
1
])
assert
.
Equal
(
t
,
availableDynLibs
[
"cpu"
+
variant
],
res
[
1
])
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment