Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
41a272de
Commit
41a272de
authored
Apr 16, 2024
by
Michael Yang
Browse files
darwin: no partial offloading if required memory greater than system
parent
f3357222
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
9 deletions
+17
-9
gpu/gpu_darwin.go
gpu/gpu_darwin.go
+2
-1
gpu/gpu_info_darwin.h
gpu/gpu_info_darwin.h
+1
-0
gpu/gpu_info_darwin.m
gpu/gpu_info_darwin.m
+9
-7
llm/server.go
llm/server.go
+5
-1
No files found.
gpu/gpu_darwin.go
View file @
41a272de
...
@@ -32,6 +32,7 @@ func CheckVRAM() (uint64, error) {
...
@@ -32,6 +32,7 @@ func CheckVRAM() (uint64, error) {
// gpu not supported, this may not be metal
// gpu not supported, this may not be metal
return
0
,
nil
return
0
,
nil
}
}
return
uint64
(
C
.
getRecommendedMaxVRAM
()),
nil
return
uint64
(
C
.
getRecommendedMaxVRAM
()),
nil
}
}
...
@@ -52,7 +53,7 @@ func GetGPUInfo() GpuInfo {
...
@@ -52,7 +53,7 @@ func GetGPUInfo() GpuInfo {
func
getCPUMem
()
(
memInfo
,
error
)
{
func
getCPUMem
()
(
memInfo
,
error
)
{
return
memInfo
{
return
memInfo
{
TotalMemory
:
0
,
TotalMemory
:
uint64
(
C
.
getPhysicalMemory
())
,
FreeMemory
:
0
,
FreeMemory
:
0
,
DeviceCount
:
0
,
DeviceCount
:
0
,
},
nil
},
nil
...
...
gpu/gpu_info_darwin.h
View file @
41a272de
#import <Metal/Metal.h>
#import <Metal/Metal.h>
#include <stdint.h>
#include <stdint.h>
uint64_t
getRecommendedMaxVRAM
();
uint64_t
getRecommendedMaxVRAM
();
uint64_t
getPhysicalMemory
();
gpu/gpu_info_darwin.m
View file @
41a272de
//go:build darwin
//
go:build darwin
#include "gpu_info_darwin.h"
#include "gpu_info_darwin.h"
uint64_t getRecommendedMaxVRAM()
uint64_t getRecommendedMaxVRAM() {
{
id
<MTLDevice>
device = MTLCreateSystemDefaultDevice();
id
<MTLDevice>
device = MTLCreateSystemDefaultDevice();
uint64_t result = device.recommendedMaxWorkingSetSize;
uint64_t result = device.recommendedMaxWorkingSetSize;
CFRelease(device);
CFRelease(device);
return result;
return result;
}
}
uint64_t getPhysicalMemory() {
return [[NSProcessInfo processInfo] physicalMemory];
}
llm/server.go
View file @
41a272de
...
@@ -108,7 +108,11 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
...
@@ -108,7 +108,11 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
memoryLayerOutput
:=
layers
[
"output"
]
.
size
()
memoryLayerOutput
:=
layers
[
"output"
]
.
size
()
memoryRequiredTotal
+=
memoryLayerOutput
memoryRequiredTotal
+=
memoryLayerOutput
if
memoryAvailable
>
memoryRequiredTotal
{
if
info
.
Library
==
"metal"
&&
memoryRequiredTotal
>
info
.
TotalMemory
{
// disable partial offloading when model is greater than total system memory
opts
.
NumGPU
=
0
}
else
if
memoryAvailable
>
memoryRequiredTotal
{
layerCount
=
int
(
ggml
.
KV
()
.
BlockCount
())
+
1
layerCount
=
int
(
ggml
.
KV
()
.
BlockCount
())
+
1
memoryRequiredPartial
=
memoryRequiredTotal
memoryRequiredPartial
=
memoryRequiredTotal
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment