Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c336693f
Unverified
Commit
c336693f
authored
Jan 09, 2024
by
Jeffrey Morgan
Committed by
GitHub
Jan 09, 2024
Browse files
calculate overhead based number of gpu devices (#1875)
parent
e89dc1d5
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
13 additions
and
6 deletions
+13
-6
gpu/gpu.go
gpu/gpu.go
+3
-1
gpu/gpu_darwin.go
gpu/gpu_darwin.go
+1
-0
gpu/gpu_info.h
gpu/gpu_info.h
+1
-0
gpu/gpu_info_cpu.c
gpu/gpu_info_cpu.c
+2
-0
gpu/gpu_info_cuda.c
gpu/gpu_info_cuda.c
+2
-4
gpu/gpu_info_rocm.c
gpu/gpu_info_rocm.c
+2
-0
gpu/gpu_test.go
gpu/gpu_test.go
+1
-1
gpu/types.go
gpu/types.go
+1
-0
No files found.
gpu/gpu.go
View file @
c336693f
...
@@ -110,6 +110,8 @@ func GetGPUInfo() GpuInfo {
...
@@ -110,6 +110,8 @@ func GetGPUInfo() GpuInfo {
C
.
free
(
unsafe
.
Pointer
(
memInfo
.
err
))
C
.
free
(
unsafe
.
Pointer
(
memInfo
.
err
))
return
resp
return
resp
}
}
resp
.
DeviceCount
=
uint32
(
memInfo
.
count
)
resp
.
FreeMemory
=
uint64
(
memInfo
.
free
)
resp
.
FreeMemory
=
uint64
(
memInfo
.
free
)
resp
.
TotalMemory
=
uint64
(
memInfo
.
total
)
resp
.
TotalMemory
=
uint64
(
memInfo
.
total
)
return
resp
return
resp
...
@@ -132,7 +134,7 @@ func CheckVRAM() (int64, error) {
...
@@ -132,7 +134,7 @@ func CheckVRAM() (int64, error) {
gpuInfo
:=
GetGPUInfo
()
gpuInfo
:=
GetGPUInfo
()
if
gpuInfo
.
FreeMemory
>
0
&&
(
gpuInfo
.
Library
==
"cuda"
||
gpuInfo
.
Library
==
"rocm"
)
{
if
gpuInfo
.
FreeMemory
>
0
&&
(
gpuInfo
.
Library
==
"cuda"
||
gpuInfo
.
Library
==
"rocm"
)
{
// leave 10% or 384Mi of VRAM free for unaccounted for overhead
// leave 10% or 384Mi of VRAM free for unaccounted for overhead
overhead
:=
gpuInfo
.
FreeMemory
/
10
overhead
:=
gpuInfo
.
FreeMemory
*
uint64
(
gpuInfo
.
DeviceCount
)
/
10
if
overhead
<
384
*
1024
*
1024
{
if
overhead
<
384
*
1024
*
1024
{
overhead
=
384
*
1024
*
1024
overhead
=
384
*
1024
*
1024
}
}
...
...
gpu/gpu_darwin.go
View file @
c336693f
...
@@ -42,6 +42,7 @@ func getCPUMem() (memInfo, error) {
...
@@ -42,6 +42,7 @@ func getCPUMem() (memInfo, error) {
return
memInfo
{
return
memInfo
{
TotalMemory
:
0
,
TotalMemory
:
0
,
FreeMemory
:
0
,
FreeMemory
:
0
,
DeviceCount
:
0
,
},
nil
},
nil
}
}
...
...
gpu/gpu_info.h
View file @
c336693f
...
@@ -34,6 +34,7 @@ extern "C" {
...
@@ -34,6 +34,7 @@ extern "C" {
typedef
struct
mem_info
{
typedef
struct
mem_info
{
uint64_t
total
;
uint64_t
total
;
uint64_t
free
;
uint64_t
free
;
unsigned
int
count
;
char
*
err
;
// If non-nill, caller responsible for freeing
char
*
err
;
// If non-nill, caller responsible for freeing
}
mem_info_t
;
}
mem_info_t
;
...
...
gpu/gpu_info_cpu.c
View file @
c336693f
...
@@ -8,6 +8,7 @@ void cpu_check_ram(mem_info_t *resp) {
...
@@ -8,6 +8,7 @@ void cpu_check_ram(mem_info_t *resp) {
MEMORYSTATUSEX
info
;
MEMORYSTATUSEX
info
;
info
.
dwLength
=
sizeof
(
info
);
info
.
dwLength
=
sizeof
(
info
);
if
(
GlobalMemoryStatusEx
(
&
info
)
!=
0
)
{
if
(
GlobalMemoryStatusEx
(
&
info
)
!=
0
)
{
resp
->
count
=
1
;
resp
->
total
=
info
.
ullTotalPhys
;
resp
->
total
=
info
.
ullTotalPhys
;
resp
->
free
=
info
.
ullAvailPhys
;
resp
->
free
=
info
.
ullAvailPhys
;
}
else
{
}
else
{
...
@@ -26,6 +27,7 @@ void cpu_check_ram(mem_info_t *resp) {
...
@@ -26,6 +27,7 @@ void cpu_check_ram(mem_info_t *resp) {
if
(
sysinfo
(
&
info
)
!=
0
)
{
if
(
sysinfo
(
&
info
)
!=
0
)
{
resp
->
err
=
strdup
(
strerror
(
errno
));
resp
->
err
=
strdup
(
strerror
(
errno
));
}
else
{
}
else
{
resp
->
count
=
1
;
resp
->
total
=
info
.
totalram
*
info
.
mem_unit
;
resp
->
total
=
info
.
totalram
*
info
.
mem_unit
;
resp
->
free
=
info
.
freeram
*
info
.
mem_unit
;
resp
->
free
=
info
.
freeram
*
info
.
mem_unit
;
}
}
...
...
gpu/gpu_info_cuda.c
View file @
c336693f
...
@@ -94,8 +94,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
...
@@ -94,8 +94,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
return
;
return
;
}
}
unsigned
int
devices
;
ret
=
(
*
h
.
getCount
)(
&
resp
->
count
);
ret
=
(
*
h
.
getCount
)(
&
devices
);
if
(
ret
!=
NVML_SUCCESS
)
{
if
(
ret
!=
NVML_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"unable to get device count: %d"
,
ret
);
snprintf
(
buf
,
buflen
,
"unable to get device count: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
resp
->
err
=
strdup
(
buf
);
...
@@ -104,8 +103,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
...
@@ -104,8 +103,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
resp
->
total
=
0
;
resp
->
total
=
0
;
resp
->
free
=
0
;
resp
->
free
=
0
;
for
(
i
=
0
;
i
<
resp
->
count
;
i
++
)
{
for
(
i
=
0
;
i
<
devices
;
i
++
)
{
ret
=
(
*
h
.
getHandle
)(
i
,
&
device
);
ret
=
(
*
h
.
getHandle
)(
i
,
&
device
);
if
(
ret
!=
NVML_SUCCESS
)
{
if
(
ret
!=
NVML_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"unable to get device handle %d: %d"
,
i
,
ret
);
snprintf
(
buf
,
buflen
,
"unable to get device handle %d: %d"
,
i
,
ret
);
...
...
gpu/gpu_info_rocm.c
View file @
c336693f
...
@@ -110,6 +110,8 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
...
@@ -110,6 +110,8 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
return
;
return
;
}
}
// TODO: set this to the actual number of devices
resp
->
count
=
1
;
resp
->
total
=
totalMem
;
resp
->
total
=
totalMem
;
resp
->
free
=
totalMem
-
usedMem
;
resp
->
free
=
totalMem
-
usedMem
;
return
;
return
;
...
...
gpu/gpu_test.go
View file @
c336693f
...
@@ -18,6 +18,7 @@ func TestBasicGetGPUInfo(t *testing.T) {
...
@@ -18,6 +18,7 @@ func TestBasicGetGPUInfo(t *testing.T) {
case
"linux"
,
"windows"
:
case
"linux"
,
"windows"
:
assert
.
Greater
(
t
,
info
.
TotalMemory
,
uint64
(
0
))
assert
.
Greater
(
t
,
info
.
TotalMemory
,
uint64
(
0
))
assert
.
Greater
(
t
,
info
.
FreeMemory
,
uint64
(
0
))
assert
.
Greater
(
t
,
info
.
FreeMemory
,
uint64
(
0
))
assert
.
Greater
(
t
,
info
.
DeviceCount
,
uint64
(
0
))
default
:
default
:
return
return
}
}
...
@@ -35,7 +36,6 @@ func TestCPUMemInfo(t *testing.T) {
...
@@ -35,7 +36,6 @@ func TestCPUMemInfo(t *testing.T) {
default
:
default
:
return
return
}
}
}
}
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
gpu/types.go
View file @
c336693f
...
@@ -3,6 +3,7 @@ package gpu
...
@@ -3,6 +3,7 @@ package gpu
type
memInfo
struct
{
type
memInfo
struct
{
TotalMemory
uint64
`json:"total_memory,omitempty"`
TotalMemory
uint64
`json:"total_memory,omitempty"`
FreeMemory
uint64
`json:"free_memory,omitempty"`
FreeMemory
uint64
`json:"free_memory,omitempty"`
DeviceCount
uint32
`json:"device_count,omitempty"`
}
}
// Beginning of an `ollama info` command
// Beginning of an `ollama info` command
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment