Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
059ae458
Unverified
Commit
059ae458
authored
Jan 07, 2024
by
Daniel Hiltgen
Committed by
GitHub
Jan 07, 2024
Browse files
Merge pull request #1834 from dhiltgen/old_cuda
Detect very old CUDA GPUs and fall back to CPU
parents
6347f501
d74ce6bd
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
74 additions
and
2 deletions
+74
-2
gpu/gpu.go
gpu/gpu.go
+15
-1
gpu/gpu_info_cuda.c
gpu/gpu_info_cuda.c
+51
-1
gpu/gpu_info_cuda.h
gpu/gpu_info_cuda.h
+8
-0
No files found.
gpu/gpu.go
View file @
059ae458
...
@@ -28,6 +28,9 @@ type handles struct {
...
@@ -28,6 +28,9 @@ type handles struct {
var
gpuMutex
sync
.
Mutex
var
gpuMutex
sync
.
Mutex
var
gpuHandles
*
handles
=
nil
var
gpuHandles
*
handles
=
nil
// TODO verify this is the correct min version
const
CudaComputeMajorMin
=
5
// Note: gpuMutex must already be held
// Note: gpuMutex must already be held
func
initGPUHandles
()
{
func
initGPUHandles
()
{
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
...
@@ -73,7 +76,18 @@ func GetGPUInfo() GpuInfo {
...
@@ -73,7 +76,18 @@ func GetGPUInfo() GpuInfo {
log
.
Printf
(
"error looking up CUDA GPU memory: %s"
,
C
.
GoString
(
memInfo
.
err
))
log
.
Printf
(
"error looking up CUDA GPU memory: %s"
,
C
.
GoString
(
memInfo
.
err
))
C
.
free
(
unsafe
.
Pointer
(
memInfo
.
err
))
C
.
free
(
unsafe
.
Pointer
(
memInfo
.
err
))
}
else
{
}
else
{
resp
.
Library
=
"cuda"
// Verify minimum compute capability
var
cc
C
.
cuda_compute_capability_t
C
.
cuda_compute_capability
(
*
gpuHandles
.
cuda
,
&
cc
)
if
cc
.
err
!=
nil
{
log
.
Printf
(
"error looking up CUDA GPU compute capability: %s"
,
C
.
GoString
(
cc
.
err
))
C
.
free
(
unsafe
.
Pointer
(
cc
.
err
))
}
else
if
cc
.
major
>=
CudaComputeMajorMin
{
log
.
Printf
(
"CUDA Compute Capability detected: %d.%d"
,
cc
.
major
,
cc
.
minor
)
resp
.
Library
=
"cuda"
}
else
{
log
.
Printf
(
"CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d"
,
cc
.
major
,
cc
.
minor
)
}
}
}
}
else
if
gpuHandles
.
rocm
!=
nil
{
}
else
if
gpuHandles
.
rocm
!=
nil
{
C
.
rocm_check_vram
(
*
gpuHandles
.
rocm
,
&
memInfo
)
C
.
rocm_check_vram
(
*
gpuHandles
.
rocm
,
&
memInfo
)
...
...
gpu/gpu_info_cuda.c
View file @
059ae458
...
@@ -21,7 +21,7 @@ const char *cuda_lib_paths[] = {
...
@@ -21,7 +21,7 @@ const char *cuda_lib_paths[] = {
};
};
#endif
#endif
#define CUDA_LOOKUP_SIZE
5
#define CUDA_LOOKUP_SIZE
6
void
cuda_init
(
cuda_init_resp_t
*
resp
)
{
void
cuda_init
(
cuda_init_resp_t
*
resp
)
{
nvmlReturn_t
ret
;
nvmlReturn_t
ret
;
...
@@ -39,6 +39,7 @@ void cuda_init(cuda_init_resp_t *resp) {
...
@@ -39,6 +39,7 @@ void cuda_init(cuda_init_resp_t *resp) {
{
"nvmlDeviceGetHandleByIndex"
,
(
void
*
)
&
resp
->
ch
.
getHandle
},
{
"nvmlDeviceGetHandleByIndex"
,
(
void
*
)
&
resp
->
ch
.
getHandle
},
{
"nvmlDeviceGetMemoryInfo"
,
(
void
*
)
&
resp
->
ch
.
getMemInfo
},
{
"nvmlDeviceGetMemoryInfo"
,
(
void
*
)
&
resp
->
ch
.
getMemInfo
},
{
"nvmlDeviceGetCount_v2"
,
(
void
*
)
&
resp
->
ch
.
getCount
},
{
"nvmlDeviceGetCount_v2"
,
(
void
*
)
&
resp
->
ch
.
getCount
},
{
"nvmlDeviceGetCudaComputeCapability"
,
(
void
*
)
&
resp
->
ch
.
getComputeCapability
},
};
};
for
(
i
=
0
;
cuda_lib_paths
[
i
]
!=
NULL
&&
resp
->
ch
.
handle
==
NULL
;
i
++
)
{
for
(
i
=
0
;
cuda_lib_paths
[
i
]
!=
NULL
&&
resp
->
ch
.
handle
==
NULL
;
i
++
)
{
...
@@ -123,4 +124,53 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
...
@@ -123,4 +124,53 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
resp
->
free
+=
memInfo
.
free
;
resp
->
free
+=
memInfo
.
free
;
}
}
}
}
void
cuda_compute_capability
(
cuda_handle_t
h
,
cuda_compute_capability_t
*
resp
)
{
resp
->
err
=
NULL
;
resp
->
major
=
0
;
resp
->
minor
=
0
;
nvmlDevice_t
device
;
int
major
=
0
;
int
minor
=
0
;
nvmlReturn_t
ret
;
const
int
buflen
=
256
;
char
buf
[
buflen
+
1
];
int
i
;
if
(
h
.
handle
==
NULL
)
{
resp
->
err
=
strdup
(
"nvml handle not initialized"
);
return
;
}
unsigned
int
devices
;
ret
=
(
*
h
.
getCount
)(
&
devices
);
if
(
ret
!=
NVML_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"unable to get device count: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
return
;
}
for
(
i
=
0
;
i
<
devices
;
i
++
)
{
ret
=
(
*
h
.
getHandle
)(
i
,
&
device
);
if
(
ret
!=
NVML_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"unable to get device handle %d: %d"
,
i
,
ret
);
resp
->
err
=
strdup
(
buf
);
return
;
}
ret
=
(
*
h
.
getComputeCapability
)(
device
,
&
major
,
&
minor
);
if
(
ret
!=
NVML_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"device compute capability lookup failure %d: %d"
,
i
,
ret
);
resp
->
err
=
strdup
(
buf
);
return
;
}
// Report the lowest major.minor we detect as that limits our compatibility
if
(
resp
->
major
==
0
||
resp
->
major
>
major
)
{
resp
->
major
=
major
;
resp
->
minor
=
minor
;
}
else
if
(
resp
->
major
==
major
&&
resp
->
minor
>
minor
)
{
resp
->
minor
=
minor
;
}
}
}
#endif // __APPLE__
#endif // __APPLE__
\ No newline at end of file
gpu/gpu_info_cuda.h
View file @
059ae458
...
@@ -22,6 +22,7 @@ typedef struct cuda_handle {
...
@@ -22,6 +22,7 @@ typedef struct cuda_handle {
nvmlReturn_t
(
*
getHandle
)(
unsigned
int
,
nvmlDevice_t
*
);
nvmlReturn_t
(
*
getHandle
)(
unsigned
int
,
nvmlDevice_t
*
);
nvmlReturn_t
(
*
getMemInfo
)(
nvmlDevice_t
,
nvmlMemory_t
*
);
nvmlReturn_t
(
*
getMemInfo
)(
nvmlDevice_t
,
nvmlMemory_t
*
);
nvmlReturn_t
(
*
getCount
)(
unsigned
int
*
);
nvmlReturn_t
(
*
getCount
)(
unsigned
int
*
);
nvmlReturn_t
(
*
getComputeCapability
)(
nvmlDevice_t
,
int
*
major
,
int
*
minor
);
}
cuda_handle_t
;
}
cuda_handle_t
;
typedef
struct
cuda_init_resp
{
typedef
struct
cuda_init_resp
{
...
@@ -29,8 +30,15 @@ typedef struct cuda_init_resp {
...
@@ -29,8 +30,15 @@ typedef struct cuda_init_resp {
cuda_handle_t
ch
;
cuda_handle_t
ch
;
}
cuda_init_resp_t
;
}
cuda_init_resp_t
;
typedef
struct
cuda_compute_capability
{
char
*
err
;
int
major
;
int
minor
;
}
cuda_compute_capability_t
;
void
cuda_init
(
cuda_init_resp_t
*
resp
);
void
cuda_init
(
cuda_init_resp_t
*
resp
);
void
cuda_check_vram
(
cuda_handle_t
ch
,
mem_info_t
*
resp
);
void
cuda_check_vram
(
cuda_handle_t
ch
,
mem_info_t
*
resp
);
void
cuda_compute_capability
(
cuda_handle_t
ch
,
cuda_compute_capability_t
*
cc
);
#endif // __GPU_INFO_CUDA_H__
#endif // __GPU_INFO_CUDA_H__
#endif // __APPLE__
#endif // __APPLE__
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment