Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
fd5971be
Commit
fd5971be
authored
May 24, 2024
by
Wang,Zhe
Browse files
support ollama run on Intel GPUs
parent
7ca71a6b
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
614 additions
and
31 deletions
+614
-31
gpu/gpu.go
gpu/gpu.go
+93
-31
gpu/gpu_info.h
gpu/gpu_info.h
+1
-0
gpu/gpu_info_oneapi.c
gpu/gpu_info_oneapi.c
+214
-0
gpu/gpu_info_oneapi.h
gpu/gpu_info_oneapi.h
+211
-0
gpu/gpu_oneapi.go
gpu/gpu_oneapi.go
+21
-0
llm/generate/gen_linux.sh
llm/generate/gen_linux.sh
+30
-0
llm/generate/gen_windows.ps1
llm/generate/gen_windows.ps1
+44
-0
No files found.
gpu/gpu.go
View file @
fd5971be
...
@@ -16,6 +16,7 @@ import (
...
@@ -16,6 +16,7 @@ import (
"os"
"os"
"path/filepath"
"path/filepath"
"runtime"
"runtime"
"strconv"
"strings"
"strings"
"sync"
"sync"
"unsafe"
"unsafe"
...
@@ -28,6 +29,7 @@ type handles struct {
...
@@ -28,6 +29,7 @@ type handles struct {
deviceCount
int
deviceCount
int
cudart
*
C
.
cudart_handle_t
cudart
*
C
.
cudart_handle_t
nvcuda
*
C
.
nvcuda_handle_t
nvcuda
*
C
.
nvcuda_handle_t
oneapi
*
C
.
oneapi_handle_t
}
}
const
(
const
(
...
@@ -80,6 +82,15 @@ var NvcudaWindowsGlobs = []string{
...
@@ -80,6 +82,15 @@ var NvcudaWindowsGlobs = []string{
"c:
\\
windows
\\
system*
\\
nvcuda.dll"
,
"c:
\\
windows
\\
system*
\\
nvcuda.dll"
,
}
}
var
OneapiWindowsGlobs
=
[]
string
{
"c:
\\
Windows
\\
System32
\\
DriverStore
\\
FileRepository
\\
*
\\
ze_intel_gpu64.dll"
,
}
var
OneapiLinuxGlobs
=
[]
string
{
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*"
,
"/usr/lib*/libze_intel_gpu.so*"
,
}
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var
CudaTegra
string
=
os
.
Getenv
(
"JETSON_JETPACK"
)
var
CudaTegra
string
=
os
.
Getenv
(
"JETSON_JETPACK"
)
...
@@ -94,6 +105,8 @@ func initGPUHandles() *handles {
...
@@ -94,6 +105,8 @@ func initGPUHandles() *handles {
var
cudartMgmtPatterns
[]
string
var
cudartMgmtPatterns
[]
string
var
nvcudaMgmtName
string
var
nvcudaMgmtName
string
var
nvcudaMgmtPatterns
[]
string
var
nvcudaMgmtPatterns
[]
string
var
oneapiMgmtName
string
var
oneapiMgmtPatterns
[]
string
tmpDir
,
_
:=
PayloadsDir
()
tmpDir
,
_
:=
PayloadsDir
()
switch
runtime
.
GOOS
{
switch
runtime
.
GOOS
{
...
@@ -105,6 +118,8 @@ func initGPUHandles() *handles {
...
@@ -105,6 +118,8 @@ func initGPUHandles() *handles {
// Aligned with driver, we can't carry as payloads
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName
=
"nvcuda.dll"
nvcudaMgmtName
=
"nvcuda.dll"
nvcudaMgmtPatterns
=
NvcudaWindowsGlobs
nvcudaMgmtPatterns
=
NvcudaWindowsGlobs
oneapiMgmtName
=
"ze_intel_gpu64.dll"
oneapiMgmtPatterns
=
OneapiWindowsGlobs
case
"linux"
:
case
"linux"
:
cudartMgmtName
=
"libcudart.so*"
cudartMgmtName
=
"libcudart.so*"
if
tmpDir
!=
""
{
if
tmpDir
!=
""
{
...
@@ -115,6 +130,8 @@ func initGPUHandles() *handles {
...
@@ -115,6 +130,8 @@ func initGPUHandles() *handles {
// Aligned with driver, we can't carry as payloads
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName
=
"libcuda.so*"
nvcudaMgmtName
=
"libcuda.so*"
nvcudaMgmtPatterns
=
NvcudaLinuxGlobs
nvcudaMgmtPatterns
=
NvcudaLinuxGlobs
oneapiMgmtName
=
"libze_intel_gpu.so"
oneapiMgmtPatterns
=
OneapiLinuxGlobs
default
:
default
:
return
gpuHandles
return
gpuHandles
}
}
...
@@ -141,6 +158,18 @@ func initGPUHandles() *handles {
...
@@ -141,6 +158,18 @@ func initGPUHandles() *handles {
return
gpuHandles
return
gpuHandles
}
}
}
}
oneapiLibPaths
:=
FindGPULibs
(
oneapiMgmtName
,
oneapiMgmtPatterns
)
if
len
(
oneapiLibPaths
)
>
0
{
deviceCount
,
oneapi
,
libPath
:=
LoadOneapiMgmt
(
oneapiLibPaths
)
if
oneapi
!=
nil
{
slog
.
Debug
(
"detected Intel GPUs"
,
"library"
,
libPath
,
"count"
,
deviceCount
)
gpuHandles
.
oneapi
=
oneapi
gpuHandles
.
deviceCount
=
deviceCount
return
gpuHandles
}
}
return
gpuHandles
return
gpuHandles
}
}
...
@@ -181,39 +210,53 @@ func GetGPUInfo() GpuInfoList {
...
@@ -181,39 +210,53 @@ func GetGPUInfo() GpuInfoList {
if
cpuVariant
==
""
&&
runtime
.
GOARCH
==
"amd64"
{
if
cpuVariant
==
""
&&
runtime
.
GOARCH
==
"amd64"
{
continue
continue
}
}
gpuInfo
:=
GpuInfo
{
if
gpuHandles
.
cudart
!=
nil
||
gpuHandles
.
nvcuda
!=
nil
{
Library
:
"cuda"
,
gpuInfo
:=
GpuInfo
{
}
Library
:
"cuda"
,
var
driverMajor
int
}
var
driverMinor
int
var
driverMajor
int
if
gpuHandles
.
cudart
!=
nil
{
var
driverMinor
int
C
.
cudart_check_vram
(
*
gpuHandles
.
cudart
,
C
.
int
(
i
),
&
memInfo
)
if
gpuHandles
.
cudart
!=
nil
{
}
else
{
C
.
cudart_check_vram
(
*
gpuHandles
.
cudart
,
C
.
int
(
i
),
&
memInfo
)
C
.
nvcuda_check_vram
(
*
gpuHandles
.
nvcuda
,
C
.
int
(
i
),
&
memInfo
)
}
else
{
driverMajor
=
int
(
gpuHandles
.
nvcuda
.
driver_major
)
C
.
nvcuda_check_vram
(
*
gpuHandles
.
nvcuda
,
C
.
int
(
i
),
&
memInfo
)
driverMinor
=
int
(
gpuHandles
.
nvcuda
.
driver_minor
)
driverMajor
=
int
(
gpuHandles
.
nvcuda
.
driver_major
)
}
driverMinor
=
int
(
gpuHandles
.
nvcuda
.
driver_minor
)
if
memInfo
.
err
!=
nil
{
}
slog
.
Info
(
"error looking up nvidia GPU memory"
,
"error"
,
C
.
GoString
(
memInfo
.
err
))
if
memInfo
.
err
!=
nil
{
C
.
free
(
unsafe
.
Pointer
(
memInfo
.
err
))
slog
.
Info
(
"error looking up nvidia GPU memory"
,
"error"
,
C
.
GoString
(
memInfo
.
err
))
continue
C
.
free
(
unsafe
.
Pointer
(
memInfo
.
err
))
continue
}
if
memInfo
.
major
<
CudaComputeMin
[
0
]
||
(
memInfo
.
major
==
CudaComputeMin
[
0
]
&&
memInfo
.
minor
<
CudaComputeMin
[
1
])
{
slog
.
Info
(
fmt
.
Sprintf
(
"[%d] CUDA GPU is too old. Compute Capability detected: %d.%d"
,
i
,
memInfo
.
major
,
memInfo
.
minor
))
continue
}
gpuInfo
.
TotalMemory
=
uint64
(
memInfo
.
total
)
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
DependencyPath
=
depPath
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
DriverMajor
=
int
(
driverMajor
)
gpuInfo
.
DriverMinor
=
int
(
driverMinor
)
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp
=
append
(
resp
,
gpuInfo
)
}
}
if
memInfo
.
major
<
CudaComputeMin
[
0
]
||
(
memInfo
.
major
==
CudaComputeMin
[
0
]
&&
memInfo
.
minor
<
CudaComputeMin
[
1
])
{
if
gpuHandles
.
oneapi
!=
nil
{
slog
.
Info
(
fmt
.
Sprintf
(
"[%d] CUDA GPU is too old. Compute Capability detected: %d.%d"
,
i
,
memInfo
.
major
,
memInfo
.
minor
))
gpuInfo
:=
GpuInfo
{
continue
Library
:
"oneapi"
,
}
C
.
oneapi_check_vram
(
*
gpuHandles
.
oneapi
,
&
memInfo
)
var
totalFreeMem
float64
=
float64
(
memInfo
.
free
)
*
0.95
// work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo
.
free
=
C
.
uint64_t
(
totalFreeMem
)
gpuInfo
.
TotalMemory
=
uint64
(
memInfo
.
total
)
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
ID
=
strconv
.
Itoa
(
i
)
resp
=
append
(
resp
,
gpuInfo
)
}
}
gpuInfo
.
TotalMemory
=
uint64
(
memInfo
.
total
)
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
DependencyPath
=
depPath
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
DriverMajor
=
int
(
driverMajor
)
gpuInfo
.
DriverMinor
=
int
(
driverMinor
)
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp
=
append
(
resp
,
gpuInfo
)
}
}
// Then AMD
// Then AMD
...
@@ -348,6 +391,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
...
@@ -348,6 +391,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
return
0
,
nil
,
""
return
0
,
nil
,
""
}
}
func
LoadOneapiMgmt
(
oneapiLibPaths
[]
string
)
(
int
,
*
C
.
oneapi_handle_t
,
string
)
{
var
resp
C
.
oneapi_init_resp_t
resp
.
oh
.
verbose
=
getVerboseState
()
for
_
,
libPath
:=
range
oneapiLibPaths
{
lib
:=
C
.
CString
(
libPath
)
defer
C
.
free
(
unsafe
.
Pointer
(
lib
))
C
.
oneapi_init
(
lib
,
&
resp
)
if
resp
.
err
!=
nil
{
slog
.
Debug
(
"Unable to load oneAPI management library"
,
"library"
,
libPath
,
"error"
,
C
.
GoString
(
resp
.
err
))
C
.
free
(
unsafe
.
Pointer
(
resp
.
err
))
}
else
{
return
int
(
resp
.
num_devices
),
&
resp
.
oh
,
libPath
}
}
return
0
,
nil
,
""
}
func
getVerboseState
()
C
.
uint16_t
{
func
getVerboseState
()
C
.
uint16_t
{
if
envconfig
.
Debug
{
if
envconfig
.
Debug
{
return
C
.
uint16_t
(
1
)
return
C
.
uint16_t
(
1
)
...
@@ -368,6 +428,8 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
...
@@ -368,6 +428,8 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
return
cudaGetVisibleDevicesEnv
(
l
)
return
cudaGetVisibleDevicesEnv
(
l
)
case
"rocm"
:
case
"rocm"
:
return
rocmGetVisibleDevicesEnv
(
l
)
return
rocmGetVisibleDevicesEnv
(
l
)
case
"oneapi"
:
return
oneapiGetVisibleDevicesEnv
(
l
)
default
:
default
:
slog
.
Debug
(
"no filter required for library "
+
l
[
0
]
.
Library
)
slog
.
Debug
(
"no filter required for library "
+
l
[
0
]
.
Library
)
return
""
,
""
return
""
,
""
...
...
gpu/gpu_info.h
View file @
fd5971be
...
@@ -62,6 +62,7 @@ void cpu_check_ram(mem_info_t *resp);
...
@@ -62,6 +62,7 @@ void cpu_check_ram(mem_info_t *resp);
#include "gpu_info_cudart.h"
#include "gpu_info_cudart.h"
#include "gpu_info_nvcuda.h"
#include "gpu_info_nvcuda.h"
#include "gpu_info_oneapi.h"
#endif // __GPU_INFO_H__
#endif // __GPU_INFO_H__
#endif // __APPLE__
#endif // __APPLE__
\ No newline at end of file
gpu/gpu_info_oneapi.c
0 → 100644
View file @
fd5971be
#ifndef __APPLE__
#include "gpu_info_oneapi.h"
#include <string.h>
void
oneapi_init
(
char
*
oneapi_lib_path
,
oneapi_init_resp_t
*
resp
)
{
ze_result_t
ret
;
resp
->
err
=
NULL
;
const
int
buflen
=
256
;
char
buf
[
buflen
+
1
];
int
i
;
struct
lookup
{
char
*
s
;
void
**
p
;
}
l
[]
=
{
{
"zesInit"
,
(
void
*
)
&
resp
->
oh
.
zesInit
},
{
"zesDriverGet"
,
(
void
*
)
&
resp
->
oh
.
zesDriverGet
},
{
"zesDeviceGet"
,
(
void
*
)
&
resp
->
oh
.
zesDeviceGet
},
{
"zesDeviceGetProperties"
,
(
void
*
)
&
resp
->
oh
.
zesDeviceGetProperties
},
{
"zesDeviceEnumMemoryModules"
,
(
void
*
)
&
resp
->
oh
.
zesDeviceEnumMemoryModules
},
{
"zesMemoryGetProperties"
,
(
void
*
)
&
resp
->
oh
.
zesMemoryGetProperties
},
{
"zesMemoryGetState"
,
(
void
*
)
&
resp
->
oh
.
zesMemoryGetState
},
{
NULL
,
NULL
},
};
resp
->
oh
.
handle
=
LOAD_LIBRARY
(
oneapi_lib_path
,
RTLD_LAZY
);
if
(
!
resp
->
oh
.
handle
)
{
char
*
msg
=
LOAD_ERR
();
snprintf
(
buf
,
buflen
,
"Unable to load %s library to query for Intel GPUs: %s
\n
"
,
oneapi_lib_path
,
msg
);
free
(
msg
);
resp
->
err
=
strdup
(
buf
);
return
;
}
// TODO once we've squashed the remaining corner cases remove this log
LOG
(
resp
->
oh
.
verbose
,
"wiring Level-Zero management library functions in %s
\n
"
,
oneapi_lib_path
);
for
(
i
=
0
;
l
[
i
].
s
!=
NULL
;
i
++
)
{
// TODO once we've squashed the remaining corner cases remove this log
LOG
(
resp
->
oh
.
verbose
,
"dlsym: %s
\n
"
,
l
[
i
].
s
);
*
l
[
i
].
p
=
LOAD_SYMBOL
(
resp
->
oh
.
handle
,
l
[
i
].
s
);
if
(
!
l
[
i
].
p
)
{
resp
->
oh
.
handle
=
NULL
;
char
*
msg
=
LOAD_ERR
();
LOG
(
resp
->
oh
.
verbose
,
"dlerr: %s
\n
"
,
msg
);
UNLOAD_LIBRARY
(
resp
->
oh
.
handle
);
snprintf
(
buf
,
buflen
,
"symbol lookup for %s failed: %s"
,
l
[
i
].
s
,
msg
);
free
(
msg
);
resp
->
err
=
strdup
(
buf
);
return
;
}
}
ret
=
(
*
resp
->
oh
.
zesInit
)(
0
);
if
(
ret
!=
ZE_RESULT_SUCCESS
)
{
LOG
(
resp
->
oh
.
verbose
,
"zesInit err: %d
\n
"
,
ret
);
UNLOAD_LIBRARY
(
resp
->
oh
.
handle
);
resp
->
oh
.
handle
=
NULL
;
snprintf
(
buf
,
buflen
,
"oneapi vram init failure: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
}
(
*
resp
->
oh
.
zesDriverGet
)(
&
resp
->
num_devices
,
NULL
);
return
;
}
void
oneapi_check_vram
(
oneapi_handle_t
h
,
mem_info_t
*
resp
)
{
ze_result_t
ret
;
resp
->
err
=
NULL
;
uint64_t
totalMem
=
0
;
uint64_t
usedMem
=
0
;
const
int
buflen
=
256
;
char
buf
[
buflen
+
1
];
int
i
,
d
,
m
;
if
(
h
.
handle
==
NULL
)
{
resp
->
err
=
strdup
(
"Level-Zero handle not initialized"
);
return
;
}
uint32_t
driversCount
=
0
;
ret
=
(
*
h
.
zesDriverGet
)(
&
driversCount
,
NULL
);
if
(
ret
!=
ZE_RESULT_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"unable to get driver count: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
return
;
}
LOG
(
h
.
verbose
,
"discovered %d Level-Zero drivers
\n
"
,
driversCount
);
zes_driver_handle_t
*
allDrivers
=
malloc
(
driversCount
*
sizeof
(
zes_driver_handle_t
));
(
*
h
.
zesDriverGet
)(
&
driversCount
,
allDrivers
);
resp
->
total
=
0
;
resp
->
free
=
0
;
for
(
d
=
0
;
d
<
driversCount
;
d
++
)
{
uint32_t
deviceCount
=
0
;
ret
=
(
*
h
.
zesDeviceGet
)(
allDrivers
[
d
],
&
deviceCount
,
NULL
);
if
(
ret
!=
ZE_RESULT_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"unable to get device count: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
free
(
allDrivers
);
return
;
}
LOG
(
h
.
verbose
,
"discovered %d Level-Zero devices
\n
"
,
deviceCount
);
zes_device_handle_t
*
devices
=
malloc
(
deviceCount
*
sizeof
(
zes_device_handle_t
));
(
*
h
.
zesDeviceGet
)(
allDrivers
[
d
],
&
deviceCount
,
devices
);
for
(
i
=
0
;
i
<
deviceCount
;
i
++
)
{
zes_device_ext_properties_t
ext_props
;
ext_props
.
stype
=
ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES
;
ext_props
.
pNext
=
NULL
;
zes_device_properties_t
props
;
props
.
stype
=
ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES
;
props
.
pNext
=
&
ext_props
;
ret
=
(
*
h
.
zesDeviceGetProperties
)(
devices
[
i
],
&
props
);
if
(
ret
!=
ZE_RESULT_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"unable to get device properties: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
free
(
allDrivers
);
free
(
devices
);
return
;
}
if
(
h
.
verbose
)
{
// When in verbose mode, report more information about
// the card we discover.
LOG
(
h
.
verbose
,
"[%d] oneAPI device name: %s
\n
"
,
i
,
props
.
modelName
);
LOG
(
h
.
verbose
,
"[%d] oneAPI brand: %s
\n
"
,
i
,
props
.
brandName
);
LOG
(
h
.
verbose
,
"[%d] oneAPI vendor: %s
\n
"
,
i
,
props
.
vendorName
);
LOG
(
h
.
verbose
,
"[%d] oneAPI S/N: %s
\n
"
,
i
,
props
.
serialNumber
);
LOG
(
h
.
verbose
,
"[%d] oneAPI board number: %s
\n
"
,
i
,
props
.
boardNumber
);
}
uint32_t
memCount
=
0
;
ret
=
(
*
h
.
zesDeviceEnumMemoryModules
)(
devices
[
i
],
&
memCount
,
NULL
);
if
(
ret
!=
ZE_RESULT_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"unable to enumerate Level-Zero memory modules: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
free
(
allDrivers
);
free
(
devices
);
return
;
}
LOG
(
h
.
verbose
,
"discovered %d Level-Zero memory modules
\n
"
,
memCount
);
zes_mem_handle_t
*
mems
=
malloc
(
memCount
*
sizeof
(
zes_mem_handle_t
));
(
*
h
.
zesDeviceEnumMemoryModules
)(
devices
[
i
],
&
memCount
,
mems
);
for
(
m
=
0
;
m
<
memCount
;
m
++
)
{
zes_mem_state_t
state
;
state
.
stype
=
ZES_STRUCTURE_TYPE_MEM_STATE
;
state
.
pNext
=
NULL
;
ret
=
(
*
h
.
zesMemoryGetState
)(
mems
[
m
],
&
state
);
if
(
ret
!=
ZE_RESULT_SUCCESS
)
{
snprintf
(
buf
,
buflen
,
"unable to get memory state: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
free
(
allDrivers
);
free
(
devices
);
free
(
mems
);
return
;
}
resp
->
total
+=
state
.
size
;
resp
->
free
+=
state
.
free
;
}
free
(
mems
);
}
free
(
devices
);
}
free
(
allDrivers
);
}
#endif // __APPLE__
gpu/gpu_info_oneapi.h
0 → 100644
View file @
fd5971be
#ifndef __APPLE__
#ifndef __GPU_INFO_ONEAPI_H__
#define __GPU_INFO_ONEAPI_H__
#include "gpu_info.h"
#define ZE_MAX_DEVICE_NAME 256
#define ZE_MAX_DEVICE_UUID_SIZE 16
#define ZES_STRING_PROPERTY_SIZE 64
#define ZE_BIT(_i) (1 << _i)
// Just enough typedef's to dlopen/dlsym for memory information
typedef
enum
ze_result_t
{
ZE_RESULT_SUCCESS
=
0
,
// Other values omitted for now...
}
ze_result_t
;
typedef
uint8_t
ze_bool_t
;
typedef
struct
_zes_driver_handle_t
*
zes_driver_handle_t
;
typedef
struct
_zes_device_handle_t
*
zes_device_handle_t
;
typedef
struct
_zes_mem_handle_t
*
zes_mem_handle_t
;
typedef
enum
_ze_structure_type_t
{
ZE_STRUCTURE_TYPE_FORCE_UINT32
=
0x7fffffff
}
ze_structure_type_t
;
typedef
enum
_zes_structure_type_t
{
ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES
=
0x1
,
ZES_STRUCTURE_TYPE_MEM_PROPERTIES
=
0xb
,
ZES_STRUCTURE_TYPE_MEM_STATE
=
0x1e
,
ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES
=
0x2d
,
ZES_STRUCTURE_TYPE_FORCE_UINT32
=
0x7fffffff
}
zes_structure_type_t
;
typedef
enum
_zes_mem_type_t
{
ZES_MEM_TYPE_FORCE_UINT32
=
0x7fffffff
}
zes_mem_type_t
;
typedef
enum
_zes_mem_loc_t
{
ZES_MEM_LOC_SYSTEM
=
0
,
ZES_MEM_LOC_DEVICE
=
1
,
ZES_MEM_LOC_FORCE_UINT32
=
0x7fffffff
}
zes_mem_loc_t
;
typedef
enum
_zes_mem_health_t
{
ZES_MEM_HEALTH_FORCE_UINT32
=
0x7fffffff
}
zes_mem_health_t
;
typedef
struct
_ze_device_uuid_t
{
uint8_t
id
[
ZE_MAX_DEVICE_UUID_SIZE
];
}
ze_device_uuid_t
;
typedef
struct
_zes_uuid_t
{
uint8_t
id
[
ZE_MAX_DEVICE_UUID_SIZE
];
}
zes_uuid_t
;
typedef
enum
_ze_device_type_t
{
ZE_DEVICE_TYPE_GPU
=
1
,
ZE_DEVICE_TYPE_CPU
=
2
,
ZE_DEVICE_TYPE_FPGA
=
3
,
ZE_DEVICE_TYPE_MCA
=
4
,
ZE_DEVICE_TYPE_VPU
=
5
,
ZE_DEVICE_TYPE_FORCE_UINT32
=
0x7fffffff
}
ze_device_type_t
;
typedef
enum
_zes_device_type_t
{
ZES_DEVICE_TYPE_GPU
=
1
,
ZES_DEVICE_TYPE_CPU
=
2
,
ZES_DEVICE_TYPE_FPGA
=
3
,
ZES_DEVICE_TYPE_MCA
=
4
,
ZES_DEVICE_TYPE_VPU
=
5
,
ZES_DEVICE_TYPE_FORCE_UINT32
=
0x7fffffff
}
zes_device_type_t
;
typedef
uint32_t
ze_device_property_flags_t
;
typedef
enum
_ze_device_property_flag_t
{
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED
=
ZE_BIT
(
0
),
ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE
=
ZE_BIT
(
1
),
ZE_DEVICE_PROPERTY_FLAG_ECC
=
ZE_BIT
(
2
),
ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING
=
ZE_BIT
(
3
),
ZE_DEVICE_PROPERTY_FLAG_FORCE_UINT32
=
0x7fffffff
}
ze_device_property_flag_t
;
typedef
uint32_t
zes_device_property_flags_t
;
typedef
enum
_zes_device_property_flag_t
{
ZES_DEVICE_PROPERTY_FLAG_INTEGRATED
=
ZE_BIT
(
0
),
ZES_DEVICE_PROPERTY_FLAG_SUBDEVICE
=
ZE_BIT
(
1
),
ZES_DEVICE_PROPERTY_FLAG_ECC
=
ZE_BIT
(
2
),
ZES_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING
=
ZE_BIT
(
3
),
ZES_DEVICE_PROPERTY_FLAG_FORCE_UINT32
=
0x7fffffff
}
zes_device_property_flag_t
;
typedef
struct
_ze_device_properties_t
{
ze_structure_type_t
stype
;
void
*
pNext
;
ze_device_type_t
type
;
uint32_t
vendorId
;
uint32_t
deviceId
;
ze_device_property_flags_t
flags
;
uint32_t
subdeviceId
;
uint32_t
coreClockRate
;
uint64_t
maxMemAllocSize
;
uint32_t
maxHardwareContexts
;
uint32_t
maxCommandQueuePriority
;
uint32_t
numThreadsPerEU
;
uint32_t
physicalEUSimdWidth
;
uint32_t
numEUsPerSubslice
;
uint32_t
numSubslicesPerSlice
;
uint32_t
numSlices
;
uint64_t
timerResolution
;
uint32_t
timestampValidBits
;
uint32_t
kernelTimestampValidBits
;
ze_device_uuid_t
uuid
;
char
name
[
ZE_MAX_DEVICE_NAME
];
}
ze_device_properties_t
;
typedef
struct
_zes_device_properties_t
{
zes_structure_type_t
stype
;
void
*
pNext
;
ze_device_properties_t
core
;
uint32_t
numSubdevices
;
char
serialNumber
[
ZES_STRING_PROPERTY_SIZE
];
char
boardNumber
[
ZES_STRING_PROPERTY_SIZE
];
char
brandName
[
ZES_STRING_PROPERTY_SIZE
];
char
modelName
[
ZES_STRING_PROPERTY_SIZE
];
char
vendorName
[
ZES_STRING_PROPERTY_SIZE
];
char
driverVersion
[
ZES_STRING_PROPERTY_SIZE
];
}
zes_device_properties_t
;
typedef
struct
_zes_device_ext_properties_t
{
zes_structure_type_t
stype
;
void
*
pNext
;
zes_uuid_t
uuid
;
zes_device_type_t
type
;
zes_device_property_flags_t
flags
;
}
zes_device_ext_properties_t
;
typedef
struct
_zes_mem_properties_t
{
zes_structure_type_t
stype
;
void
*
pNext
;
zes_mem_type_t
type
;
ze_bool_t
onSubdevice
;
uint32_t
subdeviceId
;
zes_mem_loc_t
location
;
uint64_t
physicalSize
;
int32_t
busWidth
;
int32_t
numChannels
;
}
zes_mem_properties_t
;
typedef
struct
_zes_mem_state_t
{
zes_structure_type_t
stype
;
const
void
*
pNext
;
zes_mem_health_t
health
;
uint64_t
free
;
uint64_t
size
;
}
zes_mem_state_t
;
typedef
struct
oneapi_handle
{
void
*
handle
;
uint16_t
verbose
;
ze_result_t
(
*
zesInit
)(
int
);
ze_result_t
(
*
zesDriverGet
)(
uint32_t
*
pCount
,
zes_driver_handle_t
*
phDrivers
);
ze_result_t
(
*
zesDeviceGet
)(
zes_driver_handle_t
hDriver
,
uint32_t
*
pCount
,
zes_device_handle_t
*
phDevices
);
ze_result_t
(
*
zesDeviceGetProperties
)(
zes_device_handle_t
hDevice
,
zes_device_properties_t
*
pProperties
);
ze_result_t
(
*
zesDeviceEnumMemoryModules
)(
zes_device_handle_t
hDevice
,
uint32_t
*
pCount
,
zes_mem_handle_t
*
phMemory
);
ze_result_t
(
*
zesMemoryGetProperties
)(
zes_mem_handle_t
hMemory
,
zes_mem_properties_t
*
pProperties
);
ze_result_t
(
*
zesMemoryGetState
)(
zes_mem_handle_t
hMemory
,
zes_mem_state_t
*
pState
);
}
oneapi_handle_t
;
typedef
struct
oneapi_init_resp
{
char
*
err
;
// If err is non-null handle is invalid
int
num_devices
;
oneapi_handle_t
oh
;
}
oneapi_init_resp_t
;
typedef
struct
oneapi_version_resp
{
ze_result_t
status
;
char
*
str
;
// Contains version or error string if status != 0
}
oneapi_version_resp_t
;
void
oneapi_init
(
char
*
oneapi_lib_path
,
oneapi_init_resp_t
*
resp
);
void
oneapi_check_vram
(
oneapi_handle_t
rh
,
mem_info_t
*
resp
);
#endif // __GPU_INFO_INTEL_H__
#endif // __APPLE__
gpu/gpu_oneapi.go
0 → 100644
View file @
fd5971be
//go:build linux || windows
package
gpu
import
(
"log/slog"
"strings"
)
func
oneapiGetVisibleDevicesEnv
(
gpuInfo
[]
GpuInfo
)
(
string
,
string
)
{
ids
:=
[]
string
{}
for
_
,
info
:=
range
gpuInfo
{
if
info
.
Library
!=
"oneapi"
{
// TODO shouldn't happen if things are wired correctly...
slog
.
Debug
(
"oneapiGetVisibleDevicesEnv skipping over non-sycl device"
,
"library"
,
info
.
Library
)
continue
}
ids
=
append
(
ids
,
info
.
ID
)
}
return
"ONEAPI_DEVICE_SELECTOR"
,
"level_zero:"
+
strings
.
Join
(
ids
,
","
)
}
llm/generate/gen_linux.sh
View file @
fd5971be
...
@@ -206,6 +206,36 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
...
@@ -206,6 +206,36 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
fi
fi
if
[
-z
"
${
ONEAPI_ROOT
}
"
]
;
then
# Try the default location in case it exists
ONEAPI_ROOT
=
/opt/intel/oneapi
fi
if
[
-d
"
${
ONEAPI_ROOT
}
"
]
;
then
echo
"OneAPI libraries detected - building dynamic OneAPI library"
init_vars
source
${
ONEAPI_ROOT
}
/setvars.sh
--force
# set up environment variables for oneAPI
CC
=
icx
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
-DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL=ON -DLLAMA_SYCL_F16=OFF"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/oneapi"
EXTRA_LIBS
=
"-fsycl -Wl,-rpath,
${
ONEAPI_ROOT
}
/compiler/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/mkl/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/tbb/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
DEBUG_FLAGS
=
""
# icx compiles with -O0 if we pass -g, so we must remove it
build
# copy oneAPI dependencies
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-e
sycl
-e
mkl
-e
tbb
)
;
do
cp
"
${
dep
}
"
"
${
BUILD_DIR
}
/bin/"
done
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libOpenCL.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libimf.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libintlc.so.5"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libirng.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libpi_level_zero.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libsvml.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libur_loader.so.0"
"
${
BUILD_DIR
}
/bin/"
compress
fi
if
[
-z
"
${
ROCM_PATH
}
"
]
;
then
if
[
-z
"
${
ROCM_PATH
}
"
]
;
then
# Try the default location in case it exists
# Try the default location in case it exists
ROCM_PATH
=
/opt/rocm
ROCM_PATH
=
/opt/rocm
...
...
llm/generate/gen_windows.ps1
View file @
fd5971be
...
@@ -289,6 +289,49 @@ function build_cuda() {
...
@@ -289,6 +289,49 @@ function build_cuda() {
}
}
}
}
function
build_oneapi
()
{
if
((
-not
"
${env:OLLAMA_SKIP_CUDA_GENERATE}
"
)
-and
(
"
${env:ONEAPI_ROOT}
"
))
{
# Get oneAPI version
$
script
:
ONEAPI_VERSION
=
icpx
--version
$
script
:
ONEAPI_VERSION
=
[
regex
]::
Match
(
$
script
:
ONEAPI_VERSION
,
'(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)'
)
.
Value
if
(
$null
-ne
$
script
:
ONEAPI_VERSION
)
{
$
script
:
ONEAPI_VARIANT
=
"_v"
+
$
script
:
ONEAPI_VERSION
}
init_vars
$
script
:
buildDir
=
"../build/windows/
${script:ARCH}
/oneapi
$
script
:
ONEAPI_VARIANT
"
$
script
:
distDir
=
"
$
script
:
DIST_BASE
\oneapi
$
script
:
ONEAPI_VARIANT
"
$
script
:
cmakeDefs
+=
@(
"-G"
,
"MinGW Makefiles"
,
"-DLLAMA_SYCL=ON"
,
"-DCMAKE_C_COMPILER=icx"
,
"-DCMAKE_CXX_COMPILER=icx"
,
"-DCMAKE_BUILD_TYPE=Release"
)
Write-Host
"Building oneAPI"
build
# Ninja doesn't prefix with config name
if
(
$null
-ne
$
script
:
DUMPBIN
)
{
&
"
$
script
:
DUMPBIN
"
/dependents
"
${script:buildDir}
/bin/ollama_llama_server.exe"
|
Select-String
".dll"
}
sign
install
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libirngmd.dll"
"
${script:distDir}
"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libmmd.dll"
"
${script:distDir}
"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_level_zero.dll"
"
${script:distDir}
"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_unified_runtime.dll"
"
${script:distDir}
"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_win_proxy_loader.dll"
"
${script:distDir}
"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\svml_dispmd.dll"
"
${script:distDir}
"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\sycl7.dll"
"
${script:distDir}
"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_core.2.dll"
"
${script:distDir}
"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_sycl_blas.4.dll"
"
${script:distDir}
"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_tbb_thread.2.dll"
"
${script:distDir}
"
}
else
{
Write-Host
"Skipping oneAPI generation step"
}
}
function
build_rocm
()
{
function
build_rocm
()
{
if
((
-not
"
${env:OLLAMA_SKIP_ROCM_GENERATE}
"
)
-and
(
"
${env:HIP_PATH}
"
))
{
if
((
-not
"
${env:OLLAMA_SKIP_ROCM_GENERATE}
"
)
-and
(
"
${env:HIP_PATH}
"
))
{
$
script
:
ROCM_VERSION
=
(
get-item
$
env
:
HIP_PATH
)
.
Basename
$
script
:
ROCM_VERSION
=
(
get-item
$
env
:
HIP_PATH
)
.
Basename
...
@@ -356,6 +399,7 @@ if ($($args.count) -eq 0) {
...
@@ -356,6 +399,7 @@ if ($($args.count) -eq 0) {
build_cpu_avx
build_cpu_avx
build_cpu_avx2
build_cpu_avx2
build_cuda
build_cuda
build_oneapi
build_rocm
build_rocm
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment