Commit 920a4b07 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Merge remote-tracking branch 'upstream/main' into pr3702

parents c496967e ee49844d
...@@ -11,6 +11,8 @@ import ( ...@@ -11,6 +11,8 @@ import (
"slices" "slices"
"strconv" "strconv"
"strings" "strings"
"github.com/ollama/ollama/format"
) )
// Discovery logic for AMD/ROCm GPUs // Discovery logic for AMD/ROCm GPUs
...@@ -23,26 +25,20 @@ const ( ...@@ -23,26 +25,20 @@ const (
// Prefix with the node dir // Prefix with the node dir
GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
GPUUsedMemoryFileGlob = "mem_banks/*/used_memory" GPUUsedMemoryFileGlob = "mem_banks/*/used_memory"
RocmStandardLocation = "/opt/rocm/lib"
// TODO find a better way to detect iGPU instead of minimum memory
IGPUMemLimit = 1024 * 1024 * 1024 // 512G is what they typically report, so anything less than 1G must be iGPU
) )
var ( var (
// Used to validate if the given ROCm lib is usable // Used to validate if the given ROCm lib is usable
ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here... ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
) )
// Gather GPU information from the amdgpu driver if any supported GPUs are detected // Gather GPU information from the amdgpu driver if any supported GPUs are detected
// HIP_VISIBLE_DEVICES will be set if we detect a mix of unsupported and supported devices func AMDGetGPUInfo() []GpuInfo {
// and the user hasn't already set this variable resp := []GpuInfo{}
func AMDGetGPUInfo(resp *GpuInfo) {
// TODO - DRY this out with windows
if !AMDDetected() { if !AMDDetected() {
return return resp
} }
skip := map[int]interface{}{}
// Opportunistic logging of driver version to aid in troubleshooting // Opportunistic logging of driver version to aid in troubleshooting
ver, err := AMDDriverVersion() ver, err := AMDDriverVersion()
...@@ -50,160 +46,117 @@ func AMDGetGPUInfo(resp *GpuInfo) { ...@@ -50,160 +46,117 @@ func AMDGetGPUInfo(resp *GpuInfo) {
slog.Info("AMD Driver: " + ver) slog.Info("AMD Driver: " + ver)
} else { } else {
// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU // TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
slog.Warn(fmt.Sprintf("ollama recommends running the https://www.amd.com/en/support/linux-drivers: %s", err)) slog.Warn("ollama recommends running the https://www.amd.com/en/support/linux-drivers", "error", err)
} }
// If the user has specified exactly which GPUs to use, look up their memory // Determine if the user has already pre-selected which GPUs to look at, then ignore the others
visibleDevices := os.Getenv("HIP_VISIBLE_DEVICES") var visibleDevices []string
if visibleDevices != "" { hipVD := os.Getenv("HIP_VISIBLE_DEVICES") // zero based index only
ids := []int{} rocrVD := os.Getenv("ROCR_VISIBLE_DEVICES") // zero based index or UUID, but consumer cards seem to not support UUID
for _, idStr := range strings.Split(visibleDevices, ",") { gpuDO := os.Getenv("GPU_DEVICE_ORDINAL") // zero based index
id, err := strconv.Atoi(idStr) switch {
if err != nil { // TODO is this priorty order right?
slog.Warn(fmt.Sprintf("malformed HIP_VISIBLE_DEVICES=%s %s", visibleDevices, err)) case hipVD != "":
} else { visibleDevices = strings.Split(hipVD, ",")
ids = append(ids, id) case rocrVD != "":
} visibleDevices = strings.Split(rocrVD, ",")
} // TODO - since we don't yet support UUIDs, consider detecting and reporting here
amdProcMemLookup(resp, nil, ids) // all our test systems show GPU-XX indicating UUID is not supported
return case gpuDO != "":
visibleDevices = strings.Split(gpuDO, ",")
} }
// Gather GFX version information from all detected cards gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
gfx := AMDGFXVersions() var supported []string
verStrings := []string{} libDir := ""
for i, v := range gfx {
verStrings = append(verStrings, v.ToGFXString()) // The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
if v.Major == 0 { // from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
// Silently skip CPUs matches, _ := filepath.Glob(GPUPropertiesFileGlob)
skip[i] = struct{}{} cpuCount := 0
for _, match := range matches {
slog.Debug("evaluating amdgpu node " + match)
fp, err := os.Open(match)
if err != nil {
slog.Debug("failed to open sysfs node", "file", match, "error", err)
continue continue
} }
if v.Major < 9 { defer fp.Close()
// TODO consider this a build-time setting if we can support 8xx family GPUs nodeID, err := strconv.Atoi(filepath.Base(filepath.Dir(match)))
slog.Warn(fmt.Sprintf("amdgpu [%d] too old %s", i, v.ToGFXString())) if err != nil {
skip[i] = struct{}{} slog.Debug("failed to parse node ID", "error", err)
continue
} }
}
slog.Info(fmt.Sprintf("detected amdgpu versions %v", verStrings))
// Abort if all GPUs are skipped scanner := bufio.NewScanner(fp)
if len(skip) >= len(gfx) { isCPU := false
slog.Info("all detected amdgpus are skipped, falling back to CPU") var major, minor, patch uint64
return for scanner.Scan() {
} line := strings.TrimSpace(scanner.Text())
// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
if strings.HasPrefix(line, "gfx_target_version") {
ver := strings.Fields(line)
// If we got this far, then we have at least 1 GPU that's a ROCm candidate, so make sure we have a lib // Detect CPUs
libDir, err := AMDValidateLibDir() if len(ver) == 2 && ver[1] == "0" {
if err != nil { slog.Debug("detected CPU " + match)
slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err)) isCPU = true
return break
} }
updateLibPath(libDir) if len(ver) != 2 || len(ver[1]) < 5 {
slog.Warn("malformed "+match, "gfx_target_version", line)
// If this winds up being a CPU, our offsets may be wrong
continue
}
l := len(ver[1])
var err1, err2, err3 error
patch, err1 = strconv.ParseUint(ver[1][l-2:l], 10, 32)
minor, err2 = strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
major, err3 = strconv.ParseUint(ver[1][:l-4], 10, 32)
if err1 != nil || err2 != nil || err3 != nil {
slog.Debug("malformed int " + line)
continue
}
}
gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION") // TODO - any other properties we want to extract and record?
if gfxOverride == "" { // vendor_id + device_id -> pci lookup for "Name"
supported, err := GetSupportedGFX(libDir) // Other metrics that may help us understand relative performance between multiple GPUs
if err != nil {
slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err))
return
} }
slog.Debug(fmt.Sprintf("rocm supported GPU types %v", supported))
for i, v := range gfx { if isCPU {
if !slices.Contains[[]string, string](supported, v.ToGFXString()) { cpuCount++
slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, v.ToGFXString(), libDir, supported)) continue
// TODO - consider discrete markdown just for ROCM troubleshooting?
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/gpu.md#overrides for HSA_OVERRIDE_GFX_VERSION usage")
skip[i] = struct{}{}
} else {
slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, v.ToGFXString()))
}
} }
} else {
slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
}
if len(skip) >= len(gfx) { // CPUs are always first in the list
slog.Info("all detected amdgpus are skipped, falling back to CPU") gpuID := nodeID - cpuCount
return
}
ids := make([]int, len(gfx)) // Shouldn't happen, but just in case...
i := 0 if gpuID < 0 {
for k := range gfx { slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
ids[i] = k return []GpuInfo{}
i++
}
amdProcMemLookup(resp, skip, ids)
if resp.memInfo.DeviceCount == 0 {
return
}
if len(skip) > 0 {
amdSetVisibleDevices(ids, skip)
}
}
func updateLibPath(libDir string) {
ldPaths := []string{}
if val, ok := os.LookupEnv("LD_LIBRARY_PATH"); ok {
ldPaths = strings.Split(val, ":")
}
for _, d := range ldPaths {
if d == libDir {
return
}
}
val := strings.Join(append(ldPaths, libDir), ":")
slog.Debug("updated lib path", "LD_LIBRARY_PATH", val)
os.Setenv("LD_LIBRARY_PATH", val)
}
// Walk the sysfs nodes for the available GPUs and gather information from them
// skipping over any devices in the skip map
func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
resp.memInfo.DeviceCount = 0
resp.memInfo.TotalMemory = 0
resp.memInfo.FreeMemory = 0
slog.Debug("discovering VRAM for amdgpu devices")
if len(ids) == 0 {
entries, err := os.ReadDir(AMDNodesSysfsDir)
if err != nil {
slog.Warn(fmt.Sprintf("failed to read amdgpu sysfs %s - %s", AMDNodesSysfsDir, err))
return
}
for _, node := range entries {
if !node.IsDir() {
continue
}
id, err := strconv.Atoi(node.Name())
if err != nil {
slog.Warn("malformed amdgpu sysfs node id " + node.Name())
continue
}
ids = append(ids, id)
} }
}
slog.Debug(fmt.Sprintf("amdgpu devices %v", ids))
for _, id := range ids { if int(major) < RocmComputeMin {
if _, skipped := skip[id]; skipped { slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%d%x", major, minor, patch), "gpu", gpuID)
continue continue
} }
// Look up the memory for the current node
totalMemory := uint64(0) totalMemory := uint64(0)
usedMemory := uint64(0) usedMemory := uint64(0)
// Adjust for sysfs vs HIP ids propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(nodeID), GPUTotalMemoryFileGlob)
propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id+1), GPUTotalMemoryFileGlob)
propFiles, err := filepath.Glob(propGlob) propFiles, err := filepath.Glob(propGlob)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("error looking up total GPU memory: %s %s", propGlob, err)) slog.Warn("error looking up total GPU memory", "glob", propGlob, "error", err)
} }
// 1 or more memory banks - sum the values of all of them // 1 or more memory banks - sum the values of all of them
for _, propFile := range propFiles { for _, propFile := range propFiles {
fp, err := os.Open(propFile) fp, err := os.Open(propFile)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", propFile, err)) slog.Warn("failed to open sysfs node", "file", propFile, "erroir", err)
continue continue
} }
defer fp.Close() defer fp.Close()
...@@ -226,49 +179,113 @@ func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) { ...@@ -226,49 +179,113 @@ func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
} }
} }
if totalMemory == 0 { if totalMemory == 0 {
slog.Warn(fmt.Sprintf("amdgpu [%d] reports zero total memory, skipping", id)) slog.Warn("amdgpu reports zero total memory", "gpu", gpuID)
skip[id] = struct{}{}
continue continue
} }
if totalMemory < IGPUMemLimit { usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(nodeID), GPUUsedMemoryFileGlob)
slog.Info(fmt.Sprintf("amdgpu [%d] appears to be an iGPU with %dM reported total memory, skipping", id, totalMemory/1024/1024))
skip[id] = struct{}{}
continue
}
usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUUsedMemoryFileGlob)
usedFiles, err := filepath.Glob(usedGlob) usedFiles, err := filepath.Glob(usedGlob)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("error looking up used GPU memory: %s %s", usedGlob, err)) slog.Warn("error looking up used GPU memory", "glob", usedGlob, "error", err)
continue continue
} }
for _, usedFile := range usedFiles { for _, usedFile := range usedFiles {
fp, err := os.Open(usedFile) fp, err := os.Open(usedFile)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", usedFile, err)) slog.Warn("failed to open sysfs node", "file", usedFile, "error", err)
continue continue
} }
defer fp.Close() defer fp.Close()
data, err := io.ReadAll(fp) data, err := io.ReadAll(fp)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("failed to read sysfs node file %s: %s", usedFile, err)) slog.Warn("failed to read sysfs node", "file", usedFile, "error", err)
continue continue
} }
used, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) used, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("malformed used memory %s: %s", string(data), err)) slog.Warn("malformed used memory", "data", string(data), "error", err)
continue continue
} }
usedMemory += used usedMemory += used
} }
slog.Info(fmt.Sprintf("[%d] amdgpu totalMemory %dM", id, totalMemory/1024/1024))
slog.Info(fmt.Sprintf("[%d] amdgpu freeMemory %dM", id, (totalMemory-usedMemory)/1024/1024)) // iGPU detection, remove this check once we can support an iGPU variant of the rocm library
resp.memInfo.DeviceCount++ if totalMemory < IGPUMemLimit {
resp.memInfo.TotalMemory += totalMemory slog.Info("amdgpu appears to be an iGPU, skipping", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
resp.memInfo.FreeMemory += (totalMemory - usedMemory) continue
}
slog.Info("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
slog.Info("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
gpuInfo := GpuInfo{
Library: "rocm",
memInfo: memInfo{
TotalMemory: totalMemory,
FreeMemory: (totalMemory - usedMemory),
},
ID: fmt.Sprintf("%d", gpuID),
// Name: not exposed in sysfs directly, would require pci device id lookup
Major: int(major),
Minor: int(minor),
Patch: int(patch),
MinimumMemory: rocmMinimumMemory,
}
// If the user wants to filter to a subset of devices, filter out if we aren't a match
if len(visibleDevices) > 0 {
include := false
for _, visible := range visibleDevices {
if visible == gpuInfo.ID {
include = true
break
}
}
if !include {
slog.Info("filtering out device per user request", "id", gpuInfo.ID, "visible_devices", visibleDevices)
continue
}
}
// Final validation is gfx compatibility - load the library if we haven't already loaded it
// even if the user overrides, we still need to validate the library
if libDir == "" {
libDir, err = AMDValidateLibDir()
if err != nil {
slog.Warn("unable to verify rocm library, will use cpu", "error", err)
return []GpuInfo{}
}
}
gpuInfo.DependencyPath = libDir
if gfxOverride == "" {
// Only load supported list once
if len(supported) == 0 {
supported, err = GetSupportedGFX(libDir)
if err != nil {
slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
return []GpuInfo{}
}
slog.Debug("rocm supported GPUs", "types", supported)
}
gfx := fmt.Sprintf("gfx%d%d%x", gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch)
if !slices.Contains[[]string, string](supported, gfx) {
slog.Warn("amdgpu is not supported", "gpu", gpuInfo.ID, "gpu_type", gfx, "library", libDir, "supported_types", supported)
// TODO - consider discrete markdown just for ROCM troubleshooting?
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/gpu.md#overrides for HSA_OVERRIDE_GFX_VERSION usage")
continue
} else {
slog.Info("amdgpu is supported", "gpu", gpuInfo.ID, "gpu_type", gfx)
}
} else {
slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
}
// The GPU has passed all the verification steps and is supported
resp = append(resp, gpuInfo)
} }
if resp.memInfo.DeviceCount > 0 { if len(resp) == 0 {
resp.Library = "rocm" slog.Info("no compatible amdgpu devices detected")
} }
return resp
} }
// Quick check for AMD driver so we can skip amdgpu discovery if not present // Quick check for AMD driver so we can skip amdgpu discovery if not present
...@@ -280,87 +297,24 @@ func AMDDetected() bool { ...@@ -280,87 +297,24 @@ func AMDDetected() bool {
slog.Debug("amdgpu driver not detected " + sysfsDir) slog.Debug("amdgpu driver not detected " + sysfsDir)
return false return false
} else if err != nil { } else if err != nil {
slog.Debug(fmt.Sprintf("error looking up amd driver %s %s", sysfsDir, err)) slog.Debug("error looking up amd driver", "path", sysfsDir, "error", err)
return false return false
} }
return true return true
} }
func setupLink(source, target string) error {
if err := os.RemoveAll(target); err != nil {
return fmt.Errorf("failed to remove old rocm directory %s %w", target, err)
}
if err := os.Symlink(source, target); err != nil {
return fmt.Errorf("failed to create link %s => %s %w", source, target, err)
}
slog.Debug(fmt.Sprintf("host rocm linked %s => %s", source, target))
return nil
}
// Ensure the AMD rocm lib dir is wired up
// Prefer to use host installed ROCm, as long as it meets our minimum requirements // Prefer to use host installed ROCm, as long as it meets our minimum requirements
// failing that, tell the user how to download it on their own // failing that, tell the user how to download it on their own
func AMDValidateLibDir() (string, error) { func AMDValidateLibDir() (string, error) {
// We rely on the rpath compiled into our library to find rocm libDir, err := commonAMDValidateLibDir()
// so we establish a symlink to wherever we find it on the system
// to <payloads>/rocm
payloadsDir, err := PayloadsDir()
if err != nil {
return "", err
}
// If we already have a rocm dependency wired, nothing more to do
rocmTargetDir := filepath.Clean(filepath.Join(payloadsDir, "..", "rocm"))
if rocmLibUsable(rocmTargetDir) {
return rocmTargetDir, nil
}
// next to the running binary
exe, err := os.Executable()
if err == nil { if err == nil {
peerDir := filepath.Dir(exe) return libDir, nil
if rocmLibUsable(peerDir) {
slog.Debug("detected ROCM next to ollama executable " + peerDir)
return rocmTargetDir, setupLink(peerDir, rocmTargetDir)
}
peerDir = filepath.Join(filepath.Dir(exe), "rocm")
if rocmLibUsable(peerDir) {
slog.Debug("detected ROCM next to ollama executable " + peerDir)
return rocmTargetDir, setupLink(peerDir, rocmTargetDir)
}
} }
// Well known ollama installer path // Well known ollama installer path
installedRocmDir := "/usr/share/ollama/lib/rocm" installedRocmDir := "/usr/share/ollama/lib/rocm"
if rocmLibUsable(installedRocmDir) { if rocmLibUsable(installedRocmDir) {
return rocmTargetDir, setupLink(installedRocmDir, rocmTargetDir) return installedRocmDir, nil
}
// Prefer explicit HIP env var
hipPath := os.Getenv("HIP_PATH")
if hipPath != "" {
hipLibDir := filepath.Join(hipPath, "lib")
if rocmLibUsable(hipLibDir) {
slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
return rocmTargetDir, setupLink(hipLibDir, rocmTargetDir)
}
}
// Scan the library path for potential matches
ldPaths := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
for _, ldPath := range ldPaths {
d, err := filepath.Abs(ldPath)
if err != nil {
continue
}
if rocmLibUsable(d) {
return rocmTargetDir, setupLink(d, rocmTargetDir)
}
}
// Well known location(s)
if rocmLibUsable("/opt/rocm/lib") {
return rocmTargetDir, setupLink("/opt/rocm/lib", rocmTargetDir)
} }
// If we still haven't found a usable rocm, the user will have to install it on their own // If we still haven't found a usable rocm, the user will have to install it on their own
...@@ -384,68 +338,3 @@ func AMDDriverVersion() (string, error) { ...@@ -384,68 +338,3 @@ func AMDDriverVersion() (string, error) {
} }
return strings.TrimSpace(string(verString)), nil return strings.TrimSpace(string(verString)), nil
} }
func AMDGFXVersions() map[int]Version {
// The amdgpu driver always exposes the host CPU as node 0, but we have to skip that and subtract one
// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
res := map[int]Version{}
matches, _ := filepath.Glob(GPUPropertiesFileGlob)
for _, match := range matches {
fp, err := os.Open(match)
if err != nil {
slog.Debug(fmt.Sprintf("failed to open sysfs node file %s: %s", match, err))
continue
}
defer fp.Close()
i, err := strconv.Atoi(filepath.Base(filepath.Dir(match)))
if err != nil {
slog.Debug(fmt.Sprintf("failed to parse node ID %s", err))
continue
}
if i == 0 {
// Skipping the CPU
continue
}
// Align with HIP IDs (zero is first GPU, not CPU)
i -= 1
scanner := bufio.NewScanner(fp)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(line, "gfx_target_version") {
ver := strings.Fields(line)
if len(ver) != 2 || len(ver[1]) < 5 {
if ver[1] != "0" {
slog.Debug("malformed " + line)
}
res[i] = Version{
Major: 0,
Minor: 0,
Patch: 0,
}
continue
}
l := len(ver[1])
patch, err1 := strconv.ParseUint(ver[1][l-2:l], 10, 32)
minor, err2 := strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
major, err3 := strconv.ParseUint(ver[1][:l-4], 10, 32)
if err1 != nil || err2 != nil || err3 != nil {
slog.Debug("malformed int " + line)
continue
}
res[i] = Version{
Major: uint(major),
Minor: uint(minor),
Patch: uint(patch),
}
}
}
}
return res
}
func (v Version) ToGFXString() string {
return fmt.Sprintf("gfx%d%d%d", v.Major, v.Minor, v.Patch)
}
...@@ -7,11 +7,13 @@ import ( ...@@ -7,11 +7,13 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"slices" "slices"
"strconv"
"strings" "strings"
"github.com/ollama/ollama/format"
) )
const ( const (
RocmStandardLocation = "C:\\Program Files\\AMD\\ROCm\\5.7\\bin" // TODO glob?
// TODO We're lookinng for this exact name to detect iGPUs since hipGetDeviceProperties never reports integrated==true // TODO We're lookinng for this exact name to detect iGPUs since hipGetDeviceProperties never reports integrated==true
iGPUName = "AMD Radeon(TM) Graphics" iGPUName = "AMD Radeon(TM) Graphics"
...@@ -19,39 +21,36 @@ const ( ...@@ -19,39 +21,36 @@ const (
var ( var (
// Used to validate if the given ROCm lib is usable // Used to validate if the given ROCm lib is usable
ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // TODO - probably include more coverage of files here... ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // TODO - probably include more coverage of files here...
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob?
) )
func AMDGetGPUInfo(resp *GpuInfo) { func AMDGetGPUInfo() []GpuInfo {
resp := []GpuInfo{}
hl, err := NewHipLib() hl, err := NewHipLib()
if err != nil { if err != nil {
slog.Debug(err.Error()) slog.Debug(err.Error())
return return nil
} }
defer hl.Release() defer hl.Release()
skip := map[int]interface{}{}
ids := []int{}
resp.memInfo.DeviceCount = 0
resp.memInfo.TotalMemory = 0
resp.memInfo.FreeMemory = 0
ver, err := hl.AMDDriverVersion() ver, err := hl.AMDDriverVersion()
if err == nil { if err == nil {
slog.Info("AMD Driver: " + ver) slog.Info("AMD Driver: " + ver)
} else { } else {
// For now this is benign, but we may eventually need to fail compatibility checks // For now this is benign, but we may eventually need to fail compatibility checks
slog.Debug(fmt.Sprintf("error looking up amd driver version: %s", err)) slog.Debug("error looking up amd driver version", "error", err)
} }
// Note: the HIP library automatically handles HIP_VISIBLE_DEVICES // Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
count := hl.HipGetDeviceCount() count := hl.HipGetDeviceCount()
if count == 0 { if count == 0 {
return return nil
} }
libDir, err := AMDValidateLibDir() libDir, err := AMDValidateLibDir()
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err)) slog.Warn("unable to verify rocm library, will use cpu", "error", err)
return return nil
} }
var supported []string var supported []string
...@@ -59,95 +58,120 @@ func AMDGetGPUInfo(resp *GpuInfo) { ...@@ -59,95 +58,120 @@ func AMDGetGPUInfo(resp *GpuInfo) {
if gfxOverride == "" { if gfxOverride == "" {
supported, err = GetSupportedGFX(libDir) supported, err = GetSupportedGFX(libDir)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err)) slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
return return nil
} }
} else { } else {
slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride) slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
} }
slog.Info(fmt.Sprintf("detected %d hip devices", count)) slog.Info("detected hip devices", "count", count)
// TODO how to determine the underlying device ID when visible devices is causing this to subset?
for i := 0; i < count; i++ { for i := 0; i < count; i++ {
ids = append(ids, i)
err = hl.HipSetDevice(i) err = hl.HipSetDevice(i)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("[%d] %s", i, err)) slog.Warn("set device", "id", i, "error", err)
skip[i] = struct{}{}
continue continue
} }
props, err := hl.HipGetDeviceProperties(i) props, err := hl.HipGetDeviceProperties(i)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("[%d] %s", i, err)) slog.Warn("get properties", "id", i, "error", err)
skip[i] = struct{}{}
continue continue
} }
n := bytes.IndexByte(props.Name[:], 0) n := bytes.IndexByte(props.Name[:], 0)
name := string(props.Name[:n]) name := string(props.Name[:n])
slog.Info(fmt.Sprintf("[%d] Name: %s", i, name)) // TODO is UUID actually populated on windows?
// Can luid be used on windows for setting visible devices (and is it actually set?)
n = bytes.IndexByte(props.GcnArchName[:], 0) n = bytes.IndexByte(props.GcnArchName[:], 0)
gfx := string(props.GcnArchName[:n]) gfx := string(props.GcnArchName[:n])
slog.Info(fmt.Sprintf("[%d] GcnArchName: %s", i, gfx)) slog.Info("hip device", "id", i, "name", name, "gfx", gfx)
var major, minor, patch string
switch len(gfx) {
case 6:
major, minor, patch = gfx[3:4], gfx[4:5], gfx[5:]
case 7:
major, minor, patch = gfx[3:5], gfx[5:6], gfx[6:]
}
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0 //slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
// TODO Why isn't props.iGPU accurate!? // TODO Why isn't props.iGPU accurate!?
if strings.EqualFold(name, iGPUName) { if strings.EqualFold(name, iGPUName) {
slog.Info(fmt.Sprintf("iGPU detected [%d] skipping", i)) slog.Info("iGPU detected skipping", "id", i)
skip[i] = struct{}{}
continue continue
} }
if gfxOverride == "" { if gfxOverride == "" {
if !slices.Contains[[]string, string](supported, gfx) { if !slices.Contains[[]string, string](supported, gfx) {
slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, gfx, libDir, supported)) slog.Warn("amdgpu is not supported", "gpu", i, "gpu_type", gfx, "library", libDir, "supported_types", supported)
// TODO - consider discrete markdown just for ROCM troubleshooting? // TODO - consider discrete markdown just for ROCM troubleshooting?
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage") slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
skip[i] = struct{}{}
continue continue
} else { } else {
slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, gfx)) slog.Info("amdgpu is supported", "gpu", i, "gpu_type", gfx)
} }
} }
totalMemory, freeMemory, err := hl.HipMemGetInfo() freeMemory, totalMemory, err := hl.HipMemGetInfo()
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("[%d] %s", i, err)) slog.Warn("get mem info", "id", i, "error", err)
continue continue
} }
// TODO according to docs, freeMem may lie on windows! // iGPU detection, remove this check once we can support an iGPU variant of the rocm library
slog.Info(fmt.Sprintf("[%d] Total Mem: %d", i, totalMemory)) if totalMemory < IGPUMemLimit {
slog.Info(fmt.Sprintf("[%d] Free Mem: %d", i, freeMemory)) slog.Info("amdgpu appears to be an iGPU, skipping", "gpu", i, "total", format.HumanBytes2(totalMemory))
resp.memInfo.DeviceCount++ continue
resp.memInfo.TotalMemory += totalMemory }
resp.memInfo.FreeMemory += freeMemory
} // TODO revisit this once ROCm v6 is available on windows.
if resp.memInfo.DeviceCount > 0 { // v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
resp.Library = "rocm" slog.Info("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
} slog.Info("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
// Abort if all GPUs are skipped gpuInfo := GpuInfo{
if len(skip) >= count { Library: "rocm",
slog.Info("all detected amdgpus are skipped, falling back to CPU") memInfo: memInfo{
return TotalMemory: totalMemory,
} FreeMemory: freeMemory,
if len(skip) > 0 { },
amdSetVisibleDevices(ids, skip) ID: fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices
DependencyPath: libDir,
MinimumMemory: rocmMinimumMemory,
}
if major != "" {
gpuInfo.Major, err = strconv.Atoi(major)
if err != nil {
slog.Info("failed to parse version", "version", gfx, "error", err)
}
}
if minor != "" {
gpuInfo.Minor, err = strconv.Atoi(minor)
if err != nil {
slog.Info("failed to parse version", "version", gfx, "error", err)
}
}
if patch != "" {
// Patch rev is hex; e.g. gfx90a
p, err := strconv.ParseInt(patch, 16, 0)
if err != nil {
slog.Info("failed to parse version", "version", gfx, "error", err)
} else {
gpuInfo.Patch = int(p)
}
}
if gpuInfo.Major < RocmComputeMin {
slog.Warn(fmt.Sprintf("amdgpu [%s] too old gfx%d%d%x", gpuInfo.ID, gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch))
continue
}
resp = append(resp, gpuInfo)
} }
UpdatePath(libDir)
return resp
} }
func AMDValidateLibDir() (string, error) { func AMDValidateLibDir() (string, error) {
// On windows non-admins typically can't create links libDir, err := commonAMDValidateLibDir()
// so instead of trying to rely on rpath and a link in
// $LibDir/rocm, we instead rely on setting PATH to point
// to the location of the ROCm library
// Installer payload location if we're running the installed binary
exe, err := os.Executable()
if err == nil { if err == nil {
rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm") return libDir, nil
if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
return rocmTargetDir, nil
}
} }
// Installer payload (if we're running from some other location) // Installer payload (if we're running from some other location)
...@@ -159,21 +183,6 @@ func AMDValidateLibDir() (string, error) { ...@@ -159,21 +183,6 @@ func AMDValidateLibDir() (string, error) {
return rocmTargetDir, nil return rocmTargetDir, nil
} }
// Prefer explicit HIP env var
hipPath := os.Getenv("HIP_PATH")
if hipPath != "" {
hipLibDir := filepath.Join(hipPath, "bin")
if rocmLibUsable(hipLibDir) {
slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
return hipLibDir, nil
}
}
// Well known location(s)
if rocmLibUsable(RocmStandardLocation) {
return RocmStandardLocation, nil
}
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this // Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm") slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
return "", fmt.Errorf("no suitable rocm found, falling back to CPU") return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
......
...@@ -12,6 +12,8 @@ import ( ...@@ -12,6 +12,8 @@ import (
"sync" "sync"
"syscall" "syscall"
"time" "time"
"github.com/ollama/ollama/server/envconfig"
) )
var ( var (
...@@ -24,8 +26,16 @@ func PayloadsDir() (string, error) { ...@@ -24,8 +26,16 @@ func PayloadsDir() (string, error) {
defer lock.Unlock() defer lock.Unlock()
var err error var err error
if payloadsDir == "" { if payloadsDir == "" {
runnersDir := envconfig.RunnersDir
if runnersDir != "" {
payloadsDir = runnersDir
return payloadsDir, nil
}
// The remainder only applies on non-windows where we still carry payloads in the main executable
cleanupTmpDirs() cleanupTmpDirs()
tmpDir := os.Getenv("OLLAMA_TMPDIR") tmpDir := envconfig.TmpDir
if tmpDir == "" { if tmpDir == "" {
tmpDir, err = os.MkdirTemp("", "ollama") tmpDir, err = os.MkdirTemp("", "ollama")
if err != nil { if err != nil {
...@@ -80,7 +90,7 @@ func cleanupTmpDirs() { ...@@ -80,7 +90,7 @@ func cleanupTmpDirs() {
} }
err = os.RemoveAll(d) err = os.RemoveAll(d)
if err != nil { if err != nil {
slog.Debug(fmt.Sprintf("unable to cleanup stale tmpdir %s: %s", d, err)) slog.Debug("unable to cleanup stale tmpdir", "path", d, "error", err)
} }
} }
} }
...@@ -88,7 +98,8 @@ func cleanupTmpDirs() { ...@@ -88,7 +98,8 @@ func cleanupTmpDirs() {
func Cleanup() { func Cleanup() {
lock.Lock() lock.Lock()
defer lock.Unlock() defer lock.Unlock()
if payloadsDir != "" { runnersDir := envconfig.RunnersDir
if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
// We want to fully clean up the tmpdir parent of the payloads dir // We want to fully clean up the tmpdir parent of the payloads dir
tmpDir := filepath.Clean(filepath.Join(payloadsDir, "..")) tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
slog.Debug("cleaning up", "dir", tmpDir) slog.Debug("cleaning up", "dir", tmpDir)
...@@ -120,7 +131,7 @@ func UpdatePath(dir string) { ...@@ -120,7 +131,7 @@ func UpdatePath(dir string) {
} }
} }
newPath := strings.Join(append([]string{dir}, pathComponents...), ";") newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
slog.Info(fmt.Sprintf("Updating PATH to %s", newPath)) slog.Info("updating", "PATH", newPath)
os.Setenv("PATH", newPath) os.Setenv("PATH", newPath)
} }
// linux and darwin rely on rpath // linux and darwin rely on rpath
......
//go:build linux || windows
package gpu
import (
"log/slog"
"strings"
)
func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
ids := []string{}
for _, info := range gpuInfo {
if info.Library != "cuda" {
// TODO shouldn't happen if things are wired correctly...
slog.Debug("cudaGetVisibleDevicesEnv skipping over non-cuda device", "library", info.Library)
continue
}
ids = append(ids, info.ID)
}
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
}
...@@ -16,22 +16,23 @@ import ( ...@@ -16,22 +16,23 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
"strconv"
"strings" "strings"
"sync" "sync"
"unsafe" "unsafe"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
"github.com/ollama/ollama/server/envconfig"
) )
type handles struct { type handles struct {
nvml *C.nvml_handle_t deviceCount int
cudart *C.cudart_handle_t cudart *C.cudart_handle_t
nvcuda *C.nvcuda_handle_t
} }
const ( const (
cudaMinimumMemory = 457 * format.MebiByte cudaMinimumMemory = 256 * format.MebiByte
rocmMinimumMemory = 457 * format.MebiByte rocmMinimumMemory = 256 * format.MebiByte
) )
var gpuMutex sync.Mutex var gpuMutex sync.Mutex
...@@ -39,26 +40,10 @@ var gpuMutex sync.Mutex ...@@ -39,26 +40,10 @@ var gpuMutex sync.Mutex
// With our current CUDA compile flags, older than 5.0 will not work properly // With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0} var CudaComputeMin = [2]C.int{5, 0}
// Possible locations for the nvidia-ml library var RocmComputeMin = 9
var NvmlLinuxGlobs = []string{
"/usr/local/cuda/lib64/libnvidia-ml.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so*",
"/usr/lib/wsl/lib/libnvidia-ml.so*",
"/usr/lib/wsl/drivers/*/libnvidia-ml.so*",
"/opt/cuda/lib64/libnvidia-ml.so*",
"/usr/lib*/libnvidia-ml.so*",
"/usr/lib/aarch64-linux-gnu/nvidia/current/libnvidia-ml.so*",
"/usr/lib/aarch64-linux-gnu/libnvidia-ml.so*",
"/usr/local/lib*/libnvidia-ml.so*",
// TODO: are these stubs ever valid?
"/opt/cuda/targets/x86_64-linux/lib/stubs/libnvidia-ml.so*",
}
var NvmlWindowsGlobs = []string{ // TODO find a better way to detect iGPU instead of minimum memory
"c:\\Windows\\System32\\nvml.dll", const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
}
var CudartLinuxGlobs = []string{ var CudartLinuxGlobs = []string{
"/usr/local/cuda/lib64/libcudart.so*", "/usr/local/cuda/lib64/libcudart.so*",
...@@ -79,6 +64,22 @@ var CudartWindowsGlobs = []string{ ...@@ -79,6 +64,22 @@ var CudartWindowsGlobs = []string{
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll", "c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
} }
var NvcudaLinuxGlobs = []string{
"/usr/local/cuda*/targets/*/lib/libcuda.so*",
"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
"/usr/lib/*-linux-gnu/libcuda.so*",
"/usr/lib/wsl/lib/libcuda.so*",
"/usr/lib/wsl/drivers/*/libcuda.so*",
"/opt/cuda/lib*/libcuda.so*",
"/usr/local/cuda/lib*/libcuda.so*",
"/usr/lib*/libcuda.so*",
"/usr/local/lib*/libcuda.so*",
}
var NvcudaWindowsGlobs = []string{
"c:\\windows\\system*\\nvcuda.dll",
}
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed. // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK") var CudaTegra string = os.Getenv("JETSON_JETPACK")
...@@ -88,61 +89,62 @@ func initGPUHandles() *handles { ...@@ -88,61 +89,62 @@ func initGPUHandles() *handles {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
gpuHandles := &handles{nil, nil} gpuHandles := &handles{}
var nvmlMgmtName string
var nvmlMgmtPatterns []string
var cudartMgmtName string var cudartMgmtName string
var cudartMgmtPatterns []string var cudartMgmtPatterns []string
var nvcudaMgmtName string
var nvcudaMgmtPatterns []string
tmpDir, _ := PayloadsDir() tmpDir, _ := PayloadsDir()
switch runtime.GOOS { switch runtime.GOOS {
case "windows": case "windows":
nvmlMgmtName = "nvml.dll"
nvmlMgmtPatterns = make([]string, len(NvmlWindowsGlobs))
copy(nvmlMgmtPatterns, NvmlWindowsGlobs)
cudartMgmtName = "cudart64_*.dll" cudartMgmtName = "cudart64_*.dll"
localAppData := os.Getenv("LOCALAPPDATA") localAppData := os.Getenv("LOCALAPPDATA")
cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)} cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...) cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "nvcuda.dll"
nvcudaMgmtPatterns = NvcudaWindowsGlobs
case "linux": case "linux":
nvmlMgmtName = "libnvidia-ml.so"
nvmlMgmtPatterns = make([]string, len(NvmlLinuxGlobs))
copy(nvmlMgmtPatterns, NvmlLinuxGlobs)
cudartMgmtName = "libcudart.so*" cudartMgmtName = "libcudart.so*"
if tmpDir != "" { if tmpDir != "" {
// TODO - add "payloads" for subprocess // TODO - add "payloads" for subprocess
cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)} cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
} }
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...) cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "libcuda.so*"
nvcudaMgmtPatterns = NvcudaLinuxGlobs
default: default:
return gpuHandles return gpuHandles
} }
slog.Info("Detecting GPU type") slog.Info("Detecting GPUs")
cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns) nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
if len(cudartLibPaths) > 0 { if len(nvcudaLibPaths) > 0 {
cudart := LoadCUDARTMgmt(cudartLibPaths) deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
if cudart != nil { if nvcuda != nil {
slog.Info("Nvidia GPU detected via cudart") slog.Info("detected GPUs", "count", deviceCount, "library", libPath)
gpuHandles.cudart = cudart gpuHandles.nvcuda = nvcuda
gpuHandles.deviceCount = deviceCount
return gpuHandles return gpuHandles
} }
} }
// TODO once we build confidence, remove this and the gpu_info_nvml.[ch] files cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
nvmlLibPaths := FindGPULibs(nvmlMgmtName, nvmlMgmtPatterns) if len(cudartLibPaths) > 0 {
if len(nvmlLibPaths) > 0 { deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
nvml := LoadNVMLMgmt(nvmlLibPaths) if cudart != nil {
if nvml != nil { slog.Info("detected GPUs", "library", libPath, "count", deviceCount)
slog.Info("Nvidia GPU detected via nvidia-ml") gpuHandles.cudart = cudart
gpuHandles.nvml = nvml gpuHandles.deviceCount = deviceCount
return gpuHandles return gpuHandles
} }
} }
return gpuHandles return gpuHandles
} }
func GetGPUInfo() GpuInfo { func GetGPUInfo() GpuInfoList {
// TODO - consider exploring lspci (and equivalent on windows) to check for // TODO - consider exploring lspci (and equivalent on windows) to check for
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
gpuMutex.Lock() gpuMutex.Lock()
...@@ -150,12 +152,12 @@ func GetGPUInfo() GpuInfo { ...@@ -150,12 +152,12 @@ func GetGPUInfo() GpuInfo {
gpuHandles := initGPUHandles() gpuHandles := initGPUHandles()
defer func() { defer func() {
if gpuHandles.nvml != nil {
C.nvml_release(*gpuHandles.nvml)
}
if gpuHandles.cudart != nil { if gpuHandles.cudart != nil {
C.cudart_release(*gpuHandles.cudart) C.cudart_release(*gpuHandles.cudart)
} }
if gpuHandles.nvcuda != nil {
C.nvcuda_release(*gpuHandles.nvcuda)
}
}() }()
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX // All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
...@@ -164,73 +166,75 @@ func GetGPUInfo() GpuInfo { ...@@ -164,73 +166,75 @@ func GetGPUInfo() GpuInfo {
slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.") slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
} }
// On windows we bundle the nvidia library one level above the runner dir
depPath := ""
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
depPath = filepath.Dir(envconfig.RunnersDir)
}
var memInfo C.mem_info_t var memInfo C.mem_info_t
resp := GpuInfo{} resp := []GpuInfo{}
if gpuHandles.nvml != nil && (cpuVariant != "" || runtime.GOARCH != "amd64") {
C.nvml_check_vram(*gpuHandles.nvml, &memInfo) // NVIDIA first
if memInfo.err != nil { for i := 0; i < gpuHandles.deviceCount; i++ {
slog.Info(fmt.Sprintf("[nvidia-ml] error looking up NVML GPU memory: %s", C.GoString(memInfo.err))) // TODO once we support CPU compilation variants of GPU libraries refine this...
C.free(unsafe.Pointer(memInfo.err)) if cpuVariant == "" && runtime.GOARCH == "amd64" {
} else if memInfo.count > 0 { continue
// Verify minimum compute capability }
var cc C.nvml_compute_capability_t gpuInfo := GpuInfo{
C.nvml_compute_capability(*gpuHandles.nvml, &cc) Library: "cuda",
if cc.err != nil { }
slog.Info(fmt.Sprintf("[nvidia-ml] error looking up NVML GPU compute capability: %s", C.GoString(cc.err))) if gpuHandles.cudart != nil {
C.free(unsafe.Pointer(cc.err)) C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
} else if cc.major > CudaComputeMin[0] || (cc.major == CudaComputeMin[0] && cc.minor >= CudaComputeMin[1]) { } else {
slog.Info(fmt.Sprintf("[nvidia-ml] NVML CUDA Compute Capability detected: %d.%d", cc.major, cc.minor)) C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
resp.Library = "cuda"
resp.MinimumMemory = cudaMinimumMemory
} else {
slog.Info(fmt.Sprintf("[nvidia-ml] CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor))
}
} }
} else if gpuHandles.cudart != nil && (cpuVariant != "" || runtime.GOARCH != "amd64") {
C.cudart_check_vram(*gpuHandles.cudart, &memInfo)
if memInfo.err != nil { if memInfo.err != nil {
slog.Info(fmt.Sprintf("[cudart] error looking up CUDART GPU memory: %s", C.GoString(memInfo.err))) slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err)) C.free(unsafe.Pointer(memInfo.err))
} else if memInfo.count > 0 { continue
// Verify minimum compute capability
var cc C.cudart_compute_capability_t
C.cudart_compute_capability(*gpuHandles.cudart, &cc)
if cc.err != nil {
slog.Info(fmt.Sprintf("[cudart] error looking up CUDA compute capability: %s", C.GoString(cc.err)))
C.free(unsafe.Pointer(cc.err))
} else if cc.major > CudaComputeMin[0] || (cc.major == CudaComputeMin[0] && cc.minor >= CudaComputeMin[1]) {
slog.Info(fmt.Sprintf("[cudart] CUDART CUDA Compute Capability detected: %d.%d", cc.major, cc.minor))
resp.Library = "cuda"
resp.MinimumMemory = cudaMinimumMemory
} else {
slog.Info(fmt.Sprintf("[cudart] CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor))
}
} }
} else { if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
AMDGetGPUInfo(&resp) slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
if resp.Library != "" { continue
resp.MinimumMemory = rocmMinimumMemory
return resp
} }
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Major = int(memInfo.major)
gpuInfo.Minor = int(memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp = append(resp, gpuInfo)
} }
if resp.Library == "" {
// Then AMD
resp = append(resp, AMDGetGPUInfo()...)
if len(resp) == 0 {
C.cpu_check_ram(&memInfo) C.cpu_check_ram(&memInfo)
resp.Library = "cpu" if memInfo.err != nil {
resp.Variant = cpuVariant slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
} C.free(unsafe.Pointer(memInfo.err))
if memInfo.err != nil { return resp
slog.Info(fmt.Sprintf("error looking up CPU memory: %s", C.GoString(memInfo.err))) }
C.free(unsafe.Pointer(memInfo.err)) gpuInfo := GpuInfo{
return resp Library: "cpu",
Variant: cpuVariant,
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
resp = append(resp, gpuInfo)
} }
resp.DeviceCount = uint32(memInfo.count)
resp.FreeMemory = uint64(memInfo.free)
resp.TotalMemory = uint64(memInfo.total)
return resp return resp
} }
func getCPUMem() (memInfo, error) { func GetCPUMem() (memInfo, error) {
var ret memInfo var ret memInfo
var info C.mem_info_t var info C.mem_info_t
C.cpu_check_ram(&info) C.cpu_check_ram(&info)
...@@ -243,29 +247,12 @@ func getCPUMem() (memInfo, error) { ...@@ -243,29 +247,12 @@ func getCPUMem() (memInfo, error) {
return ret, nil return ret, nil
} }
func CheckVRAM() (uint64, error) { func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseInt(userLimit, 10, 64)
if err != nil {
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
}
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
return uint64(avail), nil
}
gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
return gpuInfo.FreeMemory, nil
}
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
}
func FindGPULibs(baseLibName string, patterns []string) []string {
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var ldPaths []string var ldPaths []string
var patterns []string
gpuLibPaths := []string{} gpuLibPaths := []string{}
slog.Info(fmt.Sprintf("Searching for GPU management library %s", baseLibName)) slog.Debug("Searching for GPU library", "name", baseLibName)
switch runtime.GOOS { switch runtime.GOOS {
case "windows": case "windows":
...@@ -283,8 +270,14 @@ func FindGPULibs(baseLibName string, patterns []string) []string { ...@@ -283,8 +270,14 @@ func FindGPULibs(baseLibName string, patterns []string) []string {
} }
patterns = append(patterns, filepath.Join(d, baseLibName+"*")) patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
} }
slog.Debug(fmt.Sprintf("gpu management search paths: %v", patterns)) patterns = append(patterns, defaultPatterns...)
slog.Debug("gpu library search", "globs", patterns)
for _, pattern := range patterns { for _, pattern := range patterns {
// Nvidia PhysX known to return bogus results
if strings.Contains(pattern, "PhysX") {
slog.Debug("skipping PhysX cuda library path", "path", pattern)
}
// Ignore glob discovery errors // Ignore glob discovery errors
matches, _ := filepath.Glob(pattern) matches, _ := filepath.Glob(pattern)
for _, match := range matches { for _, match := range matches {
...@@ -311,47 +304,66 @@ func FindGPULibs(baseLibName string, patterns []string) []string { ...@@ -311,47 +304,66 @@ func FindGPULibs(baseLibName string, patterns []string) []string {
} }
} }
} }
slog.Info(fmt.Sprintf("Discovered GPU libraries: %v", gpuLibPaths)) slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
return gpuLibPaths return gpuLibPaths
} }
func LoadNVMLMgmt(nvmlLibPaths []string) *C.nvml_handle_t { func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
var resp C.nvml_init_resp_t var resp C.cudart_init_resp_t
resp.ch.verbose = getVerboseState() resp.ch.verbose = getVerboseState()
for _, libPath := range nvmlLibPaths { for _, libPath := range cudartLibPaths {
lib := C.CString(libPath) lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib)) defer C.free(unsafe.Pointer(lib))
C.nvml_init(lib, &resp) C.cudart_init(lib, &resp)
if resp.err != nil { if resp.err != nil {
slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))) slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
C.free(unsafe.Pointer(resp.err)) C.free(unsafe.Pointer(resp.err))
} else { } else {
return &resp.ch return int(resp.num_devices), &resp.ch, libPath
} }
} }
return nil return 0, nil, ""
} }
func LoadCUDARTMgmt(cudartLibPaths []string) *C.cudart_handle_t { func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
var resp C.cudart_init_resp_t var resp C.nvcuda_init_resp_t
resp.ch.verbose = getVerboseState() resp.ch.verbose = getVerboseState()
for _, libPath := range cudartLibPaths { for _, libPath := range nvcudaLibPaths {
lib := C.CString(libPath) lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib)) defer C.free(unsafe.Pointer(lib))
C.cudart_init(lib, &resp) C.nvcuda_init(lib, &resp)
if resp.err != nil { if resp.err != nil {
slog.Info(fmt.Sprintf("Unable to load cudart CUDA management library %s: %s", libPath, C.GoString(resp.err))) slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
C.free(unsafe.Pointer(resp.err)) C.free(unsafe.Pointer(resp.err))
} else { } else {
return &resp.ch return int(resp.num_devices), &resp.ch, libPath
} }
} }
return nil return 0, nil, ""
} }
func getVerboseState() C.uint16_t { func getVerboseState() C.uint16_t {
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" { if envconfig.Debug {
return C.uint16_t(1) return C.uint16_t(1)
} }
return C.uint16_t(0) return C.uint16_t(0)
} }
// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
if len(l) == 0 {
return "", ""
}
switch l[0].Library {
case "cuda":
return cudaGetVisibleDevicesEnv(l)
case "rocm":
return rocmGetVisibleDevicesEnv(l)
default:
slog.Debug("no filter required for library " + l[0].Library)
return "", ""
}
}
...@@ -9,52 +9,47 @@ package gpu ...@@ -9,52 +9,47 @@ package gpu
*/ */
import "C" import "C"
import ( import (
"fmt"
"log/slog"
"os"
"runtime" "runtime"
"strconv"
)
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
func CheckVRAM() (uint64, error) {
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseInt(userLimit, 10, 64)
if err != nil {
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
}
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
return uint64(avail), nil
}
if runtime.GOARCH == "amd64" { "github.com/ollama/ollama/format"
// gpu not supported, this may not be metal )
return 0, nil
}
return uint64(C.getRecommendedMaxVRAM()), nil const (
} metalMinimumMemory = 384 * format.MebiByte
)
func GetGPUInfo() GpuInfo { func GetGPUInfo() GpuInfoList {
mem, _ := getCPUMem() mem, _ := GetCPUMem()
if runtime.GOARCH == "amd64" { if runtime.GOARCH == "amd64" {
return GpuInfo{ return []GpuInfo{
Library: "cpu", {
Variant: GetCPUVariant(), Library: "cpu",
memInfo: mem, Variant: GetCPUVariant(),
memInfo: mem,
},
} }
} }
return GpuInfo{ info := GpuInfo{
Library: "metal", Library: "metal",
memInfo: mem, ID: "0",
} }
info.TotalMemory = uint64(C.getRecommendedMaxVRAM())
// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
info.FreeMemory = info.TotalMemory
info.MinimumMemory = metalMinimumMemory
return []GpuInfo{info}
} }
func getCPUMem() (memInfo, error) { func GetCPUMem() (memInfo, error) {
return memInfo{ return memInfo{
TotalMemory: uint64(C.getPhysicalMemory()), TotalMemory: uint64(C.getPhysicalMemory()),
FreeMemory: 0, FreeMemory: 0,
DeviceCount: 1,
}, nil }, nil
} }
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
// No-op on darwin
return "", ""
}
...@@ -38,12 +38,17 @@ ...@@ -38,12 +38,17 @@
extern "C" { extern "C" {
#endif #endif
#define GPU_ID_LEN 64
typedef struct mem_info { typedef struct mem_info {
char *err; // If non-nill, caller responsible for freeing
char gpu_id[GPU_ID_LEN];
uint64_t total; uint64_t total;
uint64_t free; uint64_t free;
unsigned int count;
int igpu_index; // If >= 0, we detected an integrated GPU to ignore // Compute Capability
char *err; // If non-nill, caller responsible for freeing int major;
int minor;
} mem_info_t; } mem_info_t;
void cpu_check_ram(mem_info_t *resp); void cpu_check_ram(mem_info_t *resp);
...@@ -52,8 +57,8 @@ void cpu_check_ram(mem_info_t *resp); ...@@ -52,8 +57,8 @@ void cpu_check_ram(mem_info_t *resp);
} }
#endif #endif
#include "gpu_info_nvml.h"
#include "gpu_info_cudart.h" #include "gpu_info_cudart.h"
#include "gpu_info_nvcuda.h"
#endif // __GPU_INFO_H__ #endif // __GPU_INFO_H__
#endif // __APPLE__ #endif // __APPLE__
\ No newline at end of file
...@@ -8,9 +8,11 @@ void cpu_check_ram(mem_info_t *resp) { ...@@ -8,9 +8,11 @@ void cpu_check_ram(mem_info_t *resp) {
MEMORYSTATUSEX info; MEMORYSTATUSEX info;
info.dwLength = sizeof(info); info.dwLength = sizeof(info);
if (GlobalMemoryStatusEx(&info) != 0) { if (GlobalMemoryStatusEx(&info) != 0) {
resp->count = 1;
resp->total = info.ullTotalPhys; resp->total = info.ullTotalPhys;
resp->free = info.ullAvailPhys; resp->free = info.ullAvailPhys;
resp->major = 0;
resp->minor = 0;
snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
} else { } else {
resp->err = LOAD_ERR(); resp->err = LOAD_ERR();
} }
...@@ -27,9 +29,11 @@ void cpu_check_ram(mem_info_t *resp) { ...@@ -27,9 +29,11 @@ void cpu_check_ram(mem_info_t *resp) {
if (sysinfo(&info) != 0) { if (sysinfo(&info) != 0) {
resp->err = strdup(strerror(errno)); resp->err = strdup(strerror(errno));
} else { } else {
resp->count = 1;
resp->total = info.totalram * info.mem_unit; resp->total = info.totalram * info.mem_unit;
resp->free = info.freeram * info.mem_unit; resp->free = info.freeram * info.mem_unit;
resp->major = 0;
resp->minor = 0;
snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
} }
return; return;
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) { void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
cudartReturn_t ret; cudartReturn_t ret;
resp->err = NULL; resp->err = NULL;
resp->num_devices = 0;
const int buflen = 256; const int buflen = 256;
char buf[buflen + 1]; char buf[buflen + 1];
int i; int i;
...@@ -21,6 +22,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) { ...@@ -21,6 +22,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
{"cudaGetDeviceCount", (void *)&resp->ch.cudaGetDeviceCount}, {"cudaGetDeviceCount", (void *)&resp->ch.cudaGetDeviceCount},
{"cudaDeviceGetAttribute", (void *)&resp->ch.cudaDeviceGetAttribute}, {"cudaDeviceGetAttribute", (void *)&resp->ch.cudaDeviceGetAttribute},
{"cudaDriverGetVersion", (void *)&resp->ch.cudaDriverGetVersion}, {"cudaDriverGetVersion", (void *)&resp->ch.cudaDriverGetVersion},
{"cudaGetDeviceProperties", (void *)&resp->ch.cudaGetDeviceProperties},
{NULL, NULL}, {NULL, NULL},
}; };
...@@ -36,13 +38,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) { ...@@ -36,13 +38,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
return; return;
} }
// TODO once we've squashed the remaining corner cases remove this log
LOG(resp->ch.verbose, "wiring cudart library functions in %s\n", cudart_lib_path);
for (i = 0; l[i].s != NULL; i++) { for (i = 0; l[i].s != NULL; i++) {
// TODO once we've squashed the remaining corner cases remove this log
LOG(resp->ch.verbose, "dlsym: %s\n", l[i].s);
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s); *l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
if (!l[i].p) { if (!l[i].p) {
char *msg = LOAD_ERR(); char *msg = LOAD_ERR();
...@@ -63,7 +59,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) { ...@@ -63,7 +59,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
UNLOAD_LIBRARY(resp->ch.handle); UNLOAD_LIBRARY(resp->ch.handle);
resp->ch.handle = NULL; resp->ch.handle = NULL;
if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) { if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) {
resp->err = strdup("your nvidia driver is too old or missing, please upgrade to run ollama"); resp->err = strdup("your nvidia driver is too old or missing. If you have a CUDA GPU please upgrade to run ollama");
return; return;
} }
snprintf(buf, buflen, "cudart init failure: %d", ret); snprintf(buf, buflen, "cudart init failure: %d", ret);
...@@ -85,110 +81,95 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) { ...@@ -85,110 +81,95 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
driverVersion.minor = (version - (driverVersion.major * 1000)) / 10; driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor); LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
} }
ret = (*resp->ch.cudaGetDeviceCount)(&resp->num_devices);
if (ret != CUDART_SUCCESS) {
LOG(resp->ch.verbose, "cudaGetDeviceCount err: %d\n", ret);
UNLOAD_LIBRARY(resp->ch.handle);
resp->ch.handle = NULL;
snprintf(buf, buflen, "unable to get device count: %d", ret);
resp->err = strdup(buf);
return;
}
} }
void cudart_check_vram(cudart_handle_t h, mem_info_t *resp) { void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) {
resp->err = NULL; resp->err = NULL;
cudartMemory_t memInfo = {0,0,0}; cudartMemory_t memInfo = {0,0,0};
cudartReturn_t ret; cudartReturn_t ret;
const int buflen = 256; const int buflen = 256;
char buf[buflen + 1]; char buf[buflen + 1];
int i;
if (h.handle == NULL) { if (h.handle == NULL) {
resp->err = strdup("cudart handle isn't initialized"); resp->err = strdup("cudart handle isn't initialized");
return; return;
} }
// cudaGetDeviceCount takes int type, resp-> count is uint ret = (*h.cudaSetDevice)(i);
int deviceCount;
ret = (*h.cudaGetDeviceCount)(&deviceCount);
if (ret != CUDART_SUCCESS) { if (ret != CUDART_SUCCESS) {
snprintf(buf, buflen, "unable to get device count: %d", ret); snprintf(buf, buflen, "cudart device failed to initialize");
resp->err = strdup(buf); resp->err = strdup(buf);
return; return;
} else {
resp->count = (unsigned int)deviceCount;
} }
resp->total = 0; cudaDeviceProp_t props;
resp->free = 0; ret = (*h.cudaGetDeviceProperties)(&props, i);
for (i = 0; i < resp-> count; i++) { if (ret != CUDART_SUCCESS) {
ret = (*h.cudaSetDevice)(i); LOG(h.verbose, "[%d] device properties lookup failure: %d\n", i, ret);
if (ret != CUDART_SUCCESS) { snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", i);
snprintf(buf, buflen, "cudart device failed to initialize"); resp->major = 0;
resp->err = strdup(buf); resp->minor = 0;
return; } else {
int allNull = 1;
for (int j = 0; j < 16; j++) {
if (props.uuid.bytes[j] != 0) {
allNull = 0;
break;
}
} }
ret = (*h.cudaMemGetInfo)(&memInfo.free, &memInfo.total); if (allNull != 0) {
if (ret != CUDART_SUCCESS) { snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", i);
snprintf(buf, buflen, "cudart device memory info lookup failure %d", ret); } else {
resp->err = strdup(buf); // GPU-d110a105-ac29-1d54-7b49-9c90440f215b
return; snprintf(&resp->gpu_id[0], GPU_ID_LEN,
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
props.uuid.bytes[0],
props.uuid.bytes[1],
props.uuid.bytes[2],
props.uuid.bytes[3],
props.uuid.bytes[4],
props.uuid.bytes[5],
props.uuid.bytes[6],
props.uuid.bytes[7],
props.uuid.bytes[8],
props.uuid.bytes[9],
props.uuid.bytes[10],
props.uuid.bytes[11],
props.uuid.bytes[12],
props.uuid.bytes[13],
props.uuid.bytes[14],
props.uuid.bytes[15]
);
} }
resp->major = props.major;
resp->minor = props.minor;
LOG(h.verbose, "[%d] CUDA totalMem %lu\n", i, memInfo.total); // TODO add other useful properties from props
LOG(h.verbose, "[%d] CUDA freeMem %lu\n", i, memInfo.free);
resp->total += memInfo.total;
resp->free += memInfo.free;
}
}
void cudart_compute_capability(cudart_handle_t h, cudart_compute_capability_t *resp) {
resp->err = NULL;
resp->major = 0;
resp->minor = 0;
int major = 0;
int minor = 0;
cudartReturn_t ret;
const int buflen = 256;
char buf[buflen + 1];
int i;
if (h.handle == NULL) {
resp->err = strdup("cudart handle not initialized");
return;
} }
ret = (*h.cudaMemGetInfo)(&memInfo.free, &memInfo.total);
int devices;
ret = (*h.cudaGetDeviceCount)(&devices);
if (ret != CUDART_SUCCESS) { if (ret != CUDART_SUCCESS) {
snprintf(buf, buflen, "unable to get cudart device count: %d", ret); snprintf(buf, buflen, "cudart device memory info lookup failure %d", ret);
resp->err = strdup(buf); resp->err = strdup(buf);
return; return;
} }
for (i = 0; i < devices; i++) { resp->total = memInfo.total;
ret = (*h.cudaSetDevice)(i); resp->free = memInfo.free;
if (ret != CUDART_SUCCESS) {
snprintf(buf, buflen, "cudart device failed to initialize");
resp->err = strdup(buf);
return;
}
ret = (*h.cudaDeviceGetAttribute)(&major, cudartDevAttrComputeCapabilityMajor, i); LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total);
if (ret != CUDART_SUCCESS) { LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free);
snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret); LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
resp->err = strdup(buf);
return;
}
ret = (*h.cudaDeviceGetAttribute)(&minor, cudartDevAttrComputeCapabilityMinor, i);
if (ret != CUDART_SUCCESS) {
snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret);
resp->err = strdup(buf);
return;
}
// Report the lowest major.minor we detect as that limits our compatibility
if (resp->major == 0 || resp->major > major ) {
resp->major = major;
resp->minor = minor;
} else if ( resp->major == major && resp->minor > minor ) {
resp->minor = minor;
}
}
} }
void cudart_release(cudart_handle_t h) { void cudart_release(cudart_handle_t h) {
......
This diff is collapsed.
This diff is collapsed.
#ifndef __APPLE__
#ifndef __GPU_INFO_NVCUDA_H__
#define __GPU_INFO_NVCUDA_H__
#include "gpu_info.h"
// Just enough typedef's to dlopen/dlsym for memory information
typedef enum cudaError_enum {
CUDA_SUCCESS = 0,
CUDA_ERROR_INVALID_VALUE = 1,
CUDA_ERROR_MEMORY_ALLOCATION = 2,
CUDA_ERROR_NOT_INITIALIZED = 3,
CUDA_ERROR_INSUFFICIENT_DRIVER = 35,
// Other values omitted for now...
} CUresult;
typedef enum CUdevice_attribute_enum {
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
// TODO - not yet wired up but may be useful for Jetson or other
// integrated GPU scenarios with shared memory
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18
} CUdevice_attribute;
typedef void *nvcudaDevice_t; // Opaque is sufficient
typedef struct nvcudaMemory_st {
uint64_t total;
uint64_t free;
} nvcudaMemory_t;
typedef struct nvcudaDriverVersion {
int major;
int minor;
} nvcudaDriverVersion_t;
typedef struct CUuuid_st {
unsigned char bytes[16];
} CUuuid;
typedef int CUdevice;
typedef void* CUcontext;
typedef struct nvcuda_handle {
void *handle;
uint16_t verbose;
CUresult (*cuInit)(unsigned int Flags);
CUresult (*cuDriverGetVersion)(int *driverVersion);
CUresult (*cuDeviceGetCount)(int *);
CUresult (*cuDeviceGet)(CUdevice* device, int ordinal);
CUresult (*cuDeviceGetAttribute)(int* pi, CUdevice_attribute attrib, CUdevice dev);
CUresult (*cuDeviceGetUuid)(CUuuid* uuid, CUdevice dev); // signature compatible with cuDeviceGetUuid_v2
// Context specific aspects
CUresult (*cuCtxCreate_v3)(CUcontext* pctx, void *params, int len, unsigned int flags, CUdevice dev);
CUresult (*cuMemGetInfo_v2)(uint64_t* free, uint64_t* total);
CUresult (*cuCtxDestroy)(CUcontext ctx);
} nvcuda_handle_t;
typedef struct nvcuda_init_resp {
char *err; // If err is non-null handle is invalid
nvcuda_handle_t ch;
int num_devices;
} nvcuda_init_resp_t;
void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
void nvcuda_check_vram(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
void nvcuda_release(nvcuda_handle_t ch);
#endif // __GPU_INFO_NVCUDA_H__
#endif // __APPLE__
This diff is collapsed.
This diff is collapsed.
...@@ -9,23 +9,16 @@ import ( ...@@ -9,23 +9,16 @@ import (
func TestBasicGetGPUInfo(t *testing.T) { func TestBasicGetGPUInfo(t *testing.T) {
info := GetGPUInfo() info := GetGPUInfo()
assert.Contains(t, "cuda rocm cpu metal", info.Library) assert.Greater(t, len(info), 0)
assert.Contains(t, "cuda rocm cpu metal", info[0].Library)
switch runtime.GOOS { if info[0].Library != "cpu" {
case "darwin": assert.Greater(t, info[0].TotalMemory, uint64(0))
// TODO - remove this once MacOS returns some size for CPU assert.Greater(t, info[0].FreeMemory, uint64(0))
return
case "linux", "windows":
assert.Greater(t, info.TotalMemory, uint64(0))
assert.Greater(t, info.FreeMemory, uint64(0))
assert.Greater(t, info.DeviceCount, uint32(0))
default:
return
} }
} }
func TestCPUMemInfo(t *testing.T) { func TestCPUMemInfo(t *testing.T) {
info, err := getCPUMem() info, err := GetCPUMem()
assert.NoError(t, err) assert.NoError(t, err)
switch runtime.GOOS { switch runtime.GOOS {
case "darwin": case "darwin":
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -4,7 +4,6 @@ package integration ...@@ -4,7 +4,6 @@ package integration
import ( import (
"context" "context"
"net/http"
"testing" "testing"
"time" "time"
...@@ -25,5 +24,5 @@ func TestContextExhaustion(t *testing.T) { ...@@ -25,5 +24,5 @@ func TestContextExhaustion(t *testing.T) {
"num_ctx": 128, "num_ctx": 128,
}, },
} }
GenerateTestHelper(ctx, t, &http.Client{}, req, []string{"once", "upon", "lived"}) GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"})
} }
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment