Unverified Commit 45cacbaf authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Merge pull request #4517 from dhiltgen/gpu_incremental

Enhanced GPU discovery and multi-gpu support with concurrency
parents 6b800aa7 17df6520
...@@ -53,6 +53,8 @@ var ( ...@@ -53,6 +53,8 @@ var (
NumParallel int NumParallel int
// Set via OLLAMA_RUNNERS_DIR in the environment // Set via OLLAMA_RUNNERS_DIR in the environment
RunnersDir string RunnersDir string
// Set via OLLAMA_SCHED_SPREAD in the environment
SchedSpread bool
// Set via OLLAMA_TMPDIR in the environment // Set via OLLAMA_TMPDIR in the environment
TmpDir string TmpDir string
) )
...@@ -79,6 +81,7 @@ func AsMap() map[string]EnvVar { ...@@ -79,6 +81,7 @@ func AsMap() map[string]EnvVar {
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"}, "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"},
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"}, "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"}, "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread, "Always schedule model across all GPUs"},
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"}, "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
} }
} }
...@@ -191,6 +194,15 @@ func LoadConfig() { ...@@ -191,6 +194,15 @@ func LoadConfig() {
NoHistory = true NoHistory = true
} }
if spread := clean("OLLAMA_SCHED_SPREAD"); spread != "" {
s, err := strconv.ParseBool(spread)
if err == nil {
SchedSpread = s
} else {
SchedSpread = true
}
}
if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" { if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
NoPrune = true NoPrune = true
} }
......
...@@ -25,7 +25,16 @@ const ( ...@@ -25,7 +25,16 @@ const (
// Prefix with the node dir // Prefix with the node dir
GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
GPUUsedMemoryFileGlob = "mem_banks/*/used_memory"
// Direct Rendering Manager sysfs location
DRMDeviceDirGlob = "/sys/class/drm/card*/device"
DRMTotalMemoryFile = "mem_info_vram_total"
DRMUsedMemoryFile = "mem_info_vram_used"
// In hex; properties file is in decimal
DRMUniqueIDFile = "unique_id"
DRMVendorFile = "vendor"
DRMDeviceFile = "device"
) )
var ( var (
...@@ -35,8 +44,8 @@ var ( ...@@ -35,8 +44,8 @@ var (
) )
// Gather GPU information from the amdgpu driver if any supported GPUs are detected // Gather GPU information from the amdgpu driver if any supported GPUs are detected
func AMDGetGPUInfo() []GpuInfo { func AMDGetGPUInfo() []RocmGPUInfo {
resp := []GpuInfo{} resp := []RocmGPUInfo{}
if !AMDDetected() { if !AMDDetected() {
return resp return resp
} }
...@@ -90,7 +99,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -90,7 +99,7 @@ func AMDGetGPUInfo() []GpuInfo {
scanner := bufio.NewScanner(fp) scanner := bufio.NewScanner(fp)
isCPU := false isCPU := false
var major, minor, patch uint64 var major, minor, patch uint64
var vendor, device uint64 var vendor, device, uniqueID uint64
for scanner.Scan() { for scanner.Scan() {
line := strings.TrimSpace(scanner.Text()) line := strings.TrimSpace(scanner.Text())
// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs // Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
...@@ -121,30 +130,43 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -121,30 +130,43 @@ func AMDGetGPUInfo() []GpuInfo {
} else if strings.HasPrefix(line, "vendor_id") { } else if strings.HasPrefix(line, "vendor_id") {
ver := strings.Fields(line) ver := strings.Fields(line)
if len(ver) != 2 { if len(ver) != 2 {
slog.Debug("malformed vendor_id", "vendor_id", line) slog.Debug("malformed", "vendor_id", line)
continue continue
} }
vendor, err = strconv.ParseUint(ver[1], 10, 32) vendor, err = strconv.ParseUint(ver[1], 10, 64)
if err != nil { if err != nil {
slog.Debug("malformed vendor_id" + line) slog.Debug("malformed", "vendor_id", line, "error", err)
} }
} else if strings.HasPrefix(line, "device_id") { } else if strings.HasPrefix(line, "device_id") {
ver := strings.Fields(line) ver := strings.Fields(line)
if len(ver) != 2 { if len(ver) != 2 {
slog.Debug("malformed device_id", "device_id", line) slog.Debug("malformed", "device_id", line)
continue
}
device, err = strconv.ParseUint(ver[1], 10, 64)
if err != nil {
slog.Debug("malformed", "device_id", line, "error", err)
}
} else if strings.HasPrefix(line, "unique_id") {
ver := strings.Fields(line)
if len(ver) != 2 {
slog.Debug("malformed", "unique_id", line)
continue continue
} }
device, err = strconv.ParseUint(ver[1], 10, 32) uniqueID, err = strconv.ParseUint(ver[1], 10, 64)
if err != nil { if err != nil {
slog.Debug("malformed device_id" + line) slog.Debug("malformed", "unique_id", line, "error", err)
} }
} }
// TODO - any other properties we want to extract and record? // TODO - any other properties we want to extract and record?
// vendor_id + device_id -> pci lookup for "Name" // vendor_id + device_id -> pci lookup for "Name"
// Other metrics that may help us understand relative performance between multiple GPUs // Other metrics that may help us understand relative performance between multiple GPUs
} }
// Note: while ./mem_banks/*/used_memory exists, it doesn't appear to take other VRAM consumers
// into consideration, so we instead map the device over to the DRM driver sysfs nodes which
// do reliably report VRAM usage.
if isCPU { if isCPU {
cpuCount++ cpuCount++
continue continue
...@@ -156,7 +178,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -156,7 +178,7 @@ func AMDGetGPUInfo() []GpuInfo {
// Shouldn't happen, but just in case... // Shouldn't happen, but just in case...
if gpuID < 0 { if gpuID < 0 {
slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue") slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
return []GpuInfo{} return nil
} }
if int(major) < RocmComputeMin { if int(major) < RocmComputeMin {
...@@ -167,65 +189,68 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -167,65 +189,68 @@ func AMDGetGPUInfo() []GpuInfo {
// Look up the memory for the current node // Look up the memory for the current node
totalMemory := uint64(0) totalMemory := uint64(0)
usedMemory := uint64(0) usedMemory := uint64(0)
propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(nodeID), GPUTotalMemoryFileGlob) var usedFile string
propFiles, err := filepath.Glob(propGlob) mapping := []struct {
if err != nil { id uint64
slog.Warn("error looking up total GPU memory", "glob", propGlob, "error", err) filename string
}{
{vendor, DRMVendorFile},
{device, DRMDeviceFile},
{uniqueID, DRMUniqueIDFile}, // Not all devices will report this
} }
// 1 or more memory banks - sum the values of all of them slog.Debug("mapping amdgpu to drm sysfs nodes", "amdgpu", match, "vendor", vendor, "device", device, "unique_id", uniqueID)
for _, propFile := range propFiles { // Map over to DRM location to find the total/free memory
fp, err := os.Open(propFile) drmMatches, _ := filepath.Glob(DRMDeviceDirGlob)
if err != nil { for _, devDir := range drmMatches {
slog.Warn("failed to open sysfs node", "file", propFile, "erroir", err) matched := true
continue for _, m := range mapping {
} if m.id == 0 {
defer fp.Close() // Null ID means it didn't populate, so we can't use it to match
scanner := bufio.NewScanner(fp) continue
for scanner.Scan() { }
line := strings.TrimSpace(scanner.Text()) filename := filepath.Join(devDir, m.filename)
if strings.HasPrefix(line, "size_in_bytes") { buf, err := os.ReadFile(filename)
ver := strings.Fields(line) if err != nil {
if len(ver) != 2 { slog.Debug("failed to read sysfs node", "file", filename, "error", err)
slog.Warn("malformed " + line) matched = false
continue break
} }
bankSizeInBytes, err := strconv.ParseUint(ver[1], 10, 64) // values here are in hex, strip off the lead 0x and parse so we can compare the numeric (decimal) values in amdgpu
if err != nil { cmp, err := strconv.ParseUint(strings.TrimPrefix(strings.TrimSpace(string(buf)), "0x"), 16, 64)
slog.Warn("malformed int " + line) if err != nil {
continue slog.Debug("failed to parse sysfs node", "file", filename, "error", err)
} matched = false
totalMemory += bankSizeInBytes break
}
if cmp != m.id {
matched = false
break
} }
} }
} if !matched {
if totalMemory == 0 {
slog.Warn("amdgpu reports zero total memory", "gpu", gpuID)
continue
}
usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(nodeID), GPUUsedMemoryFileGlob)
usedFiles, err := filepath.Glob(usedGlob)
if err != nil {
slog.Warn("error looking up used GPU memory", "glob", usedGlob, "error", err)
continue
}
for _, usedFile := range usedFiles {
fp, err := os.Open(usedFile)
if err != nil {
slog.Warn("failed to open sysfs node", "file", usedFile, "error", err)
continue continue
} }
defer fp.Close()
data, err := io.ReadAll(fp) // Found the matching DRM directory
slog.Debug("matched", "amdgpu", match, "drm", devDir)
totalFile := filepath.Join(devDir, DRMTotalMemoryFile)
buf, err := os.ReadFile(totalFile)
if err != nil { if err != nil {
slog.Warn("failed to read sysfs node", "file", usedFile, "error", err) slog.Debug("failed to read sysfs node", "file", totalFile, "error", err)
continue break
} }
used, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) totalMemory, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
if err != nil { if err != nil {
slog.Warn("malformed used memory", "data", string(data), "error", err) slog.Debug("failed to parse sysfs node", "file", totalFile, "error", err)
continue break
}
usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
usedMemory, err = getFreeMemory(usedFile)
if err != nil {
slog.Debug("failed to update used memory", "error", err)
} }
usedMemory += used break
} }
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library // iGPU detection, remove this check once we can support an iGPU variant of the rocm library
...@@ -241,18 +266,21 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -241,18 +266,21 @@ func AMDGetGPUInfo() []GpuInfo {
slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory)) slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory)) slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
gpuInfo := GpuInfo{ gpuInfo := RocmGPUInfo{
Library: "rocm", GpuInfo: GpuInfo{
memInfo: memInfo{ Library: "rocm",
TotalMemory: totalMemory, memInfo: memInfo{
FreeMemory: (totalMemory - usedMemory), TotalMemory: totalMemory,
FreeMemory: (totalMemory - usedMemory),
},
ID: strconv.Itoa(gpuID),
Name: name,
Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch),
MinimumMemory: rocmMinimumMemory,
DriverMajor: driverMajor,
DriverMinor: driverMinor,
}, },
ID: fmt.Sprintf("%d", gpuID), usedFilepath: usedFile,
Name: name,
Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch),
MinimumMemory: rocmMinimumMemory,
DriverMajor: driverMajor,
DriverMinor: driverMinor,
} }
// If the user wants to filter to a subset of devices, filter out if we aren't a match // If the user wants to filter to a subset of devices, filter out if we aren't a match
...@@ -276,7 +304,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -276,7 +304,7 @@ func AMDGetGPUInfo() []GpuInfo {
libDir, err = AMDValidateLibDir() libDir, err = AMDValidateLibDir()
if err != nil { if err != nil {
slog.Warn("unable to verify rocm library, will use cpu", "error", err) slog.Warn("unable to verify rocm library, will use cpu", "error", err)
return []GpuInfo{} return nil
} }
} }
gpuInfo.DependencyPath = libDir gpuInfo.DependencyPath = libDir
...@@ -287,7 +315,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -287,7 +315,7 @@ func AMDGetGPUInfo() []GpuInfo {
supported, err = GetSupportedGFX(libDir) supported, err = GetSupportedGFX(libDir)
if err != nil { if err != nil {
slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err) slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
return []GpuInfo{} return nil
} }
slog.Debug("rocm supported GPUs", "types", supported) slog.Debug("rocm supported GPUs", "types", supported)
} }
...@@ -378,3 +406,31 @@ func AMDDriverVersion() (driverMajor, driverMinor int, err error) { ...@@ -378,3 +406,31 @@ func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
} }
return driverMajor, driverMinor, nil return driverMajor, driverMinor, nil
} }
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
if len(gpus) == 0 {
return nil
}
for i := range gpus {
usedMemory, err := getFreeMemory(gpus[i].usedFilepath)
if err != nil {
return err
}
slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(gpus[i].TotalMemory-usedMemory))
gpus[i].FreeMemory = gpus[i].TotalMemory - usedMemory
}
return nil
}
func getFreeMemory(usedFile string) (uint64, error) {
buf, err := os.ReadFile(usedFile)
if err != nil {
return 0, fmt.Errorf("failed to read sysfs node %s %w", usedFile, err)
}
usedMemory, err := strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
if err != nil {
slog.Debug("failed to parse sysfs node", "file", usedFile, "error", err)
return 0, fmt.Errorf("failed to parse sysfs node %s %w", usedFile, err)
}
return usedMemory, nil
}
...@@ -7,6 +7,7 @@ import ( ...@@ -7,6 +7,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"slices" "slices"
"strconv"
"strings" "strings"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
...@@ -24,8 +25,8 @@ var ( ...@@ -24,8 +25,8 @@ var (
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob? RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob?
) )
func AMDGetGPUInfo() []GpuInfo { func AMDGetGPUInfo() []RocmGPUInfo {
resp := []GpuInfo{} resp := []RocmGPUInfo{}
hl, err := NewHipLib() hl, err := NewHipLib()
if err != nil { if err != nil {
slog.Debug(err.Error()) slog.Debug(err.Error())
...@@ -117,21 +118,24 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -117,21 +118,24 @@ func AMDGetGPUInfo() []GpuInfo {
// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable // v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory)) slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory)) slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
gpuInfo := GpuInfo{ gpuInfo := RocmGPUInfo{
Library: "rocm", GpuInfo: GpuInfo{
memInfo: memInfo{ Library: "rocm",
TotalMemory: totalMemory, memInfo: memInfo{
FreeMemory: freeMemory, TotalMemory: totalMemory,
FreeMemory: freeMemory,
},
ID: strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices
DependencyPath: libDir,
MinimumMemory: rocmMinimumMemory,
Name: name,
Compute: gfx,
// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
// DriverMajor: driverMajor,
// DriverMinor: driverMinor,
}, },
ID: fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices index: i,
DependencyPath: libDir,
MinimumMemory: rocmMinimumMemory,
Name: name,
Compute: gfx,
// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
// DriverMajor: driverMajor,
// DriverMinor: driverMinor,
} }
resp = append(resp, gpuInfo) resp = append(resp, gpuInfo)
...@@ -159,3 +163,30 @@ func AMDValidateLibDir() (string, error) { ...@@ -159,3 +163,30 @@ func AMDValidateLibDir() (string, error) {
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm") slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
return "", fmt.Errorf("no suitable rocm found, falling back to CPU") return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
} }
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
if len(gpus) == 0 {
return nil
}
hl, err := NewHipLib()
if err != nil {
slog.Debug(err.Error())
return nil
}
defer hl.Release()
for i := range gpus {
err := hl.HipSetDevice(gpus[i].index)
if err != nil {
return err
}
freeMemory, _, err := hl.HipMemGetInfo()
if err != nil {
slog.Warn("get mem info", "id", i, "error", err)
continue
}
slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(freeMemory))
gpus[i].FreeMemory = freeMemory
}
return nil
}
package gpu package gpu
import ( import (
"log/slog"
"golang.org/x/sys/cpu" "golang.org/x/sys/cpu"
) )
func GetCPUVariant() string { func GetCPUCapability() CPUCapability {
if cpu.X86.HasAVX2 { if cpu.X86.HasAVX2 {
slog.Debug("CPU has AVX2") return CPUCapabilityAVX2
return "avx2"
} }
if cpu.X86.HasAVX { if cpu.X86.HasAVX {
slog.Debug("CPU has AVX") return CPUCapabilityAVX
return "avx"
} }
slog.Debug("CPU does not have vector extensions")
// else LCD // else LCD
return "" return CPUCapabilityNone
} }
...@@ -24,19 +24,37 @@ import ( ...@@ -24,19 +24,37 @@ import (
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
) )
type handles struct { type cudaHandles struct {
deviceCount int deviceCount int
cudart *C.cudart_handle_t cudart *C.cudart_handle_t
nvcuda *C.nvcuda_handle_t nvcuda *C.nvcuda_handle_t
nvml *C.nvml_handle_t
}
type oneapiHandles struct {
oneapi *C.oneapi_handle_t oneapi *C.oneapi_handle_t
deviceCount int
} }
const ( const (
cudaMinimumMemory = 457 * format.MebiByte cudaMinimumMemory = 457 * format.MebiByte
rocmMinimumMemory = 457 * format.MebiByte rocmMinimumMemory = 457 * format.MebiByte
// TODO OneAPI minimum memory
) )
var gpuMutex sync.Mutex var (
gpuMutex sync.Mutex
bootstrapped bool
cpuCapability CPUCapability
cpus []CPUInfo
cudaGPUs []CudaGPUInfo
nvcudaLibPath string
cudartLibPath string
oneapiLibPath string
nvmlLibPath string
rocmGPUs []RocmGPUInfo
oneapiGPUs []OneapiGPUInfo
)
// With our current CUDA compile flags, older than 5.0 will not work properly // With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0} var CudaComputeMin = [2]C.int{5, 0}
...@@ -46,113 +64,113 @@ var RocmComputeMin = 9 ...@@ -46,113 +64,113 @@ var RocmComputeMin = 9
// TODO find a better way to detect iGPU instead of minimum memory // TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
var CudartLinuxGlobs = []string{
"/usr/local/cuda/lib64/libcudart.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
"/usr/lib/x86_64-linux-gnu/libcudart.so*",
"/usr/lib/wsl/lib/libcudart.so*",
"/usr/lib/wsl/drivers/*/libcudart.so*",
"/opt/cuda/lib64/libcudart.so*",
"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
"/usr/lib/aarch64-linux-gnu/libcudart.so*",
"/usr/local/cuda/lib*/libcudart.so*",
"/usr/lib*/libcudart.so*",
"/usr/local/lib*/libcudart.so*",
}
var CudartWindowsGlobs = []string{
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}
var NvcudaLinuxGlobs = []string{
"/usr/local/cuda*/targets/*/lib/libcuda.so*",
"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
"/usr/lib/*-linux-gnu/libcuda.so*",
"/usr/lib/wsl/lib/libcuda.so*",
"/usr/lib/wsl/drivers/*/libcuda.so*",
"/opt/cuda/lib*/libcuda.so*",
"/usr/local/cuda/lib*/libcuda.so*",
"/usr/lib*/libcuda.so*",
"/usr/local/lib*/libcuda.so*",
}
var NvcudaWindowsGlobs = []string{
"c:\\windows\\system*\\nvcuda.dll",
}
var OneapiWindowsGlobs = []string{
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}
var OneapiLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
"/usr/lib*/libze_intel_gpu.so*",
}
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed. // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK") var CudaTegra string = os.Getenv("JETSON_JETPACK")
// Note: gpuMutex must already be held // Note: gpuMutex must already be held
func initGPUHandles() *handles { func initCudaHandles() *cudaHandles {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
gpuHandles := &handles{} cHandles := &cudaHandles{}
var cudartMgmtName string // Short Circuit if we already know which library to use
if nvmlLibPath != "" {
cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
return cHandles
}
if nvcudaLibPath != "" {
cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
return cHandles
}
if cudartLibPath != "" {
cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
return cHandles
}
slog.Debug("searching for GPU discovery libraries for NVIDIA")
var cudartMgmtPatterns []string var cudartMgmtPatterns []string
var nvcudaMgmtName string
var nvcudaMgmtPatterns []string
tmpDir, _ := PayloadsDir() // Aligned with driver, we can't carry as payloads
switch runtime.GOOS { nvcudaMgmtPatterns := NvcudaGlobs
case "windows":
cudartMgmtName = "cudart64_*.dll" if runtime.GOOS == "windows" {
localAppData := os.Getenv("LOCALAPPDATA") localAppData := os.Getenv("LOCALAPPDATA")
cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)} cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...) }
// Aligned with driver, we can't carry as payloads tmpDir, _ := PayloadsDir()
nvcudaMgmtName = "nvcuda.dll" if tmpDir != "" {
nvcudaMgmtPatterns = NvcudaWindowsGlobs // TODO - add "payloads" for subprocess
case "linux": cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
cudartMgmtName = "libcudart.so*" }
if tmpDir != "" { cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
// TODO - add "payloads" for subprocess
cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)} if len(NvmlGlobs) > 0 {
nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
if len(nvmlLibPaths) > 0 {
nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
if nvml != nil {
slog.Debug("nvidia-ml loaded", "library", libPath)
cHandles.nvml = nvml
nvmlLibPath = libPath
}
} }
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "libcuda.so*"
nvcudaMgmtPatterns = NvcudaLinuxGlobs
default:
return gpuHandles
} }
slog.Debug("Detecting GPUs") nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
if len(nvcudaLibPaths) > 0 { if len(nvcudaLibPaths) > 0 {
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths) deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
if nvcuda != nil { if nvcuda != nil {
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath) slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
gpuHandles.nvcuda = nvcuda cHandles.nvcuda = nvcuda
gpuHandles.deviceCount = deviceCount cHandles.deviceCount = deviceCount
return gpuHandles nvcudaLibPath = libPath
return cHandles
} }
} }
cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns) cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
if len(cudartLibPaths) > 0 { if len(cudartLibPaths) > 0 {
deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths) deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
if cudart != nil { if cudart != nil {
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount) slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
gpuHandles.cudart = cudart cHandles.cudart = cudart
gpuHandles.deviceCount = deviceCount cHandles.deviceCount = deviceCount
return gpuHandles cudartLibPath = libPath
return cHandles
} }
} }
return gpuHandles return cHandles
}
// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
oHandles := &oneapiHandles{}
// Short Circuit if we already know which library to use
if oneapiLibPath != "" {
oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
return oHandles
}
oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
if len(oneapiLibPaths) > 0 {
oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
}
return oHandles
}
func GetCPUInfo() GpuInfoList {
gpuMutex.Lock()
if !bootstrapped {
gpuMutex.Unlock()
GetGPUInfo()
} else {
gpuMutex.Unlock()
}
return GpuInfoList{cpus[0].GpuInfo}
} }
func GetGPUInfo() GpuInfoList { func GetGPUInfo() GpuInfoList {
...@@ -160,110 +178,245 @@ func GetGPUInfo() GpuInfoList { ...@@ -160,110 +178,245 @@ func GetGPUInfo() GpuInfoList {
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
gpuMutex.Lock() gpuMutex.Lock()
defer gpuMutex.Unlock() defer gpuMutex.Unlock()
needRefresh := true
gpuHandles := initGPUHandles() var cHandles *cudaHandles
var oHandles *oneapiHandles
defer func() { defer func() {
if gpuHandles.cudart != nil { if cHandles != nil {
C.cudart_release(*gpuHandles.cudart) if cHandles.cudart != nil {
C.cudart_release(*cHandles.cudart)
}
if cHandles.nvcuda != nil {
C.nvcuda_release(*cHandles.nvcuda)
}
if cHandles.nvml != nil {
C.nvml_release(*cHandles.nvml)
}
} }
if gpuHandles.nvcuda != nil { if oHandles != nil {
C.nvcuda_release(*gpuHandles.nvcuda) if oHandles.oneapi != nil {
// TODO - is this needed?
C.oneapi_release(*oHandles.oneapi)
}
} }
}() }()
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX if !bootstrapped {
cpuVariant := GetCPUVariant() slog.Debug("Detecting GPUs")
if cpuVariant == "" && runtime.GOARCH == "amd64" { needRefresh = false
slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.") cpuCapability = GetCPUCapability()
} var memInfo C.mem_info_t
// On windows we bundle the nvidia library one level above the runner dir mem, err := GetCPUMem()
depPath := "" if err != nil {
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" { slog.Warn("error looking up system memory", "error", err)
depPath = filepath.Dir(envconfig.RunnersDir) }
} cpus = []CPUInfo{CPUInfo{
GpuInfo: GpuInfo{
memInfo: mem,
Library: "cpu",
Variant: cpuCapability,
ID: "0",
},
}}
// Fallback to CPU mode if we're lacking required vector extensions on x86
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
bootstrapped = true
// No need to do any GPU discovery, since we can't run on them
return GpuInfoList{cpus[0].GpuInfo}
}
var memInfo C.mem_info_t // On windows we bundle the nvidia library one level above the runner dir
resp := []GpuInfo{} depPath := ""
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
depPath = filepath.Dir(envconfig.RunnersDir)
}
// NVIDIA first // Load ALL libraries
for i := range gpuHandles.deviceCount { cHandles = initCudaHandles()
// TODO once we support CPU compilation variants of GPU libraries refine this...
if cpuVariant == "" && runtime.GOARCH == "amd64" { // NVIDIA
continue for i := range cHandles.deviceCount {
if cHandles.cudart != nil || cHandles.nvcuda != nil {
gpuInfo := CudaGPUInfo{
GpuInfo: GpuInfo{
Library: "cuda",
},
index: i,
}
var driverMajor int
var driverMinor int
if cHandles.cudart != nil {
C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
} else {
C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
driverMajor = int(cHandles.nvcuda.driver_major)
driverMinor = int(cHandles.nvcuda.driver_minor)
}
if memInfo.err != nil {
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
continue
}
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
continue
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
cudaGPUs = append(cudaGPUs, gpuInfo)
}
} }
if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
gpuInfo := GpuInfo{ // Intel
Library: "cuda", oHandles = initOneAPIHandles()
for d := 0; oHandles.oneapi != nil && d < int(oHandles.oneapi.num_drivers); d++ {
if oHandles.oneapi == nil {
// shouldn't happen
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
continue
}
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
for i := range devCount {
gpuInfo := OneapiGPUInfo{
GpuInfo: GpuInfo{
Library: "oneapi",
},
driverIndex: d,
gpuIndex: int(i),
}
// TODO - split bootstrapping from updating free memory
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
// TODO dependency path?
oneapiGPUs = append(oneapiGPUs, gpuInfo)
} }
var driverMajor int }
var driverMinor int
if gpuHandles.cudart != nil { rocmGPUs = AMDGetGPUInfo()
C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo) bootstrapped = true
}
// For detected GPUs, load library if not loaded
// Refresh free memory usage
if needRefresh {
mem, err := GetCPUMem()
if err != nil {
slog.Warn("error looking up system memory", "error", err)
} else {
slog.Debug("updating system memory data",
slog.Group(
"before",
"total", format.HumanBytes2(cpus[0].TotalMemory),
"free", format.HumanBytes2(cpus[0].FreeMemory),
),
slog.Group(
"now",
"total", format.HumanBytes2(mem.TotalMemory),
"free", format.HumanBytes2(mem.FreeMemory),
),
)
cpus[0].FreeMemory = mem.FreeMemory
}
var memInfo C.mem_info_t
if cHandles == nil && len(cudaGPUs) > 0 {
cHandles = initCudaHandles()
}
for i, gpu := range cudaGPUs {
if cHandles.nvml != nil {
C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
} else if cHandles.cudart != nil {
C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
} else if cHandles.nvcuda != nil {
C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
memInfo.used = memInfo.total - memInfo.free
} else { } else {
C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo) // shouldn't happen
driverMajor = int(gpuHandles.nvcuda.driver_major) slog.Warn("no valid cuda library loaded to refresh vram usage")
driverMinor = int(gpuHandles.nvcuda.driver_minor) break
} }
if memInfo.err != nil { if memInfo.err != nil {
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err)) slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err)) C.free(unsafe.Pointer(memInfo.err))
continue continue
} }
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) { if memInfo.free == 0 {
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor)) slog.Warn("error looking up nvidia GPU memory")
continue continue
} }
gpuInfo.TotalMemory = uint64(memInfo.total) slog.Debug("updating cuda memory data",
gpuInfo.FreeMemory = uint64(memInfo.free) "gpu", gpu.ID,
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) "name", gpu.Name,
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) slog.Group(
gpuInfo.MinimumMemory = cudaMinimumMemory "before",
gpuInfo.DependencyPath = depPath "total", format.HumanBytes2(gpu.TotalMemory),
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) "free", format.HumanBytes2(gpu.FreeMemory),
gpuInfo.DriverMajor = driverMajor ),
gpuInfo.DriverMinor = driverMinor slog.Group(
"now",
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does... "total", format.HumanBytes2(uint64(memInfo.total)),
resp = append(resp, gpuInfo) "free", format.HumanBytes2(uint64(memInfo.free)),
"used", format.HumanBytes2(uint64(memInfo.used)),
),
)
cudaGPUs[i].FreeMemory = uint64(memInfo.free)
} }
}
// Then AMD
resp = append(resp, AMDGetGPUInfo()...)
if len(resp) == 0 { if oHandles == nil && len(oneapiGPUs) > 0 {
C.cpu_check_ram(&memInfo) oHandles = initOneAPIHandles()
if memInfo.err != nil {
slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
return resp
} }
gpuInfo := GpuInfo{ for i, gpu := range oneapiGPUs {
Library: "cpu", if oHandles.oneapi == nil {
Variant: cpuVariant, // shouldn't happen
slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
continue
}
C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
} }
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
resp = append(resp, gpuInfo) err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
if err != nil {
slog.Debug("problem refreshing ROCm free memory", "error", err)
}
} }
return resp resp := []GpuInfo{}
} for _, gpu := range cudaGPUs {
resp = append(resp, gpu.GpuInfo)
func GetCPUMem() (memInfo, error) { }
var ret memInfo for _, gpu := range rocmGPUs {
var info C.mem_info_t resp = append(resp, gpu.GpuInfo)
C.cpu_check_ram(&info) }
if info.err != nil { for _, gpu := range oneapiGPUs {
defer C.free(unsafe.Pointer(info.err)) resp = append(resp, gpu.GpuInfo)
return ret, fmt.Errorf(C.GoString(info.err)) }
if len(resp) == 0 {
resp = append(resp, cpus[0].GpuInfo)
} }
ret.FreeMemory = uint64(info.free) return resp
ret.TotalMemory = uint64(info.total)
return ret, nil
} }
func FindGPULibs(baseLibName string, defaultPatterns []string) []string { func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
...@@ -362,8 +515,26 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) { ...@@ -362,8 +515,26 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
return 0, nil, "" return 0, nil, ""
} }
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
var resp C.nvml_init_resp_t
resp.ch.verbose = getVerboseState()
for _, libPath := range nvmlLibPaths {
lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib))
C.nvml_init(lib, &resp)
if resp.err != nil {
slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
C.free(unsafe.Pointer(resp.err))
} else {
return &resp.ch, libPath
}
}
return nil, ""
}
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) { func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
var resp C.oneapi_init_resp_t var resp C.oneapi_init_resp_t
num_devices := 0
resp.oh.verbose = getVerboseState() resp.oh.verbose = getVerboseState()
for _, libPath := range oneapiLibPaths { for _, libPath := range oneapiLibPaths {
lib := C.CString(libPath) lib := C.CString(libPath)
...@@ -373,7 +544,10 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) { ...@@ -373,7 +544,10 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err)) slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
C.free(unsafe.Pointer(resp.err)) C.free(unsafe.Pointer(resp.err))
} else { } else {
return int(resp.num_devices), &resp.oh, libPath for i := range resp.oh.num_drivers {
num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
}
return num_devices, &resp.oh, libPath
} }
} }
return 0, nil, "" return 0, nil, ""
......
...@@ -24,7 +24,7 @@ func GetGPUInfo() GpuInfoList { ...@@ -24,7 +24,7 @@ func GetGPUInfo() GpuInfoList {
return []GpuInfo{ return []GpuInfo{
{ {
Library: "cpu", Library: "cpu",
Variant: GetCPUVariant(), Variant: GetCPUCapability(),
memInfo: mem, memInfo: mem,
}, },
} }
...@@ -42,6 +42,17 @@ func GetGPUInfo() GpuInfoList { ...@@ -42,6 +42,17 @@ func GetGPUInfo() GpuInfoList {
return []GpuInfo{info} return []GpuInfo{info}
} }
func GetCPUInfo() GpuInfoList {
mem, _ := GetCPUMem()
return []GpuInfo{
{
Library: "cpu",
Variant: GetCPUCapability(),
memInfo: mem,
},
}
}
func GetCPUMem() (memInfo, error) { func GetCPUMem() (memInfo, error) {
return memInfo{ return memInfo{
TotalMemory: uint64(C.getPhysicalMemory()), TotalMemory: uint64(C.getPhysicalMemory()),
......
...@@ -47,6 +47,7 @@ typedef struct mem_info { ...@@ -47,6 +47,7 @@ typedef struct mem_info {
char gpu_name[GPU_NAME_LEN]; char gpu_name[GPU_NAME_LEN];
uint64_t total; uint64_t total;
uint64_t free; uint64_t free;
uint64_t used;
// Compute Capability // Compute Capability
int major; int major;
...@@ -62,6 +63,7 @@ void cpu_check_ram(mem_info_t *resp); ...@@ -62,6 +63,7 @@ void cpu_check_ram(mem_info_t *resp);
#include "gpu_info_cudart.h" #include "gpu_info_cudart.h"
#include "gpu_info_nvcuda.h" #include "gpu_info_nvcuda.h"
#include "gpu_info_nvml.h"
#include "gpu_info_oneapi.h" #include "gpu_info_oneapi.h"
#endif // __GPU_INFO_H__ #endif // __GPU_INFO_H__
......
#include "gpu_info.h"
// Fallbacks for CPU mode
#ifdef _WIN32
#include <sysinfoapi.h>
void cpu_check_ram(mem_info_t *resp) {
resp->err = NULL;
MEMORYSTATUSEX info;
info.dwLength = sizeof(info);
if (GlobalMemoryStatusEx(&info) != 0) {
resp->total = info.ullTotalPhys;
resp->free = info.ullAvailPhys;
snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
} else {
resp->err = LOAD_ERR();
}
return;
}
#elif __linux__
#include <errno.h>
#include <string.h>
#include <sys/sysinfo.h>
void cpu_check_ram(mem_info_t *resp) {
struct sysinfo info;
resp->err = NULL;
if (sysinfo(&info) != 0) {
resp->err = strdup(strerror(errno));
} else {
resp->total = info.totalram * info.mem_unit;
resp->free = info.freeram * info.mem_unit;
snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
}
return;
}
#elif __APPLE__
// TODO consider an Apple implementation that does something useful
// mem_info_t cpu_check_ram() {
// mem_info_t resp = {0, 0, NULL};
// return resp;
// }
#else
#error "Unsupported platform"
#endif
...@@ -94,7 +94,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) { ...@@ -94,7 +94,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
} }
void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) { void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
resp->err = NULL; resp->err = NULL;
cudartMemory_t memInfo = {0,0,0}; cudartMemory_t memInfo = {0,0,0};
cudartReturn_t ret; cudartReturn_t ret;
...@@ -166,9 +166,11 @@ void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) { ...@@ -166,9 +166,11 @@ void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) {
resp->total = memInfo.total; resp->total = memInfo.total;
resp->free = memInfo.free; resp->free = memInfo.free;
resp->used = memInfo.used;
LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total); LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total);
LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free); LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free);
LOG(h.verbose, "[%s] CUDA usedMem %lu\n", resp->gpu_id, resp->used);
LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor); LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
} }
......
...@@ -140,7 +140,8 @@ typedef struct cudart_init_resp { ...@@ -140,7 +140,8 @@ typedef struct cudart_init_resp {
} cudart_init_resp_t; } cudart_init_resp_t;
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp); void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
void cudart_check_vram(cudart_handle_t ch, int device_id, mem_info_t *resp); void cudart_bootstrap(cudart_handle_t ch, int device_id, mem_info_t *resp);
// TODO - if we keep this library longer term, add cudart_get_free
void cudart_release(cudart_handle_t ch); void cudart_release(cudart_handle_t ch);
#endif // __GPU_INFO_CUDART_H__ #endif // __GPU_INFO_CUDART_H__
......
...@@ -96,7 +96,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { ...@@ -96,7 +96,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
} }
const int buflen = 256; const int buflen = 256;
void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) { void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
resp->err = NULL; resp->err = NULL;
nvcudaMemory_t memInfo = {0,0}; nvcudaMemory_t memInfo = {0,0};
CUresult ret; CUresult ret;
...@@ -168,7 +168,7 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) { ...@@ -168,7 +168,7 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
// To get memory we have to set (and release) a context // To get memory we have to set (and release) a context
ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device); ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
if (ret != CUDA_SUCCESS) { if (ret != CUDA_SUCCESS) {
snprintf(buf, buflen, "nvcuda failed to get primary device context %d", ret); snprintf(buf, buflen, "nvcuda failed to get device context %d", ret);
resp->err = strdup(buf); resp->err = strdup(buf);
return; return;
} }
...@@ -193,7 +193,42 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) { ...@@ -193,7 +193,42 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
ret = (*h.cuCtxDestroy)(ctx); ret = (*h.cuCtxDestroy)(ctx);
if (ret != CUDA_SUCCESS) { if (ret != CUDA_SUCCESS) {
LOG(1, "nvcuda failed to release primary device context %d", ret); LOG(1, "nvcuda failed to release device context %d", ret);
}
}
void nvcuda_get_free(nvcuda_handle_t h, int i, uint64_t *free, uint64_t *total) {
CUresult ret;
CUcontext ctx = NULL;
CUdevice device = -1;
*free = 0;
*total = 0;
ret = (*h.cuDeviceGet)(&device, i);
if (ret != CUDA_SUCCESS) {
LOG(1, "nvcuda device failed to initialize");
return;
}
// To get memory we have to set (and release) a context
ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
if (ret != CUDA_SUCCESS) {
LOG(1, "nvcuda failed to get device context %d", ret);
return;
}
ret = (*h.cuMemGetInfo_v2)(free, total);
if (ret != CUDA_SUCCESS) {
LOG(1, "nvcuda device memory info lookup failure %d", ret);
// Best effort on failure...
(*h.cuCtxDestroy)(ctx);
return;
}
ret = (*h.cuCtxDestroy)(ctx);
if (ret != CUDA_SUCCESS) {
LOG(1, "nvcuda failed to release device context %d", ret);
} }
} }
......
...@@ -67,7 +67,8 @@ typedef struct nvcuda_init_resp { ...@@ -67,7 +67,8 @@ typedef struct nvcuda_init_resp {
} nvcuda_init_resp_t; } nvcuda_init_resp_t;
void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp); void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
void nvcuda_check_vram(nvcuda_handle_t ch, int device_id, mem_info_t *resp); void nvcuda_bootstrap(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
void nvcuda_get_free(nvcuda_handle_t ch, int device_id, uint64_t *free, uint64_t *total);
void nvcuda_release(nvcuda_handle_t ch); void nvcuda_release(nvcuda_handle_t ch);
#endif // __GPU_INFO_NVCUDA_H__ #endif // __GPU_INFO_NVCUDA_H__
......
#ifndef __APPLE__ // TODO - maybe consider nvidia support on intel macs?
#include <string.h>
#include "gpu_info_nvml.h"
void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp) {
nvmlReturn_t ret;
resp->err = NULL;
const int buflen = 256;
char buf[buflen + 1];
int i;
struct lookup {
char *s;
void **p;
} l[] = {
{"nvmlInit_v2", (void *)&resp->ch.nvmlInit_v2},
{"nvmlShutdown", (void *)&resp->ch.nvmlShutdown},
{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.nvmlDeviceGetHandleByIndex},
{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.nvmlDeviceGetMemoryInfo},
{NULL, NULL},
};
resp->ch.handle = LOAD_LIBRARY(nvml_lib_path, RTLD_LAZY);
if (!resp->ch.handle) {
char *msg = LOAD_ERR();
LOG(resp->ch.verbose, "library %s load err: %s\n", nvml_lib_path, msg);
snprintf(buf, buflen,
"Unable to load %s library to query for Nvidia GPUs: %s",
nvml_lib_path, msg);
free(msg);
resp->err = strdup(buf);
return;
}
// TODO once we've squashed the remaining corner cases remove this log
// LOG(resp->ch.verbose, "wiring nvidia management library functions in %s\n", nvml_lib_path);
for (i = 0; l[i].s != NULL; i++) {
// TODO once we've squashed the remaining corner cases remove this log
// LOG(resp->ch.verbose, "dlsym: %s\n", l[i].s);
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
if (!l[i].p) {
resp->ch.handle = NULL;
char *msg = LOAD_ERR();
LOG(resp->ch.verbose, "dlerr: %s\n", msg);
UNLOAD_LIBRARY(resp->ch.handle);
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
msg);
free(msg);
resp->err = strdup(buf);
return;
}
}
ret = (*resp->ch.nvmlInit_v2)();
if (ret != NVML_SUCCESS) {
LOG(resp->ch.verbose, "nvmlInit_v2 err: %d\n", ret);
UNLOAD_LIBRARY(resp->ch.handle);
resp->ch.handle = NULL;
snprintf(buf, buflen, "nvml vram init failure: %d", ret);
resp->err = strdup(buf);
return;
}
}
void nvml_get_free(nvml_handle_t h, int device_id, uint64_t *free, uint64_t *total, uint64_t *used) {
nvmlDevice_t device;
nvmlMemory_t memInfo = {0};
nvmlReturn_t ret;
ret = (*h.nvmlDeviceGetHandleByIndex)(device_id, &device);
if (ret != NVML_SUCCESS) {
LOG(1, "unable to get device handle %d: %d", device_id, ret);
*free = 0;
return;
}
ret = (*h.nvmlDeviceGetMemoryInfo)(device, &memInfo);
if (ret != NVML_SUCCESS) {
LOG(1, "device memory info lookup failure %d: %d", device_id, ret);
*free = 0;
return;
}
*free = memInfo.free;
*total = memInfo.total;
*used = memInfo.used;
}
void nvml_release(nvml_handle_t h) {
LOG(h.verbose, "releasing nvml library\n");
nvmlReturn_t ret;
ret = (*h.nvmlShutdown)();
if (ret != NVML_SUCCESS) {
LOG(1, "error during nvmlShutdown %d", ret);
}
UNLOAD_LIBRARY(h.handle);
h.handle = NULL;
}
#endif // __APPLE__
\ No newline at end of file
#ifndef __APPLE__
#ifndef __GPU_INFO_NVML_H__
#define __GPU_INFO_NVML_H__
#include "gpu_info.h"
// Just enough typedef's to dlopen/dlsym for memory information
typedef enum nvmlReturn_enum {
NVML_SUCCESS = 0,
// Other values omitted for now...
} nvmlReturn_t;
typedef void *nvmlDevice_t; // Opaque is sufficient
typedef struct nvmlMemory_st {
unsigned long long total;
unsigned long long free;
unsigned long long used;
} nvmlMemory_t;
typedef enum nvmlBrandType_enum
{
NVML_BRAND_UNKNOWN = 0,
} nvmlBrandType_t;
typedef struct nvml_handle {
void *handle;
uint16_t verbose;
nvmlReturn_t (*nvmlInit_v2)(void);
nvmlReturn_t (*nvmlShutdown)(void);
nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(unsigned int, nvmlDevice_t *);
nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t, nvmlMemory_t *);
} nvml_handle_t;
typedef struct nvml_init_resp {
char *err; // If err is non-null handle is invalid
nvml_handle_t ch;
} nvml_init_resp_t;
typedef struct nvml_compute_capability {
char *err;
int major;
int minor;
} nvml_compute_capability_t;
void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp);
void nvml_get_free(nvml_handle_t ch, int device_id, uint64_t *free, uint64_t *total, uint64_t *used);
void nvml_release(nvml_handle_t ch);
#endif // __GPU_INFO_NVML_H__
#endif // __APPLE__
\ No newline at end of file
...@@ -4,15 +4,17 @@ ...@@ -4,15 +4,17 @@
#include <string.h> #include <string.h>
void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) {
{
ze_result_t ret; ze_result_t ret;
resp->err = NULL; resp->err = NULL;
resp->oh.devices = NULL;
resp->oh.num_devices = NULL;
resp->oh.drivers = NULL;
resp->oh.num_drivers = 0;
const int buflen = 256; const int buflen = 256;
char buf[buflen + 1]; char buf[buflen + 1];
int i; int i, d, count;
struct lookup struct lookup {
{
char *s; char *s;
void **p; void **p;
} l[] = { } l[] = {
...@@ -28,8 +30,7 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) ...@@ -28,8 +30,7 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
}; };
resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY); resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
if (!resp->oh.handle) if (!resp->oh.handle) {
{
char *msg = LOAD_ERR(); char *msg = LOAD_ERR();
snprintf(buf, buflen, snprintf(buf, buflen,
"Unable to load %s library to query for Intel GPUs: %s\n", "Unable to load %s library to query for Intel GPUs: %s\n",
...@@ -44,14 +45,12 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) ...@@ -44,14 +45,12 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
"wiring Level-Zero management library functions in %s\n", "wiring Level-Zero management library functions in %s\n",
oneapi_lib_path); oneapi_lib_path);
for (i = 0; l[i].s != NULL; i++) for (i = 0; l[i].s != NULL; i++) {
{
// TODO once we've squashed the remaining corner cases remove this log // TODO once we've squashed the remaining corner cases remove this log
LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s); LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
*l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s); *l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
if (!l[i].p) if (!l[i].p) {
{
resp->oh.handle = NULL; resp->oh.handle = NULL;
char *msg = LOAD_ERR(); char *msg = LOAD_ERR();
LOG(resp->oh.verbose, "dlerr: %s\n", msg); LOG(resp->oh.verbose, "dlerr: %s\n", msg);
...@@ -64,22 +63,67 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) ...@@ -64,22 +63,67 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
} }
ret = (*resp->oh.zesInit)(0); ret = (*resp->oh.zesInit)(0);
if (ret != ZE_RESULT_SUCCESS) if (ret != ZE_RESULT_SUCCESS) {
{ LOG(resp->oh.verbose, "zesInit err: %x\n", ret);
LOG(resp->oh.verbose, "zesInit err: %d\n", ret); snprintf(buf, buflen, "oneapi vram init failure: %x", ret);
UNLOAD_LIBRARY(resp->oh.handle);
resp->oh.handle = NULL;
snprintf(buf, buflen, "oneapi vram init failure: %d", ret);
resp->err = strdup(buf); resp->err = strdup(buf);
oneapi_release(resp->oh);
return;
} }
(*resp->oh.zesDriverGet)(&resp->num_devices, NULL); count = 0;
ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, NULL);
if (ret != ZE_RESULT_SUCCESS) {
LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
snprintf(buf, buflen, "unable to get driver count: %x", ret);
resp->err = strdup(buf);
oneapi_release(resp->oh);
return;
}
LOG(resp->oh.verbose, "oneapi driver count: %d\n", resp->oh.num_drivers);
resp->oh.drivers = malloc(resp->oh.num_drivers * sizeof(zes_driver_handle_t));
resp->oh.num_devices = malloc(resp->oh.num_drivers * sizeof(uint32_t));
memset(&resp->oh.num_devices[0], 0, resp->oh.num_drivers * sizeof(uint32_t));
resp->oh.devices =
malloc(resp->oh.num_drivers * sizeof(zes_device_handle_t *));
ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, &resp->oh.drivers[0]);
if (ret != ZE_RESULT_SUCCESS) {
LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
snprintf(buf, buflen, "unable to get driver count: %x", ret);
resp->err = strdup(buf);
oneapi_release(resp->oh);
return;
}
for (d = 0; d < resp->oh.num_drivers; d++) {
ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d],
&resp->oh.num_devices[d], NULL);
if (ret != ZE_RESULT_SUCCESS) {
LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
snprintf(buf, buflen, "unable to get device count: %x", ret);
resp->err = strdup(buf);
oneapi_release(resp->oh);
return;
}
resp->oh.devices[d] =
malloc(resp->oh.num_devices[d] * sizeof(zes_device_handle_t));
ret = (*resp->oh.zesDeviceGet)(
resp->oh.drivers[d], &resp->oh.num_devices[d], resp->oh.devices[d]);
if (ret != ZE_RESULT_SUCCESS) {
LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
snprintf(buf, buflen, "unable to get device count: %x", ret);
resp->err = strdup(buf);
oneapi_release(resp->oh);
return;
}
count += resp->oh.num_devices[d];
}
return; return;
} }
void oneapi_check_vram(oneapi_handle_t h, mem_info_t *resp) void oneapi_check_vram(oneapi_handle_t h, int driver, int device,
{ mem_info_t *resp) {
ze_result_t ret; ze_result_t ret;
resp->err = NULL; resp->err = NULL;
uint64_t totalMem = 0; uint64_t totalMem = 0;
...@@ -88,127 +132,126 @@ void oneapi_check_vram(oneapi_handle_t h, mem_info_t *resp) ...@@ -88,127 +132,126 @@ void oneapi_check_vram(oneapi_handle_t h, mem_info_t *resp)
char buf[buflen + 1]; char buf[buflen + 1];
int i, d, m; int i, d, m;
if (h.handle == NULL) if (h.handle == NULL) {
{
resp->err = strdup("Level-Zero handle not initialized"); resp->err = strdup("Level-Zero handle not initialized");
return; return;
} }
uint32_t driversCount = 0; if (driver > h.num_drivers || device > h.num_devices[driver]) {
ret = (*h.zesDriverGet)(&driversCount, NULL); resp->err = strdup("driver of device index out of bounds");
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen, "unable to get driver count: %d", ret);
resp->err = strdup(buf);
return; return;
} }
LOG(h.verbose, "discovered %d Level-Zero drivers\n", driversCount);
zes_driver_handle_t *allDrivers =
malloc(driversCount * sizeof(zes_driver_handle_t));
(*h.zesDriverGet)(&driversCount, allDrivers);
resp->total = 0; resp->total = 0;
resp->free = 0; resp->free = 0;
for (d = 0; d < driversCount; d++) zes_device_ext_properties_t ext_props;
{ ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
uint32_t deviceCount = 0; ext_props.pNext = NULL;
ret = (*h.zesDeviceGet)(allDrivers[d], &deviceCount, NULL);
if (ret != ZE_RESULT_SUCCESS) zes_device_properties_t props;
{ props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
snprintf(buf, buflen, "unable to get device count: %d", ret); props.pNext = &ext_props;
ret = (*h.zesDeviceGetProperties)(h.devices[driver][device], &props);
if (ret != ZE_RESULT_SUCCESS) {
snprintf(buf, buflen, "unable to get device properties: %d", ret);
resp->err = strdup(buf);
return;
}
snprintf(&resp->gpu_name[0], GPU_NAME_LEN, props.modelName);
// TODO this needs to map to ONEAPI_DEVICE_SELECTOR syntax
// (this is probably wrong...)
// TODO - the driver isn't included - what if there are multiple drivers?
snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", device);
if (h.verbose) {
// When in verbose mode, report more information about
// the card we discover.
LOG(h.verbose, "[%d:%d] oneAPI device name: %s\n", driver, device,
props.modelName);
LOG(h.verbose, "[%d:%d] oneAPI brand: %s\n", driver, device,
props.brandName);
LOG(h.verbose, "[%d:%d] oneAPI vendor: %s\n", driver, device,
props.vendorName);
LOG(h.verbose, "[%d:%d] oneAPI S/N: %s\n", driver, device,
props.serialNumber);
LOG(h.verbose, "[%d:%d] oneAPI board number: %s\n", driver, device,
props.boardNumber);
}
// TODO
// Compute Capability equivalent in resp->major, resp->minor, resp->patch
uint32_t memCount = 0;
ret = (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount,
NULL);
if (ret != ZE_RESULT_SUCCESS) {
snprintf(buf, buflen, "unable to enumerate Level-Zero memory modules: %x",
ret);
resp->err = strdup(buf);
return;
}
LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);
zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
(*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, mems);
for (m = 0; m < memCount; m++) {
zes_mem_state_t state;
state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
state.pNext = NULL;
ret = (*h.zesMemoryGetState)(mems[m], &state);
if (ret != ZE_RESULT_SUCCESS) {
snprintf(buf, buflen, "unable to get memory state: %x", ret);
resp->err = strdup(buf); resp->err = strdup(buf);
free(allDrivers); free(mems);
return; return;
} }
LOG(h.verbose, "discovered %d Level-Zero devices\n", deviceCount); resp->total += state.size;
resp->free += state.free;
zes_device_handle_t *devices = }
malloc(deviceCount * sizeof(zes_device_handle_t));
(*h.zesDeviceGet)(allDrivers[d], &deviceCount, devices);
for (i = 0; i < deviceCount; i++)
{
zes_device_ext_properties_t ext_props;
ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
ext_props.pNext = NULL;
zes_device_properties_t props;
props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
props.pNext = &ext_props;
ret = (*h.zesDeviceGetProperties)(devices[i], &props);
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen, "unable to get device properties: %d", ret);
resp->err = strdup(buf);
free(allDrivers);
free(devices);
return;
}
if (h.verbose)
{
// When in verbose mode, report more information about
// the card we discover.
LOG(h.verbose, "[%d] oneAPI device name: %s\n", i,
props.modelName);
LOG(h.verbose, "[%d] oneAPI brand: %s\n", i,
props.brandName);
LOG(h.verbose, "[%d] oneAPI vendor: %s\n", i,
props.vendorName);
LOG(h.verbose, "[%d] oneAPI S/N: %s\n", i,
props.serialNumber);
LOG(h.verbose, "[%d] oneAPI board number: %s\n", i,
props.boardNumber);
}
uint32_t memCount = 0;
ret = (*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, NULL);
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen,
"unable to enumerate Level-Zero memory modules: %d", ret);
resp->err = strdup(buf);
free(allDrivers);
free(devices);
return;
}
LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);
zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
(*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, mems);
for (m = 0; m < memCount; m++)
{
zes_mem_state_t state;
state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
state.pNext = NULL;
ret = (*h.zesMemoryGetState)(mems[m], &state);
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen, "unable to get memory state: %d", ret);
resp->err = strdup(buf);
free(allDrivers);
free(devices);
free(mems);
return;
}
resp->total += state.size;
resp->free += state.free;
}
free(mems); free(mems);
} }
free(devices); void oneapi_release(oneapi_handle_t h) {
int d;
LOG(h.verbose, "releasing oneapi library\n");
for (d = 0; d < h.num_drivers; d++) {
if (h.devices != NULL && h.devices[d] != NULL) {
free(h.devices[d]);
}
}
if (h.devices != NULL) {
free(h.devices);
h.devices = NULL;
} }
if (h.num_devices != NULL) {
free(h.num_devices);
h.num_devices = NULL;
}
if (h.drivers != NULL) {
free(h.drivers);
h.drivers = NULL;
}
h.num_drivers = 0;
UNLOAD_LIBRARY(h.handle);
h.handle = NULL;
}
free(allDrivers); int oneapi_get_device_count(oneapi_handle_t h, int driver) {
if (h.handle == NULL || h.num_devices == NULL) {
return 0;
}
if (driver > h.num_drivers) {
return 0;
}
return (int)h.num_devices[driver];
} }
#endif // __APPLE__ #endif // __APPLE__
...@@ -9,8 +9,7 @@ ...@@ -9,8 +9,7 @@
#define ZE_BIT(_i) (1 << _i) #define ZE_BIT(_i) (1 << _i)
// Just enough typedef's to dlopen/dlsym for memory information // Just enough typedef's to dlopen/dlsym for memory information
typedef enum ze_result_t typedef enum ze_result_t {
{
ZE_RESULT_SUCCESS = 0, ZE_RESULT_SUCCESS = 0,
// Other values omitted for now... // Other values omitted for now...
} ze_result_t; } ze_result_t;
...@@ -20,13 +19,11 @@ typedef struct _zes_driver_handle_t *zes_driver_handle_t; ...@@ -20,13 +19,11 @@ typedef struct _zes_driver_handle_t *zes_driver_handle_t;
typedef struct _zes_device_handle_t *zes_device_handle_t; typedef struct _zes_device_handle_t *zes_device_handle_t;
typedef struct _zes_mem_handle_t *zes_mem_handle_t; typedef struct _zes_mem_handle_t *zes_mem_handle_t;
typedef enum _ze_structure_type_t typedef enum _ze_structure_type_t {
{
ZE_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff ZE_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
} ze_structure_type_t; } ze_structure_type_t;
typedef enum _zes_structure_type_t typedef enum _zes_structure_type_t {
{
ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x1, ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x1,
ZES_STRUCTURE_TYPE_MEM_PROPERTIES = 0xb, ZES_STRUCTURE_TYPE_MEM_PROPERTIES = 0xb,
ZES_STRUCTURE_TYPE_MEM_STATE = 0x1e, ZES_STRUCTURE_TYPE_MEM_STATE = 0x1e,
...@@ -34,35 +31,29 @@ typedef enum _zes_structure_type_t ...@@ -34,35 +31,29 @@ typedef enum _zes_structure_type_t
ZES_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff ZES_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
} zes_structure_type_t; } zes_structure_type_t;
typedef enum _zes_mem_type_t typedef enum _zes_mem_type_t {
{
ZES_MEM_TYPE_FORCE_UINT32 = 0x7fffffff ZES_MEM_TYPE_FORCE_UINT32 = 0x7fffffff
} zes_mem_type_t; } zes_mem_type_t;
typedef enum _zes_mem_loc_t typedef enum _zes_mem_loc_t {
{
ZES_MEM_LOC_SYSTEM = 0, ZES_MEM_LOC_SYSTEM = 0,
ZES_MEM_LOC_DEVICE = 1, ZES_MEM_LOC_DEVICE = 1,
ZES_MEM_LOC_FORCE_UINT32 = 0x7fffffff ZES_MEM_LOC_FORCE_UINT32 = 0x7fffffff
} zes_mem_loc_t; } zes_mem_loc_t;
typedef enum _zes_mem_health_t typedef enum _zes_mem_health_t {
{
ZES_MEM_HEALTH_FORCE_UINT32 = 0x7fffffff ZES_MEM_HEALTH_FORCE_UINT32 = 0x7fffffff
} zes_mem_health_t; } zes_mem_health_t;
typedef struct _ze_device_uuid_t typedef struct _ze_device_uuid_t {
{
uint8_t id[ZE_MAX_DEVICE_UUID_SIZE]; uint8_t id[ZE_MAX_DEVICE_UUID_SIZE];
} ze_device_uuid_t; } ze_device_uuid_t;
typedef struct _zes_uuid_t typedef struct _zes_uuid_t {
{
uint8_t id[ZE_MAX_DEVICE_UUID_SIZE]; uint8_t id[ZE_MAX_DEVICE_UUID_SIZE];
} zes_uuid_t; } zes_uuid_t;
typedef enum _ze_device_type_t typedef enum _ze_device_type_t {
{
ZE_DEVICE_TYPE_GPU = 1, ZE_DEVICE_TYPE_GPU = 1,
ZE_DEVICE_TYPE_CPU = 2, ZE_DEVICE_TYPE_CPU = 2,
ZE_DEVICE_TYPE_FPGA = 3, ZE_DEVICE_TYPE_FPGA = 3,
...@@ -71,8 +62,7 @@ typedef enum _ze_device_type_t ...@@ -71,8 +62,7 @@ typedef enum _ze_device_type_t
ZE_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff ZE_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff
} ze_device_type_t; } ze_device_type_t;
typedef enum _zes_device_type_t typedef enum _zes_device_type_t {
{
ZES_DEVICE_TYPE_GPU = 1, ZES_DEVICE_TYPE_GPU = 1,
ZES_DEVICE_TYPE_CPU = 2, ZES_DEVICE_TYPE_CPU = 2,
ZES_DEVICE_TYPE_FPGA = 3, ZES_DEVICE_TYPE_FPGA = 3,
...@@ -82,8 +72,7 @@ typedef enum _zes_device_type_t ...@@ -82,8 +72,7 @@ typedef enum _zes_device_type_t
} zes_device_type_t; } zes_device_type_t;
typedef uint32_t ze_device_property_flags_t; typedef uint32_t ze_device_property_flags_t;
typedef enum _ze_device_property_flag_t typedef enum _ze_device_property_flag_t {
{
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0), ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0),
ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1), ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1),
ZE_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2), ZE_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2),
...@@ -92,8 +81,7 @@ typedef enum _ze_device_property_flag_t ...@@ -92,8 +81,7 @@ typedef enum _ze_device_property_flag_t
} ze_device_property_flag_t; } ze_device_property_flag_t;
typedef uint32_t zes_device_property_flags_t; typedef uint32_t zes_device_property_flags_t;
typedef enum _zes_device_property_flag_t typedef enum _zes_device_property_flag_t {
{
ZES_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0), ZES_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0),
ZES_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1), ZES_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1),
ZES_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2), ZES_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2),
...@@ -101,8 +89,7 @@ typedef enum _zes_device_property_flag_t ...@@ -101,8 +89,7 @@ typedef enum _zes_device_property_flag_t
ZES_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff ZES_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff
} zes_device_property_flag_t; } zes_device_property_flag_t;
typedef struct _ze_device_properties_t typedef struct _ze_device_properties_t {
{
ze_structure_type_t stype; ze_structure_type_t stype;
void *pNext; void *pNext;
ze_device_type_t type; ze_device_type_t type;
...@@ -126,8 +113,7 @@ typedef struct _ze_device_properties_t ...@@ -126,8 +113,7 @@ typedef struct _ze_device_properties_t
char name[ZE_MAX_DEVICE_NAME]; char name[ZE_MAX_DEVICE_NAME];
} ze_device_properties_t; } ze_device_properties_t;
typedef struct _zes_device_properties_t typedef struct _zes_device_properties_t {
{
zes_structure_type_t stype; zes_structure_type_t stype;
void *pNext; void *pNext;
ze_device_properties_t core; ze_device_properties_t core;
...@@ -140,8 +126,7 @@ typedef struct _zes_device_properties_t ...@@ -140,8 +126,7 @@ typedef struct _zes_device_properties_t
char driverVersion[ZES_STRING_PROPERTY_SIZE]; char driverVersion[ZES_STRING_PROPERTY_SIZE];
} zes_device_properties_t; } zes_device_properties_t;
typedef struct _zes_device_ext_properties_t typedef struct _zes_device_ext_properties_t {
{
zes_structure_type_t stype; zes_structure_type_t stype;
void *pNext; void *pNext;
zes_uuid_t uuid; zes_uuid_t uuid;
...@@ -149,8 +134,7 @@ typedef struct _zes_device_ext_properties_t ...@@ -149,8 +134,7 @@ typedef struct _zes_device_ext_properties_t
zes_device_property_flags_t flags; zes_device_property_flags_t flags;
} zes_device_ext_properties_t; } zes_device_ext_properties_t;
typedef struct _zes_mem_properties_t typedef struct _zes_mem_properties_t {
{
zes_structure_type_t stype; zes_structure_type_t stype;
void *pNext; void *pNext;
zes_mem_type_t type; zes_mem_type_t type;
...@@ -162,8 +146,7 @@ typedef struct _zes_mem_properties_t ...@@ -162,8 +146,7 @@ typedef struct _zes_mem_properties_t
int32_t numChannels; int32_t numChannels;
} zes_mem_properties_t; } zes_mem_properties_t;
typedef struct _zes_mem_state_t typedef struct _zes_mem_state_t {
{
zes_structure_type_t stype; zes_structure_type_t stype;
const void *pNext; const void *pNext;
zes_mem_health_t health; zes_mem_health_t health;
...@@ -171,10 +154,19 @@ typedef struct _zes_mem_state_t ...@@ -171,10 +154,19 @@ typedef struct _zes_mem_state_t
uint64_t size; uint64_t size;
} zes_mem_state_t; } zes_mem_state_t;
typedef struct oneapi_handle typedef struct oneapi_handle {
{
void *handle; void *handle;
uint16_t verbose; uint16_t verbose;
uint32_t num_drivers;
zes_driver_handle_t *drivers;
uint32_t *num_devices;
zes_device_handle_t **devices;
// TODO Driver major, minor information
// int driver_major;
// int driver_minor;
ze_result_t (*zesInit)(int); ze_result_t (*zesInit)(int);
ze_result_t (*zesDriverGet)(uint32_t *pCount, zes_driver_handle_t *phDrivers); ze_result_t (*zesDriverGet)(uint32_t *pCount, zes_driver_handle_t *phDrivers);
ze_result_t (*zesDeviceGet)(zes_driver_handle_t hDriver, uint32_t *pCount, ze_result_t (*zesDeviceGet)(zes_driver_handle_t hDriver, uint32_t *pCount,
...@@ -191,21 +183,21 @@ typedef struct oneapi_handle ...@@ -191,21 +183,21 @@ typedef struct oneapi_handle
} oneapi_handle_t; } oneapi_handle_t;
typedef struct oneapi_init_resp typedef struct oneapi_init_resp {
{
char *err; // If err is non-null handle is invalid char *err; // If err is non-null handle is invalid
int num_devices;
oneapi_handle_t oh; oneapi_handle_t oh;
} oneapi_init_resp_t; } oneapi_init_resp_t;
typedef struct oneapi_version_resp typedef struct oneapi_version_resp {
{
ze_result_t status; ze_result_t status;
char *str; // Contains version or error string if status != 0 char *str; // Contains version or error string if status != 0
} oneapi_version_resp_t; } oneapi_version_resp_t;
void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp); void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp);
void oneapi_check_vram(oneapi_handle_t rh, mem_info_t *resp); void oneapi_check_vram(oneapi_handle_t h, int driver, int device,
mem_info_t *resp);
void oneapi_release(oneapi_handle_t h);
int oneapi_get_device_count(oneapi_handle_t h, int driver);
#endif // __GPU_INFO_INTEL_H__ #endif // __GPU_INFO_INTEL_H__
#endif // __APPLE__ #endif // __APPLE__
package gpu
import (
"bufio"
"fmt"
"os"
"strings"
"github.com/ollama/ollama/format"
)
var CudartGlobs = []string{
"/usr/local/cuda/lib64/libcudart.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
"/usr/lib/x86_64-linux-gnu/libcudart.so*",
"/usr/lib/wsl/lib/libcudart.so*",
"/usr/lib/wsl/drivers/*/libcudart.so*",
"/opt/cuda/lib64/libcudart.so*",
"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
"/usr/lib/aarch64-linux-gnu/libcudart.so*",
"/usr/local/cuda/lib*/libcudart.so*",
"/usr/lib*/libcudart.so*",
"/usr/local/lib*/libcudart.so*",
}
var NvmlGlobs = []string{}
var NvcudaGlobs = []string{
"/usr/local/cuda*/targets/*/lib/libcuda.so*",
"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
"/usr/lib/*-linux-gnu/libcuda.so*",
"/usr/lib/wsl/lib/libcuda.so*",
"/usr/lib/wsl/drivers/*/libcuda.so*",
"/opt/cuda/lib*/libcuda.so*",
"/usr/local/cuda/lib*/libcuda.so*",
"/usr/lib*/libcuda.so*",
"/usr/local/lib*/libcuda.so*",
}
var OneapiGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
"/usr/lib*/libze_intel_gpu.so*",
}
var CudartMgmtName = "libcudart.so*"
var NvcudaMgmtName = "libcuda.so*"
var NvmlMgmtName = "" // not currently wired on linux
var OneapiMgmtName = "libze_intel_gpu.so"
func GetCPUMem() (memInfo, error) {
var mem memInfo
var total, available, free, buffers, cached uint64
f, err := os.Open("/proc/meminfo")
if err != nil {
return mem, err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
line := s.Text()
switch {
case strings.HasPrefix(line, "MemTotal:"):
_, err = fmt.Sscanf(line, "MemTotal:%d", &total)
case strings.HasPrefix(line, "MemAvailable:"):
_, err = fmt.Sscanf(line, "MemAvailable:%d", &available)
case strings.HasPrefix(line, "MemFree:"):
_, err = fmt.Sscanf(line, "MemFree:%d", &free)
case strings.HasPrefix(line, "Buffers:"):
_, err = fmt.Sscanf(line, "Buffers:%d", &buffers)
case strings.HasPrefix(line, "Cached:"):
_, err = fmt.Sscanf(line, "Cached:%d", &cached)
default:
continue
}
if err != nil {
return mem, err
}
if total > 0 && available > 0 {
mem.TotalMemory = total * format.KibiByte
mem.FreeMemory = available * format.KibiByte
return mem, nil
}
}
mem.TotalMemory = total * format.KibiByte
mem.FreeMemory = (free + buffers + cached) * format.KibiByte
return mem, nil
}
package gpu
import (
"fmt"
"syscall"
"unsafe"
)
type MEMORYSTATUSEX struct {
length uint32
MemoryLoad uint32
TotalPhys uint64
AvailPhys uint64
TotalPageFile uint64
AvailPageFile uint64
TotalVirtual uint64
AvailVirtual uint64
AvailExtendedVirtual uint64
}
var (
k32 = syscall.NewLazyDLL("kernel32.dll")
globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
sizeofMemoryStatusEx = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
)
var CudartGlobs = []string{
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}
var NvmlGlobs = []string{
"c:\\Windows\\System32\\nvml.dll",
}
var NvcudaGlobs = []string{
"c:\\windows\\system*\\nvcuda.dll",
}
var OneapiGlobs = []string{
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}
var CudartMgmtName = "cudart64_*.dll"
var NvcudaMgmtName = "nvcuda.dll"
var NvmlMgmtName = "nvml.dll"
var OneapiMgmtName = "ze_intel_gpu64.dll"
func GetCPUMem() (memInfo, error) {
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
r1, _, err := globalMemoryStatusExProc.Call(uintptr(unsafe.Pointer(&memStatus)))
if r1 == 0 {
return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
}
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys}, nil
}
...@@ -18,7 +18,7 @@ type GpuInfo struct { ...@@ -18,7 +18,7 @@ type GpuInfo struct {
Library string `json:"library,omitempty"` Library string `json:"library,omitempty"`
// Optional variant to select (e.g. versions, cpu feature flags) // Optional variant to select (e.g. versions, cpu feature flags)
Variant string `json:"variant,omitempty"` Variant CPUCapability `json:"variant"`
// MinimumMemory represents the minimum memory required to use the GPU // MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory uint64 `json:"-"` MinimumMemory uint64 `json:"-"`
...@@ -38,6 +38,30 @@ type GpuInfo struct { ...@@ -38,6 +38,30 @@ type GpuInfo struct {
// TODO other performance capability info to help in scheduling decisions // TODO other performance capability info to help in scheduling decisions
} }
type CPUInfo struct {
GpuInfo
}
type CudaGPUInfo struct {
GpuInfo
index int //nolint:unused,nolintlint
}
type CudaGPUInfoList []CudaGPUInfo
type RocmGPUInfo struct {
GpuInfo
usedFilepath string //nolint:unused,nolintlint
index int //nolint:unused,nolintlint
}
type RocmGPUInfoList []RocmGPUInfo
type OneapiGPUInfo struct {
GpuInfo
driverIndex int //nolint:unused,nolintlint
gpuIndex int //nolint:unused,nolintlint
}
type OneapiGPUInfoList []OneapiGPUInfo
type GpuInfoList []GpuInfo type GpuInfoList []GpuInfo
// Split up the set of gpu info's by Library and variant // Split up the set of gpu info's by Library and variant
...@@ -47,8 +71,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList { ...@@ -47,8 +71,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
for _, info := range l { for _, info := range l {
found := false found := false
requested := info.Library requested := info.Library
if info.Variant != "" { if info.Variant != CPUCapabilityNone {
requested += "_" + info.Variant requested += "_" + info.Variant.String()
} }
for i, lib := range libs { for i, lib := range libs {
if lib == requested { if lib == requested {
...@@ -86,3 +110,26 @@ type ByFreeMemory []GpuInfo ...@@ -86,3 +110,26 @@ type ByFreeMemory []GpuInfo
func (a ByFreeMemory) Len() int { return len(a) } func (a ByFreeMemory) Len() int { return len(a) }
func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory } func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
type CPUCapability uint32
// Override at build time when building base GPU runners
var GPURunnerCPUCapability = CPUCapabilityAVX
const (
CPUCapabilityNone CPUCapability = iota
CPUCapabilityAVX
CPUCapabilityAVX2
// TODO AVX512
)
func (c CPUCapability) String() string {
switch c {
case CPUCapabilityAVX:
return "avx"
case CPUCapabilityAVX2:
return "avx2"
default:
return "no vector extensions"
}
}
...@@ -19,17 +19,19 @@ func TestMultiModelConcurrency(t *testing.T) { ...@@ -19,17 +19,19 @@ func TestMultiModelConcurrency(t *testing.T) {
var ( var (
req = [2]api.GenerateRequest{ req = [2]api.GenerateRequest{
{ {
Model: "orca-mini", Model: "orca-mini",
Prompt: "why is the ocean blue?", Prompt: "why is the ocean blue?",
Stream: &stream, Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{ Options: map[string]interface{}{
"seed": 42, "seed": 42,
"temperature": 0.0, "temperature": 0.0,
}, },
}, { }, {
Model: "tinydolphin", Model: "tinydolphin",
Prompt: "what is the origin of the us thanksgiving holiday?", Prompt: "what is the origin of the us thanksgiving holiday?",
Stream: &stream, Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{ Options: map[string]interface{}{
"seed": 42, "seed": 42,
"temperature": 0.0, "temperature": 0.0,
...@@ -38,42 +40,64 @@ func TestMultiModelConcurrency(t *testing.T) { ...@@ -38,42 +40,64 @@ func TestMultiModelConcurrency(t *testing.T) {
} }
resp = [2][]string{ resp = [2][]string{
[]string{"sunlight"}, []string{"sunlight"},
[]string{"england", "english", "massachusetts", "pilgrims"}, []string{"england", "english", "massachusetts", "pilgrims", "british"},
} }
) )
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(req)) wg.Add(len(req))
ctx, cancel := context.WithTimeout(context.Background(), time.Second*120) ctx, cancel := context.WithTimeout(context.Background(), time.Second*240)
defer cancel() defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
for i := 0; i < len(req); i++ {
require.NoError(t, PullIfMissing(ctx, client, req[i].Model))
}
for i := 0; i < len(req); i++ { for i := 0; i < len(req); i++ {
go func(i int) { go func(i int) {
defer wg.Done() defer wg.Done()
GenerateTestHelper(ctx, t, req[i], resp[i]) DoGenerate(ctx, t, client, req[i], resp[i], 60*time.Second, 10*time.Second)
}(i) }(i)
} }
wg.Wait() wg.Wait()
} }
func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) { func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) // GTX 750 2G card takes ~9 minutes req, resp := GenerateRequests()
reqLimit := len(req)
iterLimit := 5
vram := os.Getenv("OLLAMA_MAX_VRAM")
if vram != "" {
max, err := strconv.ParseUint(vram, 10, 64)
require.NoError(t, err)
// Don't hammer on small VRAM cards...
if max < 4*1024*1024*1024 {
reqLimit = min(reqLimit, 2)
iterLimit = 2
}
}
ctx, cancel := context.WithTimeout(context.Background(), 9*time.Minute)
defer cancel() defer cancel()
client, _, cleanup := InitServerConnection(ctx, t) client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup() defer cleanup()
req, resp := GenerateRequests()
// Get the server running (if applicable) warm the model up with a single initial request // Get the server running (if applicable) warm the model up with a single initial request
DoGenerate(ctx, t, client, req[0], resp[0], 60*time.Second, 5*time.Second) DoGenerate(ctx, t, client, req[0], resp[0], 60*time.Second, 10*time.Second)
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(req)) wg.Add(reqLimit)
for i := 0; i < len(req); i++ { for i := 0; i < reqLimit; i++ {
go func(i int) { go func(i int) {
defer wg.Done() defer wg.Done()
for j := 0; j < 5; j++ { for j := 0; j < iterLimit; j++ {
slog.Info("Starting", "req", i, "iter", j) slog.Info("Starting", "req", i, "iter", j)
// On slower GPUs it can take a while to process the 4 concurrent requests // On slower GPUs it can take a while to process the concurrent requests
// so we allow a much longer initial timeout // so we allow a much longer initial timeout
DoGenerate(ctx, t, client, req[i], resp[i], 90*time.Second, 5*time.Second) DoGenerate(ctx, t, client, req[i], resp[i], 120*time.Second, 20*time.Second)
} }
}(i) }(i)
} }
...@@ -221,5 +245,23 @@ func TestMultiModelStress(t *testing.T) { ...@@ -221,5 +245,23 @@ func TestMultiModelStress(t *testing.T) {
} }
}(i) }(i)
} }
go func() {
for {
time.Sleep(2 * time.Second)
select {
case <-ctx.Done():
return
default:
models, err := client.ListRunning(ctx)
if err != nil {
slog.Warn("failed to list running models", "error", err)
continue
}
for _, m := range models.Models {
slog.Info("loaded model snapshot", "model", m)
}
}
}
}()
wg.Wait() wg.Wait()
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment