Commit da3bf233 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Workaround gfx900 SDMA bugs

Implement support for GPU env var workarounds, and leverage
this for the Vega RX 56 which needs
HSA_ENABLE_SDMA=0 set to work properly
parent 45cacbaf
...@@ -332,6 +332,11 @@ func AMDGetGPUInfo() []RocmGPUInfo { ...@@ -332,6 +332,11 @@ func AMDGetGPUInfo() []RocmGPUInfo {
slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride) slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
} }
// Check for env var workarounds
if name == "1002:687f" { // Vega RX 56
gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, [2]string{"HSA_ENABLE_SDMA", "0"})
}
// The GPU has passed all the verification steps and is supported // The GPU has passed all the verification steps and is supported
resp = append(resp, gpuInfo) resp = append(resp, gpuInfo)
} }
......
...@@ -26,6 +26,9 @@ type GpuInfo struct { ...@@ -26,6 +26,9 @@ type GpuInfo struct {
// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
DependencyPath string `json:"lib_path,omitempty"` DependencyPath string `json:"lib_path,omitempty"`
// Extra environment variables specific to the GPU as list of [key,value]
EnvWorkarounds [][2]string `json:"envs,omitempty"`
// GPU information // GPU information
ID string `json:"gpu_id"` // string to use for selection of this specific GPU ID string `json:"gpu_id"` // string to use for selection of this specific GPU
Name string `json:"name"` // user friendly name if available Name string `json:"name"` // user friendly name if available
......
...@@ -320,6 +320,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr ...@@ -320,6 +320,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
s.cmd.Stdout = os.Stdout s.cmd.Stdout = os.Stdout
s.cmd.Stderr = s.status s.cmd.Stderr = s.status
envWorkarounds := [][2]string{}
for _, gpu := range gpus {
envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
}
visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv() visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv()
pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator)) pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
...@@ -334,6 +338,12 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr ...@@ -334,6 +338,12 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) { } else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
devicesNeeded = false devicesNeeded = false
} else if len(envWorkarounds) != 0 {
for _, kv := range envWorkarounds {
if strings.EqualFold(cmp[0], kv[0]) {
s.cmd.Env[i] = kv[0] + "=" + kv[1]
}
}
} }
} }
if pathNeeded { if pathNeeded {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment