Unverified Commit 72ff5b9d authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

log: warn if user overrides detected (#13088)

Many failed GPU discovery issues recently can be traced to incorrect override settings.
This extra logging should help quickly spot these and guide users to try unsetting them first.
parent ce29f695
...@@ -65,6 +65,10 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml. ...@@ -65,6 +65,10 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
} }
slog.Info("discovering available GPUs...") slog.Info("discovering available GPUs...")
// Warn if any user-overrides are set which could lead to incorrect GPU discovery
overrideWarnings()
requested := envconfig.LLMLibrary() requested := envconfig.LLMLibrary()
jetpack := cudaJetpack() jetpack := cudaJetpack()
...@@ -449,3 +453,24 @@ func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs map ...@@ -449,3 +453,24 @@ func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs map
return devices return devices
} }
func overrideWarnings() {
anyFound := false
m := envconfig.AsMap()
for _, k := range []string{
"CUDA_VISIBLE_DEVICES",
"HIP_VISIBLE_DEVICES",
"ROCR_VISIBLE_DEVICES",
"GGML_VK_VISIBLE_DEVICES",
"GPU_DEVICE_ORDINAL",
"HSA_OVERRIDE_GFX_VERSION",
} {
if e, found := m[k]; found && e.Value != "" {
anyFound = true
slog.Warn("user override visible devices", k, e.Value)
}
}
if anyFound {
slog.Warn("if GPUs are not correctly discovered, unset and try again")
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment