Fix vulkan PCI ID and ID handling (#12775)

* Fix vulkan PCI ID and ID handling Intel GPUs may not report PCI IDs which was leading to incorrect overlap detection. Switch to using the existing PCI IDs, however AMD GPUs claim not to report PCI IDs, but actually do, so try anyway, as this is required for ADLX to find the GPUs on Windows. Numeric IDs lead to scheduling problems, so this also switches Vulkan to use UUID based IDs. The GPU discovery patches have been squashed into a single patch to simplify future rebases. * review comments

Fix vulkan PCI ID and ID handling (#12775)
* Fix vulkan PCI ID and ID handling Intel GPUs may not report PCI IDs which was leading to incorrect overlap detection. Switch to using the existing PCI IDs, however AMD GPUs claim not to report PCI IDs, but actually do, so try anyway, as this is required for ADLX to find the GPUs on Windows. Numeric IDs lead to scheduling problems, so this also switches Vulkan to use UUID based IDs. The GPU discovery patches have been squashed into a single patch to simplify future rebases. * review comments
14977a93 · Daniel Hiltgen · GitHub · 29f63f37 · 14977a93 · 14977a93
Unverified Commit 14977a93 authored Oct 28, 2025 by Daniel Hiltgen Committed by GitHub Oct 28, 2025
15 changed files
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -117,7 +117,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.

 		// In the second pass, we more deeply initialize the GPUs to weed out devices that
 		// aren't supported by a given library.  We run this phase in parallel to speed up discovery.
-		slog.Debug("filtering out unsupported or overlapping GPU library combinations", "count", len(devices))
+		slog.Debug("evluating which if any devices to filter out", "initial_count", len(devices))
 		ctx2ndPass, cancel := context.WithTimeout(ctx, 30*time.Second)
 		defer cancel()
 		var wg sync.WaitGroup
@@ -129,7 +129,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 			if devices[i].Library == "Metal" {
 				continue
 			}
-			slog.Debug("verifying GPU is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "pci_id", devices[i].PCIID)
+			slog.Debug("verifying GPU is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
 			wg.Add(1)
 			go func(i int) {
 				defer wg.Done()
@@ -155,6 +155,12 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 					envVar:           id,  // Filter to just this one GPU
 				}
 				if len(bootstrapDevices(ctx2ndPass, devices[i].LibraryPath, extraEnvs)) == 0 {
+					slog.Debug("filtering device which didn't fully initialize",
+						"id", devices[i].ID,
+						"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
+						"pci_id", devices[i].PCIID,
+						"library", devices[i].Library,
+					)
 					needsDelete[i] = true
 				} else {
 					supportedMu.Lock()
@@ -170,7 +176,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 			}(i)
 		}
 		wg.Wait()
-		logutil.Trace("supported GPU library combinations", "supported", supported)
+		logutil.Trace("supported GPU library combinations before filtering", "supported", supported)

 		filterOutVulkanThatAreSupportedByOtherGPU(needsDelete)

@@ -372,12 +378,13 @@ func filterOutVulkanThatAreSupportedByOtherGPU(needsDelete []bool) {
 			}
 			if devices[j].PCIID == devices[i].PCIID && devices[j].Library != "Vulkan" && !needsDelete[j] {
 				needsDelete[i] = true
-				slog.Debug("dropping Vulkan duplicate by PCI ID",
-					"vulkan_id", devices[i].ID,
-					"vulkan_libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
+				slog.Debug("filtering device with duplicate PCI ID",
+					"id", devices[i].ID,
+					"library", devices[i].Library,
+					"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
 					"pci_id", devices[i].PCIID,
-					"kept_library", devices[j].Library,
 					"kept_id", devices[j].ID,
+					"kept_library", devices[j].Library,
 				)
 				break
 			}
@@ -422,6 +429,12 @@ func filterOverlapByLibrary(supported map[string]map[string]map[string]int, need
 			}
 			for dev, i := range byLibDirs[libDir] {
 				if _, found := byLibDirs[newest][dev]; found {
+					slog.Debug("filtering device with overlapping libraries",
+						"id", dev,
+						"library", libDir,
+						"delete_index", i,
+						"kept_library", newest,
+					)
 					needsDelete[i] = true
 				}
 			}

--- a/discover/types.go
+++ b/discover/types.go
@@ -3,6 +3,7 @@ package discover
 import (
 	"log/slog"
 	"path/filepath"
+	"sort"
 	"strings"

 	"github.com/ollama/ollama/format"
@@ -26,6 +27,7 @@ type CPU struct {
 }

 func LogDetails(devices []ml.DeviceInfo) {
+	sort.Sort(sort.Reverse(ml.ByFreeMemory(devices))) // Report devices in order of scheduling preference
 	for _, dev := range devices {
 		var libs []string
 		for _, dir := range dev.LibraryPath {
@@ -39,6 +41,7 @@ func LogDetails(devices []ml.DeviceInfo) {
 		}
 		slog.Info("inference compute",
 			"id", dev.ID,
+			"filtered_id", dev.FilteredID,
 			"library", dev.Library,
 			"compute", dev.Compute(),
 			"name", dev.Name,

--- a/llama/patches/0026-GPU-discovery-enhancements.patch
+++ b/llama/patches/0026-GPU-discovery-enhancements.patch
--- a/llama/patches/0029-NVML-fallback-for-unified-memory-GPUs.patch
+++ b/llama/patches/0029-NVML-fallback-for-unified-memory-GPUs.patch
@@ -8,7 +8,7 @@ Subject: [PATCH] NVML fallback for unified memory GPUs
 1 file changed, 68 insertions(+), 3 deletions(-)

 diff --git a/ggml/src/mem_nvml.cpp b/ggml/src/mem_nvml.cpp
-index c9073cef..f473a2a2 100644
+index c9073cef0..f473a2a2c 100644
 --- a/ggml/src/mem_nvml.cpp
 +++ b/ggml/src/mem_nvml.cpp
 @@ -13,6 +13,7 @@

--- a/llama/patches/0027-vulkan-get-GPU-ID-ollama-v0.11.5.patch
+++ b/llama/patches/0027-vulkan-get-GPU-ID-ollama-v0.11.5.patch
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Xiaodong Ye <xiaodong.ye@mthreads.com>
-Date: Mon, 18 Aug 2025 12:48:07 +0800
-Subject: [PATCH] vulkan: get GPU ID (ollama v0.11.5)
-
-Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
---
- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 37 ++++++++++++++++++++++++++++
- 1 file changed, 37 insertions(+)
-
-diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index 061cd078..adea7783 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-@@ -11588,6 +11588,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_
-     snprintf(description, description_size, "%s", props.deviceName.data());
- }
-
-+static std::string ggml_vk_get_device_id(int device) {
-+    ggml_vk_instance_init();
-+
-+    std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
-+
-+    vk::PhysicalDeviceProperties2 props;
-+    vk::PhysicalDeviceIDProperties deviceIDProps;
-+    props.pNext = &deviceIDProps;
-+    devices[device].getProperties2(&props);
-+
-+    const auto& uuid = deviceIDProps.deviceUUID;
-+    char id[64];
-+    snprintf(id, sizeof(id),
-+        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
-+        uuid[0], uuid[1], uuid[2], uuid[3],
-+        uuid[4], uuid[5],
-+        uuid[6], uuid[7],
-+        uuid[8], uuid[9],
-+        uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]
-+    );
-+    return std::string(id);
-+}
-+
- // backend interface
-
- #define UNUSED GGML_UNUSED
-@@ -12394,6 +12417,12 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
-     ggml_vk_get_device_description(dev_idx, description, description_size);
- }
-
-+std::string ggml_backend_vk_get_device_id(int device) {
-+    GGML_ASSERT(device < (int) vk_instance.device_indices.size());
-+    int dev_idx = vk_instance.device_indices[device];
-+    return ggml_vk_get_device_id(dev_idx);
-+}
-+
- void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
-     GGML_ASSERT(device < (int) vk_instance.device_indices.size());
-     GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
-@@ -12481,6 +12510,7 @@ struct ggml_backend_vk_device_context {
-     std::string description;
-     bool is_integrated_gpu;
-     std::string pci_bus_id;
-+    std::string id;
- };
-
- static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
-@@ -12493,6 +12523,11 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
-     return ctx->description.c_str();
- }
-
-+static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
-+    ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
-+    return ctx->id.c_str();
-+}
-+
- static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
-     ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
-     ggml_backend_vk_get_device_memory(ctx->device, free, total);
-@@ -12519,6 +12554,7 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
-
-     props->name        = ggml_backend_vk_device_get_name(dev);
-     props->description = ggml_backend_vk_device_get_description(dev);
-+    props->id          = ggml_backend_vk_device_get_id(dev);
-     props->type        = ggml_backend_vk_device_get_type(dev);
-     props->device_id   = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
-     ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
-@@ -12965,6 +13001,7 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
-                 ctx->description = desc;
-                 ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
-                 ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i);
-+                ctx->id = ggml_backend_vk_get_device_id(i);
-                 devices.push_back(new ggml_backend_device {
-                     /* .iface   = */ ggml_backend_vk_device_i,
-                     /* .reg     = */ reg,
-- 
-2.51.0
\ No newline at end of file
--- a/llama/patches/0030-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch
+++ b/llama/patches/0030-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch
@@ -28,7 +28,7 @@ Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
 1 file changed, 9 insertions(+)

 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index 6a278b5e9..87941f872 100644
+index b075a18be..d62f412d6 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
 @@ -340,6 +340,15 @@ static ggml_cuda_device_info ggml_cuda_init() {

--- a/llama/patches/0028-vulkan-pci-and-memory.patch
+++ b/llama/patches/0028-vulkan-pci-and-memory.patch
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Daniel Hiltgen <daniel@ollama.com>
-Date:   Fri Sep 5 08:25:03 2025 -0700
-Subject: [PATCH] Vulkan PCI and Memory
-
---
- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 176 ++++++++++++++++++++++-----
- 1 file changed, 145 insertions(+), 31 deletions(-)
-
-diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index adea7783..fb7204ce 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-@@ -12423,31 +12423,99 @@ std::string ggml_backend_vk_get_device_id(int device) {
-     return ggml_vk_get_device_id(dev_idx);
- }
- 
-void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
-    GGML_ASSERT(device < (int) vk_instance.device_indices.size());
-    GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
-+//////////////////////////
-+
-+struct ggml_backend_vk_device_context {
-+    size_t device;
-+    std::string name;
-+    std::string description;
-+    bool is_integrated_gpu;
-+    // Combined string id in the form "dddd:bb:dd.f" (domain:bus:device.function)
-+    std::string pci_id;
-+    std::string id;
-+    std::string uuid;
-+    int major;
-+    int minor;
-+    int driver_major;
-+    int driver_minor;
-+    int pci_bus_id;
-+    int pci_device_id;
-+    int pci_domain_id;
-+};
-+
-+void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
-+    GGML_ASSERT(ctx->device < (int) vk_instance.device_indices.size());
-+    GGML_ASSERT(ctx->device < (int) vk_instance.device_supports_membudget.size());
-+
-+    vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[ctx->device]];
- 
-    vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
-    vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
-    vk::PhysicalDeviceMemoryProperties2 memprops = {};
-    bool membudget_supported = vk_instance.device_supports_membudget[device];
-+    vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
-+    vk::PhysicalDeviceProperties2 props2;
-+    vkdev.getProperties2(&props2);
- 
-    if (membudget_supported) {
-        memprops.pNext = &budgetprops;
-+    if (!ctx->is_integrated_gpu)
-+    {
-+        // Use vendor specific management libraries for best VRAM reporting if available
-+        switch (props2.properties.vendorID) {
-+        case VK_VENDOR_ID_AMD:
-+            if (ggml_hip_mgmt_init() == 0) {
-+                int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total);
-+                if (status == 0) {
-+                    GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
-+                    ggml_hip_mgmt_release();
-+                    return;
-+                }
-+                ggml_hip_mgmt_release();
-+            }
-+            break;
-+        case VK_VENDOR_ID_NVIDIA:
-+            if (ggml_nvml_init() == 0) {
-+                int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
-+                if (status == 0) {
-+                    GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
-+                    ggml_nvml_release();
-+                    return;
-+                }
-+                ggml_nvml_release();
-+            }
-+            break;
-+        }
-     }
-    vkdev.getMemoryProperties2(&memprops);
-+    // else fallback to memory budget if supported
- 
-    for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
-        const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
-+    *total = 0;
-+    *free = 0;
-+    vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
-+    vk::PhysicalDeviceMemoryProperties2 memprops2;
-+    memprops2.pNext = &mem_budget_props;
-+    vkdev.getMemoryProperties2(&memprops2);
-+    for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
-+        if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
-+            *total += memprops2.memoryProperties.memoryHeaps[i].size;
-+        } else if (ctx->is_integrated_gpu) {
-+            // Include shared memory on iGPUs
-+            *total += memprops2.memoryProperties.memoryHeaps[i].size;
-+        }
-+    }
-+    for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
-+        if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
-+            *free += mem_budget_props.heapBudget[i];
-+        } else if (ctx->is_integrated_gpu) {
-+            *free += mem_budget_props.heapBudget[i];
-+        }
-+    }
-+    if (*total > 0 && *free > 0) {
-+        return;
-+    } else if (*total > 0) {
-+        *free = *total;
-+        return;
-+    }
- 
-+    // else just report the physical memory
-+    for (const vk::MemoryHeap& heap : memprops2.memoryProperties.memoryHeaps) {
-         if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
-             *total = heap.size;
-
-            if (membudget_supported && i < budgetprops.heapUsage.size()) {
-                *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
-            } else {
-                *free = heap.size;
-            }
-+            *free = heap.size;
-             break;
-         }
-     }
-@@ -12502,16 +12570,17 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
-     return std::string(pci_bus_id);
- }
- 
-//////////////////////////
-
-struct ggml_backend_vk_device_context {
-    size_t device;
-    std::string name;
-    std::string description;
-    bool is_integrated_gpu;
-    std::string pci_bus_id;
-    std::string id;
-};
-+static bool ggml_backend_vk_parse_pci_bus_id(const std::string & id, int *domain, int *bus, int *device) {
-+    if (id.empty()) return false;
-+    unsigned int d = 0, b = 0, dev = 0, func = 0;
-+    // Expected format: dddd:bb:dd.f (all hex)
-+    int n = sscanf(id.c_str(), "%4x:%2x:%2x.%1x", &d, &b, &dev, &func);
-+    if (n < 4) return false;
-+    if (domain) *domain = (int) d;
-+    if (bus) *bus = (int) b;
-+    if (device) *device = (int) dev;
-+    return true;
-+}
- 
- static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
-     ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
-@@ -12530,7 +12599,7 @@ static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
- 
- static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
-     ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
-    ggml_backend_vk_get_device_memory(ctx->device, free, total);
-+    ggml_backend_vk_get_device_memory(ctx, free, total);
- }
- 
- static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
-@@ -12556,7 +12625,7 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
-     props->description = ggml_backend_vk_device_get_description(dev);
-     props->id          = ggml_backend_vk_device_get_id(dev);
-     props->type        = ggml_backend_vk_device_get_type(dev);
-    props->device_id   = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
-+    props->device_id   = ctx->pci_id.empty() ? nullptr : ctx->pci_id.c_str();
-     ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
-     props->caps = {
-         /* .async                 = */ false,
-@@ -12564,6 +12633,17 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
-         /* .buffer_from_host_ptr  = */ false,
-         /* .events                = */ false,
-     };
-+
-+    props->compute_major = ctx->major;
-+    props->compute_minor = ctx->minor;
-+    props->driver_major = ctx->driver_major;
-+    props->driver_minor = ctx->driver_minor;
-+    props->integrated = ctx->is_integrated_gpu;
-+    props->pci_bus_id = ctx->pci_bus_id;
-+    props->pci_device_id = ctx->pci_device_id;
-+    props->pci_domain_id = ctx->pci_domain_id;
-+    props->library = GGML_VK_NAME;
-+    props->numeric_id = ctx->id.empty() ? nullptr : ctx->id.c_str();
- }
- 
- static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
-@@ -12992,6 +13071,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
-         static std::mutex mutex;
-         std::lock_guard<std::mutex> lock(mutex);
-         if (!initialized) {
-+            std::vector<vk::PhysicalDevice> vk_devices = vk_instance.instance.enumeratePhysicalDevices();
-+
-             for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
-                 ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
-                 char desc[256];
-@@ -13000,13 +13081,46 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
-                 ctx->name = GGML_VK_NAME + std::to_string(i);
-                 ctx->description = desc;
-                 ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
-                ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i);
-+                ctx->pci_id = ggml_backend_vk_get_device_pci_id(i);
-                 ctx->id = ggml_backend_vk_get_device_id(i);
-                 devices.push_back(new ggml_backend_device {
-                     /* .iface   = */ ggml_backend_vk_device_i,
-                     /* .reg     = */ reg,
-                     /* .context = */ ctx,
-                 });
-+
-+                // Gather additional information about the device
-+                int dev_idx = vk_instance.device_indices[i];
-+                vk::PhysicalDeviceProperties props1;
-+                vk_devices[dev_idx].getProperties(&props1);
-+                vk::PhysicalDeviceProperties2 props2;
-+                vk::PhysicalDeviceIDProperties device_id_props;
-+                vk::PhysicalDevicePCIBusInfoPropertiesEXT  pci_bus_props;
-+                vk::PhysicalDeviceDriverProperties driver_props;
-+                props2.pNext = &device_id_props;
-+                device_id_props.pNext = &pci_bus_props;
-+                pci_bus_props.pNext = &driver_props;
-+                vk_devices[dev_idx].getProperties2(&props2);
-+                std::ostringstream oss;
-+                oss << std::hex << std::setfill('0');
-+                oss << "GPU-";
-+                int byteIdx = 0;
-+                for (int i = 0; i < 16; ++i, ++byteIdx) {
-+                    oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
-+                    if (byteIdx == 3 || byteIdx == 5 || byteIdx == 7 || byteIdx == 9) {
-+                        oss << '-';
-+                    }
-+                }
-+                ctx->uuid = oss.str();
-+                ctx->pci_bus_id = pci_bus_props.pciBus;
-+                ctx->pci_device_id = pci_bus_props.pciDevice;
-+                ctx->pci_domain_id = pci_bus_props.pciDomain;
-+                ctx->id = std::to_string(i);
-+                ctx->major = 0;
-+                ctx->minor = 0;
-+                // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
-+                ctx->driver_major = 0;
-+                ctx->driver_minor = 0;
-             }
-             initialized = true;
-         }
-- 
-2.51.0
\ No newline at end of file
--- a/llama/patches/0031-report-LoadLibrary-failures.patch
+++ b/llama/patches/0031-report-LoadLibrary-failures.patch
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -725,7 +725,9 @@ func (b *Backend) BackendDevices() []ml.DeviceInfo {
 		if props.library != nil {
 			info.Library = C.GoString(props.library)
 		}
-		info.PCIID = fmt.Sprintf("%02x:%02x.%x", props.pci_bus_id, props.pci_device_id, props.pci_domain_id)
+		if props.device_id != nil {
+			info.PCIID = C.GoString(props.device_id)
+		}
 		info.LibraryPath = ggml.LibPaths()
 		if props.numeric_id != nil {
 			info.FilteredID = C.GoString(props.numeric_id)

--- a/ml/backend/ggml/ggml/include/ggml-backend.h
+++ b/ml/backend/ggml/ggml/include/ggml-backend.h
@@ -174,9 +174,6 @@ extern "C" {
        int compute_major;
        int compute_minor;
        int integrated;
-        int pci_bus_id;
-        int pci_device_id;
-        int pci_domain_id;
        const char *library;
        // number with which the devices are accessed (Vulkan)
        const char *numeric_id;

--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3513,9 +3513,6 @@ struct ggml_backend_cuda_device_context {
    int driver_major;
    int driver_minor;
    int integrated;
-    int pciBusID;
-    int pciDeviceID;
-    int pciDomainID;
 };

 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -3539,9 +3536,9 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *

 #if defined(GGML_USE_HIP)
    if (ggml_hip_mgmt_init() == 0) {
-        int status = ggml_hip_get_device_memory(ctx->pciBusID, ctx->pciDeviceID, free, total);
+        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
        if (status == 0) {
-            GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+            GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
            ggml_hip_mgmt_release();
            return;
        }
@@ -3551,7 +3548,7 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
    if (ggml_nvml_init() == 0) {
        int status = ggml_nvml_get_device_memory(ctx->id.c_str(), free, total);
        if (status == 0) {
-            GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+            GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->id.c_str(), *free, *total);
            ggml_nvml_release();
            return;
        }
@@ -3591,9 +3588,6 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
    props->driver_major = ctx->driver_major;
    props->driver_minor = ctx->driver_minor;
    props->integrated = ctx->integrated;
-    props->pci_bus_id = ctx->pciBusID;
-    props->pci_device_id = ctx->pciDeviceID;
-    props->pci_domain_id = ctx->pciDomainID;
    props->library = GGML_CUDA_NAME;

    bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
@@ -4182,9 +4176,6 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                dev_ctx->driver_major = driverVersion / 1000;
                dev_ctx->driver_minor = (driverVersion - (dev_ctx->driver_major * 1000)) / 10;
                dev_ctx->integrated = prop.integrated;
-                dev_ctx->pciBusID = prop.pciBusID;
-                dev_ctx->pciDeviceID = prop.pciDeviceID;
-                dev_ctx->pciDomainID = prop.pciDomainID;
                ggml_backend_dev_t dev = new ggml_backend_device {
                    /* .iface   = */ ggml_backend_cuda_device_interface,
                    /* .reg     = */ &reg,

--- a/ml/backend/ggml/ggml/src/ggml-impl.h
+++ b/ml/backend/ggml/ggml/src/ggml-impl.h
@@ -643,7 +643,7 @@ GGML_API int ggml_nvml_init();
 GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
 GGML_API void ggml_nvml_release();
 GGML_API int ggml_hip_mgmt_init();
-GGML_API int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total);
+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
 GGML_API void ggml_hip_mgmt_release();

 #ifdef __cplusplus

--- a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -231,6 +231,7 @@ class vk_memory_logger;
 #endif
 class vk_perf_logger;
 static void ggml_vk_destroy_buffer(vk_buffer& buf);
+static std::string ggml_vk_get_device_id(int device);
  
 static constexpr uint32_t mul_mat_vec_max_cols = 8;
 static constexpr uint32_t p021_max_gqa_ratio = 8;
@@ -11598,7 +11599,7 @@ static std::string ggml_vk_get_device_id(int device) {
    const auto& uuid = deviceIDProps.deviceUUID;
    char id[64];
    snprintf(id, sizeof(id),
-        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+        "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
        uuid[0], uuid[1], uuid[2], uuid[3],
        uuid[4], uuid[5],
        uuid[6], uuid[7],
@@ -12431,13 +12432,11 @@ struct ggml_backend_vk_device_context {
    std::string pci_id;
    std::string id;
    std::string uuid;
+    std::string numeric_id;
    int major;
    int minor;
    int driver_major;
    int driver_minor;
-    int pci_bus_id;
-    int pci_device_id;
-    int pci_domain_id;
 };
  
 void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
@@ -12456,9 +12455,9 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
        switch (props2.properties.vendorID) {
        case VK_VENDOR_ID_AMD:
            if (ggml_hip_mgmt_init() == 0) {
-                int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total);
+                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
                if (status == 0) {
-                    GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+                    GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
                    ggml_hip_mgmt_release();
                    return;
                }
@@ -12469,7 +12468,7 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
            if (ggml_nvml_init() == 0) {
                int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
                if (status == 0) {
-                    GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+                    GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->uuid.c_str(), *free, *total);
                    ggml_nvml_release();
                    return;
                }
@@ -12545,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
        }
    }
  
+    vk::PhysicalDeviceProperties2 props2;
    if (!ext_support) {
-        return "";
+        device.getProperties2(&props2);
+        if (props2.properties.vendorID != VK_VENDOR_ID_AMD) {
+            return "";
+        }
+        // AMD doesn't claim to support PCI ID, but actually does, so try anyway and check for non-zero
    }
  
    vk::PhysicalDeviceProperties2 props = {};
@@ -12563,6 +12567,9 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
  
    char pci_bus_id[16] = {};
    snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
+    if (pci_domain == 0 && pci_bus == 0 && pci_device == 0 && pci_function == 0) {
+        return "";
+    }
  
    return std::string(pci_bus_id);
 }
@@ -12636,11 +12643,8 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
    props->driver_major = ctx->driver_major;
    props->driver_minor = ctx->driver_minor;
    props->integrated = ctx->is_integrated_gpu;
-    props->pci_bus_id = ctx->pci_bus_id;
-    props->pci_device_id = ctx->pci_device_id;
-    props->pci_domain_id = ctx->pci_domain_id;
    props->library = GGML_VK_NAME;
-    props->numeric_id = ctx->id.empty() ? nullptr : ctx->id.c_str();
+    props->numeric_id = ctx->numeric_id.c_str();
 }
  
 static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
@@ -13101,7 +13105,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
                vk_devices[dev_idx].getProperties2(&props2);
                std::ostringstream oss;
                oss << std::hex << std::setfill('0');
-                oss << "GPU-";
                int byteIdx = 0;
                for (int i = 0; i < 16; ++i, ++byteIdx) {
                    oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
@@ -13110,15 +13113,12 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
                    }
                }
                ctx->uuid = oss.str();
-                ctx->pci_bus_id = pci_bus_props.pciBus;
-                ctx->pci_device_id = pci_bus_props.pciDevice;
-                ctx->pci_domain_id = pci_bus_props.pciDomain;
-                ctx->id = std::to_string(i);
                ctx->major = 0;
                ctx->minor = 0;
                // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
                ctx->driver_major = 0;
                ctx->driver_minor = 0;
+                ctx->numeric_id = std::to_string(i);
            }
            initialized = true;
        }

--- a/ml/backend/ggml/ggml/src/mem_hip.cpp
+++ b/ml/backend/ggml/ggml/src/mem_hip.cpp
@@ -331,7 +331,7 @@ void ggml_hip_mgmt_release() {
    if (gpus != NULL) gpus->pVtbl->Release(gpus); \
    if (gpu != NULL) gpu->pVtbl->Release(gpu)

-int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
    std::lock_guard<std::mutex> lock(ggml_adlx_lock);
    if (adlx.handle == NULL) {
        GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
@@ -343,9 +343,13 @@ int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free,
    IADLXGPU* gpu = NULL;
    IADLXGPUMetrics *gpuMetrics = NULL;
    ADLX_RESULT status;
-    // The "UniqueID" exposed in ADLX is the PCI Bus and Device IDs 
-    adlx_int target = (pci_bus_id << 8) | (pci_device_id & 0xff);

+    uint32_t pci_domain, pci_bus, pci_device, pci_function;
+    if (sscanf(id, "%04x:%02x:%02x.%x", &pci_domain, &pci_bus, &pci_device, &pci_function) != 4) {
+        // TODO - parse other formats?
+        GGML_LOG_DEBUG("%s device ID was not a PCI ID %s\n", __func__, id);
+        return ADLX_NOT_FOUND;
+    }
    status = adlx.sys->pVtbl->GetPerformanceMonitoringServices(adlx.sys, &perfMonitoringServices);
    if (ADLX_FAILED(status)) {
        GGML_LOG_INFO("%s GetPerformanceMonitoringServices failed %d\n", __func__, status);
@@ -368,16 +372,15 @@ int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free,
            GGML_LOG_INFO("%s %d] At_GPUList failed %d\n", __func__, crt, status);
            continue;
        }
-        adlx_int id;
-        status = gpu->pVtbl->UniqueId(gpu, &id);
+        adlx_int uniqueID;
+        status = gpu->pVtbl->UniqueId(gpu, &uniqueID);
        if (ADLX_FAILED(status)) {
            GGML_LOG_INFO("%s %d] UniqueId lookup failed %d\n", __func__, crt, status);
            gpu->pVtbl->Release(gpu);
            gpu = NULL;
            continue;
        }
-        if (id != target) {
-            GGML_LOG_DEBUG("%s %d] GPU UniqueId: %x does not match target %02x %02x\n", __func__, crt, id, pci_bus_id, pci_device_id);
+        if ((((uniqueID >> 8) & 0xff) != pci_bus) || ((uniqueID & 0xff) != pci_device)) {
            gpu->pVtbl->Release(gpu);
            gpu = NULL;
            continue;
@@ -440,7 +443,7 @@ int ggml_hip_mgmt_init() {
    return -1;
 }
 void ggml_hip_mgmt_release() {}
-int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
    return -1;
 }


--- a/ml/device.go
+++ b/ml/device.go
@@ -391,6 +391,10 @@ func (a DeviceInfo) Compare(b DeviceInfo) DeviceComparison {
 	if a.PCIID != b.PCIID {
 		return UniqueDevice
 	}
+	// If PCIID is empty, we have to use ID + library for uniqueness
+	if a.PCIID == "" && a.DeviceID != b.DeviceID {
+		return UniqueDevice
+	}
 	if a.Library == b.Library {
 		return SameBackendDevice
 	}
@@ -454,13 +458,13 @@ func (d DeviceInfo) updateVisibleDevicesEnv(env map[string]string) {
 	var envVar string
 	switch d.Library {
 	case "ROCm":
+		// ROCm must be filtered as it can crash the runner on unsupported devices
 		envVar = "ROCR_VISIBLE_DEVICES"
 		if runtime.GOOS != "linux" {
 			envVar = "HIP_VISIBLE_DEVICES"
 		}
-	case "Vulkan":
-		envVar = "GGML_VK_VISIBLE_DEVICES"
 	default:
+		// CUDA and Vulkan are not filtered via env var, but via scheduling decisions
 		return
 	}
 	v, existing := env[envVar]