0015-ggml-Export-GPU-UUIDs.patch 7.4 KB
Newer Older
1
2
3
4
5
6
7
8
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Thu, 24 Apr 2025 14:48:51 -0700
Subject: [PATCH] ggml: Export GPU UUIDs

This enables matching up devices and information reported by the backend
with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
---
Daniel Hiltgen's avatar
Daniel Hiltgen committed
9
10
11
 ggml/include/ggml-backend.h        |  1 +
 ggml/src/ggml-cuda/ggml-cuda.cu    | 67 +++++++++++++++++++++++++++---
 ggml/src/ggml-metal/ggml-metal.cpp |  1 +
12
 3 files changed, 63 insertions(+), 6 deletions(-)
13
14

diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
Daniel Hiltgen's avatar
Daniel Hiltgen committed
15
index c54ff98bf..229bf387b 100644
16
17
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
Daniel Hiltgen's avatar
Daniel Hiltgen committed
18
@@ -158,6 +158,7 @@ extern "C" {
19
         const char * description;
Daniel Hiltgen's avatar
Daniel Hiltgen committed
20
         // device free memory in bytes
21
         size_t memory_free;
Daniel Hiltgen's avatar
Daniel Hiltgen committed
22
23
+        const char * id;
         // device total memory in bytes
24
         size_t memory_total;
Daniel Hiltgen's avatar
Daniel Hiltgen committed
25
         // device type
26
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
index aefc6935e..cc201afff 100644
28
29
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
Daniel Hiltgen's avatar
Daniel Hiltgen committed
30
@@ -183,6 +183,51 @@ static int ggml_cuda_parse_id(char devName[]) {
31
 }
32
 #endif // defined(GGML_USE_HIP)
33
34
35
36
 
+static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
+    char id[64];
+
37
+#if !defined(GGML_USE_HIP)
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
+    snprintf(id, sizeof(id),
+        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+        (unsigned char)prop.uuid.bytes[0],
+        (unsigned char)prop.uuid.bytes[1],
+        (unsigned char)prop.uuid.bytes[2],
+        (unsigned char)prop.uuid.bytes[3],
+        (unsigned char)prop.uuid.bytes[4],
+        (unsigned char)prop.uuid.bytes[5],
+        (unsigned char)prop.uuid.bytes[6],
+        (unsigned char)prop.uuid.bytes[7],
+        (unsigned char)prop.uuid.bytes[8],
+        (unsigned char)prop.uuid.bytes[9],
+        (unsigned char)prop.uuid.bytes[10],
+        (unsigned char)prop.uuid.bytes[11],
+        (unsigned char)prop.uuid.bytes[12],
+        (unsigned char)prop.uuid.bytes[13],
+        (unsigned char)prop.uuid.bytes[14],
+        (unsigned char)prop.uuid.bytes[15]
+        );
57
58
+#else
+#ifdef _WIN32
59
+        snprintf(id, sizeof(id), "%d", device_num);
60
+#else
61
62
63
64
65
66
67
68
69
70
71
72
+    try {
+        std::string uuid = std::string(prop.uuid.bytes, 16);
+
+        size_t pos = 0;
+        unsigned long long v = stoull(uuid, &pos, 16);
+        if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
+            throw std::invalid_argument("invalid uuid");
+
+        snprintf(id, sizeof(id), "GPU-%016llx", v);
+    } catch (const std::exception &e) {
+        snprintf(id, sizeof(id), "%d", device_num);
+    }
73
74
+#endif
+#endif
75
76
77
78
79
+
+    return id;
+}
+
 static ggml_cuda_device_info ggml_cuda_init() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
80
81
82
     ggml_cuda_device_info info = {};
 
@@ -249,22 +294,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
                 info.devices[id].cc += prop.minor * 0x10;
             }
         }
-        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
+        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
                       id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
-                      device_vmm ? "yes" : "no", prop.warpSize);
+                      device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
 #elif defined(GGML_USE_MUSA)
         // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
         info.devices[id].warp_size = 32;
         info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
         info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
         info.devices[id].cc += prop.minor * 0x10;
-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
+        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
+                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
+                        ggml_cuda_parse_uuid(prop, id).c_str());
 #else
         info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
         info.devices[id].cc = 100*prop.major + 10*prop.minor;
-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
+        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
+                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
+                        ggml_cuda_parse_uuid(prop, id).c_str());
Daniel Hiltgen's avatar
Daniel Hiltgen committed
110
111
112
         std::string device_name(prop.name);
         if (device_name == "NVIDIA GeForce MX450") {
             turing_devices_without_mma.push_back({ id, device_name });
Daniel Hiltgen's avatar
Daniel Hiltgen committed
113
@@ -3268,6 +3315,7 @@ struct ggml_backend_cuda_device_context {
114
115
     std::string name;
     std::string description;
Daniel Hiltgen's avatar
Daniel Hiltgen committed
116
     std::string pci_bus_id;
117
+    std::string id;
118
119
120
 };
 
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
121
@@ -3280,6 +3328,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
122
123
124
     return ctx->description.c_str();
 }
 
125
+static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
126
+    ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
127
+    return ctx->id.c_str();
128
129
130
131
132
+}
+
 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
     ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
     ggml_cuda_set_device(ctx->device);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
133
@@ -3296,6 +3349,7 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
Daniel Hiltgen's avatar
Daniel Hiltgen committed
134
 
135
136
     props->name        = ggml_backend_cuda_device_get_name(dev);
     props->description = ggml_backend_cuda_device_get_description(dev);
137
+    props->id          = ggml_backend_cuda_device_get_id(dev);
138
     props->type        = ggml_backend_cuda_device_get_type(dev);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
139
     props->device_id   = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
140
     ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
141
@@ -3869,6 +3923,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
142
                 cudaDeviceProp prop;
143
144
                 CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                 dev_ctx->description = prop.name;
145
+                dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
146
 
Daniel Hiltgen's avatar
Daniel Hiltgen committed
147
148
149
                 char pci_bus_id[16] = {};
                 snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
Daniel Hiltgen's avatar
Daniel Hiltgen committed
150
index bf0962274..f2ff9f322 100644
Daniel Hiltgen's avatar
Daniel Hiltgen committed
151
152
153
154
--- a/ggml/src/ggml-metal/ggml-metal.cpp
+++ b/ggml/src/ggml-metal/ggml-metal.cpp
@@ -538,6 +538,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
 static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
155
156
     props->name        = ggml_backend_metal_device_get_name(dev);
     props->description = ggml_backend_metal_device_get_description(dev);
157
+    props->id          = "0";
158
     props->type        = ggml_backend_metal_device_get_type(dev);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
159
 
160
     ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);