Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
7ad03699
Unverified
Commit
7ad03699
authored
Dec 23, 2025
by
Daniel Hiltgen
Committed by
GitHub
Dec 23, 2025
Browse files
amd: use GTT on iGPUs on linux (#13196)
On Linux, look at the GTT memory information for iGPUs.
parent
172b5924
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
83 additions
and
25 deletions
+83
-25
llama/patches/0024-GPU-discovery-enhancements.patch
llama/patches/0024-GPU-discovery-enhancements.patch
+44
-15
llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
+3
-3
llama/patches/0029-ggml-cuda-skip-large-batches.patch
llama/patches/0029-ggml-cuda-skip-large-batches.patch
+1
-1
llama/patches/0030-fix-bakllava-regression.patch
llama/patches/0030-fix-bakllava-regression.patch
+0
-0
llama/patches/0031-win-exit-instead-of-abort.patch
llama/patches/0031-win-exit-instead-of-abort.patch
+0
-0
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+1
-1
ml/backend/ggml/ggml/src/ggml-impl.h
ml/backend/ggml/ggml/src/ggml-impl.h
+1
-1
ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+1
-1
ml/backend/ggml/ggml/src/mem_hip.cpp
ml/backend/ggml/ggml/src/mem_hip.cpp
+32
-3
No files found.
llama/patches/0024-GPU-discovery-enhancements.patch
View file @
7ad03699
...
...
@@ -20,10 +20,10 @@ fix vulkan PCI ID and ID handling
ggml/src/ggml-cuda/vendors/hip.h | 3 +
ggml/src/ggml-impl.h | 8 +
ggml/src/ggml-metal/ggml-metal.cpp | 2 +
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 169 +++++++
+
-
ggml/src/mem_hip.cpp | 5
29
+++++++++++++++++++++++++++
ggml/src/mem_nvml.cpp | 209 ++++++++++
+
9 files changed,
976
insertions(+), 17 deletions(-)
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 169 +++++++-
ggml/src/mem_hip.cpp | 5
58
+++++++++++++++++++++++++++
ggml/src/mem_nvml.cpp | 209 ++++++++++
9 files changed,
1005
insertions(+), 17 deletions(-)
create mode 100644 ggml/src/mem_hip.cpp
create mode 100644 ggml/src/mem_nvml.cpp
...
...
@@ -58,7 +58,7 @@ index d55aed348..99ae293cc 100644
set_target_properties(ggml-base PROPERTIES
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 6852d2e20..
48cdb1dcf
100644
index 6852d2e20..
334a30135
100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -267,6 +267,16 @@
static ggml_cuda_device_info ggml_cuda_init() {
...
...
@@ -109,7 +109,7 @@ index 6852d2e20..48cdb1dcf 100644
+
+#if defined(GGML_USE_HIP)
+ if (ggml_hip_mgmt_init() == 0) {
+ int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
+ int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total
, ctx->integrated != 0
);
+ if (status == 0) {
+ GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
+ ggml_hip_mgmt_release();
...
...
@@ -204,7 +204,7 @@ index 4e162258d..d89e35a8e 100644
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
#define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
index fe57d4c58..
1c07e767a
100644
index fe57d4c58..
dba8f4695
100644
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@@ -677,6 +677,14 @@
static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
...
...
@@ -216,7 +216,7 @@ index fe57d4c58..1c07e767a 100644
+GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
+GGML_API void ggml_nvml_release();
+GGML_API int ggml_hip_mgmt_init();
+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total
, bool is_integrated_gpu
);
+GGML_API void ggml_hip_mgmt_release();
+
#ifdef __cplusplus
...
...
@@ -243,7 +243,7 @@ index ba95b4acc..f6f8f7a10 100644
/* .async = */ true,
/* .host_buffer = */ false,
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 5349bce24..
d43d46d1d
100644
index 5349bce24..
0103fd03a
100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -236,6 +236,7 @@
class vk_memory_logger;
...
...
@@ -334,7 +334,7 @@ index 5349bce24..d43d46d1d 100644
+ switch (props2.properties.vendorID) {
+ case VK_VENDOR_ID_AMD:
+ if (ggml_hip_mgmt_init() == 0) {
+ int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
+ int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total
, ctx->is_integrated_gpu
);
+ if (status == 0) {
+ GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
+ ggml_hip_mgmt_release();
...
...
@@ -505,10 +505,10 @@ index 5349bce24..d43d46d1d 100644
}
diff --git a/ggml/src/mem_hip.cpp b/ggml/src/mem_hip.cpp
new file mode 100644
index 000000000..
c1949b899
index 000000000..
23c765806
--- /dev/null
+++ b/ggml/src/mem_hip.cpp
@@ -0,0 +1,5
29
@@
@@ -0,0 +1,5
58
@@
+#include "ggml.h"
+#include "ggml-impl.h"
+
...
...
@@ -842,7 +842,7 @@ index 000000000..c1949b899
+ if (gpus != NULL) gpus->pVtbl->Release(gpus); \
+ if (gpu != NULL) gpu->pVtbl->Release(gpu)
+
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total
, bool is_integrated_gpu
) {
+ std::lock_guard<std::mutex> lock(ggml_adlx_lock);
+ if (adlx.handle == NULL) {
+ GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
...
...
@@ -966,13 +966,16 @@ index 000000000..c1949b899
+ return 0;
+}
+void ggml_hip_mgmt_release() {}
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total
, bool is_integrated_gpu
) {
+ GGML_LOG_INFO("%s searching for device %s\n", __func__, id);
+ const std::string drmDeviceGlob = "/sys/class/drm/card*/device/uevent";
+ const std::string drmTotalMemoryFile = "mem_info_vram_total";
+ const std::string drmUsedMemoryFile = "mem_info_vram_used";
+ const std::string drmGTTTotalMemoryFile = "mem_info_gtt_total";
+ const std::string drmGTTUsedMemoryFile = "mem_info_gtt_used";
+ const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME=";
+
+
+ glob_t glob_result;
+ glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result);
+
...
...
@@ -1006,7 +1009,6 @@ index 000000000..c1949b899
+
+ uint64_t memory;
+ totalFileStream >> memory;
+ *total = memory;
+
+ std::string usedFile = dir + "/" + drmUsedMemoryFile;
+ std::ifstream usedFileStream(usedFile.c_str());
...
...
@@ -1019,6 +1021,33 @@ index 000000000..c1949b899
+
+ uint64_t memoryUsed;
+ usedFileStream >> memoryUsed;
+
+ if (is_integrated_gpu) {
+ std::string totalFile = dir + "/" + drmGTTTotalMemoryFile;
+ std::ifstream totalFileStream(totalFile.c_str());
+ if (!totalFileStream.is_open()) {
+ GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, totalFile.c_str());
+ file.close();
+ globfree(&glob_result);
+ return 1;
+ }
+ uint64_t gtt;
+ totalFileStream >> gtt;
+ std::string usedFile = dir + "/" + drmGTTUsedMemoryFile;
+ std::ifstream usedFileStream(usedFile.c_str());
+ if (!usedFileStream.is_open()) {
+ GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, usedFile.c_str());
+ file.close();
+ globfree(&glob_result);
+ return 1;
+ }
+ uint64_t gttUsed;
+ usedFileStream >> gttUsed;
+ memory += gtt;
+ memoryUsed += gttUsed;
+ }
+
+ *total = memory;
+ *free = memory - memoryUsed;
+
+ file.close();
...
...
llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
View file @
7ad03699
...
...
@@ -24,12 +24,12 @@ index 99ae293cc..9a134b7af 100644
set_target_properties(ggml-base PROPERTIES
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
index
1c07e767a..0da3e065b
100644
index
dba8f4695..7e17032c7
100644
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@@ -684,6 +684,9 @@
GGML_API void ggml_nvml_release();
GGML_API int ggml_hip_mgmt_init();
GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total
, bool is_integrated_gpu
);
GGML_API void ggml_hip_mgmt_release();
+GGML_API int ggml_dxgi_pdh_init();
+GGML_API int ggml_dxgi_pdh_get_device_memory(const char* luid, size_t *free, size_t *total, bool is_integrated_gpu);
...
...
@@ -38,7 +38,7 @@ index 1c07e767a..0da3e065b 100644
#ifdef __cplusplus
}
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index
d43d46d1d..df79f9f79
100644
index
0103fd03a..9cc4ebdef
100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -74,6 +74,7 @@
DispatchLoaderDynamic & ggml_vk_default_dispatcher();
...
...
llama/patches/0029-ggml-cuda-skip-large-batches.patch
View file @
7ad03699
...
...
@@ -10,7 +10,7 @@ fallback to cpu
1 file changed, 3 insertions(+)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index
48cdb1dcf..3102d7ea7
100644
index
334a30135..5c9dfd032
100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -4633,6 +4633,9 @@
static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
...
...
llama/patches/003
1
-fix-bakllava-regression.patch
→
llama/patches/003
0
-fix-bakllava-regression.patch
View file @
7ad03699
File moved
llama/patches/003
0
-win-exit-instead-of-abort.patch
→
llama/patches/003
1
-win-exit-instead-of-abort.patch
View file @
7ad03699
File moved
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
View file @
7ad03699
...
...
@@ -4436,7 +4436,7 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
#if defined(GGML_USE_HIP)
if
(
ggml_hip_mgmt_init
()
==
0
)
{
int
status
=
ggml_hip_get_device_memory
(
ctx
->
pci_bus_id
.
c_str
(),
free
,
total
);
int
status
=
ggml_hip_get_device_memory
(
ctx
->
pci_bus_id
.
c_str
(),
free
,
total
,
ctx
->
integrated
!=
0
);
if
(
status
==
0
)
{
GGML_LOG_DEBUG
(
"%s device %s utilizing AMD specific memory reporting free: %zu total: %zu
\n
"
,
__func__
,
ctx
->
pci_bus_id
.
c_str
(),
*
free
,
*
total
);
ggml_hip_mgmt_release
();
...
...
ml/backend/ggml/ggml/src/ggml-impl.h
View file @
7ad03699
...
...
@@ -682,7 +682,7 @@ GGML_API int ggml_nvml_init();
GGML_API
int
ggml_nvml_get_device_memory
(
const
char
*
uuid
,
size_t
*
free
,
size_t
*
total
);
GGML_API
void
ggml_nvml_release
();
GGML_API
int
ggml_hip_mgmt_init
();
GGML_API
int
ggml_hip_get_device_memory
(
const
char
*
id
,
size_t
*
free
,
size_t
*
total
);
GGML_API
int
ggml_hip_get_device_memory
(
const
char
*
id
,
size_t
*
free
,
size_t
*
total
,
bool
is_integrated_gpu
);
GGML_API
void
ggml_hip_mgmt_release
();
GGML_API
int
ggml_dxgi_pdh_init
();
GGML_API
int
ggml_dxgi_pdh_get_device_memory
(
const
char
*
luid
,
size_t
*
free
,
size_t
*
total
,
bool
is_integrated_gpu
);
...
...
ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
View file @
7ad03699
...
...
@@ -13710,7 +13710,7 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
switch (props2.properties.vendorID) {
case VK_VENDOR_ID_AMD:
if (ggml_hip_mgmt_init() == 0) {
int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total
, ctx->is_integrated_gpu
);
if (status == 0) {
GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
ggml_hip_mgmt_release();
...
...
ml/backend/ggml/ggml/src/mem_hip.cpp
View file @
7ad03699
...
...
@@ -331,7 +331,7 @@ void ggml_hip_mgmt_release() {
if (gpus != NULL) gpus->pVtbl->Release(gpus); \
if (gpu != NULL) gpu->pVtbl->Release(gpu)
int
ggml_hip_get_device_memory
(
const
char
*
id
,
size_t
*
free
,
size_t
*
total
)
{
int
ggml_hip_get_device_memory
(
const
char
*
id
,
size_t
*
free
,
size_t
*
total
,
bool
is_integrated_gpu
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
ggml_adlx_lock
);
if
(
adlx
.
handle
==
NULL
)
{
GGML_LOG_INFO
(
"%s ADLX was not initialized
\n
"
,
__func__
);
...
...
@@ -455,13 +455,16 @@ int ggml_hip_mgmt_init() {
return
0
;
}
void
ggml_hip_mgmt_release
()
{}
int
ggml_hip_get_device_memory
(
const
char
*
id
,
size_t
*
free
,
size_t
*
total
)
{
int
ggml_hip_get_device_memory
(
const
char
*
id
,
size_t
*
free
,
size_t
*
total
,
bool
is_integrated_gpu
)
{
GGML_LOG_INFO
(
"%s searching for device %s
\n
"
,
__func__
,
id
);
const
std
::
string
drmDeviceGlob
=
"/sys/class/drm/card*/device/uevent"
;
const
std
::
string
drmTotalMemoryFile
=
"mem_info_vram_total"
;
const
std
::
string
drmUsedMemoryFile
=
"mem_info_vram_used"
;
const
std
::
string
drmGTTTotalMemoryFile
=
"mem_info_gtt_total"
;
const
std
::
string
drmGTTUsedMemoryFile
=
"mem_info_gtt_used"
;
const
std
::
string
drmUeventPCISlotLabel
=
"PCI_SLOT_NAME="
;
glob_t
glob_result
;
glob
(
drmDeviceGlob
.
c_str
(),
GLOB_NOSORT
,
NULL
,
&
glob_result
);
...
...
@@ -495,7 +498,6 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
uint64_t
memory
;
totalFileStream
>>
memory
;
*
total
=
memory
;
std
::
string
usedFile
=
dir
+
"/"
+
drmUsedMemoryFile
;
std
::
ifstream
usedFileStream
(
usedFile
.
c_str
());
...
...
@@ -508,6 +510,33 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
uint64_t
memoryUsed
;
usedFileStream
>>
memoryUsed
;
if
(
is_integrated_gpu
)
{
std
::
string
totalFile
=
dir
+
"/"
+
drmGTTTotalMemoryFile
;
std
::
ifstream
totalFileStream
(
totalFile
.
c_str
());
if
(
!
totalFileStream
.
is_open
())
{
GGML_LOG_DEBUG
(
"%s Failed to read sysfs node %s
\n
"
,
__func__
,
totalFile
.
c_str
());
file
.
close
();
globfree
(
&
glob_result
);
return
1
;
}
uint64_t
gtt
;
totalFileStream
>>
gtt
;
std
::
string
usedFile
=
dir
+
"/"
+
drmGTTUsedMemoryFile
;
std
::
ifstream
usedFileStream
(
usedFile
.
c_str
());
if
(
!
usedFileStream
.
is_open
())
{
GGML_LOG_DEBUG
(
"%s Failed to read sysfs node %s
\n
"
,
__func__
,
usedFile
.
c_str
());
file
.
close
();
globfree
(
&
glob_result
);
return
1
;
}
uint64_t
gttUsed
;
usedFileStream
>>
gttUsed
;
memory
+=
gtt
;
memoryUsed
+=
gttUsed
;
}
*
total
=
memory
;
*
free
=
memory
-
memoryUsed
;
file
.
close
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment