Auto disable cuda malloc on some GPUs on windows.

799c08a4 · comfyanonymous · 0b284f65 · 799c08a4 · 799c08a4
Commit 799c08a4 authored Jul 19, 2023 by comfyanonymous
Show whitespace changes
Inline Side-by-side

Showing with 78 additions and 24 deletions

cuda_malloc.py cuda_malloc.py +77 -0

main.py main.py +1 -24

No files found.
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
+import os
+import importlib.util
+from comfy.cli_args import args
+#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
+def get_gpu_names():
+    if os.name == 'nt':
+        import ctypes
+        # Define necessary C structures and types
+        class DISPLAY_DEVICEA(ctypes.Structure):
+            _fields_ = [
+                ('cb', ctypes.c_ulong),
+                ('DeviceName', ctypes.c_char * 32),
+                ('DeviceString', ctypes.c_char * 128),
+                ('StateFlags', ctypes.c_ulong),
+                ('DeviceID', ctypes.c_char * 128),
+                ('DeviceKey', ctypes.c_char * 128)
+            ]
+        # Load user32.dll
+        user32 = ctypes.windll.user32
+        # Call EnumDisplayDevicesA
+        def enum_display_devices():
+            device_info = DISPLAY_DEVICEA()
+            device_info.cb = ctypes.sizeof(device_info)
+            device_index = 0
+            gpu_names = set()
+            while user32.EnumDisplayDevicesA(None, device_index, ctypes.byref(device_info), 0):
+                device_index += 1
+                gpu_names.add(device_info.DeviceString.decode('utf-8'))
+            return gpu_names
+        return enum_display_devices()
+    else:
+        return set()
+def cuda_malloc_supported():
+    blacklist = {"GeForce GTX 960M", "GeForce GTX 950M", "GeForce 945M", "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M"}
+    try:
+        names = get_gpu_names()
+    except:
+        names = set()
+    for x in names:
+        if "NVIDIA" in x:
+            for b in blacklist:
+                if b in x:
+                    return False
+    return True
+if not args.cuda_malloc:
+    try:
+        version = ""
+        torch_spec = importlib.util.find_spec("torch")
+        for folder in torch_spec.submodule_search_locations:
+            ver_file = os.path.join(folder, "version.py")
+            if os.path.isfile(ver_file):
+                spec = importlib.util.spec_from_file_location("torch_version_import", ver_file)
+                module = importlib.util.module_from_spec(spec)
+                spec.loader.exec_module(module)
+                version = module.__version__
+        if int(version[0]) >= 2: #enable by default for torch version 2.0 and up
+            args.cuda_malloc = cuda_malloc_supported()
+    except:
+        pass
+if args.cuda_malloc and not args.disable_cuda_malloc:
+    env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None)
+    if env_var is None:
+        env_var = "backend:cudaMallocAsync"
+    else:
+        env_var += ",backend:cudaMallocAsync"
+    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var
--- a/main.py
+++ b/main.py
@@ -61,30 +61,7 @@ if __name__ == "__main__":
        os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
        print("Set cuda device to:", args.cuda_device)
-    if not args.cuda_malloc:
+    import cuda_malloc
-        try: #if there's a better way to check the torch version without importing it let me know
-            version = ""
-            torch_spec = importlib.util.find_spec("torch")
-            for folder in torch_spec.submodule_search_locations:
-                ver_file = os.path.join(folder, "version.py")
-                if os.path.isfile(ver_file):
-                    spec = importlib.util.spec_from_file_location("torch_version_import", ver_file)
-                    module = importlib.util.module_from_spec(spec)
-                    spec.loader.exec_module(module)
-                    version = module.__version__
-            if int(version[0]) >= 2: #enable by default for torch version 2.0 and up
-                args.cuda_malloc = True
-        except:
-            pass
-    if args.cuda_malloc and not args.disable_cuda_malloc:
-        env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None)
-        if env_var is None:
-            env_var = "backend:cudaMallocAsync"
-        else:
-            env_var += ",backend:cudaMallocAsync"
-        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var
 import comfy.utils
 import yaml