Fix some OOM issues with split and sub quad attention.

a373367b · comfyanonymous · 7fbb217d · a373367b · a373367b
Commit a373367b authored Oct 25, 2023 by comfyanonymous
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 3 deletions

comfy/ldm/modules/attention.py comfy/ldm/modules/attention.py +7 -2

comfy/ldm/modules/sub_quadratic_attention.py comfy/ldm/modules/sub_quadratic_attention.py +2 -1

No files found.
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -222,9 +222,14 @@ def attention_split(q, k, v, heads, mask=None):
    mem_free_total = model_management.get_free_memory(q.device)
+    if _ATTN_PRECISION =="fp32":
+        element_size = 4
+    else:
+        element_size = q.element_size()
    gb = 1024 ** 3
-    tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size()
+    tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * element_size
-    modifier = 3 if q.element_size() == 2 else 2.5
+    modifier = 3 if element_size == 2 else 2.5
    mem_required = tensor_size * modifier
    steps = 1

--- a/comfy/ldm/modules/sub_quadratic_attention.py
+++ b/comfy/ldm/modules/sub_quadratic_attention.py
@@ -83,7 +83,8 @@ def _summarize_chunk(
        )
    max_score, _ = torch.max(attn_weights, -1, keepdim=True)
    max_score = max_score.detach()
-    torch.exp(attn_weights - max_score, out=attn_weights)
+    attn_weights -= max_score
+    torch.exp(attn_weights, out=attn_weights)
    exp_weights = attn_weights.to(value.dtype)
    exp_values = torch.bmm(exp_weights, value)
    max_score = max_score.squeeze(-1)