Commit 3fd87cbd authored by comfyanonymous's avatar comfyanonymous
Browse files

Slightly smarter batching behaviour.

Try to keep batch sizes more consistent which seems to improve things on
AMD GPUs.
parent bbdcf0b7
......@@ -86,15 +86,21 @@ class CFGDenoiserComplex(torch.nn.Module):
while len(to_run) > 0:
first = to_run[0]
first_shape = first[0][0].shape
to_batch = []
to_batch_temp = []
for x in range(len(to_run)):
if to_run[x][0][0].shape == first_shape:
if to_run[x][0][2].shape == first[0][2].shape:
to_batch += [x]
if (len(to_batch) * first_shape[0] * first_shape[2] * first_shape[3] >= max_total_area):
break
to_batch_temp += [x]
to_batch_temp.reverse()
to_batch = to_batch_temp[:1]
for i in range(1, len(to_batch_temp) + 1):
batch_amount = to_batch_temp[:len(to_batch_temp)//i]
if (len(batch_amount) * first_shape[0] * first_shape[2] * first_shape[3] < max_total_area):
to_batch = batch_amount
break
to_batch.reverse()
input_x = []
mult = []
c = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment