samplers.py 17.1 KB
Newer Older
1
2
from .k_diffusion import sampling as k_diffusion_sampling
from .k_diffusion import external as k_diffusion_external
3
from .extra_samplers import uni_pc
comfyanonymous's avatar
comfyanonymous committed
4
5
import torch
import contextlib
6
import model_management
comfyanonymous's avatar
comfyanonymous committed
7
8
9
10
11
12
13

class CFGDenoiser(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.inner_model = model

    def forward(self, x, sigma, uncond, cond, cond_scale):
comfyanonymous's avatar
comfyanonymous committed
14
        if len(uncond[0]) == len(cond[0]) and x.shape[0] * x.shape[2] * x.shape[3] < (96 * 96): #TODO check memory instead
comfyanonymous's avatar
comfyanonymous committed
15
16
17
18
19
20
21
22
23
            x_in = torch.cat([x] * 2)
            sigma_in = torch.cat([sigma] * 2)
            cond_in = torch.cat([uncond, cond])
            uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2)
        else:
            cond = self.inner_model(x, sigma, cond=cond)
            uncond = self.inner_model(x, sigma, cond=uncond)
        return uncond + (cond - uncond) * cond_scale

comfyanonymous's avatar
comfyanonymous committed
24
25
26
27
28

#The main sampling function shared by all the samplers
#Returns predicted noise
def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, cond_concat=None):
        def get_area_and_mult(cond, x_in, cond_concat_in, timestep_in):
29
30
31
32
33
34
            area = (x_in.shape[2], x_in.shape[3], 0, 0)
            strength = 1.0
            if 'area' in cond[1]:
                area = cond[1]['area']
            if 'strength' in cond[1]:
                strength = cond[1]['strength']
35

36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
            input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
            mult = torch.ones_like(input_x) * strength

            rr = 8
            if area[2] != 0:
                for t in range(rr):
                    mult[:,:,area[2]+t:area[2]+1+t,:] *= ((1.0/rr) * (t + 1))
            if (area[0] + area[2]) < x_in.shape[2]:
                for t in range(rr):
                    mult[:,:,area[0] + area[2] - 1 - t:area[0] + area[2] - t,:] *= ((1.0/rr) * (t + 1))
            if area[3] != 0:
                for t in range(rr):
                    mult[:,:,:,area[3]+t:area[3]+1+t] *= ((1.0/rr) * (t + 1))
            if (area[1] + area[3]) < x_in.shape[3]:
                for t in range(rr):
                    mult[:,:,:,area[1] + area[3] - 1 - t:area[1] + area[3] - t] *= ((1.0/rr) * (t + 1))
comfyanonymous's avatar
comfyanonymous committed
52
53
54
55
56
57
58
59
            conditionning = {}
            conditionning['c_crossattn'] = cond[0]
            if cond_concat_in is not None and len(cond_concat_in) > 0:
                cropped = []
                for x in cond_concat_in:
                    cr = x[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
                    cropped.append(cr)
                conditionning['c_concat'] = torch.cat(cropped, dim=1)
comfyanonymous's avatar
comfyanonymous committed
60
61
62
63
64

            control = None
            if 'control' in cond[1]:
                control = cond[1]['control']
            return (input_x, mult, conditionning, area, control)
comfyanonymous's avatar
comfyanonymous committed
65
66

        def cond_equal_size(c1, c2):
comfyanonymous's avatar
comfyanonymous committed
67
68
            if c1 is c2:
                return True
comfyanonymous's avatar
comfyanonymous committed
69
70
71
72
73
74
75
76
77
78
            if c1.keys() != c2.keys():
                return False
            if 'c_crossattn' in c1:
                if c1['c_crossattn'].shape != c2['c_crossattn'].shape:
                    return False
            if 'c_concat' in c1:
                if c1['c_concat'].shape != c2['c_concat'].shape:
                    return False
            return True

comfyanonymous's avatar
comfyanonymous committed
79
80
81
82
83
84
85
86
87
88
89
        def can_concat_cond(c1, c2):
            if c1[0].shape != c2[0].shape:
                return False
            if (c1[4] is None) != (c2[4] is None):
                return False
            if c1[4] is not None:
                if c1[4] is not c2[4]:
                    return False

            return cond_equal_size(c1[2], c2[2])

comfyanonymous's avatar
comfyanonymous committed
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
        def cond_cat(c_list):
            c_crossattn = []
            c_concat = []
            for x in c_list:
                if 'c_crossattn' in x:
                    c_crossattn.append(x['c_crossattn'])
                if 'c_concat' in x:
                    c_concat.append(x['c_concat'])
            out = {}
            if len(c_crossattn) > 0:
                out['c_crossattn'] = [torch.cat(c_crossattn)]
            if len(c_concat) > 0:
                out['c_concat'] = [torch.cat(c_concat)]
            return out

comfyanonymous's avatar
comfyanonymous committed
105
        def calc_cond_uncond_batch(model_function, cond, uncond, x_in, timestep, max_total_area, cond_concat_in):
comfyanonymous's avatar
comfyanonymous committed
106
107
            out_cond = torch.zeros_like(x_in)
            out_count = torch.ones_like(x_in)/100000.0
108
109
110
111
112
113

            out_uncond = torch.zeros_like(x_in)
            out_uncond_count = torch.ones_like(x_in)/100000.0

            COND = 0
            UNCOND = 1
comfyanonymous's avatar
comfyanonymous committed
114

115
            to_run = []
comfyanonymous's avatar
comfyanonymous committed
116
            for x in cond:
comfyanonymous's avatar
comfyanonymous committed
117
                p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
118
                if p is None:
comfyanonymous's avatar
comfyanonymous committed
119
                    continue
120
121
122

                to_run += [(p, COND)]
            for x in uncond:
comfyanonymous's avatar
comfyanonymous committed
123
                p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
124
125
126
127
128
129
130
131
                if p is None:
                    continue

                to_run += [(p, UNCOND)]

            while len(to_run) > 0:
                first = to_run[0]
                first_shape = first[0][0].shape
132
                to_batch_temp = []
133
                for x in range(len(to_run)):
comfyanonymous's avatar
comfyanonymous committed
134
135
                    if can_concat_cond(to_run[x][0], first[0]):
                        to_batch_temp += [x]
136
137
138
139
140
141
142
143
144

                to_batch_temp.reverse()
                to_batch = to_batch_temp[:1]

                for i in range(1, len(to_batch_temp) + 1):
                    batch_amount = to_batch_temp[:len(to_batch_temp)//i]
                    if (len(batch_amount) * first_shape[0] * first_shape[2] * first_shape[3] < max_total_area):
                        to_batch = batch_amount
                        break
145
146
147
148
149
150

                input_x = []
                mult = []
                c = []
                cond_or_uncond = []
                area = []
comfyanonymous's avatar
comfyanonymous committed
151
                control = None
152
153
154
155
156
157
158
159
                for x in to_batch:
                    o = to_run.pop(x)
                    p = o[0]
                    input_x += [p[0]]
                    mult += [p[1]]
                    c += [p[2]]
                    area += [p[3]]
                    cond_or_uncond += [o[1]]
comfyanonymous's avatar
comfyanonymous committed
160
                    control = p[4]
161
162
163

                batch_chunks = len(cond_or_uncond)
                input_x = torch.cat(input_x)
comfyanonymous's avatar
comfyanonymous committed
164
                c = cond_cat(c)
comfyanonymous's avatar
comfyanonymous committed
165
                timestep_ = torch.cat([timestep] * batch_chunks)
166

comfyanonymous's avatar
comfyanonymous committed
167
168
169
170
                if control is not None:
                    c['control'] = control.get_control(input_x, timestep_, c['c_crossattn'])

                output = model_function(input_x, timestep_, cond=c).chunk(batch_chunks)
comfyanonymous's avatar
comfyanonymous committed
171
                del input_x
172
173
174
175
176
177
178
179

                for o in range(batch_chunks):
                    if cond_or_uncond[o] == COND:
                        out_cond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o]
                        out_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o]
                    else:
                        out_uncond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o]
                        out_uncond_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o]
comfyanonymous's avatar
comfyanonymous committed
180
181
182
183
                del mult

            out_cond /= out_count
            del out_count
184
185
186
187
            out_uncond /= out_uncond_count
            del out_uncond_count

            return out_cond, out_uncond
comfyanonymous's avatar
comfyanonymous committed
188
189


190
        max_total_area = model_management.maximum_batch_area()
comfyanonymous's avatar
comfyanonymous committed
191
        cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat)
comfyanonymous's avatar
comfyanonymous committed
192
        return uncond + (cond - uncond) * cond_scale
comfyanonymous's avatar
comfyanonymous committed
193

comfyanonymous's avatar
comfyanonymous committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213

class CompVisVDenoiser(k_diffusion_external.DiscreteVDDPMDenoiser):
    def __init__(self, model, quantize=False, device='cpu'):
        super().__init__(model, model.alphas_cumprod, quantize=quantize)

    def get_v(self, x, t, cond, **kwargs):
        return self.inner_model.apply_model(x, t, cond, **kwargs)


class CFGNoisePredictor(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.inner_model = model
        self.alphas_cumprod = model.alphas_cumprod
    def apply_model(self, x, timestep, cond, uncond, cond_scale, cond_concat=None):
        out = sampling_function(self.inner_model.apply_model, x, timestep, uncond, cond, cond_scale, cond_concat)
        return out


class KSamplerX0Inpaint(torch.nn.Module):
214
215
216
    def __init__(self, model):
        super().__init__()
        self.inner_model = model
comfyanonymous's avatar
comfyanonymous committed
217
    def forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, cond_concat=None):
218
219
220
        if denoise_mask is not None:
            latent_mask = 1. - denoise_mask
            x = x * denoise_mask + (self.latent_image + self.noise * sigma) * latent_mask
comfyanonymous's avatar
comfyanonymous committed
221
        out = self.inner_model(x, sigma, cond=cond, uncond=uncond, cond_scale=cond_scale, cond_concat=cond_concat)
222
223
224
225
226
227
        if denoise_mask is not None:
            out *= denoise_mask

        if denoise_mask is not None:
            out += self.latent_image * latent_mask
        return out
228

comfyanonymous's avatar
comfyanonymous committed
229
230
231
232
233
234
235
236
def simple_scheduler(model, steps):
    sigs = []
    ss = len(model.sigmas) / steps
    for x in range(steps):
        sigs += [float(model.sigmas[-(1 + int(x * ss))])]
    sigs += [0.0]
    return torch.FloatTensor(sigs)

comfyanonymous's avatar
comfyanonymous committed
237
238
239
240
241
242
243
244
245
def blank_inpaint_image_like(latent_image):
    blank_image = torch.ones_like(latent_image)
    # these are the values for "zero" in pixel space translated to latent space
    blank_image[:,0] *= 0.8223
    blank_image[:,1] *= -0.6876
    blank_image[:,2] *= 0.6364
    blank_image[:,3] *= 0.1380
    return blank_image

comfyanonymous's avatar
comfyanonymous committed
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
def create_cond_with_same_area_if_none(conds, c):
    if 'area' not in c[1]:
        return

    c_area = c[1]['area']
    smallest = None
    for x in conds:
        if 'area' in x[1]:
            a = x[1]['area']
            if c_area[2] >= a[2] and c_area[3] >= a[3]:
                if a[0] + a[2] >= c_area[0] + c_area[2]:
                    if a[1] + a[3] >= c_area[1] + c_area[3]:
                        if smallest is None:
                            smallest = x
                        elif 'area' not in smallest[1]:
                            smallest = x
                        else:
                            if smallest[1]['area'][0] * smallest[1]['area'][1] > a[0] * a[1]:
                                smallest = x
        else:
            if smallest is None:
                smallest = x
    if smallest is None:
        return
    if 'area' in smallest[1]:
        if smallest[1]['area'] == c_area:
            return
    n = c[1].copy()
    conds += [[smallest[0], n]]
comfyanonymous's avatar
comfyanonymous committed
275

comfyanonymous's avatar
comfyanonymous committed
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311

def apply_control_net_to_equal_area(conds, uncond):
    cond_cnets = []
    cond_other = []
    uncond_cnets = []
    uncond_other = []
    for t in range(len(conds)):
        x = conds[t]
        if 'area' not in x[1]:
            if 'control' in x[1] and x[1]['control'] is not None:
                cond_cnets.append(x[1]['control'])
            else:
                cond_other.append((x, t))
    for t in range(len(uncond)):
        x = uncond[t]
        if 'area' not in x[1]:
            if 'control' in x[1] and x[1]['control'] is not None:
                uncond_cnets.append(x[1]['control'])
            else:
                uncond_other.append((x, t))

    if len(uncond_cnets) > 0:
        return

    for x in range(len(cond_cnets)):
        temp = uncond_other[x % len(uncond_other)]
        o = temp[0]
        if 'control' in o[1] and o[1]['control'] is not None:
            n = o[1].copy()
            n['control'] = cond_cnets[x]
            uncond += [[o[0], n]]
        else:
            n = o[1].copy()
            n['control'] = cond_cnets[x]
            uncond[temp[1]] = [o[0], n]

comfyanonymous's avatar
comfyanonymous committed
312
313
314
315
class KSampler:
    SCHEDULERS = ["karras", "normal", "simple"]
    SAMPLERS = ["sample_euler", "sample_euler_ancestral", "sample_heun", "sample_dpm_2", "sample_dpm_2_ancestral",
                "sample_lms", "sample_dpm_fast", "sample_dpm_adaptive", "sample_dpmpp_2s_ancestral", "sample_dpmpp_sde",
316
                "sample_dpmpp_2m", "uni_pc"]
comfyanonymous's avatar
comfyanonymous committed
317
318
319

    def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None):
        self.model = model
comfyanonymous's avatar
comfyanonymous committed
320
        self.model_denoise = CFGNoisePredictor(self.model)
comfyanonymous's avatar
comfyanonymous committed
321
        if self.model.parameterization == "v":
comfyanonymous's avatar
comfyanonymous committed
322
            self.model_wrap = CompVisVDenoiser(self.model_denoise, quantize=True)
comfyanonymous's avatar
comfyanonymous committed
323
        else:
comfyanonymous's avatar
comfyanonymous committed
324
325
326
            self.model_wrap = k_diffusion_external.CompVisDenoiser(self.model_denoise, quantize=True)
        self.model_wrap.parameterization = self.model.parameterization
        self.model_k = KSamplerX0Inpaint(self.model_wrap)
comfyanonymous's avatar
comfyanonymous committed
327
328
329
330
331
332
333
        self.device = device
        if scheduler not in self.SCHEDULERS:
            scheduler = self.SCHEDULERS[0]
        if sampler not in self.SAMPLERS:
            sampler = self.SAMPLERS[0]
        self.scheduler = scheduler
        self.sampler = sampler
334
335
        self.sigma_min=float(self.model_wrap.sigma_min)
        self.sigma_max=float(self.model_wrap.sigma_max)
comfyanonymous's avatar
comfyanonymous committed
336
337
338
339
340
341
342
343
344
345
346
        self.set_steps(steps, denoise)

    def _calculate_sigmas(self, steps):
        sigmas = None

        discard_penultimate_sigma = False
        if self.sampler in ['sample_dpm_2', 'sample_dpm_2_ancestral']:
            steps += 1
            discard_penultimate_sigma = True

        if self.scheduler == "karras":
347
            sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max, device=self.device)
comfyanonymous's avatar
comfyanonymous committed
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
        elif self.scheduler == "normal":
            sigmas = self.model_wrap.get_sigmas(steps).to(self.device)
        elif self.scheduler == "simple":
            sigmas = simple_scheduler(self.model_wrap, steps).to(self.device)
        else:
            print("error invalid scheduler", self.scheduler)

        if discard_penultimate_sigma:
            sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
        return sigmas

    def set_steps(self, steps, denoise=None):
        self.steps = steps
        if denoise is None:
            self.sigmas = self._calculate_sigmas(steps)
        else:
            new_steps = int(steps/denoise)
            sigmas = self._calculate_sigmas(new_steps)
            self.sigmas = sigmas[-(steps + 1):]


369
    def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None):
comfyanonymous's avatar
comfyanonymous committed
370
371
372
        sigmas = self.sigmas
        sigma_min = self.sigma_min

comfyanonymous's avatar
comfyanonymous committed
373
        if last_step is not None and last_step < (len(sigmas) - 1):
comfyanonymous's avatar
comfyanonymous committed
374
375
            sigma_min = sigmas[last_step]
            sigmas = sigmas[:last_step + 1]
comfyanonymous's avatar
comfyanonymous committed
376
377
378
            if force_full_denoise:
                sigmas[-1] = 0

comfyanonymous's avatar
comfyanonymous committed
379
        if start_step is not None:
comfyanonymous's avatar
comfyanonymous committed
380
381
382
383
384
385
386
            if start_step < (len(sigmas) - 1):
                sigmas = sigmas[start_step:]
            else:
                if latent_image is not None:
                    return latent_image
                else:
                    return torch.zeros_like(noise)
comfyanonymous's avatar
comfyanonymous committed
387

comfyanonymous's avatar
comfyanonymous committed
388
389
390
391
392
393
394
395
        positive = positive[:]
        negative = negative[:]
        #make sure each cond area has an opposite one with the same area
        for c in positive:
            create_cond_with_same_area_if_none(negative, c)
        for c in negative:
            create_cond_with_same_area_if_none(positive, c)

comfyanonymous's avatar
comfyanonymous committed
396
397
        apply_control_net_to_equal_area(positive, negative)

comfyanonymous's avatar
comfyanonymous committed
398
399
400
401
402
        if self.model.model.diffusion_model.dtype == torch.float16:
            precision_scope = torch.autocast
        else:
            precision_scope = contextlib.nullcontext

403
        extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg}
comfyanonymous's avatar
comfyanonymous committed
404
405
406
407
408
409
410
411

        if hasattr(self.model, 'concat_keys'):
            cond_concat = []
            for ck in self.model.concat_keys:
                if denoise_mask is not None:
                    if ck == "mask":
                        cond_concat.append(denoise_mask[:,:1])
                    elif ck == "masked_image":
412
                        cond_concat.append(latent_image) #NOTE: the latent_image should be masked by the mask in pixel space
comfyanonymous's avatar
comfyanonymous committed
413
414
415
416
417
418
419
                else:
                    if ck == "mask":
                        cond_concat.append(torch.ones_like(noise)[:,:1])
                    elif ck == "masked_image":
                        cond_concat.append(blank_inpaint_image_like(noise))
            extra_args["cond_concat"] = cond_concat

comfyanonymous's avatar
comfyanonymous committed
420
        with precision_scope(self.device):
421
            if self.sampler == "uni_pc":
422
                samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, extra_args=extra_args, noise_mask=denoise_mask)
comfyanonymous's avatar
comfyanonymous committed
423
            else:
424
425
426
427
428
429
                extra_args["denoise_mask"] = denoise_mask
                self.model_k.latent_image = latent_image
                self.model_k.noise = noise

                noise = noise * sigmas[0]

430
431
432
                if latent_image is not None:
                    noise += latent_image
                if self.sampler == "sample_dpm_fast":
433
                    samples = k_diffusion_sampling.sample_dpm_fast(self.model_k, noise, sigma_min, sigmas[0], self.steps, extra_args=extra_args)
434
                elif self.sampler == "sample_dpm_adaptive":
435
                    samples = k_diffusion_sampling.sample_dpm_adaptive(self.model_k, noise, sigma_min, sigmas[0], extra_args=extra_args)
436
                else:
437
438
                    samples = getattr(k_diffusion_sampling, self.sampler)(self.model_k, noise, sigmas, extra_args=extra_args)

comfyanonymous's avatar
comfyanonymous committed
439
        return samples.to(torch.float32)