samplers.py 24.4 KB
Newer Older
1
2
from .k_diffusion import sampling as k_diffusion_sampling
from .k_diffusion import external as k_diffusion_external
3
from .extra_samplers import uni_pc
comfyanonymous's avatar
comfyanonymous committed
4
5
import torch
import contextlib
6
from comfy import model_management
comfyanonymous's avatar
comfyanonymous committed
7
8
from .ldm.models.diffusion.ddim import DDIMSampler
from .ldm.modules.diffusionmodules.util import make_ddim_timesteps
comfyanonymous's avatar
comfyanonymous committed
9

comfyanonymous's avatar
comfyanonymous committed
10
11
#The main sampling function shared by all the samplers
#Returns predicted noise
12
def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, cond_concat=None, model_options={}):
comfyanonymous's avatar
comfyanonymous committed
13
        def get_area_and_mult(cond, x_in, cond_concat_in, timestep_in):
14
15
16
17
18
19
            area = (x_in.shape[2], x_in.shape[3], 0, 0)
            strength = 1.0
            if 'area' in cond[1]:
                area = cond[1]['area']
            if 'strength' in cond[1]:
                strength = cond[1]['strength']
20

21
            adm_cond = None
22
23
            if 'adm_encoded' in cond[1]:
                adm_cond = cond[1]['adm_encoded']
24

25
26
27
28
29
30
            input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
            mult = torch.ones_like(input_x) * strength

            rr = 8
            if area[2] != 0:
                for t in range(rr):
31
                    mult[:,:,t:1+t,:] *= ((1.0/rr) * (t + 1))
32
33
            if (area[0] + area[2]) < x_in.shape[2]:
                for t in range(rr):
34
                    mult[:,:,area[0] - 1 - t:area[0] - t,:] *= ((1.0/rr) * (t + 1))
35
36
            if area[3] != 0:
                for t in range(rr):
37
                    mult[:,:,:,t:1+t] *= ((1.0/rr) * (t + 1))
38
39
            if (area[1] + area[3]) < x_in.shape[3]:
                for t in range(rr):
40
                    mult[:,:,:,area[1] - 1 - t:area[1] - t] *= ((1.0/rr) * (t + 1))
comfyanonymous's avatar
comfyanonymous committed
41
42
43
44
45
46
47
48
            conditionning = {}
            conditionning['c_crossattn'] = cond[0]
            if cond_concat_in is not None and len(cond_concat_in) > 0:
                cropped = []
                for x in cond_concat_in:
                    cr = x[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
                    cropped.append(cr)
                conditionning['c_concat'] = torch.cat(cropped, dim=1)
comfyanonymous's avatar
comfyanonymous committed
49

50
51
52
            if adm_cond is not None:
                conditionning['c_adm'] = adm_cond

comfyanonymous's avatar
comfyanonymous committed
53
54
55
            control = None
            if 'control' in cond[1]:
                control = cond[1]['control']
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

            patches = None
            if 'gligen' in cond[1]:
                gligen = cond[1]['gligen']
                patches = {}
                gligen_type = gligen[0]
                gligen_model = gligen[1]
                if gligen_type == "position":
                    gligen_patch = gligen_model.set_position(input_x.shape, gligen[2], input_x.device)
                else:
                    gligen_patch = gligen_model.set_empty(input_x.shape, input_x.device)

                patches['middle_patch'] = [gligen_patch]

            return (input_x, mult, conditionning, area, control, patches)
comfyanonymous's avatar
comfyanonymous committed
71
72

        def cond_equal_size(c1, c2):
comfyanonymous's avatar
comfyanonymous committed
73
74
            if c1 is c2:
                return True
comfyanonymous's avatar
comfyanonymous committed
75
76
77
78
79
80
81
82
            if c1.keys() != c2.keys():
                return False
            if 'c_crossattn' in c1:
                if c1['c_crossattn'].shape != c2['c_crossattn'].shape:
                    return False
            if 'c_concat' in c1:
                if c1['c_concat'].shape != c2['c_concat'].shape:
                    return False
83
84
85
            if 'c_adm' in c1:
                if c1['c_adm'].shape != c2['c_adm'].shape:
                    return False
comfyanonymous's avatar
comfyanonymous committed
86
87
            return True

comfyanonymous's avatar
comfyanonymous committed
88
89
90
        def can_concat_cond(c1, c2):
            if c1[0].shape != c2[0].shape:
                return False
91
92

            #control
comfyanonymous's avatar
comfyanonymous committed
93
94
95
96
97
98
            if (c1[4] is None) != (c2[4] is None):
                return False
            if c1[4] is not None:
                if c1[4] is not c2[4]:
                    return False

99
100
101
102
103
104
105
            #patches
            if (c1[5] is None) != (c2[5] is None):
                return False
            if (c1[5] is not None):
                if c1[5] is not c2[5]:
                    return False

comfyanonymous's avatar
comfyanonymous committed
106
107
            return cond_equal_size(c1[2], c2[2])

comfyanonymous's avatar
comfyanonymous committed
108
109
110
        def cond_cat(c_list):
            c_crossattn = []
            c_concat = []
111
            c_adm = []
comfyanonymous's avatar
comfyanonymous committed
112
113
114
115
116
            for x in c_list:
                if 'c_crossattn' in x:
                    c_crossattn.append(x['c_crossattn'])
                if 'c_concat' in x:
                    c_concat.append(x['c_concat'])
117
118
                if 'c_adm' in x:
                    c_adm.append(x['c_adm'])
comfyanonymous's avatar
comfyanonymous committed
119
120
121
122
123
            out = {}
            if len(c_crossattn) > 0:
                out['c_crossattn'] = [torch.cat(c_crossattn)]
            if len(c_concat) > 0:
                out['c_concat'] = [torch.cat(c_concat)]
124
125
            if len(c_adm) > 0:
                out['c_adm'] = torch.cat(c_adm)
comfyanonymous's avatar
comfyanonymous committed
126
127
            return out

128
        def calc_cond_uncond_batch(model_function, cond, uncond, x_in, timestep, max_total_area, cond_concat_in, model_options):
comfyanonymous's avatar
comfyanonymous committed
129
130
            out_cond = torch.zeros_like(x_in)
            out_count = torch.ones_like(x_in)/100000.0
131
132
133
134
135
136

            out_uncond = torch.zeros_like(x_in)
            out_uncond_count = torch.ones_like(x_in)/100000.0

            COND = 0
            UNCOND = 1
comfyanonymous's avatar
comfyanonymous committed
137

138
            to_run = []
comfyanonymous's avatar
comfyanonymous committed
139
            for x in cond:
comfyanonymous's avatar
comfyanonymous committed
140
                p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
141
                if p is None:
comfyanonymous's avatar
comfyanonymous committed
142
                    continue
143
144
145

                to_run += [(p, COND)]
            for x in uncond:
comfyanonymous's avatar
comfyanonymous committed
146
                p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
147
148
149
150
151
152
153
154
                if p is None:
                    continue

                to_run += [(p, UNCOND)]

            while len(to_run) > 0:
                first = to_run[0]
                first_shape = first[0][0].shape
155
                to_batch_temp = []
156
                for x in range(len(to_run)):
comfyanonymous's avatar
comfyanonymous committed
157
158
                    if can_concat_cond(to_run[x][0], first[0]):
                        to_batch_temp += [x]
159
160
161
162
163
164
165
166
167

                to_batch_temp.reverse()
                to_batch = to_batch_temp[:1]

                for i in range(1, len(to_batch_temp) + 1):
                    batch_amount = to_batch_temp[:len(to_batch_temp)//i]
                    if (len(batch_amount) * first_shape[0] * first_shape[2] * first_shape[3] < max_total_area):
                        to_batch = batch_amount
                        break
168
169
170
171
172
173

                input_x = []
                mult = []
                c = []
                cond_or_uncond = []
                area = []
comfyanonymous's avatar
comfyanonymous committed
174
                control = None
175
                patches = None
176
177
178
179
180
181
182
183
                for x in to_batch:
                    o = to_run.pop(x)
                    p = o[0]
                    input_x += [p[0]]
                    mult += [p[1]]
                    c += [p[2]]
                    area += [p[3]]
                    cond_or_uncond += [o[1]]
comfyanonymous's avatar
comfyanonymous committed
184
                    control = p[4]
185
                    patches = p[5]
186
187
188

                batch_chunks = len(cond_or_uncond)
                input_x = torch.cat(input_x)
comfyanonymous's avatar
comfyanonymous committed
189
                c = cond_cat(c)
comfyanonymous's avatar
comfyanonymous committed
190
                timestep_ = torch.cat([timestep] * batch_chunks)
191

comfyanonymous's avatar
comfyanonymous committed
192
                if control is not None:
193
                    c['control'] = control.get_control(input_x, timestep_, c['c_crossattn'], len(cond_or_uncond))
comfyanonymous's avatar
comfyanonymous committed
194

195
                transformer_options = {}
196
                if 'transformer_options' in model_options:
197
198
199
                    transformer_options = model_options['transformer_options'].copy()

                if patches is not None:
200
201
202
203
204
205
206
207
208
                    if "patches" in transformer_options:
                        cur_patches = transformer_options["patches"].copy()
                        for p in patches:
                            if p in cur_patches:
                                cur_patches[p] = cur_patches[p] + patches[p]
                            else:
                                cur_patches[p] = patches[p]
                    else:
                        transformer_options["patches"] = patches
209
210

                c['transformer_options'] = transformer_options
211

comfyanonymous's avatar
comfyanonymous committed
212
                output = model_function(input_x, timestep_, cond=c).chunk(batch_chunks)
comfyanonymous's avatar
comfyanonymous committed
213
                del input_x
214

215
216
                model_management.throw_exception_if_processing_interrupted()

217
218
219
220
221
222
223
                for o in range(batch_chunks):
                    if cond_or_uncond[o] == COND:
                        out_cond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o]
                        out_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o]
                    else:
                        out_uncond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o]
                        out_uncond_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o]
comfyanonymous's avatar
comfyanonymous committed
224
225
226
227
                del mult

            out_cond /= out_count
            del out_count
228
229
230
231
            out_uncond /= out_uncond_count
            del out_uncond_count

            return out_cond, out_uncond
comfyanonymous's avatar
comfyanonymous committed
232
233


234
        max_total_area = model_management.maximum_batch_area()
235
        cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options)
236
237
238
239
        if "sampler_cfg_function" in model_options:
            return model_options["sampler_cfg_function"](cond, uncond, cond_scale)
        else:
            return uncond + (cond - uncond) * cond_scale
comfyanonymous's avatar
comfyanonymous committed
240

comfyanonymous's avatar
comfyanonymous committed
241
242
243
244
245
246
247
248
249
250
251
252
253
254

class CompVisVDenoiser(k_diffusion_external.DiscreteVDDPMDenoiser):
    def __init__(self, model, quantize=False, device='cpu'):
        super().__init__(model, model.alphas_cumprod, quantize=quantize)

    def get_v(self, x, t, cond, **kwargs):
        return self.inner_model.apply_model(x, t, cond, **kwargs)


class CFGNoisePredictor(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.inner_model = model
        self.alphas_cumprod = model.alphas_cumprod
255
256
    def apply_model(self, x, timestep, cond, uncond, cond_scale, cond_concat=None, model_options={}):
        out = sampling_function(self.inner_model.apply_model, x, timestep, uncond, cond, cond_scale, cond_concat, model_options=model_options)
comfyanonymous's avatar
comfyanonymous committed
257
258
259
260
        return out


class KSamplerX0Inpaint(torch.nn.Module):
261
262
263
    def __init__(self, model):
        super().__init__()
        self.inner_model = model
264
    def forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, cond_concat=None, model_options={}):
265
266
        if denoise_mask is not None:
            latent_mask = 1. - denoise_mask
267
            x = x * denoise_mask + (self.latent_image + self.noise * sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1))) * latent_mask
268
        out = self.inner_model(x, sigma, cond=cond, uncond=uncond, cond_scale=cond_scale, cond_concat=cond_concat, model_options=model_options)
269
270
271
272
273
274
        if denoise_mask is not None:
            out *= denoise_mask

        if denoise_mask is not None:
            out += self.latent_image * latent_mask
        return out
275

comfyanonymous's avatar
comfyanonymous committed
276
277
278
279
280
281
282
283
def simple_scheduler(model, steps):
    sigs = []
    ss = len(model.sigmas) / steps
    for x in range(steps):
        sigs += [float(model.sigmas[-(1 + int(x * ss))])]
    sigs += [0.0]
    return torch.FloatTensor(sigs)

comfyanonymous's avatar
comfyanonymous committed
284
285
286
287
def ddim_scheduler(model, steps):
    sigs = []
    ddim_timesteps = make_ddim_timesteps(ddim_discr_method="uniform", num_ddim_timesteps=steps, num_ddpm_timesteps=model.inner_model.inner_model.num_timesteps, verbose=False)
    for x in range(len(ddim_timesteps) - 1, -1, -1):
288
289
290
291
        ts = ddim_timesteps[x]
        if ts > 999:
            ts = 999
        sigs.append(model.t_to_sigma(torch.tensor(ts)))
comfyanonymous's avatar
comfyanonymous committed
292
293
294
    sigs += [0.0]
    return torch.FloatTensor(sigs)

comfyanonymous's avatar
comfyanonymous committed
295
296
297
298
299
300
301
302
303
def blank_inpaint_image_like(latent_image):
    blank_image = torch.ones_like(latent_image)
    # these are the values for "zero" in pixel space translated to latent space
    blank_image[:,0] *= 0.8223
    blank_image[:,1] *= -0.6876
    blank_image[:,2] *= 0.6364
    blank_image[:,3] *= 0.1380
    return blank_image

comfyanonymous's avatar
comfyanonymous committed
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
def create_cond_with_same_area_if_none(conds, c):
    if 'area' not in c[1]:
        return

    c_area = c[1]['area']
    smallest = None
    for x in conds:
        if 'area' in x[1]:
            a = x[1]['area']
            if c_area[2] >= a[2] and c_area[3] >= a[3]:
                if a[0] + a[2] >= c_area[0] + c_area[2]:
                    if a[1] + a[3] >= c_area[1] + c_area[3]:
                        if smallest is None:
                            smallest = x
                        elif 'area' not in smallest[1]:
                            smallest = x
                        else:
                            if smallest[1]['area'][0] * smallest[1]['area'][1] > a[0] * a[1]:
                                smallest = x
        else:
            if smallest is None:
                smallest = x
    if smallest is None:
        return
    if 'area' in smallest[1]:
        if smallest[1]['area'] == c_area:
            return
    n = c[1].copy()
    conds += [[smallest[0], n]]
comfyanonymous's avatar
comfyanonymous committed
333

334
def apply_empty_x_to_equal_area(conds, uncond, name, uncond_fill_func):
comfyanonymous's avatar
comfyanonymous committed
335
336
337
338
339
340
341
    cond_cnets = []
    cond_other = []
    uncond_cnets = []
    uncond_other = []
    for t in range(len(conds)):
        x = conds[t]
        if 'area' not in x[1]:
342
343
            if name in x[1] and x[1][name] is not None:
                cond_cnets.append(x[1][name])
comfyanonymous's avatar
comfyanonymous committed
344
345
346
347
348
            else:
                cond_other.append((x, t))
    for t in range(len(uncond)):
        x = uncond[t]
        if 'area' not in x[1]:
349
350
            if name in x[1] and x[1][name] is not None:
                uncond_cnets.append(x[1][name])
comfyanonymous's avatar
comfyanonymous committed
351
352
353
354
355
356
357
358
359
            else:
                uncond_other.append((x, t))

    if len(uncond_cnets) > 0:
        return

    for x in range(len(cond_cnets)):
        temp = uncond_other[x % len(uncond_other)]
        o = temp[0]
360
        if name in o[1] and o[1][name] is not None:
comfyanonymous's avatar
comfyanonymous committed
361
            n = o[1].copy()
362
            n[name] = uncond_fill_func(cond_cnets, x)
comfyanonymous's avatar
comfyanonymous committed
363
364
365
            uncond += [[o[0], n]]
        else:
            n = o[1].copy()
366
            n[name] = uncond_fill_func(cond_cnets, x)
comfyanonymous's avatar
comfyanonymous committed
367
368
            uncond[temp[1]] = [o[0], n]

369

370
371
372
373
374
375
def encode_adm(noise_augmentor, conds, batch_size, device):
    for t in range(len(conds)):
        x = conds[t]
        if 'adm' in x[1]:
            adm_inputs = []
            weights = []
376
            noise_aug = []
377
378
379
380
            adm_in = x[1]["adm"]
            for adm_c in adm_in:
                adm_cond = adm_c[0].image_embeds
                weight = adm_c[1]
381
382
383
                noise_augment = adm_c[2]
                noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
                c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
384
385
                adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
                weights.append(weight)
386
                noise_aug.append(noise_augment)
387
388
                adm_inputs.append(adm_out)

389
390
391
392
393
394
395
            if len(noise_aug) > 1:
                adm_out = torch.stack(adm_inputs).sum(0)
                #TODO: add a way to control this
                noise_augment = 0.05
                noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
                c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
                adm_out = torch.cat((c_adm, noise_level_emb), 1)
396
397
398
        else:
            adm_out = torch.zeros((1, noise_augmentor.time_embed.dim * 2), device=device)
        x[1] = x[1].copy()
399
        x[1]["adm_encoded"] = torch.cat([adm_out] * batch_size)
400
401
402

    return conds

403

comfyanonymous's avatar
comfyanonymous committed
404
class KSampler:
comfyanonymous's avatar
comfyanonymous committed
405
    SCHEDULERS = ["karras", "normal", "simple", "ddim_uniform"]
406
407
408
    SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
                "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde",
                "dpmpp_2m", "ddim", "uni_pc", "uni_pc_bh2"]
comfyanonymous's avatar
comfyanonymous committed
409

410
    def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
comfyanonymous's avatar
comfyanonymous committed
411
        self.model = model
comfyanonymous's avatar
comfyanonymous committed
412
        self.model_denoise = CFGNoisePredictor(self.model)
comfyanonymous's avatar
comfyanonymous committed
413
        if self.model.parameterization == "v":
comfyanonymous's avatar
comfyanonymous committed
414
            self.model_wrap = CompVisVDenoiser(self.model_denoise, quantize=True)
comfyanonymous's avatar
comfyanonymous committed
415
        else:
comfyanonymous's avatar
comfyanonymous committed
416
417
418
            self.model_wrap = k_diffusion_external.CompVisDenoiser(self.model_denoise, quantize=True)
        self.model_wrap.parameterization = self.model.parameterization
        self.model_k = KSamplerX0Inpaint(self.model_wrap)
comfyanonymous's avatar
comfyanonymous committed
419
420
421
422
423
424
425
        self.device = device
        if scheduler not in self.SCHEDULERS:
            scheduler = self.SCHEDULERS[0]
        if sampler not in self.SAMPLERS:
            sampler = self.SAMPLERS[0]
        self.scheduler = scheduler
        self.sampler = sampler
426
427
        self.sigma_min=float(self.model_wrap.sigma_min)
        self.sigma_max=float(self.model_wrap.sigma_max)
comfyanonymous's avatar
comfyanonymous committed
428
        self.set_steps(steps, denoise)
429
        self.denoise = denoise
430
        self.model_options = model_options
comfyanonymous's avatar
comfyanonymous committed
431

comfyanonymous's avatar
comfyanonymous committed
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
    def calculate_sigmas(self, steps):
        sigmas = None

        discard_penultimate_sigma = False
        if self.sampler in ['dpm_2', 'dpm_2_ancestral']:
            steps += 1
            discard_penultimate_sigma = True

        if self.scheduler == "karras":
            sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
        elif self.scheduler == "normal":
            sigmas = self.model_wrap.get_sigmas(steps)
        elif self.scheduler == "simple":
            sigmas = simple_scheduler(self.model_wrap, steps)
        elif self.scheduler == "ddim_uniform":
            sigmas = ddim_scheduler(self.model_wrap, steps)
        else:
            print("error invalid scheduler", self.scheduler)

        if discard_penultimate_sigma:
            sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
        return sigmas

comfyanonymous's avatar
comfyanonymous committed
455
456
    def set_steps(self, steps, denoise=None):
        self.steps = steps
457
        if denoise is None or denoise > 0.9999:
comfyanonymous's avatar
comfyanonymous committed
458
            self.sigmas = self.calculate_sigmas(steps).to(self.device)
comfyanonymous's avatar
comfyanonymous committed
459
460
        else:
            new_steps = int(steps/denoise)
comfyanonymous's avatar
comfyanonymous committed
461
            sigmas = self.calculate_sigmas(new_steps).to(self.device)
comfyanonymous's avatar
comfyanonymous committed
462
463
464
            self.sigmas = sigmas[-(steps + 1):]


465
    def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None, sigmas=None, callback=None):
466
467
        if sigmas is None:
            sigmas = self.sigmas
comfyanonymous's avatar
comfyanonymous committed
468
469
        sigma_min = self.sigma_min

comfyanonymous's avatar
comfyanonymous committed
470
        if last_step is not None and last_step < (len(sigmas) - 1):
comfyanonymous's avatar
comfyanonymous committed
471
472
            sigma_min = sigmas[last_step]
            sigmas = sigmas[:last_step + 1]
comfyanonymous's avatar
comfyanonymous committed
473
474
475
            if force_full_denoise:
                sigmas[-1] = 0

comfyanonymous's avatar
comfyanonymous committed
476
        if start_step is not None:
comfyanonymous's avatar
comfyanonymous committed
477
478
479
480
481
482
483
            if start_step < (len(sigmas) - 1):
                sigmas = sigmas[start_step:]
            else:
                if latent_image is not None:
                    return latent_image
                else:
                    return torch.zeros_like(noise)
comfyanonymous's avatar
comfyanonymous committed
484

comfyanonymous's avatar
comfyanonymous committed
485
486
487
488
489
490
491
492
        positive = positive[:]
        negative = negative[:]
        #make sure each cond area has an opposite one with the same area
        for c in positive:
            create_cond_with_same_area_if_none(negative, c)
        for c in negative:
            create_cond_with_same_area_if_none(positive, c)

493
494
        apply_empty_x_to_equal_area(positive, negative, 'control', lambda cond_cnets, x: cond_cnets[x])
        apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])
comfyanonymous's avatar
comfyanonymous committed
495

comfyanonymous's avatar
comfyanonymous committed
496
497
498
499
500
        if self.model.model.diffusion_model.dtype == torch.float16:
            precision_scope = torch.autocast
        else:
            precision_scope = contextlib.nullcontext

501
502
503
504
        if hasattr(self.model, 'noise_augmentor'): #unclip
            positive = encode_adm(self.model.noise_augmentor, positive, noise.shape[0], self.device)
            negative = encode_adm(self.model.noise_augmentor, negative, noise.shape[0], self.device)

505
        extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": self.model_options}
comfyanonymous's avatar
comfyanonymous committed
506

comfyanonymous's avatar
comfyanonymous committed
507
        cond_concat = None
508
        if hasattr(self.model, 'concat_keys'): #inpaint
comfyanonymous's avatar
comfyanonymous committed
509
510
511
512
513
514
            cond_concat = []
            for ck in self.model.concat_keys:
                if denoise_mask is not None:
                    if ck == "mask":
                        cond_concat.append(denoise_mask[:,:1])
                    elif ck == "masked_image":
515
                        cond_concat.append(latent_image) #NOTE: the latent_image should be masked by the mask in pixel space
comfyanonymous's avatar
comfyanonymous committed
516
517
518
519
520
521
522
                else:
                    if ck == "mask":
                        cond_concat.append(torch.ones_like(noise)[:,:1])
                    elif ck == "masked_image":
                        cond_concat.append(blank_inpaint_image_like(noise))
            extra_args["cond_concat"] = cond_concat

523
524
525
526
527
        if sigmas[0] != self.sigmas[0] or (self.denoise is not None and self.denoise < 1.0):
            max_denoise = False
        else:
            max_denoise = True

528
        with precision_scope(model_management.get_autocast_device(self.device)):
529
            if self.sampler == "uni_pc":
530
                samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, callback=callback)
comfyanonymous's avatar
comfyanonymous committed
531
            elif self.sampler == "uni_pc_bh2":
532
                samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, callback=callback, variant='bh2')
comfyanonymous's avatar
comfyanonymous committed
533
534
535
536
537
538
539
            elif self.sampler == "ddim":
                timesteps = []
                for s in range(sigmas.shape[0]):
                    timesteps.insert(0, self.model_wrap.sigma_to_t(sigmas[s]))
                noise_mask = None
                if denoise_mask is not None:
                    noise_mask = 1.0 - denoise_mask
540
541
542
543
544

                ddim_callback = None
                if callback is not None:
                    ddim_callback = lambda pred_x0, i: callback(i, pred_x0, None)

comfyanonymous's avatar
comfyanonymous committed
545
                sampler = DDIMSampler(self.model, device=self.device)
comfyanonymous's avatar
comfyanonymous committed
546
547
548
549
550
551
552
553
554
555
556
557
                sampler.make_schedule_timesteps(ddim_timesteps=timesteps, verbose=False)
                z_enc = sampler.stochastic_encode(latent_image, torch.tensor([len(timesteps) - 1] * noise.shape[0]).to(self.device), noise=noise, max_denoise=max_denoise)
                samples, _ = sampler.sample_custom(ddim_timesteps=timesteps,
                                                     conditioning=positive,
                                                     batch_size=noise.shape[0],
                                                     shape=noise.shape[1:],
                                                     verbose=False,
                                                     unconditional_guidance_scale=cfg,
                                                     unconditional_conditioning=negative,
                                                     eta=0.0,
                                                     x_T=z_enc,
                                                     x0=latent_image,
558
                                                     img_callback=ddim_callback,
comfyanonymous's avatar
comfyanonymous committed
559
                                                     denoise_function=sampling_function,
560
                                                     extra_args=extra_args,
comfyanonymous's avatar
comfyanonymous committed
561
562
563
564
                                                     mask=noise_mask,
                                                     to_zero=sigmas[-1]==0,
                                                     end_step=sigmas.shape[0] - 1)

comfyanonymous's avatar
comfyanonymous committed
565
            else:
566
567
568
569
570
571
                extra_args["denoise_mask"] = denoise_mask
                self.model_k.latent_image = latent_image
                self.model_k.noise = noise

                noise = noise * sigmas[0]

572
573
574
575
                k_callback = None
                if callback is not None:
                    k_callback = lambda x: callback(x["i"], x["denoised"], x["x"])

576
577
                if latent_image is not None:
                    noise += latent_image
578
                if self.sampler == "dpm_fast":
579
                    samples = k_diffusion_sampling.sample_dpm_fast(self.model_k, noise, sigma_min, sigmas[0], self.steps, extra_args=extra_args, callback=k_callback)
580
                elif self.sampler == "dpm_adaptive":
581
                    samples = k_diffusion_sampling.sample_dpm_adaptive(self.model_k, noise, sigma_min, sigmas[0], extra_args=extra_args, callback=k_callback)
582
                else:
583
                    samples = getattr(k_diffusion_sampling, "sample_{}".format(self.sampler))(self.model_k, noise, sigmas, extra_args=extra_args, callback=k_callback)
584

comfyanonymous's avatar
comfyanonymous committed
585
        return samples.to(torch.float32)