samplers.py 22.9 KB
Newer Older
1
2
from .k_diffusion import sampling as k_diffusion_sampling
from .k_diffusion import external as k_diffusion_external
3
from .extra_samplers import uni_pc
comfyanonymous's avatar
comfyanonymous committed
4
5
import torch
import contextlib
6
from comfy import model_management
comfyanonymous's avatar
comfyanonymous committed
7
8
from .ldm.models.diffusion.ddim import DDIMSampler
from .ldm.modules.diffusionmodules.util import make_ddim_timesteps
comfyanonymous's avatar
comfyanonymous committed
9
10
11
12
13
14
15

class CFGDenoiser(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.inner_model = model

    def forward(self, x, sigma, uncond, cond, cond_scale):
comfyanonymous's avatar
comfyanonymous committed
16
        if len(uncond[0]) == len(cond[0]) and x.shape[0] * x.shape[2] * x.shape[3] < (96 * 96): #TODO check memory instead
comfyanonymous's avatar
comfyanonymous committed
17
18
19
20
21
22
23
24
25
            x_in = torch.cat([x] * 2)
            sigma_in = torch.cat([sigma] * 2)
            cond_in = torch.cat([uncond, cond])
            uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2)
        else:
            cond = self.inner_model(x, sigma, cond=cond)
            uncond = self.inner_model(x, sigma, cond=uncond)
        return uncond + (cond - uncond) * cond_scale

comfyanonymous's avatar
comfyanonymous committed
26
27
28

#The main sampling function shared by all the samplers
#Returns predicted noise
29
def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, cond_concat=None, model_options={}):
comfyanonymous's avatar
comfyanonymous committed
30
        def get_area_and_mult(cond, x_in, cond_concat_in, timestep_in):
31
32
33
34
35
36
            area = (x_in.shape[2], x_in.shape[3], 0, 0)
            strength = 1.0
            if 'area' in cond[1]:
                area = cond[1]['area']
            if 'strength' in cond[1]:
                strength = cond[1]['strength']
37

38
39
40
41
            adm_cond = None
            if 'adm' in cond[1]:
                adm_cond = cond[1]['adm']

42
43
44
45
46
47
            input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
            mult = torch.ones_like(input_x) * strength

            rr = 8
            if area[2] != 0:
                for t in range(rr):
48
                    mult[:,:,t:1+t,:] *= ((1.0/rr) * (t + 1))
49
50
            if (area[0] + area[2]) < x_in.shape[2]:
                for t in range(rr):
51
                    mult[:,:,area[0] - 1 - t:area[0] - t,:] *= ((1.0/rr) * (t + 1))
52
53
            if area[3] != 0:
                for t in range(rr):
54
                    mult[:,:,:,t:1+t] *= ((1.0/rr) * (t + 1))
55
56
            if (area[1] + area[3]) < x_in.shape[3]:
                for t in range(rr):
57
                    mult[:,:,:,area[1] - 1 - t:area[1] - t] *= ((1.0/rr) * (t + 1))
comfyanonymous's avatar
comfyanonymous committed
58
59
60
61
62
63
64
65
            conditionning = {}
            conditionning['c_crossattn'] = cond[0]
            if cond_concat_in is not None and len(cond_concat_in) > 0:
                cropped = []
                for x in cond_concat_in:
                    cr = x[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
                    cropped.append(cr)
                conditionning['c_concat'] = torch.cat(cropped, dim=1)
comfyanonymous's avatar
comfyanonymous committed
66

67
68
69
            if adm_cond is not None:
                conditionning['c_adm'] = adm_cond

comfyanonymous's avatar
comfyanonymous committed
70
71
72
73
            control = None
            if 'control' in cond[1]:
                control = cond[1]['control']
            return (input_x, mult, conditionning, area, control)
comfyanonymous's avatar
comfyanonymous committed
74
75

        def cond_equal_size(c1, c2):
comfyanonymous's avatar
comfyanonymous committed
76
77
            if c1 is c2:
                return True
comfyanonymous's avatar
comfyanonymous committed
78
79
80
81
82
83
84
85
            if c1.keys() != c2.keys():
                return False
            if 'c_crossattn' in c1:
                if c1['c_crossattn'].shape != c2['c_crossattn'].shape:
                    return False
            if 'c_concat' in c1:
                if c1['c_concat'].shape != c2['c_concat'].shape:
                    return False
86
87
88
            if 'c_adm' in c1:
                if c1['c_adm'].shape != c2['c_adm'].shape:
                    return False
comfyanonymous's avatar
comfyanonymous committed
89
90
            return True

comfyanonymous's avatar
comfyanonymous committed
91
92
93
94
95
96
97
98
99
100
101
        def can_concat_cond(c1, c2):
            if c1[0].shape != c2[0].shape:
                return False
            if (c1[4] is None) != (c2[4] is None):
                return False
            if c1[4] is not None:
                if c1[4] is not c2[4]:
                    return False

            return cond_equal_size(c1[2], c2[2])

comfyanonymous's avatar
comfyanonymous committed
102
103
104
        def cond_cat(c_list):
            c_crossattn = []
            c_concat = []
105
            c_adm = []
comfyanonymous's avatar
comfyanonymous committed
106
107
108
109
110
            for x in c_list:
                if 'c_crossattn' in x:
                    c_crossattn.append(x['c_crossattn'])
                if 'c_concat' in x:
                    c_concat.append(x['c_concat'])
111
112
                if 'c_adm' in x:
                    c_adm.append(x['c_adm'])
comfyanonymous's avatar
comfyanonymous committed
113
114
115
116
117
            out = {}
            if len(c_crossattn) > 0:
                out['c_crossattn'] = [torch.cat(c_crossattn)]
            if len(c_concat) > 0:
                out['c_concat'] = [torch.cat(c_concat)]
118
119
            if len(c_adm) > 0:
                out['c_adm'] = torch.cat(c_adm)
comfyanonymous's avatar
comfyanonymous committed
120
121
            return out

122
        def calc_cond_uncond_batch(model_function, cond, uncond, x_in, timestep, max_total_area, cond_concat_in, model_options):
comfyanonymous's avatar
comfyanonymous committed
123
124
            out_cond = torch.zeros_like(x_in)
            out_count = torch.ones_like(x_in)/100000.0
125
126
127
128
129
130

            out_uncond = torch.zeros_like(x_in)
            out_uncond_count = torch.ones_like(x_in)/100000.0

            COND = 0
            UNCOND = 1
comfyanonymous's avatar
comfyanonymous committed
131

132
            to_run = []
comfyanonymous's avatar
comfyanonymous committed
133
            for x in cond:
comfyanonymous's avatar
comfyanonymous committed
134
                p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
135
                if p is None:
comfyanonymous's avatar
comfyanonymous committed
136
                    continue
137
138
139

                to_run += [(p, COND)]
            for x in uncond:
comfyanonymous's avatar
comfyanonymous committed
140
                p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
141
142
143
144
145
146
147
148
                if p is None:
                    continue

                to_run += [(p, UNCOND)]

            while len(to_run) > 0:
                first = to_run[0]
                first_shape = first[0][0].shape
149
                to_batch_temp = []
150
                for x in range(len(to_run)):
comfyanonymous's avatar
comfyanonymous committed
151
152
                    if can_concat_cond(to_run[x][0], first[0]):
                        to_batch_temp += [x]
153
154
155
156
157
158
159
160
161

                to_batch_temp.reverse()
                to_batch = to_batch_temp[:1]

                for i in range(1, len(to_batch_temp) + 1):
                    batch_amount = to_batch_temp[:len(to_batch_temp)//i]
                    if (len(batch_amount) * first_shape[0] * first_shape[2] * first_shape[3] < max_total_area):
                        to_batch = batch_amount
                        break
162
163
164
165
166
167

                input_x = []
                mult = []
                c = []
                cond_or_uncond = []
                area = []
comfyanonymous's avatar
comfyanonymous committed
168
                control = None
169
170
171
172
173
174
175
176
                for x in to_batch:
                    o = to_run.pop(x)
                    p = o[0]
                    input_x += [p[0]]
                    mult += [p[1]]
                    c += [p[2]]
                    area += [p[3]]
                    cond_or_uncond += [o[1]]
comfyanonymous's avatar
comfyanonymous committed
177
                    control = p[4]
178
179
180

                batch_chunks = len(cond_or_uncond)
                input_x = torch.cat(input_x)
comfyanonymous's avatar
comfyanonymous committed
181
                c = cond_cat(c)
comfyanonymous's avatar
comfyanonymous committed
182
                timestep_ = torch.cat([timestep] * batch_chunks)
183

comfyanonymous's avatar
comfyanonymous committed
184
                if control is not None:
185
                    c['control'] = control.get_control(input_x, timestep_, c['c_crossattn'], len(cond_or_uncond))
comfyanonymous's avatar
comfyanonymous committed
186

187
188
189
                if 'transformer_options' in model_options:
                    c['transformer_options'] = model_options['transformer_options']

comfyanonymous's avatar
comfyanonymous committed
190
                output = model_function(input_x, timestep_, cond=c).chunk(batch_chunks)
comfyanonymous's avatar
comfyanonymous committed
191
                del input_x
192

193
194
                model_management.throw_exception_if_processing_interrupted()

195
196
197
198
199
200
201
                for o in range(batch_chunks):
                    if cond_or_uncond[o] == COND:
                        out_cond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o]
                        out_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o]
                    else:
                        out_uncond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o]
                        out_uncond_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o]
comfyanonymous's avatar
comfyanonymous committed
202
203
204
205
                del mult

            out_cond /= out_count
            del out_count
206
207
208
209
            out_uncond /= out_uncond_count
            del out_uncond_count

            return out_cond, out_uncond
comfyanonymous's avatar
comfyanonymous committed
210
211


212
        max_total_area = model_management.maximum_batch_area()
213
        cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options)
214
215
216
217
        if "sampler_cfg_function" in model_options:
            return model_options["sampler_cfg_function"](cond, uncond, cond_scale)
        else:
            return uncond + (cond - uncond) * cond_scale
comfyanonymous's avatar
comfyanonymous committed
218

comfyanonymous's avatar
comfyanonymous committed
219
220
221
222
223
224
225
226
227
228
229
230
231
232

class CompVisVDenoiser(k_diffusion_external.DiscreteVDDPMDenoiser):
    def __init__(self, model, quantize=False, device='cpu'):
        super().__init__(model, model.alphas_cumprod, quantize=quantize)

    def get_v(self, x, t, cond, **kwargs):
        return self.inner_model.apply_model(x, t, cond, **kwargs)


class CFGNoisePredictor(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.inner_model = model
        self.alphas_cumprod = model.alphas_cumprod
233
234
    def apply_model(self, x, timestep, cond, uncond, cond_scale, cond_concat=None, model_options={}):
        out = sampling_function(self.inner_model.apply_model, x, timestep, uncond, cond, cond_scale, cond_concat, model_options=model_options)
comfyanonymous's avatar
comfyanonymous committed
235
236
237
238
        return out


class KSamplerX0Inpaint(torch.nn.Module):
239
240
241
    def __init__(self, model):
        super().__init__()
        self.inner_model = model
242
    def forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, cond_concat=None, model_options={}):
243
244
        if denoise_mask is not None:
            latent_mask = 1. - denoise_mask
245
            x = x * denoise_mask + (self.latent_image + self.noise * sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1))) * latent_mask
246
        out = self.inner_model(x, sigma, cond=cond, uncond=uncond, cond_scale=cond_scale, cond_concat=cond_concat, model_options=model_options)
247
248
249
250
251
252
        if denoise_mask is not None:
            out *= denoise_mask

        if denoise_mask is not None:
            out += self.latent_image * latent_mask
        return out
253

comfyanonymous's avatar
comfyanonymous committed
254
255
256
257
258
259
260
261
def simple_scheduler(model, steps):
    sigs = []
    ss = len(model.sigmas) / steps
    for x in range(steps):
        sigs += [float(model.sigmas[-(1 + int(x * ss))])]
    sigs += [0.0]
    return torch.FloatTensor(sigs)

comfyanonymous's avatar
comfyanonymous committed
262
263
264
265
def ddim_scheduler(model, steps):
    sigs = []
    ddim_timesteps = make_ddim_timesteps(ddim_discr_method="uniform", num_ddim_timesteps=steps, num_ddpm_timesteps=model.inner_model.inner_model.num_timesteps, verbose=False)
    for x in range(len(ddim_timesteps) - 1, -1, -1):
266
267
268
269
        ts = ddim_timesteps[x]
        if ts > 999:
            ts = 999
        sigs.append(model.t_to_sigma(torch.tensor(ts)))
comfyanonymous's avatar
comfyanonymous committed
270
271
272
    sigs += [0.0]
    return torch.FloatTensor(sigs)

comfyanonymous's avatar
comfyanonymous committed
273
274
275
276
277
278
279
280
281
def blank_inpaint_image_like(latent_image):
    blank_image = torch.ones_like(latent_image)
    # these are the values for "zero" in pixel space translated to latent space
    blank_image[:,0] *= 0.8223
    blank_image[:,1] *= -0.6876
    blank_image[:,2] *= 0.6364
    blank_image[:,3] *= 0.1380
    return blank_image

comfyanonymous's avatar
comfyanonymous committed
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def create_cond_with_same_area_if_none(conds, c):
    if 'area' not in c[1]:
        return

    c_area = c[1]['area']
    smallest = None
    for x in conds:
        if 'area' in x[1]:
            a = x[1]['area']
            if c_area[2] >= a[2] and c_area[3] >= a[3]:
                if a[0] + a[2] >= c_area[0] + c_area[2]:
                    if a[1] + a[3] >= c_area[1] + c_area[3]:
                        if smallest is None:
                            smallest = x
                        elif 'area' not in smallest[1]:
                            smallest = x
                        else:
                            if smallest[1]['area'][0] * smallest[1]['area'][1] > a[0] * a[1]:
                                smallest = x
        else:
            if smallest is None:
                smallest = x
    if smallest is None:
        return
    if 'area' in smallest[1]:
        if smallest[1]['area'] == c_area:
            return
    n = c[1].copy()
    conds += [[smallest[0], n]]
comfyanonymous's avatar
comfyanonymous committed
311

comfyanonymous's avatar
comfyanonymous committed
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347

def apply_control_net_to_equal_area(conds, uncond):
    cond_cnets = []
    cond_other = []
    uncond_cnets = []
    uncond_other = []
    for t in range(len(conds)):
        x = conds[t]
        if 'area' not in x[1]:
            if 'control' in x[1] and x[1]['control'] is not None:
                cond_cnets.append(x[1]['control'])
            else:
                cond_other.append((x, t))
    for t in range(len(uncond)):
        x = uncond[t]
        if 'area' not in x[1]:
            if 'control' in x[1] and x[1]['control'] is not None:
                uncond_cnets.append(x[1]['control'])
            else:
                uncond_other.append((x, t))

    if len(uncond_cnets) > 0:
        return

    for x in range(len(cond_cnets)):
        temp = uncond_other[x % len(uncond_other)]
        o = temp[0]
        if 'control' in o[1] and o[1]['control'] is not None:
            n = o[1].copy()
            n['control'] = cond_cnets[x]
            uncond += [[o[0], n]]
        else:
            n = o[1].copy()
            n['control'] = cond_cnets[x]
            uncond[temp[1]] = [o[0], n]

348
349
350
351
352
353
def encode_adm(noise_augmentor, conds, batch_size, device):
    for t in range(len(conds)):
        x = conds[t]
        if 'adm' in x[1]:
            adm_inputs = []
            weights = []
354
            noise_aug = []
355
356
357
358
            adm_in = x[1]["adm"]
            for adm_c in adm_in:
                adm_cond = adm_c[0].image_embeds
                weight = adm_c[1]
359
360
361
                noise_augment = adm_c[2]
                noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
                c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
362
363
                adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
                weights.append(weight)
364
                noise_aug.append(noise_augment)
365
366
                adm_inputs.append(adm_out)

367
368
369
370
371
372
373
            if len(noise_aug) > 1:
                adm_out = torch.stack(adm_inputs).sum(0)
                #TODO: add a way to control this
                noise_augment = 0.05
                noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
                c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
                adm_out = torch.cat((c_adm, noise_level_emb), 1)
374
375
376
377
378
379
380
        else:
            adm_out = torch.zeros((1, noise_augmentor.time_embed.dim * 2), device=device)
        x[1] = x[1].copy()
        x[1]["adm"] = torch.cat([adm_out] * batch_size)

    return conds

comfyanonymous's avatar
comfyanonymous committed
381
class KSampler:
comfyanonymous's avatar
comfyanonymous committed
382
    SCHEDULERS = ["karras", "normal", "simple", "ddim_uniform"]
383
384
385
    SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
                "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde",
                "dpmpp_2m", "ddim", "uni_pc", "uni_pc_bh2"]
comfyanonymous's avatar
comfyanonymous committed
386

387
    def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
comfyanonymous's avatar
comfyanonymous committed
388
        self.model = model
comfyanonymous's avatar
comfyanonymous committed
389
        self.model_denoise = CFGNoisePredictor(self.model)
comfyanonymous's avatar
comfyanonymous committed
390
        if self.model.parameterization == "v":
comfyanonymous's avatar
comfyanonymous committed
391
            self.model_wrap = CompVisVDenoiser(self.model_denoise, quantize=True)
comfyanonymous's avatar
comfyanonymous committed
392
        else:
comfyanonymous's avatar
comfyanonymous committed
393
394
395
            self.model_wrap = k_diffusion_external.CompVisDenoiser(self.model_denoise, quantize=True)
        self.model_wrap.parameterization = self.model.parameterization
        self.model_k = KSamplerX0Inpaint(self.model_wrap)
comfyanonymous's avatar
comfyanonymous committed
396
397
398
399
400
401
402
        self.device = device
        if scheduler not in self.SCHEDULERS:
            scheduler = self.SCHEDULERS[0]
        if sampler not in self.SAMPLERS:
            sampler = self.SAMPLERS[0]
        self.scheduler = scheduler
        self.sampler = sampler
403
404
        self.sigma_min=float(self.model_wrap.sigma_min)
        self.sigma_max=float(self.model_wrap.sigma_max)
comfyanonymous's avatar
comfyanonymous committed
405
        self.set_steps(steps, denoise)
406
        self.denoise = denoise
407
        self.model_options = model_options
comfyanonymous's avatar
comfyanonymous committed
408
409
410
411
412

    def _calculate_sigmas(self, steps):
        sigmas = None

        discard_penultimate_sigma = False
413
        if self.sampler in ['dpm_2', 'dpm_2_ancestral']:
comfyanonymous's avatar
comfyanonymous committed
414
415
416
417
            steps += 1
            discard_penultimate_sigma = True

        if self.scheduler == "karras":
418
            sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max, device=self.device)
comfyanonymous's avatar
comfyanonymous committed
419
420
421
422
        elif self.scheduler == "normal":
            sigmas = self.model_wrap.get_sigmas(steps).to(self.device)
        elif self.scheduler == "simple":
            sigmas = simple_scheduler(self.model_wrap, steps).to(self.device)
comfyanonymous's avatar
comfyanonymous committed
423
424
        elif self.scheduler == "ddim_uniform":
            sigmas = ddim_scheduler(self.model_wrap, steps).to(self.device)
comfyanonymous's avatar
comfyanonymous committed
425
426
427
428
429
430
431
432
433
        else:
            print("error invalid scheduler", self.scheduler)

        if discard_penultimate_sigma:
            sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
        return sigmas

    def set_steps(self, steps, denoise=None):
        self.steps = steps
434
        if denoise is None or denoise > 0.9999:
comfyanonymous's avatar
comfyanonymous committed
435
436
437
438
439
440
441
            self.sigmas = self._calculate_sigmas(steps)
        else:
            new_steps = int(steps/denoise)
            sigmas = self._calculate_sigmas(new_steps)
            self.sigmas = sigmas[-(steps + 1):]


442
    def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None):
comfyanonymous's avatar
comfyanonymous committed
443
444
445
        sigmas = self.sigmas
        sigma_min = self.sigma_min

comfyanonymous's avatar
comfyanonymous committed
446
        if last_step is not None and last_step < (len(sigmas) - 1):
comfyanonymous's avatar
comfyanonymous committed
447
448
            sigma_min = sigmas[last_step]
            sigmas = sigmas[:last_step + 1]
comfyanonymous's avatar
comfyanonymous committed
449
450
451
            if force_full_denoise:
                sigmas[-1] = 0

comfyanonymous's avatar
comfyanonymous committed
452
        if start_step is not None:
comfyanonymous's avatar
comfyanonymous committed
453
454
455
456
457
458
459
            if start_step < (len(sigmas) - 1):
                sigmas = sigmas[start_step:]
            else:
                if latent_image is not None:
                    return latent_image
                else:
                    return torch.zeros_like(noise)
comfyanonymous's avatar
comfyanonymous committed
460

comfyanonymous's avatar
comfyanonymous committed
461
462
463
464
465
466
467
468
        positive = positive[:]
        negative = negative[:]
        #make sure each cond area has an opposite one with the same area
        for c in positive:
            create_cond_with_same_area_if_none(negative, c)
        for c in negative:
            create_cond_with_same_area_if_none(positive, c)

comfyanonymous's avatar
comfyanonymous committed
469
470
        apply_control_net_to_equal_area(positive, negative)

comfyanonymous's avatar
comfyanonymous committed
471
472
473
474
475
        if self.model.model.diffusion_model.dtype == torch.float16:
            precision_scope = torch.autocast
        else:
            precision_scope = contextlib.nullcontext

476
477
478
479
        if hasattr(self.model, 'noise_augmentor'): #unclip
            positive = encode_adm(self.model.noise_augmentor, positive, noise.shape[0], self.device)
            negative = encode_adm(self.model.noise_augmentor, negative, noise.shape[0], self.device)

480
        extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": self.model_options}
comfyanonymous's avatar
comfyanonymous committed
481

comfyanonymous's avatar
comfyanonymous committed
482
        cond_concat = None
483
        if hasattr(self.model, 'concat_keys'): #inpaint
comfyanonymous's avatar
comfyanonymous committed
484
485
486
487
488
489
            cond_concat = []
            for ck in self.model.concat_keys:
                if denoise_mask is not None:
                    if ck == "mask":
                        cond_concat.append(denoise_mask[:,:1])
                    elif ck == "masked_image":
490
                        cond_concat.append(latent_image) #NOTE: the latent_image should be masked by the mask in pixel space
comfyanonymous's avatar
comfyanonymous committed
491
492
493
494
495
496
497
                else:
                    if ck == "mask":
                        cond_concat.append(torch.ones_like(noise)[:,:1])
                    elif ck == "masked_image":
                        cond_concat.append(blank_inpaint_image_like(noise))
            extra_args["cond_concat"] = cond_concat

498
499
500
501
502
        if sigmas[0] != self.sigmas[0] or (self.denoise is not None and self.denoise < 1.0):
            max_denoise = False
        else:
            max_denoise = True

503
        with precision_scope(model_management.get_autocast_device(self.device)):
504
            if self.sampler == "uni_pc":
505
                samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask)
comfyanonymous's avatar
comfyanonymous committed
506
            elif self.sampler == "uni_pc_bh2":
507
                samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, variant='bh2')
comfyanonymous's avatar
comfyanonymous committed
508
509
510
511
512
513
514
            elif self.sampler == "ddim":
                timesteps = []
                for s in range(sigmas.shape[0]):
                    timesteps.insert(0, self.model_wrap.sigma_to_t(sigmas[s]))
                noise_mask = None
                if denoise_mask is not None:
                    noise_mask = 1.0 - denoise_mask
comfyanonymous's avatar
comfyanonymous committed
515
                sampler = DDIMSampler(self.model, device=self.device)
comfyanonymous's avatar
comfyanonymous committed
516
517
518
519
520
521
522
523
524
525
526
527
528
                sampler.make_schedule_timesteps(ddim_timesteps=timesteps, verbose=False)
                z_enc = sampler.stochastic_encode(latent_image, torch.tensor([len(timesteps) - 1] * noise.shape[0]).to(self.device), noise=noise, max_denoise=max_denoise)
                samples, _ = sampler.sample_custom(ddim_timesteps=timesteps,
                                                     conditioning=positive,
                                                     batch_size=noise.shape[0],
                                                     shape=noise.shape[1:],
                                                     verbose=False,
                                                     unconditional_guidance_scale=cfg,
                                                     unconditional_conditioning=negative,
                                                     eta=0.0,
                                                     x_T=z_enc,
                                                     x0=latent_image,
                                                     denoise_function=sampling_function,
529
                                                     extra_args=extra_args,
comfyanonymous's avatar
comfyanonymous committed
530
531
532
533
                                                     mask=noise_mask,
                                                     to_zero=sigmas[-1]==0,
                                                     end_step=sigmas.shape[0] - 1)

comfyanonymous's avatar
comfyanonymous committed
534
            else:
535
536
537
538
539
540
                extra_args["denoise_mask"] = denoise_mask
                self.model_k.latent_image = latent_image
                self.model_k.noise = noise

                noise = noise * sigmas[0]

541
542
                if latent_image is not None:
                    noise += latent_image
543
                if self.sampler == "dpm_fast":
544
                    samples = k_diffusion_sampling.sample_dpm_fast(self.model_k, noise, sigma_min, sigmas[0], self.steps, extra_args=extra_args)
545
                elif self.sampler == "dpm_adaptive":
546
                    samples = k_diffusion_sampling.sample_dpm_adaptive(self.model_k, noise, sigma_min, sigmas[0], extra_args=extra_args)
547
                else:
548
                    samples = getattr(k_diffusion_sampling, "sample_{}".format(self.sampler))(self.model_k, noise, sigmas, extra_args=extra_args)
549

comfyanonymous's avatar
comfyanonymous committed
550
        return samples.to(torch.float32)