samplers.py 27.1 KB
Newer Older
1
2
from .k_diffusion import sampling as k_diffusion_sampling
from .k_diffusion import external as k_diffusion_external
3
from .extra_samplers import uni_pc
comfyanonymous's avatar
comfyanonymous committed
4
5
import torch
import contextlib
6
from comfy import model_management
comfyanonymous's avatar
comfyanonymous committed
7
8
from .ldm.models.diffusion.ddim import DDIMSampler
from .ldm.modules.diffusionmodules.util import make_ddim_timesteps
Jacob Segal's avatar
Jacob Segal committed
9
from torchvision.ops import masks_to_boxes
comfyanonymous's avatar
comfyanonymous committed
10

comfyanonymous's avatar
comfyanonymous committed
11
12
#The main sampling function shared by all the samplers
#Returns predicted noise
13
def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, cond_concat=None, model_options={}):
comfyanonymous's avatar
comfyanonymous committed
14
        def get_area_and_mult(cond, x_in, cond_concat_in, timestep_in):
15
16
17
18
19
20
            area = (x_in.shape[2], x_in.shape[3], 0, 0)
            strength = 1.0
            if 'area' in cond[1]:
                area = cond[1]['area']
            if 'strength' in cond[1]:
                strength = cond[1]['strength']
21

22
            adm_cond = None
23
24
            if 'adm_encoded' in cond[1]:
                adm_cond = cond[1]['adm_encoded']
25

26
            input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
Jacob Segal's avatar
Jacob Segal committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
            if 'mask' in cond[1]:
                # Scale the mask to the size of the input
                # The mask should have been resized as we began the sampling process
                mask = cond[1]['mask']
                assert(mask.shape[1] == x_in.shape[2])
                assert(mask.shape[2] == x_in.shape[3])
                mask = mask[:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
                if mask.shape[0] != input_x.shape[0]:
                    mask = mask.repeat(input_x.shape[0], 1, 1)
            else:
                mask = torch.ones_like(input_x)
            mult = mask * strength

            if 'mask' not in cond[1]:
                rr = 8
                if area[2] != 0:
                    for t in range(rr):
                        mult[:,:,t:1+t,:] *= ((1.0/rr) * (t + 1))
                if (area[0] + area[2]) < x_in.shape[2]:
                    for t in range(rr):
                        mult[:,:,area[0] - 1 - t:area[0] - t,:] *= ((1.0/rr) * (t + 1))
                if area[3] != 0:
                    for t in range(rr):
                        mult[:,:,:,t:1+t] *= ((1.0/rr) * (t + 1))
                if (area[1] + area[3]) < x_in.shape[3]:
                    for t in range(rr):
                        mult[:,:,:,area[1] - 1 - t:area[1] - t] *= ((1.0/rr) * (t + 1))

comfyanonymous's avatar
comfyanonymous committed
55
56
57
58
59
60
61
62
            conditionning = {}
            conditionning['c_crossattn'] = cond[0]
            if cond_concat_in is not None and len(cond_concat_in) > 0:
                cropped = []
                for x in cond_concat_in:
                    cr = x[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
                    cropped.append(cr)
                conditionning['c_concat'] = torch.cat(cropped, dim=1)
comfyanonymous's avatar
comfyanonymous committed
63

64
65
66
            if adm_cond is not None:
                conditionning['c_adm'] = adm_cond

comfyanonymous's avatar
comfyanonymous committed
67
68
69
            control = None
            if 'control' in cond[1]:
                control = cond[1]['control']
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

            patches = None
            if 'gligen' in cond[1]:
                gligen = cond[1]['gligen']
                patches = {}
                gligen_type = gligen[0]
                gligen_model = gligen[1]
                if gligen_type == "position":
                    gligen_patch = gligen_model.set_position(input_x.shape, gligen[2], input_x.device)
                else:
                    gligen_patch = gligen_model.set_empty(input_x.shape, input_x.device)

                patches['middle_patch'] = [gligen_patch]

            return (input_x, mult, conditionning, area, control, patches)
comfyanonymous's avatar
comfyanonymous committed
85
86

        def cond_equal_size(c1, c2):
comfyanonymous's avatar
comfyanonymous committed
87
88
            if c1 is c2:
                return True
comfyanonymous's avatar
comfyanonymous committed
89
90
91
92
93
94
95
96
            if c1.keys() != c2.keys():
                return False
            if 'c_crossattn' in c1:
                if c1['c_crossattn'].shape != c2['c_crossattn'].shape:
                    return False
            if 'c_concat' in c1:
                if c1['c_concat'].shape != c2['c_concat'].shape:
                    return False
97
98
99
            if 'c_adm' in c1:
                if c1['c_adm'].shape != c2['c_adm'].shape:
                    return False
comfyanonymous's avatar
comfyanonymous committed
100
101
            return True

comfyanonymous's avatar
comfyanonymous committed
102
103
104
        def can_concat_cond(c1, c2):
            if c1[0].shape != c2[0].shape:
                return False
105
106

            #control
comfyanonymous's avatar
comfyanonymous committed
107
108
109
110
111
112
            if (c1[4] is None) != (c2[4] is None):
                return False
            if c1[4] is not None:
                if c1[4] is not c2[4]:
                    return False

113
114
115
116
117
118
119
            #patches
            if (c1[5] is None) != (c2[5] is None):
                return False
            if (c1[5] is not None):
                if c1[5] is not c2[5]:
                    return False

comfyanonymous's avatar
comfyanonymous committed
120
121
            return cond_equal_size(c1[2], c2[2])

comfyanonymous's avatar
comfyanonymous committed
122
123
124
        def cond_cat(c_list):
            c_crossattn = []
            c_concat = []
125
            c_adm = []
comfyanonymous's avatar
comfyanonymous committed
126
127
128
129
130
            for x in c_list:
                if 'c_crossattn' in x:
                    c_crossattn.append(x['c_crossattn'])
                if 'c_concat' in x:
                    c_concat.append(x['c_concat'])
131
132
                if 'c_adm' in x:
                    c_adm.append(x['c_adm'])
comfyanonymous's avatar
comfyanonymous committed
133
134
135
136
137
            out = {}
            if len(c_crossattn) > 0:
                out['c_crossattn'] = [torch.cat(c_crossattn)]
            if len(c_concat) > 0:
                out['c_concat'] = [torch.cat(c_concat)]
138
139
            if len(c_adm) > 0:
                out['c_adm'] = torch.cat(c_adm)
comfyanonymous's avatar
comfyanonymous committed
140
141
            return out

142
        def calc_cond_uncond_batch(model_function, cond, uncond, x_in, timestep, max_total_area, cond_concat_in, model_options):
comfyanonymous's avatar
comfyanonymous committed
143
144
            out_cond = torch.zeros_like(x_in)
            out_count = torch.ones_like(x_in)/100000.0
145
146
147
148
149
150

            out_uncond = torch.zeros_like(x_in)
            out_uncond_count = torch.ones_like(x_in)/100000.0

            COND = 0
            UNCOND = 1
comfyanonymous's avatar
comfyanonymous committed
151

152
            to_run = []
comfyanonymous's avatar
comfyanonymous committed
153
            for x in cond:
comfyanonymous's avatar
comfyanonymous committed
154
                p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
155
                if p is None:
comfyanonymous's avatar
comfyanonymous committed
156
                    continue
157
158
159

                to_run += [(p, COND)]
            for x in uncond:
comfyanonymous's avatar
comfyanonymous committed
160
                p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
161
162
163
164
165
166
167
168
                if p is None:
                    continue

                to_run += [(p, UNCOND)]

            while len(to_run) > 0:
                first = to_run[0]
                first_shape = first[0][0].shape
169
                to_batch_temp = []
170
                for x in range(len(to_run)):
comfyanonymous's avatar
comfyanonymous committed
171
172
                    if can_concat_cond(to_run[x][0], first[0]):
                        to_batch_temp += [x]
173
174
175
176
177
178
179
180
181

                to_batch_temp.reverse()
                to_batch = to_batch_temp[:1]

                for i in range(1, len(to_batch_temp) + 1):
                    batch_amount = to_batch_temp[:len(to_batch_temp)//i]
                    if (len(batch_amount) * first_shape[0] * first_shape[2] * first_shape[3] < max_total_area):
                        to_batch = batch_amount
                        break
182
183
184
185
186
187

                input_x = []
                mult = []
                c = []
                cond_or_uncond = []
                area = []
comfyanonymous's avatar
comfyanonymous committed
188
                control = None
189
                patches = None
190
191
192
193
194
195
196
197
                for x in to_batch:
                    o = to_run.pop(x)
                    p = o[0]
                    input_x += [p[0]]
                    mult += [p[1]]
                    c += [p[2]]
                    area += [p[3]]
                    cond_or_uncond += [o[1]]
comfyanonymous's avatar
comfyanonymous committed
198
                    control = p[4]
199
                    patches = p[5]
200
201
202

                batch_chunks = len(cond_or_uncond)
                input_x = torch.cat(input_x)
comfyanonymous's avatar
comfyanonymous committed
203
                c = cond_cat(c)
comfyanonymous's avatar
comfyanonymous committed
204
                timestep_ = torch.cat([timestep] * batch_chunks)
205

comfyanonymous's avatar
comfyanonymous committed
206
                if control is not None:
207
                    c['control'] = control.get_control(input_x, timestep_, c['c_crossattn'], len(cond_or_uncond))
comfyanonymous's avatar
comfyanonymous committed
208

209
                transformer_options = {}
210
                if 'transformer_options' in model_options:
211
212
213
                    transformer_options = model_options['transformer_options'].copy()

                if patches is not None:
214
215
216
217
218
219
220
221
222
                    if "patches" in transformer_options:
                        cur_patches = transformer_options["patches"].copy()
                        for p in patches:
                            if p in cur_patches:
                                cur_patches[p] = cur_patches[p] + patches[p]
                            else:
                                cur_patches[p] = patches[p]
                    else:
                        transformer_options["patches"] = patches
223
224

                c['transformer_options'] = transformer_options
225

comfyanonymous's avatar
comfyanonymous committed
226
                output = model_function(input_x, timestep_, cond=c).chunk(batch_chunks)
comfyanonymous's avatar
comfyanonymous committed
227
                del input_x
228

229
230
                model_management.throw_exception_if_processing_interrupted()

231
232
233
234
235
236
237
                for o in range(batch_chunks):
                    if cond_or_uncond[o] == COND:
                        out_cond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o]
                        out_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o]
                    else:
                        out_uncond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o]
                        out_uncond_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o]
comfyanonymous's avatar
comfyanonymous committed
238
239
240
241
                del mult

            out_cond /= out_count
            del out_count
242
243
244
245
            out_uncond /= out_uncond_count
            del out_uncond_count

            return out_cond, out_uncond
comfyanonymous's avatar
comfyanonymous committed
246
247


248
        max_total_area = model_management.maximum_batch_area()
249
        cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options)
250
251
252
253
        if "sampler_cfg_function" in model_options:
            return model_options["sampler_cfg_function"](cond, uncond, cond_scale)
        else:
            return uncond + (cond - uncond) * cond_scale
comfyanonymous's avatar
comfyanonymous committed
254

comfyanonymous's avatar
comfyanonymous committed
255
256
257
258
259
260
261
262
263
264
265
266
267
268

class CompVisVDenoiser(k_diffusion_external.DiscreteVDDPMDenoiser):
    def __init__(self, model, quantize=False, device='cpu'):
        super().__init__(model, model.alphas_cumprod, quantize=quantize)

    def get_v(self, x, t, cond, **kwargs):
        return self.inner_model.apply_model(x, t, cond, **kwargs)


class CFGNoisePredictor(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.inner_model = model
        self.alphas_cumprod = model.alphas_cumprod
269
270
    def apply_model(self, x, timestep, cond, uncond, cond_scale, cond_concat=None, model_options={}):
        out = sampling_function(self.inner_model.apply_model, x, timestep, uncond, cond, cond_scale, cond_concat, model_options=model_options)
comfyanonymous's avatar
comfyanonymous committed
271
272
273
274
        return out


class KSamplerX0Inpaint(torch.nn.Module):
275
276
277
    def __init__(self, model):
        super().__init__()
        self.inner_model = model
278
    def forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, cond_concat=None, model_options={}):
279
280
        if denoise_mask is not None:
            latent_mask = 1. - denoise_mask
281
            x = x * denoise_mask + (self.latent_image + self.noise * sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1))) * latent_mask
282
        out = self.inner_model(x, sigma, cond=cond, uncond=uncond, cond_scale=cond_scale, cond_concat=cond_concat, model_options=model_options)
283
284
285
286
287
288
        if denoise_mask is not None:
            out *= denoise_mask

        if denoise_mask is not None:
            out += self.latent_image * latent_mask
        return out
289

comfyanonymous's avatar
comfyanonymous committed
290
291
292
293
294
295
296
297
def simple_scheduler(model, steps):
    sigs = []
    ss = len(model.sigmas) / steps
    for x in range(steps):
        sigs += [float(model.sigmas[-(1 + int(x * ss))])]
    sigs += [0.0]
    return torch.FloatTensor(sigs)

comfyanonymous's avatar
comfyanonymous committed
298
299
300
301
def ddim_scheduler(model, steps):
    sigs = []
    ddim_timesteps = make_ddim_timesteps(ddim_discr_method="uniform", num_ddim_timesteps=steps, num_ddpm_timesteps=model.inner_model.inner_model.num_timesteps, verbose=False)
    for x in range(len(ddim_timesteps) - 1, -1, -1):
302
303
304
305
        ts = ddim_timesteps[x]
        if ts > 999:
            ts = 999
        sigs.append(model.t_to_sigma(torch.tensor(ts)))
comfyanonymous's avatar
comfyanonymous committed
306
307
308
    sigs += [0.0]
    return torch.FloatTensor(sigs)

comfyanonymous's avatar
comfyanonymous committed
309
310
311
312
313
314
315
316
317
def blank_inpaint_image_like(latent_image):
    blank_image = torch.ones_like(latent_image)
    # these are the values for "zero" in pixel space translated to latent space
    blank_image[:,0] *= 0.8223
    blank_image[:,1] *= -0.6876
    blank_image[:,2] *= 0.6364
    blank_image[:,3] *= 0.1380
    return blank_image

Jacob Segal's avatar
Jacob Segal committed
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
def resolve_cond_masks(conditions, h, w, device):
    # We need to decide on an area outside the sampling loop in order to properly generate opposite areas of equal sizes.
    # While we're doing this, we can also resolve the mask device and scaling for performance reasons
    for i in range(len(conditions)):
        c = conditions[i]
        if 'mask' in c[1]:
            mask = c[1]['mask']
            mask = mask.to(device=device)
            modified = c[1].copy()
            if len(mask.shape) == 2:
                mask = mask.unsqueeze(0)
            if mask.shape[2] != h or mask.shape[3] != w:
                mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=(h, w), mode='bilinear', align_corners=False).squeeze(1)

            if 'area' not in modified:
                bounds = torch.max(torch.abs(mask),dim=0).values.unsqueeze(0)
                if torch.max(bounds) == 0:
                    # Handle the edge-case of an all black mask (where masks_to_boxes would error)
                    area = (0, 0, 0, 0)
                else:
                    box = masks_to_boxes(bounds)[0].type(torch.int)
                    H, W, Y, X = (box[3] - box[1] + 1, box[2] - box[0] + 1, box[1], box[0])
                    # Make sure the height and width are divisible by 8
                    if X % 8 != 0:
                        newx = X // 8 * 8
                        W = W + (X - newx)
                        X = newx
                    if Y % 8 != 0:
                        newy = Y // 8 * 8
                        H = H + (Y - newy)
                        Y = newy
                    if H % 8 != 0:
                        H = H + (8 - (H % 8))
                    if W % 8 != 0:
                        W = W + (8 - (W % 8))
                    area = (int(H), int(W), int(Y), (X))
                modified['area'] = area

            modified['mask'] = mask
            conditions[i] = [c[0], modified]

comfyanonymous's avatar
comfyanonymous committed
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
def create_cond_with_same_area_if_none(conds, c):
    if 'area' not in c[1]:
        return

    c_area = c[1]['area']
    smallest = None
    for x in conds:
        if 'area' in x[1]:
            a = x[1]['area']
            if c_area[2] >= a[2] and c_area[3] >= a[3]:
                if a[0] + a[2] >= c_area[0] + c_area[2]:
                    if a[1] + a[3] >= c_area[1] + c_area[3]:
                        if smallest is None:
                            smallest = x
                        elif 'area' not in smallest[1]:
                            smallest = x
                        else:
                            if smallest[1]['area'][0] * smallest[1]['area'][1] > a[0] * a[1]:
                                smallest = x
        else:
            if smallest is None:
                smallest = x
    if smallest is None:
        return
    if 'area' in smallest[1]:
        if smallest[1]['area'] == c_area:
            return
    n = c[1].copy()
    conds += [[smallest[0], n]]
comfyanonymous's avatar
comfyanonymous committed
388

389
def apply_empty_x_to_equal_area(conds, uncond, name, uncond_fill_func):
comfyanonymous's avatar
comfyanonymous committed
390
391
392
393
394
395
396
    cond_cnets = []
    cond_other = []
    uncond_cnets = []
    uncond_other = []
    for t in range(len(conds)):
        x = conds[t]
        if 'area' not in x[1]:
397
398
            if name in x[1] and x[1][name] is not None:
                cond_cnets.append(x[1][name])
comfyanonymous's avatar
comfyanonymous committed
399
400
401
402
403
            else:
                cond_other.append((x, t))
    for t in range(len(uncond)):
        x = uncond[t]
        if 'area' not in x[1]:
404
405
            if name in x[1] and x[1][name] is not None:
                uncond_cnets.append(x[1][name])
comfyanonymous's avatar
comfyanonymous committed
406
407
408
409
410
411
412
413
414
            else:
                uncond_other.append((x, t))

    if len(uncond_cnets) > 0:
        return

    for x in range(len(cond_cnets)):
        temp = uncond_other[x % len(uncond_other)]
        o = temp[0]
415
        if name in o[1] and o[1][name] is not None:
comfyanonymous's avatar
comfyanonymous committed
416
            n = o[1].copy()
417
            n[name] = uncond_fill_func(cond_cnets, x)
comfyanonymous's avatar
comfyanonymous committed
418
419
420
            uncond += [[o[0], n]]
        else:
            n = o[1].copy()
421
            n[name] = uncond_fill_func(cond_cnets, x)
comfyanonymous's avatar
comfyanonymous committed
422
423
            uncond[temp[1]] = [o[0], n]

424

425
426
427
428
429
430
def encode_adm(noise_augmentor, conds, batch_size, device):
    for t in range(len(conds)):
        x = conds[t]
        if 'adm' in x[1]:
            adm_inputs = []
            weights = []
431
            noise_aug = []
432
433
434
435
            adm_in = x[1]["adm"]
            for adm_c in adm_in:
                adm_cond = adm_c[0].image_embeds
                weight = adm_c[1]
436
437
438
                noise_augment = adm_c[2]
                noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
                c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
439
440
                adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
                weights.append(weight)
441
                noise_aug.append(noise_augment)
442
443
                adm_inputs.append(adm_out)

444
445
446
447
448
449
450
            if len(noise_aug) > 1:
                adm_out = torch.stack(adm_inputs).sum(0)
                #TODO: add a way to control this
                noise_augment = 0.05
                noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
                c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
                adm_out = torch.cat((c_adm, noise_level_emb), 1)
451
452
453
        else:
            adm_out = torch.zeros((1, noise_augmentor.time_embed.dim * 2), device=device)
        x[1] = x[1].copy()
454
        x[1]["adm_encoded"] = torch.cat([adm_out] * batch_size)
455
456
457

    return conds

458

comfyanonymous's avatar
comfyanonymous committed
459
class KSampler:
comfyanonymous's avatar
comfyanonymous committed
460
    SCHEDULERS = ["karras", "normal", "simple", "ddim_uniform"]
461
462
463
    SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
                "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde",
                "dpmpp_2m", "ddim", "uni_pc", "uni_pc_bh2"]
comfyanonymous's avatar
comfyanonymous committed
464

465
    def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
comfyanonymous's avatar
comfyanonymous committed
466
        self.model = model
comfyanonymous's avatar
comfyanonymous committed
467
        self.model_denoise = CFGNoisePredictor(self.model)
comfyanonymous's avatar
comfyanonymous committed
468
        if self.model.parameterization == "v":
comfyanonymous's avatar
comfyanonymous committed
469
            self.model_wrap = CompVisVDenoiser(self.model_denoise, quantize=True)
comfyanonymous's avatar
comfyanonymous committed
470
        else:
comfyanonymous's avatar
comfyanonymous committed
471
472
473
            self.model_wrap = k_diffusion_external.CompVisDenoiser(self.model_denoise, quantize=True)
        self.model_wrap.parameterization = self.model.parameterization
        self.model_k = KSamplerX0Inpaint(self.model_wrap)
comfyanonymous's avatar
comfyanonymous committed
474
475
476
477
478
479
480
        self.device = device
        if scheduler not in self.SCHEDULERS:
            scheduler = self.SCHEDULERS[0]
        if sampler not in self.SAMPLERS:
            sampler = self.SAMPLERS[0]
        self.scheduler = scheduler
        self.sampler = sampler
481
482
        self.sigma_min=float(self.model_wrap.sigma_min)
        self.sigma_max=float(self.model_wrap.sigma_max)
comfyanonymous's avatar
comfyanonymous committed
483
        self.set_steps(steps, denoise)
484
        self.denoise = denoise
485
        self.model_options = model_options
comfyanonymous's avatar
comfyanonymous committed
486

comfyanonymous's avatar
comfyanonymous committed
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
    def calculate_sigmas(self, steps):
        sigmas = None

        discard_penultimate_sigma = False
        if self.sampler in ['dpm_2', 'dpm_2_ancestral']:
            steps += 1
            discard_penultimate_sigma = True

        if self.scheduler == "karras":
            sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
        elif self.scheduler == "normal":
            sigmas = self.model_wrap.get_sigmas(steps)
        elif self.scheduler == "simple":
            sigmas = simple_scheduler(self.model_wrap, steps)
        elif self.scheduler == "ddim_uniform":
            sigmas = ddim_scheduler(self.model_wrap, steps)
        else:
            print("error invalid scheduler", self.scheduler)

        if discard_penultimate_sigma:
            sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
        return sigmas

comfyanonymous's avatar
comfyanonymous committed
510
511
    def set_steps(self, steps, denoise=None):
        self.steps = steps
512
        if denoise is None or denoise > 0.9999:
comfyanonymous's avatar
comfyanonymous committed
513
            self.sigmas = self.calculate_sigmas(steps).to(self.device)
comfyanonymous's avatar
comfyanonymous committed
514
515
        else:
            new_steps = int(steps/denoise)
comfyanonymous's avatar
comfyanonymous committed
516
            sigmas = self.calculate_sigmas(new_steps).to(self.device)
comfyanonymous's avatar
comfyanonymous committed
517
518
            self.sigmas = sigmas[-(steps + 1):]

519
    def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None, sigmas=None, callback=None):
520
521
        if sigmas is None:
            sigmas = self.sigmas
comfyanonymous's avatar
comfyanonymous committed
522
523
        sigma_min = self.sigma_min

comfyanonymous's avatar
comfyanonymous committed
524
        if last_step is not None and last_step < (len(sigmas) - 1):
comfyanonymous's avatar
comfyanonymous committed
525
526
            sigma_min = sigmas[last_step]
            sigmas = sigmas[:last_step + 1]
comfyanonymous's avatar
comfyanonymous committed
527
528
529
            if force_full_denoise:
                sigmas[-1] = 0

comfyanonymous's avatar
comfyanonymous committed
530
        if start_step is not None:
comfyanonymous's avatar
comfyanonymous committed
531
532
533
534
535
536
537
            if start_step < (len(sigmas) - 1):
                sigmas = sigmas[start_step:]
            else:
                if latent_image is not None:
                    return latent_image
                else:
                    return torch.zeros_like(noise)
comfyanonymous's avatar
comfyanonymous committed
538

comfyanonymous's avatar
comfyanonymous committed
539
540
        positive = positive[:]
        negative = negative[:]
Jacob Segal's avatar
Jacob Segal committed
541
542
543
544

        resolve_cond_masks(positive, noise.shape[2], noise.shape[3], self.device)
        resolve_cond_masks(negative, noise.shape[2], noise.shape[3], self.device)

comfyanonymous's avatar
comfyanonymous committed
545
546
547
548
549
550
        #make sure each cond area has an opposite one with the same area
        for c in positive:
            create_cond_with_same_area_if_none(negative, c)
        for c in negative:
            create_cond_with_same_area_if_none(positive, c)

551
552
        apply_empty_x_to_equal_area(positive, negative, 'control', lambda cond_cnets, x: cond_cnets[x])
        apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])
comfyanonymous's avatar
comfyanonymous committed
553

comfyanonymous's avatar
comfyanonymous committed
554
555
556
557
558
        if self.model.model.diffusion_model.dtype == torch.float16:
            precision_scope = torch.autocast
        else:
            precision_scope = contextlib.nullcontext

559
560
561
562
        if hasattr(self.model, 'noise_augmentor'): #unclip
            positive = encode_adm(self.model.noise_augmentor, positive, noise.shape[0], self.device)
            negative = encode_adm(self.model.noise_augmentor, negative, noise.shape[0], self.device)

563
        extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": self.model_options}
comfyanonymous's avatar
comfyanonymous committed
564

comfyanonymous's avatar
comfyanonymous committed
565
        cond_concat = None
566
        if hasattr(self.model, 'concat_keys'): #inpaint
comfyanonymous's avatar
comfyanonymous committed
567
568
569
570
571
572
            cond_concat = []
            for ck in self.model.concat_keys:
                if denoise_mask is not None:
                    if ck == "mask":
                        cond_concat.append(denoise_mask[:,:1])
                    elif ck == "masked_image":
573
                        cond_concat.append(latent_image) #NOTE: the latent_image should be masked by the mask in pixel space
comfyanonymous's avatar
comfyanonymous committed
574
575
576
577
578
579
580
                else:
                    if ck == "mask":
                        cond_concat.append(torch.ones_like(noise)[:,:1])
                    elif ck == "masked_image":
                        cond_concat.append(blank_inpaint_image_like(noise))
            extra_args["cond_concat"] = cond_concat

581
582
583
584
585
        if sigmas[0] != self.sigmas[0] or (self.denoise is not None and self.denoise < 1.0):
            max_denoise = False
        else:
            max_denoise = True

586
        with precision_scope(model_management.get_autocast_device(self.device)):
587
            if self.sampler == "uni_pc":
588
                samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, callback=callback)
comfyanonymous's avatar
comfyanonymous committed
589
            elif self.sampler == "uni_pc_bh2":
590
                samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, callback=callback, variant='bh2')
comfyanonymous's avatar
comfyanonymous committed
591
592
593
594
595
596
597
            elif self.sampler == "ddim":
                timesteps = []
                for s in range(sigmas.shape[0]):
                    timesteps.insert(0, self.model_wrap.sigma_to_t(sigmas[s]))
                noise_mask = None
                if denoise_mask is not None:
                    noise_mask = 1.0 - denoise_mask
598
599
600
601
602

                ddim_callback = None
                if callback is not None:
                    ddim_callback = lambda pred_x0, i: callback(i, pred_x0, None)

comfyanonymous's avatar
comfyanonymous committed
603
                sampler = DDIMSampler(self.model, device=self.device)
comfyanonymous's avatar
comfyanonymous committed
604
605
606
607
608
609
610
611
612
613
614
615
                sampler.make_schedule_timesteps(ddim_timesteps=timesteps, verbose=False)
                z_enc = sampler.stochastic_encode(latent_image, torch.tensor([len(timesteps) - 1] * noise.shape[0]).to(self.device), noise=noise, max_denoise=max_denoise)
                samples, _ = sampler.sample_custom(ddim_timesteps=timesteps,
                                                     conditioning=positive,
                                                     batch_size=noise.shape[0],
                                                     shape=noise.shape[1:],
                                                     verbose=False,
                                                     unconditional_guidance_scale=cfg,
                                                     unconditional_conditioning=negative,
                                                     eta=0.0,
                                                     x_T=z_enc,
                                                     x0=latent_image,
616
                                                     img_callback=ddim_callback,
comfyanonymous's avatar
comfyanonymous committed
617
                                                     denoise_function=sampling_function,
618
                                                     extra_args=extra_args,
comfyanonymous's avatar
comfyanonymous committed
619
620
621
622
                                                     mask=noise_mask,
                                                     to_zero=sigmas[-1]==0,
                                                     end_step=sigmas.shape[0] - 1)

comfyanonymous's avatar
comfyanonymous committed
623
            else:
624
625
626
627
628
629
                extra_args["denoise_mask"] = denoise_mask
                self.model_k.latent_image = latent_image
                self.model_k.noise = noise

                noise = noise * sigmas[0]

630
631
632
633
                k_callback = None
                if callback is not None:
                    k_callback = lambda x: callback(x["i"], x["denoised"], x["x"])

634
635
                if latent_image is not None:
                    noise += latent_image
636
                if self.sampler == "dpm_fast":
637
                    samples = k_diffusion_sampling.sample_dpm_fast(self.model_k, noise, sigma_min, sigmas[0], self.steps, extra_args=extra_args, callback=k_callback)
638
                elif self.sampler == "dpm_adaptive":
639
                    samples = k_diffusion_sampling.sample_dpm_adaptive(self.model_k, noise, sigma_min, sigmas[0], extra_args=extra_args, callback=k_callback)
640
                else:
641
                    samples = getattr(k_diffusion_sampling, "sample_{}".format(self.sampler))(self.model_k, noise, sigmas, extra_args=extra_args, callback=k_callback)
642

comfyanonymous's avatar
comfyanonymous committed
643
        return samples.to(torch.float32)