Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dongchy920
instruct_pix2pix
Commits
9cfc6603
Commit
9cfc6603
authored
Nov 26, 2024
by
dongchy920
Browse files
instruct first commit
parents
Pipeline
#1969
canceled with stages
Changes
200
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2116 additions
and
0 deletions
+2116
-0
stable_diffusion/models/first_stage_models/vq-f4/config.yaml
stable_diffusion/models/first_stage_models/vq-f4/config.yaml
+45
-0
stable_diffusion/models/first_stage_models/vq-f8-n256/config.yaml
...iffusion/models/first_stage_models/vq-f8-n256/config.yaml
+48
-0
stable_diffusion/models/first_stage_models/vq-f8/config.yaml
stable_diffusion/models/first_stage_models/vq-f8/config.yaml
+48
-0
stable_diffusion/models/ldm/bsr_sr/config.yaml
stable_diffusion/models/ldm/bsr_sr/config.yaml
+80
-0
stable_diffusion/models/ldm/celeba256/config.yaml
stable_diffusion/models/ldm/celeba256/config.yaml
+70
-0
stable_diffusion/models/ldm/cin256/config.yaml
stable_diffusion/models/ldm/cin256/config.yaml
+80
-0
stable_diffusion/models/ldm/ffhq256/config.yaml
stable_diffusion/models/ldm/ffhq256/config.yaml
+70
-0
stable_diffusion/models/ldm/inpainting_big/config.yaml
stable_diffusion/models/ldm/inpainting_big/config.yaml
+67
-0
stable_diffusion/models/ldm/layout2img-openimages256/config.yaml
...diffusion/models/ldm/layout2img-openimages256/config.yaml
+81
-0
stable_diffusion/models/ldm/lsun_beds256/config.yaml
stable_diffusion/models/ldm/lsun_beds256/config.yaml
+70
-0
stable_diffusion/models/ldm/lsun_churches256/config.yaml
stable_diffusion/models/ldm/lsun_churches256/config.yaml
+92
-0
stable_diffusion/models/ldm/semantic_synthesis256/config.yaml
...le_diffusion/models/ldm/semantic_synthesis256/config.yaml
+59
-0
stable_diffusion/models/ldm/semantic_synthesis512/config.yaml
...le_diffusion/models/ldm/semantic_synthesis512/config.yaml
+78
-0
stable_diffusion/models/ldm/text2img256/config.yaml
stable_diffusion/models/ldm/text2img256/config.yaml
+77
-0
stable_diffusion/notebook_helpers.py
stable_diffusion/notebook_helpers.py
+271
-0
stable_diffusion/scripts/download_first_stages.sh
stable_diffusion/scripts/download_first_stages.sh
+42
-0
stable_diffusion/scripts/download_models.sh
stable_diffusion/scripts/download_models.sh
+49
-0
stable_diffusion/scripts/img2img.py
stable_diffusion/scripts/img2img.py
+293
-0
stable_diffusion/scripts/inpaint.py
stable_diffusion/scripts/inpaint.py
+98
-0
stable_diffusion/scripts/knn2img.py
stable_diffusion/scripts/knn2img.py
+398
-0
No files found.
stable_diffusion/models/first_stage_models/vq-f4/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
4.5e-06
target
:
ldm.models.autoencoder.VQModel
params
:
embed_dim
:
3
n_embed
:
8192
monitor
:
val/rec_loss
ddconfig
:
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params
:
disc_conditional
:
false
disc_in_channels
:
3
disc_start
:
0
disc_weight
:
0.75
codebook_weight
:
1.0
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
8
num_workers
:
16
wrap
:
true
train
:
target
:
ldm.data.openimages.FullOpenImagesTrain
params
:
crop_size
:
256
validation
:
target
:
ldm.data.openimages.FullOpenImagesValidation
params
:
crop_size
:
256
stable_diffusion/models/first_stage_models/vq-f8-n256/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
4.5e-06
target
:
ldm.models.autoencoder.VQModel
params
:
embed_dim
:
4
n_embed
:
256
monitor
:
val/rec_loss
ddconfig
:
double_z
:
false
z_channels
:
4
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
-
32
dropout
:
0.0
lossconfig
:
target
:
taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params
:
disc_conditional
:
false
disc_in_channels
:
3
disc_start
:
250001
disc_weight
:
0.75
codebook_weight
:
1.0
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
10
num_workers
:
20
wrap
:
true
train
:
target
:
ldm.data.openimages.FullOpenImagesTrain
params
:
size
:
384
crop_size
:
256
validation
:
target
:
ldm.data.openimages.FullOpenImagesValidation
params
:
size
:
384
crop_size
:
256
stable_diffusion/models/first_stage_models/vq-f8/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
4.5e-06
target
:
ldm.models.autoencoder.VQModel
params
:
embed_dim
:
4
n_embed
:
16384
monitor
:
val/rec_loss
ddconfig
:
double_z
:
false
z_channels
:
4
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
-
32
dropout
:
0.0
lossconfig
:
target
:
taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params
:
disc_conditional
:
false
disc_in_channels
:
3
disc_num_layers
:
2
disc_start
:
1
disc_weight
:
0.6
codebook_weight
:
1.0
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
10
num_workers
:
20
wrap
:
true
train
:
target
:
ldm.data.openimages.FullOpenImagesTrain
params
:
size
:
384
crop_size
:
256
validation
:
target
:
ldm.data.openimages.FullOpenImagesValidation
params
:
size
:
384
crop_size
:
256
stable_diffusion/models/ldm/bsr_sr/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
1.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0155
log_every_t
:
100
timesteps
:
1000
loss_type
:
l2
first_stage_key
:
image
cond_stage_key
:
LR_image
image_size
:
64
channels
:
3
concat_mode
:
true
cond_stage_trainable
:
false
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
64
in_channels
:
6
out_channels
:
3
model_channels
:
160
attention_resolutions
:
-
16
-
8
num_res_blocks
:
2
channel_mult
:
-
1
-
2
-
2
-
4
num_head_channels
:
32
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
3
n_embed
:
8192
monitor
:
val/rec_loss
ddconfig
:
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
target
:
torch.nn.Identity
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
64
wrap
:
false
num_workers
:
12
train
:
target
:
ldm.data.openimages.SuperresOpenImagesAdvancedTrain
params
:
size
:
256
degradation
:
bsrgan_light
downscale_f
:
4
min_crop_f
:
0.5
max_crop_f
:
1.0
random_crop
:
true
validation
:
target
:
ldm.data.openimages.SuperresOpenImagesAdvancedValidation
params
:
size
:
256
degradation
:
bsrgan_light
downscale_f
:
4
min_crop_f
:
0.5
max_crop_f
:
1.0
random_crop
:
true
stable_diffusion/models/ldm/celeba256/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
2.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0195
num_timesteps_cond
:
1
log_every_t
:
200
timesteps
:
1000
first_stage_key
:
image
cond_stage_key
:
class_label
image_size
:
64
channels
:
3
cond_stage_trainable
:
false
concat_mode
:
false
monitor
:
val/loss
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
64
in_channels
:
3
out_channels
:
3
model_channels
:
224
attention_resolutions
:
-
8
-
4
-
2
num_res_blocks
:
2
channel_mult
:
-
1
-
2
-
3
-
4
num_head_channels
:
32
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
3
n_embed
:
8192
ddconfig
:
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
__is_unconditional__
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
48
num_workers
:
5
wrap
:
false
train
:
target
:
ldm.data.faceshq.CelebAHQTrain
params
:
size
:
256
validation
:
target
:
ldm.data.faceshq.CelebAHQValidation
params
:
size
:
256
stable_diffusion/models/ldm/cin256/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
1.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0195
num_timesteps_cond
:
1
log_every_t
:
200
timesteps
:
1000
first_stage_key
:
image
cond_stage_key
:
class_label
image_size
:
32
channels
:
4
cond_stage_trainable
:
true
conditioning_key
:
crossattn
monitor
:
val/loss_simple_ema
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
32
in_channels
:
4
out_channels
:
4
model_channels
:
256
attention_resolutions
:
-
4
-
2
-
1
num_res_blocks
:
2
channel_mult
:
-
1
-
2
-
4
num_head_channels
:
32
use_spatial_transformer
:
true
transformer_depth
:
1
context_dim
:
512
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
4
n_embed
:
16384
ddconfig
:
double_z
:
false
z_channels
:
4
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
-
32
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
target
:
ldm.modules.encoders.modules.ClassEmbedder
params
:
embed_dim
:
512
key
:
class_label
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
64
num_workers
:
12
wrap
:
false
train
:
target
:
ldm.data.imagenet.ImageNetTrain
params
:
config
:
size
:
256
validation
:
target
:
ldm.data.imagenet.ImageNetValidation
params
:
config
:
size
:
256
stable_diffusion/models/ldm/ffhq256/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
2.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0195
num_timesteps_cond
:
1
log_every_t
:
200
timesteps
:
1000
first_stage_key
:
image
cond_stage_key
:
class_label
image_size
:
64
channels
:
3
cond_stage_trainable
:
false
concat_mode
:
false
monitor
:
val/loss
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
64
in_channels
:
3
out_channels
:
3
model_channels
:
224
attention_resolutions
:
-
8
-
4
-
2
num_res_blocks
:
2
channel_mult
:
-
1
-
2
-
3
-
4
num_head_channels
:
32
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
3
n_embed
:
8192
ddconfig
:
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
__is_unconditional__
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
42
num_workers
:
5
wrap
:
false
train
:
target
:
ldm.data.faceshq.FFHQTrain
params
:
size
:
256
validation
:
target
:
ldm.data.faceshq.FFHQValidation
params
:
size
:
256
stable_diffusion/models/ldm/inpainting_big/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
1.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0205
log_every_t
:
100
timesteps
:
1000
loss_type
:
l1
first_stage_key
:
image
cond_stage_key
:
masked_image
image_size
:
64
channels
:
3
concat_mode
:
true
monitor
:
val/loss
scheduler_config
:
target
:
ldm.lr_scheduler.LambdaWarmUpCosineScheduler
params
:
verbosity_interval
:
0
warm_up_steps
:
1000
max_decay_steps
:
50000
lr_start
:
0.001
lr_max
:
0.1
lr_min
:
0.0001
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
64
in_channels
:
7
out_channels
:
3
model_channels
:
256
attention_resolutions
:
-
8
-
4
-
2
num_res_blocks
:
2
channel_mult
:
-
1
-
2
-
3
-
4
num_heads
:
8
resblock_updown
:
true
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
3
n_embed
:
8192
monitor
:
val/rec_loss
ddconfig
:
attn_type
:
none
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
ldm.modules.losses.contperceptual.DummyLoss
cond_stage_config
:
__is_first_stage__
stable_diffusion/models/ldm/layout2img-openimages256/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
2.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0205
log_every_t
:
100
timesteps
:
1000
loss_type
:
l1
first_stage_key
:
image
cond_stage_key
:
coordinates_bbox
image_size
:
64
channels
:
3
conditioning_key
:
crossattn
cond_stage_trainable
:
true
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
64
in_channels
:
3
out_channels
:
3
model_channels
:
128
attention_resolutions
:
-
8
-
4
-
2
num_res_blocks
:
2
channel_mult
:
-
1
-
2
-
3
-
4
num_head_channels
:
32
use_spatial_transformer
:
true
transformer_depth
:
3
context_dim
:
512
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
3
n_embed
:
8192
monitor
:
val/rec_loss
ddconfig
:
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
target
:
ldm.modules.encoders.modules.BERTEmbedder
params
:
n_embed
:
512
n_layer
:
16
vocab_size
:
8192
max_seq_len
:
92
use_tokenizer
:
false
monitor
:
val/loss_simple_ema
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
24
wrap
:
false
num_workers
:
10
train
:
target
:
ldm.data.openimages.OpenImagesBBoxTrain
params
:
size
:
256
validation
:
target
:
ldm.data.openimages.OpenImagesBBoxValidation
params
:
size
:
256
stable_diffusion/models/ldm/lsun_beds256/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
2.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0195
num_timesteps_cond
:
1
log_every_t
:
200
timesteps
:
1000
first_stage_key
:
image
cond_stage_key
:
class_label
image_size
:
64
channels
:
3
cond_stage_trainable
:
false
concat_mode
:
false
monitor
:
val/loss
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
64
in_channels
:
3
out_channels
:
3
model_channels
:
224
attention_resolutions
:
-
8
-
4
-
2
num_res_blocks
:
2
channel_mult
:
-
1
-
2
-
3
-
4
num_head_channels
:
32
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
3
n_embed
:
8192
ddconfig
:
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
__is_unconditional__
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
48
num_workers
:
5
wrap
:
false
train
:
target
:
ldm.data.lsun.LSUNBedroomsTrain
params
:
size
:
256
validation
:
target
:
ldm.data.lsun.LSUNBedroomsValidation
params
:
size
:
256
stable_diffusion/models/ldm/lsun_churches256/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
5.0e-05
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0155
num_timesteps_cond
:
1
log_every_t
:
200
timesteps
:
1000
loss_type
:
l1
first_stage_key
:
image
cond_stage_key
:
image
image_size
:
32
channels
:
4
cond_stage_trainable
:
false
concat_mode
:
false
scale_by_std
:
true
monitor
:
val/loss_simple_ema
scheduler_config
:
target
:
ldm.lr_scheduler.LambdaLinearScheduler
params
:
warm_up_steps
:
-
10000
cycle_lengths
:
-
10000000000000
f_start
:
-
1.0e-06
f_max
:
-
1.0
f_min
:
-
1.0
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
32
in_channels
:
4
out_channels
:
4
model_channels
:
192
attention_resolutions
:
-
1
-
2
-
4
-
8
num_res_blocks
:
2
channel_mult
:
-
1
-
2
-
2
-
4
-
4
num_heads
:
8
use_scale_shift_norm
:
true
resblock_updown
:
true
first_stage_config
:
target
:
ldm.models.autoencoder.AutoencoderKL
params
:
embed_dim
:
4
monitor
:
val/rec_loss
ddconfig
:
double_z
:
true
z_channels
:
4
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
'
__is_unconditional__'
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
96
num_workers
:
5
wrap
:
false
train
:
target
:
ldm.data.lsun.LSUNChurchesTrain
params
:
size
:
256
validation
:
target
:
ldm.data.lsun.LSUNChurchesValidation
params
:
size
:
256
stable_diffusion/models/ldm/semantic_synthesis256/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
1.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0205
log_every_t
:
100
timesteps
:
1000
loss_type
:
l1
first_stage_key
:
image
cond_stage_key
:
segmentation
image_size
:
64
channels
:
3
concat_mode
:
true
cond_stage_trainable
:
true
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
64
in_channels
:
6
out_channels
:
3
model_channels
:
128
attention_resolutions
:
-
32
-
16
-
8
num_res_blocks
:
2
channel_mult
:
-
1
-
4
-
8
num_heads
:
8
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
3
n_embed
:
8192
ddconfig
:
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
target
:
ldm.modules.encoders.modules.SpatialRescaler
params
:
n_stages
:
2
in_channels
:
182
out_channels
:
3
stable_diffusion/models/ldm/semantic_synthesis512/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
1.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0205
log_every_t
:
100
timesteps
:
1000
loss_type
:
l1
first_stage_key
:
image
cond_stage_key
:
segmentation
image_size
:
128
channels
:
3
concat_mode
:
true
cond_stage_trainable
:
true
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
128
in_channels
:
6
out_channels
:
3
model_channels
:
128
attention_resolutions
:
-
32
-
16
-
8
num_res_blocks
:
2
channel_mult
:
-
1
-
4
-
8
num_heads
:
8
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
3
n_embed
:
8192
monitor
:
val/rec_loss
ddconfig
:
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
target
:
ldm.modules.encoders.modules.SpatialRescaler
params
:
n_stages
:
2
in_channels
:
182
out_channels
:
3
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
8
wrap
:
false
num_workers
:
10
train
:
target
:
ldm.data.landscapes.RFWTrain
params
:
size
:
768
crop_size
:
512
segmentation_to_float32
:
true
validation
:
target
:
ldm.data.landscapes.RFWValidation
params
:
size
:
768
crop_size
:
512
segmentation_to_float32
:
true
stable_diffusion/models/ldm/text2img256/config.yaml
0 → 100644
View file @
9cfc6603
model
:
base_learning_rate
:
2.0e-06
target
:
ldm.models.diffusion.ddpm.LatentDiffusion
params
:
linear_start
:
0.0015
linear_end
:
0.0195
num_timesteps_cond
:
1
log_every_t
:
200
timesteps
:
1000
first_stage_key
:
image
cond_stage_key
:
caption
image_size
:
64
channels
:
3
cond_stage_trainable
:
true
conditioning_key
:
crossattn
monitor
:
val/loss_simple_ema
unet_config
:
target
:
ldm.modules.diffusionmodules.openaimodel.UNetModel
params
:
image_size
:
64
in_channels
:
3
out_channels
:
3
model_channels
:
192
attention_resolutions
:
-
8
-
4
-
2
num_res_blocks
:
2
channel_mult
:
-
1
-
2
-
3
-
5
num_head_channels
:
32
use_spatial_transformer
:
true
transformer_depth
:
1
context_dim
:
640
first_stage_config
:
target
:
ldm.models.autoencoder.VQModelInterface
params
:
embed_dim
:
3
n_embed
:
8192
ddconfig
:
double_z
:
false
z_channels
:
3
resolution
:
256
in_channels
:
3
out_ch
:
3
ch
:
128
ch_mult
:
-
1
-
2
-
4
num_res_blocks
:
2
attn_resolutions
:
[]
dropout
:
0.0
lossconfig
:
target
:
torch.nn.Identity
cond_stage_config
:
target
:
ldm.modules.encoders.modules.BERTEmbedder
params
:
n_embed
:
640
n_layer
:
32
data
:
target
:
main.DataModuleFromConfig
params
:
batch_size
:
28
num_workers
:
10
wrap
:
false
train
:
target
:
ldm.data.previews.pytorch_dataset.PreviewsTrain
params
:
size
:
256
validation
:
target
:
ldm.data.previews.pytorch_dataset.PreviewsValidation
params
:
size
:
256
stable_diffusion/notebook_helpers.py
0 → 100644
View file @
9cfc6603
from
torchvision.datasets.utils
import
download_url
from
ldm.util
import
instantiate_from_config
import
torch
import
os
# todo ?
from
google.colab
import
files
from
IPython.display
import
Image
as
ipyimg
import
ipywidgets
as
widgets
from
PIL
import
Image
from
numpy
import
asarray
from
einops
import
rearrange
,
repeat
import
torch
,
torchvision
from
ldm.models.diffusion.ddim
import
DDIMSampler
from
ldm.util
import
ismap
import
time
from
omegaconf
import
OmegaConf
def
download_models
(
mode
):
if
mode
==
"superresolution"
:
# this is the small bsr light model
url_conf
=
'https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1'
url_ckpt
=
'https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1'
path_conf
=
'logs/diffusion/superresolution_bsr/configs/project.yaml'
path_ckpt
=
'logs/diffusion/superresolution_bsr/checkpoints/last.ckpt'
download_url
(
url_conf
,
path_conf
)
download_url
(
url_ckpt
,
path_ckpt
)
path_conf
=
path_conf
+
'/?dl=1'
# fix it
path_ckpt
=
path_ckpt
+
'/?dl=1'
# fix it
return
path_conf
,
path_ckpt
else
:
raise
NotImplementedError
def
load_model_from_config
(
config
,
ckpt
):
print
(
f
"Loading model from
{
ckpt
}
"
)
pl_sd
=
torch
.
load
(
ckpt
,
map_location
=
"cpu"
)
global_step
=
pl_sd
[
"global_step"
]
sd
=
pl_sd
[
"state_dict"
]
model
=
instantiate_from_config
(
config
.
model
)
m
,
u
=
model
.
load_state_dict
(
sd
,
strict
=
False
)
model
.
cuda
()
model
.
eval
()
return
{
"model"
:
model
},
global_step
def
get_model
(
mode
):
path_conf
,
path_ckpt
=
download_models
(
mode
)
config
=
OmegaConf
.
load
(
path_conf
)
model
,
step
=
load_model_from_config
(
config
,
path_ckpt
)
return
model
def
get_custom_cond
(
mode
):
dest
=
"data/example_conditioning"
if
mode
==
"superresolution"
:
uploaded_img
=
files
.
upload
()
filename
=
next
(
iter
(
uploaded_img
))
name
,
filetype
=
filename
.
split
(
"."
)
# todo assumes just one dot in name !
os
.
rename
(
f
"
{
filename
}
"
,
f
"
{
dest
}
/
{
mode
}
/custom_
{
name
}
.
{
filetype
}
"
)
elif
mode
==
"text_conditional"
:
w
=
widgets
.
Text
(
value
=
'A cake with cream!'
,
disabled
=
True
)
display
(
w
)
with
open
(
f
"
{
dest
}
/
{
mode
}
/custom_
{
w
.
value
[:
20
]
}
.txt"
,
'w'
)
as
f
:
f
.
write
(
w
.
value
)
elif
mode
==
"class_conditional"
:
w
=
widgets
.
IntSlider
(
min
=
0
,
max
=
1000
)
display
(
w
)
with
open
(
f
"
{
dest
}
/
{
mode
}
/custom.txt"
,
'w'
)
as
f
:
f
.
write
(
w
.
value
)
else
:
raise
NotImplementedError
(
f
"cond not implemented for mode
{
mode
}
"
)
def
get_cond_options
(
mode
):
path
=
"data/example_conditioning"
path
=
os
.
path
.
join
(
path
,
mode
)
onlyfiles
=
[
f
for
f
in
sorted
(
os
.
listdir
(
path
))]
return
path
,
onlyfiles
def
select_cond_path
(
mode
):
path
=
"data/example_conditioning"
# todo
path
=
os
.
path
.
join
(
path
,
mode
)
onlyfiles
=
[
f
for
f
in
sorted
(
os
.
listdir
(
path
))]
selected
=
widgets
.
RadioButtons
(
options
=
onlyfiles
,
description
=
'Select conditioning:'
,
disabled
=
False
)
display
(
selected
)
selected_path
=
os
.
path
.
join
(
path
,
selected
.
value
)
return
selected_path
def
get_cond
(
mode
,
selected_path
):
example
=
dict
()
if
mode
==
"superresolution"
:
up_f
=
4
visualize_cond_img
(
selected_path
)
c
=
Image
.
open
(
selected_path
)
c
=
torch
.
unsqueeze
(
torchvision
.
transforms
.
ToTensor
()(
c
),
0
)
c_up
=
torchvision
.
transforms
.
functional
.
resize
(
c
,
size
=
[
up_f
*
c
.
shape
[
2
],
up_f
*
c
.
shape
[
3
]],
antialias
=
True
)
c_up
=
rearrange
(
c_up
,
'1 c h w -> 1 h w c'
)
c
=
rearrange
(
c
,
'1 c h w -> 1 h w c'
)
c
=
2.
*
c
-
1.
c
=
c
.
to
(
torch
.
device
(
"cuda"
))
example
[
"LR_image"
]
=
c
example
[
"image"
]
=
c_up
return
example
def
visualize_cond_img
(
path
):
display
(
ipyimg
(
filename
=
path
))
def
run
(
model
,
selected_path
,
task
,
custom_steps
,
resize_enabled
=
False
,
classifier_ckpt
=
None
,
global_step
=
None
):
example
=
get_cond
(
task
,
selected_path
)
save_intermediate_vid
=
False
n_runs
=
1
masked
=
False
guider
=
None
ckwargs
=
None
mode
=
'ddim'
ddim_use_x0_pred
=
False
temperature
=
1.
eta
=
1.
make_progrow
=
True
custom_shape
=
None
height
,
width
=
example
[
"image"
].
shape
[
1
:
3
]
split_input
=
height
>=
128
and
width
>=
128
if
split_input
:
ks
=
128
stride
=
64
vqf
=
4
#
model
.
split_input_params
=
{
"ks"
:
(
ks
,
ks
),
"stride"
:
(
stride
,
stride
),
"vqf"
:
vqf
,
"patch_distributed_vq"
:
True
,
"tie_braker"
:
False
,
"clip_max_weight"
:
0.5
,
"clip_min_weight"
:
0.01
,
"clip_max_tie_weight"
:
0.5
,
"clip_min_tie_weight"
:
0.01
}
else
:
if
hasattr
(
model
,
"split_input_params"
):
delattr
(
model
,
"split_input_params"
)
invert_mask
=
False
x_T
=
None
for
n
in
range
(
n_runs
):
if
custom_shape
is
not
None
:
x_T
=
torch
.
randn
(
1
,
custom_shape
[
1
],
custom_shape
[
2
],
custom_shape
[
3
]).
to
(
model
.
device
)
x_T
=
repeat
(
x_T
,
'1 c h w -> b c h w'
,
b
=
custom_shape
[
0
])
logs
=
make_convolutional_sample
(
example
,
model
,
mode
=
mode
,
custom_steps
=
custom_steps
,
eta
=
eta
,
swap_mode
=
False
,
masked
=
masked
,
invert_mask
=
invert_mask
,
quantize_x0
=
False
,
custom_schedule
=
None
,
decode_interval
=
10
,
resize_enabled
=
resize_enabled
,
custom_shape
=
custom_shape
,
temperature
=
temperature
,
noise_dropout
=
0.
,
corrector
=
guider
,
corrector_kwargs
=
ckwargs
,
x_T
=
x_T
,
save_intermediate_vid
=
save_intermediate_vid
,
make_progrow
=
make_progrow
,
ddim_use_x0_pred
=
ddim_use_x0_pred
)
return
logs
@
torch
.
no_grad
()
def
convsample_ddim
(
model
,
cond
,
steps
,
shape
,
eta
=
1.0
,
callback
=
None
,
normals_sequence
=
None
,
mask
=
None
,
x0
=
None
,
quantize_x0
=
False
,
img_callback
=
None
,
temperature
=
1.
,
noise_dropout
=
0.
,
score_corrector
=
None
,
corrector_kwargs
=
None
,
x_T
=
None
,
log_every_t
=
None
):
ddim
=
DDIMSampler
(
model
)
bs
=
shape
[
0
]
# dont know where this comes from but wayne
shape
=
shape
[
1
:]
# cut batch dim
print
(
f
"Sampling with eta =
{
eta
}
; steps:
{
steps
}
"
)
samples
,
intermediates
=
ddim
.
sample
(
steps
,
batch_size
=
bs
,
shape
=
shape
,
conditioning
=
cond
,
callback
=
callback
,
normals_sequence
=
normals_sequence
,
quantize_x0
=
quantize_x0
,
eta
=
eta
,
mask
=
mask
,
x0
=
x0
,
temperature
=
temperature
,
verbose
=
False
,
score_corrector
=
score_corrector
,
corrector_kwargs
=
corrector_kwargs
,
x_T
=
x_T
)
return
samples
,
intermediates
@
torch
.
no_grad
()
def
make_convolutional_sample
(
batch
,
model
,
mode
=
"vanilla"
,
custom_steps
=
None
,
eta
=
1.0
,
swap_mode
=
False
,
masked
=
False
,
invert_mask
=
True
,
quantize_x0
=
False
,
custom_schedule
=
None
,
decode_interval
=
1000
,
resize_enabled
=
False
,
custom_shape
=
None
,
temperature
=
1.
,
noise_dropout
=
0.
,
corrector
=
None
,
corrector_kwargs
=
None
,
x_T
=
None
,
save_intermediate_vid
=
False
,
make_progrow
=
True
,
ddim_use_x0_pred
=
False
):
log
=
dict
()
z
,
c
,
x
,
xrec
,
xc
=
model
.
get_input
(
batch
,
model
.
first_stage_key
,
return_first_stage_outputs
=
True
,
force_c_encode
=
not
(
hasattr
(
model
,
'split_input_params'
)
and
model
.
cond_stage_key
==
'coordinates_bbox'
),
return_original_cond
=
True
)
log_every_t
=
1
if
save_intermediate_vid
else
None
if
custom_shape
is
not
None
:
z
=
torch
.
randn
(
custom_shape
)
print
(
f
"Generating
{
custom_shape
[
0
]
}
samples of shape
{
custom_shape
[
1
:]
}
"
)
z0
=
None
log
[
"input"
]
=
x
log
[
"reconstruction"
]
=
xrec
if
ismap
(
xc
):
log
[
"original_conditioning"
]
=
model
.
to_rgb
(
xc
)
if
hasattr
(
model
,
'cond_stage_key'
):
log
[
model
.
cond_stage_key
]
=
model
.
to_rgb
(
xc
)
else
:
log
[
"original_conditioning"
]
=
xc
if
xc
is
not
None
else
torch
.
zeros_like
(
x
)
if
model
.
cond_stage_model
:
log
[
model
.
cond_stage_key
]
=
xc
if
xc
is
not
None
else
torch
.
zeros_like
(
x
)
if
model
.
cond_stage_key
==
'class_label'
:
log
[
model
.
cond_stage_key
]
=
xc
[
model
.
cond_stage_key
]
with
model
.
ema_scope
(
"Plotting"
):
t0
=
time
.
time
()
img_cb
=
None
sample
,
intermediates
=
convsample_ddim
(
model
,
c
,
steps
=
custom_steps
,
shape
=
z
.
shape
,
eta
=
eta
,
quantize_x0
=
quantize_x0
,
img_callback
=
img_cb
,
mask
=
None
,
x0
=
z0
,
temperature
=
temperature
,
noise_dropout
=
noise_dropout
,
score_corrector
=
corrector
,
corrector_kwargs
=
corrector_kwargs
,
x_T
=
x_T
,
log_every_t
=
log_every_t
)
t1
=
time
.
time
()
if
ddim_use_x0_pred
:
sample
=
intermediates
[
'pred_x0'
][
-
1
]
x_sample
=
model
.
decode_first_stage
(
sample
)
try
:
x_sample_noquant
=
model
.
decode_first_stage
(
sample
,
force_not_quantize
=
True
)
log
[
"sample_noquant"
]
=
x_sample_noquant
log
[
"sample_diff"
]
=
torch
.
abs
(
x_sample_noquant
-
x_sample
)
except
:
pass
log
[
"sample"
]
=
x_sample
log
[
"time"
]
=
t1
-
t0
return
log
\ No newline at end of file
stable_diffusion/scripts/download_first_stages.sh
0 → 100644
View file @
9cfc6603
#!/bin/bash
wget
-O
models/first_stage_models/kl-f4/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f4.zip
wget
-O
models/first_stage_models/kl-f8/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f8.zip
wget
-O
models/first_stage_models/kl-f16/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f16.zip
wget
-O
models/first_stage_models/kl-f32/model.zip https://ommer-lab.com/files/latent-diffusion/kl-f32.zip
wget
-O
models/first_stage_models/vq-f4/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f4.zip
wget
-O
models/first_stage_models/vq-f4-noattn/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f4-noattn.zip
wget
-O
models/first_stage_models/vq-f8/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f8.zip
wget
-O
models/first_stage_models/vq-f8-n256/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f8-n256.zip
wget
-O
models/first_stage_models/vq-f16/model.zip https://ommer-lab.com/files/latent-diffusion/vq-f16.zip
cd
models/first_stage_models/kl-f4
unzip
-o
model.zip
cd
../kl-f8
unzip
-o
model.zip
cd
../kl-f16
unzip
-o
model.zip
cd
../kl-f32
unzip
-o
model.zip
cd
../vq-f4
unzip
-o
model.zip
cd
../vq-f4-noattn
unzip
-o
model.zip
cd
../vq-f8
unzip
-o
model.zip
cd
../vq-f8-n256
unzip
-o
model.zip
cd
../vq-f16
unzip
-o
model.zip
cd
../..
\ No newline at end of file
stable_diffusion/scripts/download_models.sh
0 → 100644
View file @
9cfc6603
#!/bin/bash
wget
-O
models/ldm/celeba256/celeba-256.zip https://ommer-lab.com/files/latent-diffusion/celeba.zip
wget
-O
models/ldm/ffhq256/ffhq-256.zip https://ommer-lab.com/files/latent-diffusion/ffhq.zip
wget
-O
models/ldm/lsun_churches256/lsun_churches-256.zip https://ommer-lab.com/files/latent-diffusion/lsun_churches.zip
wget
-O
models/ldm/lsun_beds256/lsun_beds-256.zip https://ommer-lab.com/files/latent-diffusion/lsun_bedrooms.zip
wget
-O
models/ldm/text2img256/model.zip https://ommer-lab.com/files/latent-diffusion/text2img.zip
wget
-O
models/ldm/cin256/model.zip https://ommer-lab.com/files/latent-diffusion/cin.zip
wget
-O
models/ldm/semantic_synthesis512/model.zip https://ommer-lab.com/files/latent-diffusion/semantic_synthesis.zip
wget
-O
models/ldm/semantic_synthesis256/model.zip https://ommer-lab.com/files/latent-diffusion/semantic_synthesis256.zip
wget
-O
models/ldm/bsr_sr/model.zip https://ommer-lab.com/files/latent-diffusion/sr_bsr.zip
wget
-O
models/ldm/layout2img-openimages256/model.zip https://ommer-lab.com/files/latent-diffusion/layout2img_model.zip
wget
-O
models/ldm/inpainting_big/model.zip https://ommer-lab.com/files/latent-diffusion/inpainting_big.zip
cd
models/ldm/celeba256
unzip
-o
celeba-256.zip
cd
../ffhq256
unzip
-o
ffhq-256.zip
cd
../lsun_churches256
unzip
-o
lsun_churches-256.zip
cd
../lsun_beds256
unzip
-o
lsun_beds-256.zip
cd
../text2img256
unzip
-o
model.zip
cd
../cin256
unzip
-o
model.zip
cd
../semantic_synthesis512
unzip
-o
model.zip
cd
../semantic_synthesis256
unzip
-o
model.zip
cd
../bsr_sr
unzip
-o
model.zip
cd
../layout2img-openimages256
unzip
-o
model.zip
cd
../inpainting_big
unzip
-o
model.zip
cd
../..
stable_diffusion/scripts/img2img.py
0 → 100644
View file @
9cfc6603
"""make variations of input image"""
import
argparse
,
os
,
sys
,
glob
import
PIL
import
torch
import
numpy
as
np
from
omegaconf
import
OmegaConf
from
PIL
import
Image
from
tqdm
import
tqdm
,
trange
from
itertools
import
islice
from
einops
import
rearrange
,
repeat
from
torchvision.utils
import
make_grid
from
torch
import
autocast
from
contextlib
import
nullcontext
import
time
from
pytorch_lightning
import
seed_everything
from
ldm.util
import
instantiate_from_config
from
ldm.models.diffusion.ddim
import
DDIMSampler
from
ldm.models.diffusion.plms
import
PLMSSampler
def
chunk
(
it
,
size
):
it
=
iter
(
it
)
return
iter
(
lambda
:
tuple
(
islice
(
it
,
size
)),
())
def
load_model_from_config
(
config
,
ckpt
,
verbose
=
False
):
print
(
f
"Loading model from
{
ckpt
}
"
)
pl_sd
=
torch
.
load
(
ckpt
,
map_location
=
"cpu"
)
if
"global_step"
in
pl_sd
:
print
(
f
"Global Step:
{
pl_sd
[
'global_step'
]
}
"
)
sd
=
pl_sd
[
"state_dict"
]
model
=
instantiate_from_config
(
config
.
model
)
m
,
u
=
model
.
load_state_dict
(
sd
,
strict
=
False
)
if
len
(
m
)
>
0
and
verbose
:
print
(
"missing keys:"
)
print
(
m
)
if
len
(
u
)
>
0
and
verbose
:
print
(
"unexpected keys:"
)
print
(
u
)
model
.
cuda
()
model
.
eval
()
return
model
def
load_img
(
path
):
image
=
Image
.
open
(
path
).
convert
(
"RGB"
)
w
,
h
=
image
.
size
print
(
f
"loaded input image of size (
{
w
}
,
{
h
}
) from
{
path
}
"
)
w
,
h
=
map
(
lambda
x
:
x
-
x
%
32
,
(
w
,
h
))
# resize to integer multiple of 32
image
=
image
.
resize
((
w
,
h
),
resample
=
PIL
.
Image
.
LANCZOS
)
image
=
np
.
array
(
image
).
astype
(
np
.
float32
)
/
255.0
image
=
image
[
None
].
transpose
(
0
,
3
,
1
,
2
)
image
=
torch
.
from_numpy
(
image
)
return
2.
*
image
-
1.
def
main
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--prompt"
,
type
=
str
,
nargs
=
"?"
,
default
=
"a painting of a virus monster playing guitar"
,
help
=
"the prompt to render"
)
parser
.
add_argument
(
"--init-img"
,
type
=
str
,
nargs
=
"?"
,
help
=
"path to the input image"
)
parser
.
add_argument
(
"--outdir"
,
type
=
str
,
nargs
=
"?"
,
help
=
"dir to write results to"
,
default
=
"outputs/img2img-samples"
)
parser
.
add_argument
(
"--skip_grid"
,
action
=
'store_true'
,
help
=
"do not save a grid, only individual samples. Helpful when evaluating lots of samples"
,
)
parser
.
add_argument
(
"--skip_save"
,
action
=
'store_true'
,
help
=
"do not save indiviual samples. For speed measurements."
,
)
parser
.
add_argument
(
"--ddim_steps"
,
type
=
int
,
default
=
50
,
help
=
"number of ddim sampling steps"
,
)
parser
.
add_argument
(
"--plms"
,
action
=
'store_true'
,
help
=
"use plms sampling"
,
)
parser
.
add_argument
(
"--fixed_code"
,
action
=
'store_true'
,
help
=
"if enabled, uses the same starting code across all samples "
,
)
parser
.
add_argument
(
"--ddim_eta"
,
type
=
float
,
default
=
0.0
,
help
=
"ddim eta (eta=0.0 corresponds to deterministic sampling"
,
)
parser
.
add_argument
(
"--n_iter"
,
type
=
int
,
default
=
1
,
help
=
"sample this often"
,
)
parser
.
add_argument
(
"--C"
,
type
=
int
,
default
=
4
,
help
=
"latent channels"
,
)
parser
.
add_argument
(
"--f"
,
type
=
int
,
default
=
8
,
help
=
"downsampling factor, most often 8 or 16"
,
)
parser
.
add_argument
(
"--n_samples"
,
type
=
int
,
default
=
2
,
help
=
"how many samples to produce for each given prompt. A.k.a batch size"
,
)
parser
.
add_argument
(
"--n_rows"
,
type
=
int
,
default
=
0
,
help
=
"rows in the grid (default: n_samples)"
,
)
parser
.
add_argument
(
"--scale"
,
type
=
float
,
default
=
5.0
,
help
=
"unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))"
,
)
parser
.
add_argument
(
"--strength"
,
type
=
float
,
default
=
0.75
,
help
=
"strength for noising/unnoising. 1.0 corresponds to full destruction of information in init image"
,
)
parser
.
add_argument
(
"--from-file"
,
type
=
str
,
help
=
"if specified, load prompts from this file"
,
)
parser
.
add_argument
(
"--config"
,
type
=
str
,
default
=
"configs/stable-diffusion/v1-inference.yaml"
,
help
=
"path to config which constructs model"
,
)
parser
.
add_argument
(
"--ckpt"
,
type
=
str
,
default
=
"models/ldm/stable-diffusion-v1/model.ckpt"
,
help
=
"path to checkpoint of model"
,
)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
42
,
help
=
"the seed (for reproducible sampling)"
,
)
parser
.
add_argument
(
"--precision"
,
type
=
str
,
help
=
"evaluate at this precision"
,
choices
=
[
"full"
,
"autocast"
],
default
=
"autocast"
)
opt
=
parser
.
parse_args
()
seed_everything
(
opt
.
seed
)
config
=
OmegaConf
.
load
(
f
"
{
opt
.
config
}
"
)
model
=
load_model_from_config
(
config
,
f
"
{
opt
.
ckpt
}
"
)
device
=
torch
.
device
(
"cuda"
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
"cpu"
)
model
=
model
.
to
(
device
)
if
opt
.
plms
:
raise
NotImplementedError
(
"PLMS sampler not (yet) supported"
)
sampler
=
PLMSSampler
(
model
)
else
:
sampler
=
DDIMSampler
(
model
)
os
.
makedirs
(
opt
.
outdir
,
exist_ok
=
True
)
outpath
=
opt
.
outdir
batch_size
=
opt
.
n_samples
n_rows
=
opt
.
n_rows
if
opt
.
n_rows
>
0
else
batch_size
if
not
opt
.
from_file
:
prompt
=
opt
.
prompt
assert
prompt
is
not
None
data
=
[
batch_size
*
[
prompt
]]
else
:
print
(
f
"reading prompts from
{
opt
.
from_file
}
"
)
with
open
(
opt
.
from_file
,
"r"
)
as
f
:
data
=
f
.
read
().
splitlines
()
data
=
list
(
chunk
(
data
,
batch_size
))
sample_path
=
os
.
path
.
join
(
outpath
,
"samples"
)
os
.
makedirs
(
sample_path
,
exist_ok
=
True
)
base_count
=
len
(
os
.
listdir
(
sample_path
))
grid_count
=
len
(
os
.
listdir
(
outpath
))
-
1
assert
os
.
path
.
isfile
(
opt
.
init_img
)
init_image
=
load_img
(
opt
.
init_img
).
to
(
device
)
init_image
=
repeat
(
init_image
,
'1 ... -> b ...'
,
b
=
batch_size
)
init_latent
=
model
.
get_first_stage_encoding
(
model
.
encode_first_stage
(
init_image
))
# move to latent space
sampler
.
make_schedule
(
ddim_num_steps
=
opt
.
ddim_steps
,
ddim_eta
=
opt
.
ddim_eta
,
verbose
=
False
)
assert
0.
<=
opt
.
strength
<=
1.
,
'can only work with strength in [0.0, 1.0]'
t_enc
=
int
(
opt
.
strength
*
opt
.
ddim_steps
)
print
(
f
"target t_enc is
{
t_enc
}
steps"
)
precision_scope
=
autocast
if
opt
.
precision
==
"autocast"
else
nullcontext
with
torch
.
no_grad
():
with
precision_scope
(
"cuda"
):
with
model
.
ema_scope
():
tic
=
time
.
time
()
all_samples
=
list
()
for
n
in
trange
(
opt
.
n_iter
,
desc
=
"Sampling"
):
for
prompts
in
tqdm
(
data
,
desc
=
"data"
):
uc
=
None
if
opt
.
scale
!=
1.0
:
uc
=
model
.
get_learned_conditioning
(
batch_size
*
[
""
])
if
isinstance
(
prompts
,
tuple
):
prompts
=
list
(
prompts
)
c
=
model
.
get_learned_conditioning
(
prompts
)
# encode (scaled latent)
z_enc
=
sampler
.
stochastic_encode
(
init_latent
,
torch
.
tensor
([
t_enc
]
*
batch_size
).
to
(
device
))
# decode it
samples
=
sampler
.
decode
(
z_enc
,
c
,
t_enc
,
unconditional_guidance_scale
=
opt
.
scale
,
unconditional_conditioning
=
uc
,)
x_samples
=
model
.
decode_first_stage
(
samples
)
x_samples
=
torch
.
clamp
((
x_samples
+
1.0
)
/
2.0
,
min
=
0.0
,
max
=
1.0
)
if
not
opt
.
skip_save
:
for
x_sample
in
x_samples
:
x_sample
=
255.
*
rearrange
(
x_sample
.
cpu
().
numpy
(),
'c h w -> h w c'
)
Image
.
fromarray
(
x_sample
.
astype
(
np
.
uint8
)).
save
(
os
.
path
.
join
(
sample_path
,
f
"
{
base_count
:
05
}
.png"
))
base_count
+=
1
all_samples
.
append
(
x_samples
)
if
not
opt
.
skip_grid
:
# additionally, save as grid
grid
=
torch
.
stack
(
all_samples
,
0
)
grid
=
rearrange
(
grid
,
'n b c h w -> (n b) c h w'
)
grid
=
make_grid
(
grid
,
nrow
=
n_rows
)
# to image
grid
=
255.
*
rearrange
(
grid
,
'c h w -> h w c'
).
cpu
().
numpy
()
Image
.
fromarray
(
grid
.
astype
(
np
.
uint8
)).
save
(
os
.
path
.
join
(
outpath
,
f
'grid-
{
grid_count
:
04
}
.png'
))
grid_count
+=
1
toc
=
time
.
time
()
print
(
f
"Your samples are ready and waiting for you here:
\n
{
outpath
}
\n
"
f
"
\n
Enjoy."
)
if
__name__
==
"__main__"
:
main
()
stable_diffusion/scripts/inpaint.py
0 → 100644
View file @
9cfc6603
import
argparse
,
os
,
sys
,
glob
from
omegaconf
import
OmegaConf
from
PIL
import
Image
from
tqdm
import
tqdm
import
numpy
as
np
import
torch
from
main
import
instantiate_from_config
from
ldm.models.diffusion.ddim
import
DDIMSampler
def
make_batch
(
image
,
mask
,
device
):
image
=
np
.
array
(
Image
.
open
(
image
).
convert
(
"RGB"
))
image
=
image
.
astype
(
np
.
float32
)
/
255.0
image
=
image
[
None
].
transpose
(
0
,
3
,
1
,
2
)
image
=
torch
.
from_numpy
(
image
)
mask
=
np
.
array
(
Image
.
open
(
mask
).
convert
(
"L"
))
mask
=
mask
.
astype
(
np
.
float32
)
/
255.0
mask
=
mask
[
None
,
None
]
mask
[
mask
<
0.5
]
=
0
mask
[
mask
>=
0.5
]
=
1
mask
=
torch
.
from_numpy
(
mask
)
masked_image
=
(
1
-
mask
)
*
image
batch
=
{
"image"
:
image
,
"mask"
:
mask
,
"masked_image"
:
masked_image
}
for
k
in
batch
:
batch
[
k
]
=
batch
[
k
].
to
(
device
=
device
)
batch
[
k
]
=
batch
[
k
]
*
2.0
-
1.0
return
batch
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--indir"
,
type
=
str
,
nargs
=
"?"
,
help
=
"dir containing image-mask pairs (`example.png` and `example_mask.png`)"
,
)
parser
.
add_argument
(
"--outdir"
,
type
=
str
,
nargs
=
"?"
,
help
=
"dir to write results to"
,
)
parser
.
add_argument
(
"--steps"
,
type
=
int
,
default
=
50
,
help
=
"number of ddim sampling steps"
,
)
opt
=
parser
.
parse_args
()
masks
=
sorted
(
glob
.
glob
(
os
.
path
.
join
(
opt
.
indir
,
"*_mask.png"
)))
images
=
[
x
.
replace
(
"_mask.png"
,
".png"
)
for
x
in
masks
]
print
(
f
"Found
{
len
(
masks
)
}
inputs."
)
config
=
OmegaConf
.
load
(
"models/ldm/inpainting_big/config.yaml"
)
model
=
instantiate_from_config
(
config
.
model
)
model
.
load_state_dict
(
torch
.
load
(
"models/ldm/inpainting_big/last.ckpt"
)[
"state_dict"
],
strict
=
False
)
device
=
torch
.
device
(
"cuda"
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
"cpu"
)
model
=
model
.
to
(
device
)
sampler
=
DDIMSampler
(
model
)
os
.
makedirs
(
opt
.
outdir
,
exist_ok
=
True
)
with
torch
.
no_grad
():
with
model
.
ema_scope
():
for
image
,
mask
in
tqdm
(
zip
(
images
,
masks
)):
outpath
=
os
.
path
.
join
(
opt
.
outdir
,
os
.
path
.
split
(
image
)[
1
])
batch
=
make_batch
(
image
,
mask
,
device
=
device
)
# encode masked image and concat downsampled mask
c
=
model
.
cond_stage_model
.
encode
(
batch
[
"masked_image"
])
cc
=
torch
.
nn
.
functional
.
interpolate
(
batch
[
"mask"
],
size
=
c
.
shape
[
-
2
:])
c
=
torch
.
cat
((
c
,
cc
),
dim
=
1
)
shape
=
(
c
.
shape
[
1
]
-
1
,)
+
c
.
shape
[
2
:]
samples_ddim
,
_
=
sampler
.
sample
(
S
=
opt
.
steps
,
conditioning
=
c
,
batch_size
=
c
.
shape
[
0
],
shape
=
shape
,
verbose
=
False
)
x_samples_ddim
=
model
.
decode_first_stage
(
samples_ddim
)
image
=
torch
.
clamp
((
batch
[
"image"
]
+
1.0
)
/
2.0
,
min
=
0.0
,
max
=
1.0
)
mask
=
torch
.
clamp
((
batch
[
"mask"
]
+
1.0
)
/
2.0
,
min
=
0.0
,
max
=
1.0
)
predicted_image
=
torch
.
clamp
((
x_samples_ddim
+
1.0
)
/
2.0
,
min
=
0.0
,
max
=
1.0
)
inpainted
=
(
1
-
mask
)
*
image
+
mask
*
predicted_image
inpainted
=
inpainted
.
cpu
().
numpy
().
transpose
(
0
,
2
,
3
,
1
)[
0
]
*
255
Image
.
fromarray
(
inpainted
.
astype
(
np
.
uint8
)).
save
(
outpath
)
stable_diffusion/scripts/knn2img.py
0 → 100644
View file @
9cfc6603
import
argparse
,
os
,
sys
,
glob
import
clip
import
torch
import
torch.nn
as
nn
import
numpy
as
np
from
omegaconf
import
OmegaConf
from
PIL
import
Image
from
tqdm
import
tqdm
,
trange
from
itertools
import
islice
from
einops
import
rearrange
,
repeat
from
torchvision.utils
import
make_grid
import
scann
import
time
from
multiprocessing
import
cpu_count
from
ldm.util
import
instantiate_from_config
,
parallel_data_prefetch
from
ldm.models.diffusion.ddim
import
DDIMSampler
from
ldm.models.diffusion.plms
import
PLMSSampler
from
ldm.modules.encoders.modules
import
FrozenClipImageEmbedder
,
FrozenCLIPTextEmbedder
DATABASES
=
[
"openimages"
,
"artbench-art_nouveau"
,
"artbench-baroque"
,
"artbench-expressionism"
,
"artbench-impressionism"
,
"artbench-post_impressionism"
,
"artbench-realism"
,
"artbench-romanticism"
,
"artbench-renaissance"
,
"artbench-surrealism"
,
"artbench-ukiyo_e"
,
]
def
chunk
(
it
,
size
):
it
=
iter
(
it
)
return
iter
(
lambda
:
tuple
(
islice
(
it
,
size
)),
())
def
load_model_from_config
(
config
,
ckpt
,
verbose
=
False
):
print
(
f
"Loading model from
{
ckpt
}
"
)
pl_sd
=
torch
.
load
(
ckpt
,
map_location
=
"cpu"
)
if
"global_step"
in
pl_sd
:
print
(
f
"Global Step:
{
pl_sd
[
'global_step'
]
}
"
)
sd
=
pl_sd
[
"state_dict"
]
model
=
instantiate_from_config
(
config
.
model
)
m
,
u
=
model
.
load_state_dict
(
sd
,
strict
=
False
)
if
len
(
m
)
>
0
and
verbose
:
print
(
"missing keys:"
)
print
(
m
)
if
len
(
u
)
>
0
and
verbose
:
print
(
"unexpected keys:"
)
print
(
u
)
model
.
cuda
()
model
.
eval
()
return
model
class
Searcher
(
object
):
def
__init__
(
self
,
database
,
retriever_version
=
'ViT-L/14'
):
assert
database
in
DATABASES
# self.database = self.load_database(database)
self
.
database_name
=
database
self
.
searcher_savedir
=
f
'data/rdm/searchers/
{
self
.
database_name
}
'
self
.
database_path
=
f
'data/rdm/retrieval_databases/
{
self
.
database_name
}
'
self
.
retriever
=
self
.
load_retriever
(
version
=
retriever_version
)
self
.
database
=
{
'embedding'
:
[],
'img_id'
:
[],
'patch_coords'
:
[]}
self
.
load_database
()
self
.
load_searcher
()
def
train_searcher
(
self
,
k
,
metric
=
'dot_product'
,
searcher_savedir
=
None
):
print
(
'Start training searcher'
)
searcher
=
scann
.
scann_ops_pybind
.
builder
(
self
.
database
[
'embedding'
]
/
np
.
linalg
.
norm
(
self
.
database
[
'embedding'
],
axis
=
1
)[:,
np
.
newaxis
],
k
,
metric
)
self
.
searcher
=
searcher
.
score_brute_force
().
build
()
print
(
'Finish training searcher'
)
if
searcher_savedir
is
not
None
:
print
(
f
'Save trained searcher under "
{
searcher_savedir
}
"'
)
os
.
makedirs
(
searcher_savedir
,
exist_ok
=
True
)
self
.
searcher
.
serialize
(
searcher_savedir
)
def
load_single_file
(
self
,
saved_embeddings
):
compressed
=
np
.
load
(
saved_embeddings
)
self
.
database
=
{
key
:
compressed
[
key
]
for
key
in
compressed
.
files
}
print
(
'Finished loading of clip embeddings.'
)
def
load_multi_files
(
self
,
data_archive
):
out_data
=
{
key
:
[]
for
key
in
self
.
database
}
for
d
in
tqdm
(
data_archive
,
desc
=
f
'Loading datapool from
{
len
(
data_archive
)
}
individual files.'
):
for
key
in
d
.
files
:
out_data
[
key
].
append
(
d
[
key
])
return
out_data
def
load_database
(
self
):
print
(
f
'Load saved patch embedding from "
{
self
.
database_path
}
"'
)
file_content
=
glob
.
glob
(
os
.
path
.
join
(
self
.
database_path
,
'*.npz'
))
if
len
(
file_content
)
==
1
:
self
.
load_single_file
(
file_content
[
0
])
elif
len
(
file_content
)
>
1
:
data
=
[
np
.
load
(
f
)
for
f
in
file_content
]
prefetched_data
=
parallel_data_prefetch
(
self
.
load_multi_files
,
data
,
n_proc
=
min
(
len
(
data
),
cpu_count
()),
target_data_type
=
'dict'
)
self
.
database
=
{
key
:
np
.
concatenate
([
od
[
key
]
for
od
in
prefetched_data
],
axis
=
1
)[
0
]
for
key
in
self
.
database
}
else
:
raise
ValueError
(
f
'No npz-files in specified path "
{
self
.
database_path
}
" is this directory existing?'
)
print
(
f
'Finished loading of retrieval database of length
{
self
.
database
[
"embedding"
].
shape
[
0
]
}
.'
)
def
load_retriever
(
self
,
version
=
'ViT-L/14'
,
):
model
=
FrozenClipImageEmbedder
(
model
=
version
)
if
torch
.
cuda
.
is_available
():
model
.
cuda
()
model
.
eval
()
return
model
def
load_searcher
(
self
):
print
(
f
'load searcher for database
{
self
.
database_name
}
from
{
self
.
searcher_savedir
}
'
)
self
.
searcher
=
scann
.
scann_ops_pybind
.
load_searcher
(
self
.
searcher_savedir
)
print
(
'Finished loading searcher.'
)
def
search
(
self
,
x
,
k
):
if
self
.
searcher
is
None
and
self
.
database
[
'embedding'
].
shape
[
0
]
<
2e4
:
self
.
train_searcher
(
k
)
# quickly fit searcher on the fly for small databases
assert
self
.
searcher
is
not
None
,
'Cannot search with uninitialized searcher'
if
isinstance
(
x
,
torch
.
Tensor
):
x
=
x
.
detach
().
cpu
().
numpy
()
if
len
(
x
.
shape
)
==
3
:
x
=
x
[:,
0
]
query_embeddings
=
x
/
np
.
linalg
.
norm
(
x
,
axis
=
1
)[:,
np
.
newaxis
]
start
=
time
.
time
()
nns
,
distances
=
self
.
searcher
.
search_batched
(
query_embeddings
,
final_num_neighbors
=
k
)
end
=
time
.
time
()
out_embeddings
=
self
.
database
[
'embedding'
][
nns
]
out_img_ids
=
self
.
database
[
'img_id'
][
nns
]
out_pc
=
self
.
database
[
'patch_coords'
][
nns
]
out
=
{
'nn_embeddings'
:
out_embeddings
/
np
.
linalg
.
norm
(
out_embeddings
,
axis
=-
1
)[...,
np
.
newaxis
],
'img_ids'
:
out_img_ids
,
'patch_coords'
:
out_pc
,
'queries'
:
x
,
'exec_time'
:
end
-
start
,
'nns'
:
nns
,
'q_embeddings'
:
query_embeddings
}
return
out
def
__call__
(
self
,
x
,
n
):
return
self
.
search
(
x
,
n
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
# TODO: add n_neighbors and modes (text-only, text-image-retrieval, image-image retrieval etc)
# TODO: add 'image variation' mode when knn=0 but a single image is given instead of a text prompt?
parser
.
add_argument
(
"--prompt"
,
type
=
str
,
nargs
=
"?"
,
default
=
"a painting of a virus monster playing guitar"
,
help
=
"the prompt to render"
)
parser
.
add_argument
(
"--outdir"
,
type
=
str
,
nargs
=
"?"
,
help
=
"dir to write results to"
,
default
=
"outputs/txt2img-samples"
)
parser
.
add_argument
(
"--skip_grid"
,
action
=
'store_true'
,
help
=
"do not save a grid, only individual samples. Helpful when evaluating lots of samples"
,
)
parser
.
add_argument
(
"--ddim_steps"
,
type
=
int
,
default
=
50
,
help
=
"number of ddim sampling steps"
,
)
parser
.
add_argument
(
"--n_repeat"
,
type
=
int
,
default
=
1
,
help
=
"number of repeats in CLIP latent space"
,
)
parser
.
add_argument
(
"--plms"
,
action
=
'store_true'
,
help
=
"use plms sampling"
,
)
parser
.
add_argument
(
"--ddim_eta"
,
type
=
float
,
default
=
0.0
,
help
=
"ddim eta (eta=0.0 corresponds to deterministic sampling"
,
)
parser
.
add_argument
(
"--n_iter"
,
type
=
int
,
default
=
1
,
help
=
"sample this often"
,
)
parser
.
add_argument
(
"--H"
,
type
=
int
,
default
=
768
,
help
=
"image height, in pixel space"
,
)
parser
.
add_argument
(
"--W"
,
type
=
int
,
default
=
768
,
help
=
"image width, in pixel space"
,
)
parser
.
add_argument
(
"--n_samples"
,
type
=
int
,
default
=
3
,
help
=
"how many samples to produce for each given prompt. A.k.a batch size"
,
)
parser
.
add_argument
(
"--n_rows"
,
type
=
int
,
default
=
0
,
help
=
"rows in the grid (default: n_samples)"
,
)
parser
.
add_argument
(
"--scale"
,
type
=
float
,
default
=
5.0
,
help
=
"unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))"
,
)
parser
.
add_argument
(
"--from-file"
,
type
=
str
,
help
=
"if specified, load prompts from this file"
,
)
parser
.
add_argument
(
"--config"
,
type
=
str
,
default
=
"configs/retrieval-augmented-diffusion/768x768.yaml"
,
help
=
"path to config which constructs model"
,
)
parser
.
add_argument
(
"--ckpt"
,
type
=
str
,
default
=
"models/rdm/rdm768x768/model.ckpt"
,
help
=
"path to checkpoint of model"
,
)
parser
.
add_argument
(
"--clip_type"
,
type
=
str
,
default
=
"ViT-L/14"
,
help
=
"which CLIP model to use for retrieval and NN encoding"
,
)
parser
.
add_argument
(
"--database"
,
type
=
str
,
default
=
'artbench-surrealism'
,
choices
=
DATABASES
,
help
=
"The database used for the search, only applied when --use_neighbors=True"
,
)
parser
.
add_argument
(
"--use_neighbors"
,
default
=
False
,
action
=
'store_true'
,
help
=
"Include neighbors in addition to text prompt for conditioning"
,
)
parser
.
add_argument
(
"--knn"
,
default
=
10
,
type
=
int
,
help
=
"The number of included neighbors, only applied when --use_neighbors=True"
,
)
opt
=
parser
.
parse_args
()
config
=
OmegaConf
.
load
(
f
"
{
opt
.
config
}
"
)
model
=
load_model_from_config
(
config
,
f
"
{
opt
.
ckpt
}
"
)
device
=
torch
.
device
(
"cuda"
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
"cpu"
)
model
=
model
.
to
(
device
)
clip_text_encoder
=
FrozenCLIPTextEmbedder
(
opt
.
clip_type
).
to
(
device
)
if
opt
.
plms
:
sampler
=
PLMSSampler
(
model
)
else
:
sampler
=
DDIMSampler
(
model
)
os
.
makedirs
(
opt
.
outdir
,
exist_ok
=
True
)
outpath
=
opt
.
outdir
batch_size
=
opt
.
n_samples
n_rows
=
opt
.
n_rows
if
opt
.
n_rows
>
0
else
batch_size
if
not
opt
.
from_file
:
prompt
=
opt
.
prompt
assert
prompt
is
not
None
data
=
[
batch_size
*
[
prompt
]]
else
:
print
(
f
"reading prompts from
{
opt
.
from_file
}
"
)
with
open
(
opt
.
from_file
,
"r"
)
as
f
:
data
=
f
.
read
().
splitlines
()
data
=
list
(
chunk
(
data
,
batch_size
))
sample_path
=
os
.
path
.
join
(
outpath
,
"samples"
)
os
.
makedirs
(
sample_path
,
exist_ok
=
True
)
base_count
=
len
(
os
.
listdir
(
sample_path
))
grid_count
=
len
(
os
.
listdir
(
outpath
))
-
1
print
(
f
"sampling scale for cfg is
{
opt
.
scale
:.
2
f
}
"
)
searcher
=
None
if
opt
.
use_neighbors
:
searcher
=
Searcher
(
opt
.
database
)
with
torch
.
no_grad
():
with
model
.
ema_scope
():
for
n
in
trange
(
opt
.
n_iter
,
desc
=
"Sampling"
):
all_samples
=
list
()
for
prompts
in
tqdm
(
data
,
desc
=
"data"
):
print
(
"sampling prompts:"
,
prompts
)
if
isinstance
(
prompts
,
tuple
):
prompts
=
list
(
prompts
)
c
=
clip_text_encoder
.
encode
(
prompts
)
uc
=
None
if
searcher
is
not
None
:
nn_dict
=
searcher
(
c
,
opt
.
knn
)
c
=
torch
.
cat
([
c
,
torch
.
from_numpy
(
nn_dict
[
'nn_embeddings'
]).
cuda
()],
dim
=
1
)
if
opt
.
scale
!=
1.0
:
uc
=
torch
.
zeros_like
(
c
)
if
isinstance
(
prompts
,
tuple
):
prompts
=
list
(
prompts
)
shape
=
[
16
,
opt
.
H
//
16
,
opt
.
W
//
16
]
# note: currently hardcoded for f16 model
samples_ddim
,
_
=
sampler
.
sample
(
S
=
opt
.
ddim_steps
,
conditioning
=
c
,
batch_size
=
c
.
shape
[
0
],
shape
=
shape
,
verbose
=
False
,
unconditional_guidance_scale
=
opt
.
scale
,
unconditional_conditioning
=
uc
,
eta
=
opt
.
ddim_eta
,
)
x_samples_ddim
=
model
.
decode_first_stage
(
samples_ddim
)
x_samples_ddim
=
torch
.
clamp
((
x_samples_ddim
+
1.0
)
/
2.0
,
min
=
0.0
,
max
=
1.0
)
for
x_sample
in
x_samples_ddim
:
x_sample
=
255.
*
rearrange
(
x_sample
.
cpu
().
numpy
(),
'c h w -> h w c'
)
Image
.
fromarray
(
x_sample
.
astype
(
np
.
uint8
)).
save
(
os
.
path
.
join
(
sample_path
,
f
"
{
base_count
:
05
}
.png"
))
base_count
+=
1
all_samples
.
append
(
x_samples_ddim
)
if
not
opt
.
skip_grid
:
# additionally, save as grid
grid
=
torch
.
stack
(
all_samples
,
0
)
grid
=
rearrange
(
grid
,
'n b c h w -> (n b) c h w'
)
grid
=
make_grid
(
grid
,
nrow
=
n_rows
)
# to image
grid
=
255.
*
rearrange
(
grid
,
'c h w -> h w c'
).
cpu
().
numpy
()
Image
.
fromarray
(
grid
.
astype
(
np
.
uint8
)).
save
(
os
.
path
.
join
(
outpath
,
f
'grid-
{
grid_count
:
04
}
.png'
))
grid_count
+=
1
print
(
f
"Your samples are ready and waiting for you here:
\n
{
outpath
}
\n
Enjoy."
)
Prev
1
…
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment