Commit 01db7703 authored by mashun1's avatar mashun1
Browse files

taming-transformer

parents
Pipeline #801 canceled with stages
model:
base_learning_rate: 4.5e-6
target: taming.models.vqgan.VQModel
params:
embed_dim: 256
n_embed: 1024
ddconfig:
double_z: False
z_channels: 256
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [16]
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: False
disc_in_channels: 3
disc_start: 10000
disc_weight: 0.8
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 5
num_workers: 8
train:
target: taming.data.custom.CustomTrain
params:
training_images_list_file: some/training.txt
size: 256
validation:
target: taming.data.custom.CustomTest
params:
test_images_list_file: some/test.txt
size: 256
model:
base_learning_rate: 4.5e-06
target: taming.models.cond_transformer.Net2NetTransformer
params:
cond_stage_key: depth
transformer_config:
target: taming.modules.transformer.mingpt.GPT
params:
vocab_size: 1024
block_size: 512
n_layer: 24
n_head: 16
n_embd: 1024
first_stage_config:
target: taming.models.vqgan.VQModel
params:
ckpt_path: logs/2024-02-27T07-30-55_imagenet_vqgan/checkpoints/last.ckpt
embed_dim: 256
n_embed: 1024
ddconfig:
double_z: false
z_channels: 256
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 16
dropout: 0.0
lossconfig:
target: taming.modules.losses.DummyLoss
cond_stage_config:
target: taming.models.vqgan.VQModel
params:
ckpt_path: logs/2024-02-27T07-41-51_imagenetdepth_vqgan/checkpoints/last.ckpt
embed_dim: 256
n_embed: 1024
ddconfig:
double_z: false
z_channels: 256
resolution: 256
in_channels: 1
out_ch: 1
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 16
dropout: 0.0
lossconfig:
target: taming.modules.losses.DummyLoss
data:
target: main.DataModuleFromConfig
params:
batch_size: 2
num_workers: 8
train:
target: taming.data.imagenet.RINTrainWithDepth
params:
size: 256
validation:
target: taming.data.imagenet.RINValidationWithDepth
params:
size: 256
model:
base_learning_rate: 4.5e-06
target: taming.models.cond_transformer.Net2NetTransformer
params:
cond_stage_key: coord
transformer_config:
target: taming.modules.transformer.mingpt.GPT
params:
vocab_size: 1024
block_size: 512
n_layer: 24
n_head: 16
n_embd: 1024
first_stage_config:
target: taming.models.vqgan.VQModel
params:
ckpt_path: ~
# logs/2024-02-05T01-12-56_faceshq_vqgan/checkpoints/last.ckpt
embed_dim: 256
n_embed: 1024
ddconfig:
double_z: false
z_channels: 256
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 16
dropout: 0.0
lossconfig:
target: taming.modules.losses.DummyLoss
cond_stage_config:
target: taming.modules.misc.coord.CoordStage
params:
n_embed: 1024
down_factor: 16
data:
target: main.DataModuleFromConfig
params:
batch_size: 2
num_workers: 8
train:
target: taming.data.faceshq.FacesHQTrain
params:
size: 256
crop_size: 256
coord: True
validation:
target: taming.data.faceshq.FacesHQValidation
params:
size: 256
crop_size: 256
coord: True
model:
base_learning_rate: 4.5e-6
target: taming.models.vqgan.VQModel
params:
embed_dim: 256
n_embed: 1024
ddconfig:
double_z: False
z_channels: 256
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [16]
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: False
disc_in_channels: 3
disc_start: 30001
disc_weight: 0.8
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 3
num_workers: 8
train:
target: taming.data.faceshq.FacesHQTrain
params:
size: 256
crop_size: 256
validation:
target: taming.data.faceshq.FacesHQValidation
params:
size: 256
crop_size: 256
model:
base_learning_rate: 4.5e-6
target: taming.models.vqgan.VQModel
params:
embed_dim: 256
n_embed: 1024
ddconfig:
double_z: False
z_channels: 256
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [16]
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: False
disc_in_channels: 3
disc_start: 250001
disc_weight: 0.8
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 12
num_workers: 24
train:
target: taming.data.imagenet.ImageNetTrain
params:
config:
size: 256
validation:
target: taming.data.imagenet.ImageNetValidation
params:
config:
size: 256
model:
base_learning_rate: 4.5e-6
target: taming.models.vqgan.VQModel
params:
embed_dim: 256
n_embed: 1024
image_key: depth
ddconfig:
double_z: False
z_channels: 256
resolution: 256
in_channels: 1
out_ch: 1
ch: 128
ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [16]
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: False
disc_in_channels: 1
disc_start: 50001
disc_weight: 0.75
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 3
num_workers: 8
train:
target: taming.data.imagenet.ImageNetTrainWithDepth
params:
size: 256
validation:
target: taming.data.imagenet.ImageNetValidationWithDepth
params:
size: 256
model:
base_learning_rate: 4.5e-06
target: taming.models.cond_transformer.Net2NetTransformer
params:
cond_stage_key: objects_bbox
transformer_config:
target: taming.modules.transformer.mingpt.GPT
params:
vocab_size: 8192
block_size: 348 # = 256 + 92 = dim(vqgan_latent_space,16x16) + dim(conditional_builder.embedding_dim)
n_layer: 36
n_head: 16
n_embd: 1536
embd_pdrop: 0.1
resid_pdrop: 0.1
attn_pdrop: 0.1
first_stage_config:
target: taming.models.vqgan.VQModel
params:
ckpt_path: /path/to/coco_oi_epoch12.ckpt # https://heibox.uni-heidelberg.de/f/461d9a9f4fcf48ab84f4/
embed_dim: 256
n_embed: 8192
ddconfig:
double_z: false
z_channels: 256
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 16
dropout: 0.0
lossconfig:
target: taming.modules.losses.DummyLoss
cond_stage_config:
target: taming.models.dummy_cond_stage.DummyCondStage
params:
conditional_key: objects_bbox
data:
target: main.DataModuleFromConfig
params:
batch_size: 6
train:
target: taming.data.annotated_objects_open_images.AnnotatedObjectsOpenImages
params:
data_path: data/open_images_annotations_100 # substitute with path to full dataset
split: train
keys: [image, objects_bbox, file_name, annotations]
no_tokens: 8192
target_image_size: 256
category_allow_list_target: taming.data.open_images_helper.top_300_classes_plus_coco_compatibility
category_mapping_target: taming.data.open_images_helper.open_images_unify_categories_for_coco
min_object_area: 0.0001
min_objects_per_image: 2
max_objects_per_image: 30
crop_method: random-2d
random_flip: true
use_group_parameter: true
use_additional_parameters: true
encode_crop: true
validation:
target: taming.data.annotated_objects_open_images.AnnotatedObjectsOpenImages
params:
data_path: data/open_images_annotations_100 # substitute with path to full dataset
split: validation
keys: [image, objects_bbox, file_name, annotations]
no_tokens: 8192
target_image_size: 256
category_allow_list_target: taming.data.open_images_helper.top_300_classes_plus_coco_compatibility
category_mapping_target: taming.data.open_images_helper.open_images_unify_categories_for_coco
min_object_area: 0.0001
min_objects_per_image: 2
max_objects_per_image: 30
crop_method: center
random_flip: false
use_group_parameter: true
use_additional_parameters: true
encode_crop: true
model:
base_learning_rate: 4.5e-06
target: taming.models.vqgan.VQSegmentationModel
params:
embed_dim: 256
n_embed: 1024
image_key: "segmentation"
n_labels: 182
ddconfig:
double_z: false
z_channels: 256
resolution: 256
in_channels: 182
out_ch: 182
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 16
dropout: 0.0
lossconfig:
target: taming.modules.losses.segmentation.BCELossWithQuant
params:
codebook_weight: 1.0
data:
target: cutlit.DataModuleFromConfig
params:
batch_size: 12
train:
target: taming.data.sflckr.Examples # adjust
params:
size: 256
validation:
target: taming.data.sflckr.Examples # adjust
params:
size: 256
ADE_val_00000636.jpg
ADE_val_00000126.jpg
ADE_val_00001412.jpg
ADE_val_00001845.jpg
ADE_val_00001200.jpg
ADE_val_00001578.jpg
ADE_val_00000880.jpg
ADE_val_00000875.jpg
ADE_val_00000123.jpg
ADE_val_00001209.jpg
ADE_val_00000203.jpg
ADE_val_00001851.jpg
ADE_val_00001583.jpg
ADE_val_00000287.jpg
ADE_val_00001947.jpg
ADE_val_00000262.jpg
ADE_val_00000603.jpg
ADE_val_00000125.jpg
ADE_val_00001698.jpg
ADE_val_00001966.jpg
ADE_val_00000532.jpg
ADE_val_00001177.jpg
ADE_val_00000734.jpg
ADE_val_00001498.jpg
ADE_val_00001766.jpg
ADE_val_00000303.jpg
ADE_val_00000509.jpg
ADE_val_00000573.jpg
ADE_val_00000289.jpg
ADE_val_00001388.jpg
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment