training.yaml 1.83 KB
Newer Older
mashun1's avatar
mashun1 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
image_finetune: false

output_dir: "outputs"
pretrained_model_path: "models/StableDiffusion/stable-diffusion-v1-5"

unet_additional_kwargs:
  use_motion_module              : true
  motion_module_resolutions      : [ 1,2,4,8 ]
  unet_use_cross_frame_attention : false
  unet_use_temporal_attention    : false

  motion_module_type: Vanilla
  motion_module_kwargs:
    num_attention_heads                : 8
    num_transformer_block              : 1
    attention_block_types              : [ "Temporal_Self", "Temporal_Self" ]
    temporal_position_encoding         : true
    temporal_position_encoding_max_len : 24
    temporal_attention_dim_div         : 1
    zero_initialize                    : true

noise_scheduler_kwargs:
  num_train_timesteps: 1000
  beta_start:          0.00085
  beta_end:            0.012
  beta_schedule:       "linear"
  steps_offset:        1
  clip_sample:         false

train_data:
mashun1's avatar
update  
mashun1 committed
31
  csv_path:        "train_data.csv"
mashun1's avatar
mashun1 committed
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
  video_folder:    "data/videos"
  sample_size:     128
  sample_stride:   4
  sample_n_frames: 16

validation_data:
  prompts:
    - "Snow rocky mountains peaks canyon. Snow blanketed rocky mountains surround and shadow deep canyons."
    - "A drone view of celebration with Christma tree and fireworks, starry sky - background."
    - "Robot dancing in times square."
    - "Pacific coast, carmel by the sea ocean and waves."
  num_inference_steps: 25
  guidance_scale: 8.

trainable_modules:
  - "motion_modules."

unet_checkpoint_path: ""

learning_rate:    1.e-4
train_batch_size: 4

max_train_epoch:      -1
max_train_steps:      1000
checkpointing_epochs: -1
checkpointing_steps:  60

validation_steps:       5000
validation_steps_tuple: [2, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]

global_seed: 42
mixed_precision_training: true
enable_xformers_memory_efficient_attention: false

is_debug: False