TASK_TYPE: inference_sr600_entrance use_fp16: True vldm_cfg: '' round: 1 batch_size: 1 # For important input test_list_path: data/text_list_for_t2v_share.txt test_model: models/sr_step_110000_ema.pth embedder: { 'type': 'FrozenOpenCLIPTextVisualEmbedder', 'layer': 'penultimate', 'vit_resolution': [224, 224], 'pretrained': 'i2vgen-xl/models/open_clip_pytorch_model.bin', 'negative_prompt': 'worst quality, normal quality, low quality, low res, blurry, text, watermark, logo, banner, extra digits, cropped, jpeg artifacts, signature, username, error, sketch ,duplicate, ugly, monochrome, horror, geometry, mutation, disgusting', 'positive_prompt': ', cinematic, High Contrast, highly detailed, Unreal Engine 5, no blur, full length ultra-wide angle shot a cinematic scene, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, portrait Photography, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations, 4k render' } UNet: { 'type': 'UNetSD_SR600', 'in_dim': 4, 'dim': 320, 'y_dim': 1024, 'context_dim': 1024, 'out_dim': 4, 'dim_mult': [1, 2, 4, 4], 'num_heads': 8, 'head_dim': 64, 'num_res_blocks': 2, 'attn_scales' :[1, 0.5, 0.25], 'use_scale_shift_norm': True, 'dropout': 0.1, 'temporal_attn_times': 1, 'temporal_attention': True, 'use_checkpoint': True, 'use_image_dataset': False, 'use_sim_mask': False, 'inpainting': True } Diffusion: { 'type': 'DiffusionDDIMSR', 'reverse_diffusion': { 'schedule': 'cosine', 'mean_type': 'v', 'schedule_param': { 'num_timesteps': 1000, 'zero_terminal_snr': True } }, 'forward_diffusion': { 'schedule': 'logsnr_cosine_interp', 'mean_type': 'v', 'schedule_param': { 'num_timesteps': 1000, 'zero_terminal_snr': True, 'scale_min': 2.0, 'scale_max': 4.0 } } } batch_sizes: { "1": 256, "4": 96, "8": 48, "16": 32, "24": 24, "32": 10 } visual_train: { 'type': 'VisualTrainTextImageToVideo', 'partial_keys': [ # ['y', 'local_image', 'fps'], # ['image', 'local_image', 'fps'], ['y', 'image', 'local_image', 'fps'] ], 'use_offset_noise': True, 'guide_scale': 9.0, } chunk_size: 4 decoder_bs: 4 lr: 0.00003 noise_strength: 0.1 # classifier-free guidance p_zero: 0.0 guide_scale: 3.0 num_steps: 1000000 use_zero_infer: True viz_interval: 50 # 200 save_ckp_interval: 50 # 500 # Log log_dir: "workspace/experiments" log_interval: 1 seed: 6666 total_noise_levels: 700