run_train_image2video_lora.sh 2.14 KB
Newer Older
mashun1's avatar
hyi2v  
mashun1 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Root path for saving experimental results
export SAVE_BASE="."
echo "SAVE_BASE: ${SAVE_BASE}"

# Path suffix for saving experimental results
EXP_NAME="i2v_lora"

# Data jsons dir (output_base_dir/json_path in hyvideo/hyvae_extract/README.md) generated by hyvideo/hyvae_extract/start.sh
DATA_JSONS_DIR="./assets/demo/i2v_lora/train_dataset/processed_data/json_path"

# Master node IP of the machine
CHIEF_IP="127.0.0.1"

current_datetime=$(date +%Y%m%d_%H%M%S)
output_dir="${SAVE_BASE}/log_EXP"
task_flag="${current_datetime}_${EXP_NAME}"

params=" \
    --lr 1e-4 \
    --warmup-num-steps 500 \
    --global-seed 1024 \
    --tensorboard \
    --zero-stage 2 \
    --vae 884-16c-hy \
    --vae-precision fp16 \
    --vae-tiling \
    --denoise-type flow \
    --flow-reverse \
    --flow-shift 7.0 \
    --i2v-mode \
    --model HYVideo-T/2 \
    --video-micro-batch-size 1 \
    --gradient-checkpoint \
    --ckpt-every 500 \
    --embedded-cfg-scale 6.0 \
    "

video_data_params=" \
    --data-type video \
    --data-jsons-path ${DATA_JSONS_DIR} \
    --sample-n-frames 129 \
    --sample-stride 1 \
    --num-workers 8 \
    --uncond-p 0.1 \
    --sematic-cond-drop-p 0.1 \
    "

te_params=" \
    --text-encoder llm-i2v \
    --text-encoder-precision fp16 \
    --text-states-dim 4096 \
    --text-len 256 \
    --tokenizer llm-i2v \
    --prompt-template dit-llm-encode-i2v \
    --prompt-template-video dit-llm-encode-video-i2v \
    --hidden-state-skip-layer 2 \
    --text-encoder-2 clipL \
    --text-encoder-precision-2 fp16 \
    --text-states-dim-2 768 \
    --tokenizer-2 clipL \
    --text-len-2 77 \
    "
    
lora_params=" \
    --use-lora \
    --lora-rank 64 \
    "

export TOKENIZERS_PARALLELISM=false

set -x

#deepspeed --hostfile $hostfile --master_addr "${CHIEF_IP}" \
# single node, multi gpu
#deepspeed --include localhost:0,1,2,3,4,5,6,7 --master_addr "${CHIEF_IP}" \
# single node, single gpu
deepspeed --include localhost:0 --master_addr "${CHIEF_IP}" \
train_image2video_lora.py \
    ${params} \
    ${val_params} \
    ${video_data_params} \
    ${te_params} \
    ${lora_params} \
    --task-flag ${task_flag} \
    --output-dir ${output_dir} \
    "$@"