finetune_docowl_lora_dcu.sh 1.6 KB
Newer Older
wanglch's avatar
wanglch committed
1
2
3
4
5
6
7
8
#!/bin/bash
# Change for multinode config

NNODES=1
NODE_RANK=0
GPUS_PER_NODE=4
MASTER_ADDR=127.0.0.1
MASTER_PORT=29513
9
HIP_VISIBLE_DEVICES=1,3,4,7
wanglch's avatar
wanglch committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# GPUS_PER_NODE=1
DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"

# change LOAD to your local path of DocOwl1.5-Omni
# batch size = per_device_train_batch_size x GPUS_PER_NODE x NNODES x gradient_accumulation_steps

torchrun $DISTRIBUTED_ARGS ./mplug_docowl/train/train_docowl.py \
    --lora_enable True --lora_r 128 --lora_alpha 256 --vision2text_lr 2e-5 \
    --deepspeed './scripts/zero2.json' \
    --model_name_or_path './DocOwl1.5-Omni-base' \
    --version v1 \
    --data_path './DocLocal4K/mini_imges.jsonl' \
    --image_folder './DocLocal4K/' \
    --image_size 448 \
    --crop_anchors 'grid_9' \
    --add_global_img True \
    --add_textual_crop_indicator True \
    --fp16 True \
    --output_dir './train_multi_dcu' \
    --num_train_epochs 10 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 8 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 500 \
    --save_total_limit 4 \
    --learning_rate 1e-4 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 False \
    --model_max_length 3600 \
    --gradient_checkpointing True \
    --tune_vision2text True \
    --freeze_vision_model True \
    --freeze_backbone True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True \
    --report_to tensorboard