finetune_lora_multi_dcu.sh 1.81 KB
Newer Older
wanglch's avatar
wanglch committed
1
2
3
4
5
6
7
8
9
10
11
12
13
set -x

GPUS=${GPUS:-8}
BATCH_SIZE=${BATCH_SIZE:-16}
PER_DEVICE_BATCH_SIZE=${PER_DEVICE_BATCH_SIZE:-2}
GRADIENT_ACC=$((BATCH_SIZE / PER_DEVICE_BATCH_SIZE / GPUS))


export PYTHONPATH="${PYTHONPATH}:$(pwd)"
export MASTER_PORT=34229
export TF_CPP_MIN_LOG_LEVEL=3
export LAUNCHER=pytorch

wanglch's avatar
wanglch committed
14
OUTPUT_DIR='/InternVL/saves/internvl2-40b/finetune_multi_dcu'
wanglch's avatar
wanglch committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

if [ ! -d "$OUTPUT_DIR" ]; then
  mkdir -p "$OUTPUT_DIR"
fi

# number of gpus: 4
# batch size per gpu: 8
# gradient accumulation steps: 2
# total batch size: 16
# epoch: 1
torchrun \
  --nnodes=1 \
  --node_rank=0 \
  --master_addr=127.0.0.1 \
  --nproc_per_node=${GPUS} \
  --master_port=${MASTER_PORT} \
  internvl/train/internvl_chat_finetune.py \
wanglch's avatar
wanglch committed
32
  --model_name_or_path "/InternVL/InternVL2-40B" \
wanglch's avatar
wanglch committed
33
  --conv_style "internlm2-chat" \
wanglch's avatar
wanglch committed
34
  --output_dir ${OUTPUT_DIR} \
wanglch's avatar
wanglch committed
35
  --meta_path "/internvl_chat/shell/data/internvl_1_2_finetune_custom.json" \
wanglch's avatar
wanglch committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
  --overwrite_output_dir True \
  --force_image_size 448 \
  --max_dynamic_patch 12 \
  --down_sample_ratio 0.5 \
  --drop_path_rate 0.0 \
  --freeze_llm True \
  --freeze_mlp True \
  --freeze_backbone True \
  --use_llm_lora 16 \
  --vision_select_layer -1 \
  --dataloader_num_workers 8 \
  --fp16 True \
  --num_train_epochs 1 \
  --per_device_train_batch_size ${PER_DEVICE_BATCH_SIZE} \
  --gradient_accumulation_steps ${GRADIENT_ACC} \
  --evaluation_strategy "no" \
  --save_strategy "steps" \
  --save_steps 200 \
  --save_total_limit 1 \
  --learning_rate 2e-5 \
  --weight_decay 0.05 \
  --warmup_ratio 0.03 \
  --lr_scheduler_type "cosine" \
  --logging_steps 1 \
  --max_seq_length 4096 \
  --do_train True \
  --grad_checkpoint True \
  --group_by_length True \
  --dynamic_image_size True \
  --use_thumbnail True \
  --ps_version 'v2' \
wanglch's avatar
wanglch committed
67
  --deepspeed "./internvl_chat/zero_stage3_config_34b.json" \
wanglch's avatar
wanglch committed
68
  2>&1 | tee -a "${OUTPUT_DIR}/training_log.txt"