#!/bin/bash export CUDA_DEVICE_MAX_CONNECTIONS=1 DIR=`pwd` HIP_VISIBLE_DEVICES=2,3 GPUS_PER_NODE=2 NNODES=1 NODE_RANK=0 MASTER_ADDR=localhost MASTER_PORT=29519 # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations. # See the section for finetuning in README for more information. DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE \ --nnodes $NNODES \ --node_rank $NODE_RANK \ --master_addr $MASTER_ADDR \ --master_port $MASTER_PORT" torchrun $DISTRIBUTED_ARGS ./finetune_multitask_dialouge_doc.py \ --model_name_or_path ./TextMonkey_base \ --data_path ./data/data.json \ --fp16 True \ --fix_vit True \ --fix_llm True \ --output_dir ../TextMonkey/Train_multi_dcu \ --num_train_epochs 2 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 8 \ --evaluation_strategy "no" \ --save_strategy "steps" \ --save_steps 1000 \ --save_total_limit 10 \ --learning_rate 1e-5 \ --weight_decay 0.1 \ --adam_beta2 0.95 \ --warmup_ratio 0.02 \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --report_to "none" \ --model_max_length 2048 \ --gradient_checkpointing \ --lazy_preprocess True \ --deepspeed finetune/ds_config_zero2.json \ --image_size 896 \ --image_width 896 \ --image_height 896 \ --add_window true \ --use_global true \ --resampler true \ --use_lora True \ --remain 512