qwen2vl_full_sft.yaml 999 Bytes
Newer Older
luopl's avatar
luopl committed
1
2
### model
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
luopl's avatar
luopl committed
3
trust_remote_code: true
luopl's avatar
luopl committed
4
5
6
7
8

### method
stage: sft
do_train: true
finetuning_type: full
luopl's avatar
luopl committed
9
10
11
12
freeze_vision_tower: true  # choices: [true, false]
freeze_multi_modal_projector: true  # choices: [true, false]
train_mm_proj_only: false  # choices: [true, false]
deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
luopl's avatar
luopl committed
13
14

### dataset
luopl's avatar
luopl committed
15
dataset: mllm_demo,identity,alpaca_en_demo
luopl's avatar
luopl committed
16
template: qwen2_vl
luopl's avatar
luopl committed
17
cutoff_len: 2048
luopl's avatar
luopl committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16

### output
output_dir: saves/qwen2_vl-7b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true

### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 2
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000

### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500