run_qwen.sh 624 Bytes
Newer Older
cmx's avatar
cmx committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/bin/bash

torchrun --nnodes=1 --nproc-per-node=4 training.py \
    --model_name "Qwen/Qwen2-7B" \
    --bf16 \
    --num_train_epochs 1 \
    --per_device_train_batch_size 48 \
    --per_device_eval_batch_size 64 \
    --eval_strategy "no" \
    --save_strategy "no" \
    --learning_rate 6e-6 \
    --weight_decay 0.05 \
    --warmup_ratio 0.1 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --include_num_input_tokens_seen \
    --report_to none \
    --fsdp "full_shard auto_wrap" \
    --fsdp_config config/fsdp_config.json \
    --seed 42 \
    --use_liger True \
    --output_dir alpaca_finetuning