test_ci.sh 5.5 KB
Newer Older
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
1
2
3
4
#!/usr/bin/env bash

set -xue

Camille Zhong's avatar
Camille Zhong committed
5
6
7
8
9
if [ -z "$SFT_DATASET" ]; then
    echo "Please set \$SFT_DATASET to the path to sft dataset."
    exit 1
fi

Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
10
11
12
13
14
if [ -z "$PROMPT_PATH" ]; then
    echo "Please set \$PROMPT_PATH to the path to prompts csv."
    exit 1
fi

Camille Zhong's avatar
Camille Zhong committed
15
16
17
18
19
if [ -z "$PRETRAIN_DATASET" ]; then
    echo "Please set \$PRETRAIN_DATASET to the path to alpaca data."
    exit 1
fi

Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
20
21
22
23
24
25
26
BASE=$(realpath $(dirname $0))

export OMP_NUM_THREADS=8

# install requirements
pip install -r ${BASE}/requirements.txt

Camille Zhong's avatar
Camille Zhong committed
27
wandb init -m offline
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
28

Camille Zhong's avatar
Camille Zhong committed
29
30
31
32
33
# train sft
torchrun --standalone --nproc_per_node=4 ${BASE}/train_sft.py --pretrain 'bigscience/bloom-560m' \
        --model 'bloom' --strategy colossalai_zero2 --lora_rank 4\
        --dataset $SFT_DATASET --max_datasets_size 512 --max_epochs 1 \
        --save_path ${BASE}/output
34
rm -rf ${BASE}/output
35

Camille Zhong's avatar
Camille Zhong committed
36
37
38
39
torchrun --standalone --nproc_per_node=4 ${BASE}/train_sft.py --pretrain 'gpt2' \
        --model 'gpt2' --strategy colossalai_zero2 \
        --dataset $SFT_DATASET --max_datasets_size 512 --max_epochs 1 \
        --save_path ${BASE}/output
40
rm -rf ${BASE}/output
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
41

Camille Zhong's avatar
Camille Zhong committed
42
43
44
45
torchrun --standalone --nproc_per_node=4 ${BASE}/train_sft.py --pretrain 'facebook/opt-350m' \
        --model 'opt' --strategy colossalai_zero2 --lora_rank 4\
        --dataset $SFT_DATASET --max_datasets_size 512 --max_epochs 1 \
        --save_path ${BASE}/output
46
rm -rf ${BASE}/output
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
47

Camille Zhong's avatar
Camille Zhong committed
48
49
50
51
torchrun --standalone --nproc_per_node=4 ${BASE}/train_sft.py --pretrain 'gpt2' \
        --model 'gpt2' --strategy ddp --lora_rank 4\
        --dataset $SFT_DATASET --max_datasets_size 512 --max_epochs 1 \
        --save_path ${BASE}/output
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
52

Camille Zhong's avatar
Camille Zhong committed
53
54
55
56
#torchrun --standalone --nproc_per_node=4 ${BASE}/train_sft.py --pretrain 'facebook/opt-350m' \
#        --model 'opt' --strategy naive \
#        --dataset $SFT_DATASET --max_datasets_size 512 --max_epochs 1 \
#        --save_path ${BASE}/output
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
57

Camille Zhong's avatar
Camille Zhong committed
58
rm -rf ${BASE}/output
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
59
60
61

# train rm
torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
Camille Zhong's avatar
Camille Zhong committed
62
63
64
                            --pretrain 'facebook/opt-350m' --model 'opt' \
                            --strategy colossalai_zero2 --loss_fn 'log_sig'\
                            --dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \
65
                            --test True --lora_rank 0 \
Camille Zhong's avatar
Camille Zhong committed
66
67
68
69
70
71
                            --save_path ${BASE}/rm_ckpt_opt.pt

torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
                            --pretrain 'gpt2' --model 'gpt2' \
                            --strategy colossalai_zero2 --loss_fn 'log_exp' \
                            --dataset 'Dahoas/rm-static' \
72
                            --test True  --lora_rank 0 \
Camille Zhong's avatar
Camille Zhong committed
73
                            --save_path ${BASE}/rm_ckpt_gpt.pt
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
74
75

torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
Camille Zhong's avatar
Camille Zhong committed
76
77
78
79
80
                            --pretrain 'gpt2' --model 'gpt2' \
                            --strategy ddp --loss_fn 'log_exp' \
                            --dataset 'Dahoas/rm-static' \
                            --test True --lora_rank 4 \
                            --save_path ${BASE}/rm_ckpt.pt
81
rm -rf ${BASE}/rm_ckpt.pt
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
82
83

torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
Camille Zhong's avatar
Camille Zhong committed
84
85
86
87
88
                            --pretrain 'bigscience/bloom-560m' --model 'bloom' \
                            --strategy colossalai_zero2 --loss_fn 'log_sig' \
                            --dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \
                            --test True --lora_rank 4 \
                            --save_path ${BASE}/rm_ckpt.pt
89
rm -rf ${BASE}/rm_ckpt.pt
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
90
91

torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
Camille Zhong's avatar
Camille Zhong committed
92
93
94
95
96
                            --pretrain 'microsoft/deberta-v3-large' --model 'deberta' \
                            --strategy colossalai_zero2 --loss_fn 'log_sig' \
                            --dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \
                            --test True --lora_rank 4 \
                            --save_path ${BASE}/rm_ckpt.pt
97
rm -rf ${BASE}/rm_ckpt.pt
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
98

99
torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
Camille Zhong's avatar
Camille Zhong committed
100
101
102
103
104
                            --pretrain 'roberta-base' --model 'roberta' \
                            --strategy colossalai_zero2 --loss_fn 'log_exp'\
                            --dataset 'Anthropic/hh-rlhf' --subset 'harmless-base'\
                            --test True --lora_rank 4 \
                            --save_path ${BASE}/rm_ckpt.pt
105

Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
106
rm -rf ${BASE}/rm_ckpt.pt
Camille Zhong's avatar
Camille Zhong committed
107

108
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
Camille Zhong's avatar
Camille Zhong committed
109
110
111
112
113
114
115
116
        --strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \
        --update_timesteps 2 --max_epochs 1 --train_batch_size 2 \
        --pretrain 'facebook/opt-350m' --model opt \
        --rm_pretrain 'facebook/opt-350m' \
        --rm_path ${BASE}/rm_ckpt_opt.pt \
        --save_path ${BASE}/actor_checkpoint_prompts.pt
rm -rf ${BASE}/rm_ckpt_opt.pt

117
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
Camille Zhong's avatar
Camille Zhong committed
118
119
120
121
122
123
124
125
         --strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \
         --update_timesteps 2 --max_epochs 1 --train_batch_size 2 \
         --pretrain 'gpt2' --model gpt2 \
         --rm_pretrain 'gpt2' \
         --rm_path ${BASE}/rm_ckpt_gpt.pt \
         --save_path ${BASE}/actor_checkpoint_prompts.pt
rm -rf ${BASE}/rm_ckpt_gpt.pt

126
rm -rf ${BASE}/actor_checkpoint_prompts.pt
127
128
129

# 3080 doesn't support P2P, skip this test
# cd ${BASE}/ray && bash test_ci.sh && cd ${BASE}