Unverified Commit da4f7b85 authored by Wenhao Chen's avatar Wenhao Chen Committed by GitHub
Browse files

[chat] fix bugs and add unit tests (#4213)

* style: rename replay buffer

Experience replay is typically for off policy algorithms.
Use this name in PPO maybe misleading.

* fix: fix wrong zero2 default arg

* test: update experience tests

* style: rename zero_pad fn

* fix: defer init in CycledDataLoader

* test: add benchmark test

* style: rename internal fn of generation

* style: rename internal fn of lora

* fix: remove unused loss fn

* fix: remove unused utils fn

* refactor: remove generate_with_actor fn

* fix: fix type annotation

* test: add models tests

* fix: skip llama due to long execution time

* style: modify dataset

* style: apply formatter

* perf: update reward dataset

* fix: fix wrong IGNORE_INDEX in sft dataset

* fix: remove DataCollatorForSupervisedDataset

* test: add dataset tests

* style: apply formatter

* style: rename test_ci to test_train

* feat: add llama in inference

* test: add inference tests

* test: change test scripts directory

* fix: update ci

* fix: fix typo

* fix: skip llama due to oom

* fix: fix file mod

* style: apply formatter

* refactor: remove duplicated llama_gptq

* style: apply formatter

* to: update rm test

* feat: add tokenizer arg

* feat: add download model script

* test: update train tests

* fix: modify gemini load and save pretrained

* test: update checkpoint io test

* to: modify nproc_per_node

* fix: do not remove existing dir

* fix: modify save path

* test: add random choice

* fix: fix sft path

* fix: enlarge nproc_per_node to avoid oom

* fix: add num_retry

* fix: make lora config of rm and critic consistent

* fix: add warning about lora weights

* fix: skip some gpt2 tests

* fix: remove grad ckpt in rm and critic due to errors

* refactor: directly use Actor in train_sft

* test: add more arguments

* fix: disable grad ckpt when using lora

* fix: fix save_pretrained and related tests

* test: enable zero2 tests

* revert: remove useless fn

* style: polish code

* test: modify test args
parent 16bf4c02
import copy
from typing import Any, Callable, Dict, Tuple
import pytest
import torch
import torch.nn as nn
from coati.models.base import Actor, Critic, RewardModel, get_base_model
from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic
from coati.models.generation import generate
from coati.models.gpt import GPTRM, GPTActor, GPTCritic
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
from coati.models.lora import LoraLinear, convert_to_lora_module
from coati.models.loss import GPTLMLoss, LogExpLoss, LogSigLoss, PolicyLoss, ValueLoss
from coati.models.opt import OPTRM, OPTActor, OPTCritic
from coati.models.utils import calc_action_log_probs, compute_reward, masked_mean
@pytest.mark.gpu
@pytest.mark.parametrize("batch_size", [4])
@pytest.mark.parametrize("seq_len", [32])
@pytest.mark.parametrize("actor_maker", [
lambda: BLOOMActor(),
lambda: GPTActor(),
# HACK: skip llama due to long execution time
# lambda: LlamaActor(),
lambda: OPTActor()
])
@pytest.mark.parametrize("generate_kwargs", [{
"max_length": 64,
"use_cache": True,
"do_sample": True,
"temperature": 1.0,
"top_k": 50,
}])
def test_generation(actor_maker: Callable[[], Actor],
batch_size: int,
seq_len: int,
generate_kwargs: Dict[str, Any]
):
actor = actor_maker()
input_ids = torch.randint(0, 100, (batch_size, seq_len)).cuda()
sequences = generate(actor.cuda(), input_ids, **generate_kwargs)
assert sequences.shape == (batch_size, generate_kwargs["max_length"])
@pytest.mark.cpu
def test_utils():
fn_input = {
"tensor": torch.ones((10, )),
"mask": torch.randint(0, 2, (10, ))
}
fn_output = masked_mean(dim=0, **fn_input)
assert fn_output.dim() == 0
assert torch.allclose(fn_output, torch.tensor(1.0))
batch_size = 4
num_labels = 10
fn_input = {
"r": torch.ones((batch_size, )),
"kl_coef": 1.0,
"log_probs": torch.randn((batch_size, num_labels)),
"log_probs_base": torch.randn((batch_size, num_labels)),
"action_mask": torch.randint(0, 2, (batch_size, num_labels))
}
fn_output = compute_reward(**fn_input)
assert fn_output.shape == (batch_size, )
batch_size = 4
seq_len = 32
num_labels = 10
num_actions = 2
fn_input = {
"output": {
"logits": torch.randn((batch_size, seq_len, num_labels))
},
"sequences": torch.randint(0, num_labels, (batch_size, seq_len)),
"num_actions": num_actions,
}
fn_output = calc_action_log_probs(**fn_input)
assert fn_output.shape == (batch_size, num_actions)
@pytest.mark.cpu
@pytest.mark.parametrize("lora_rank", [4])
@pytest.mark.parametrize("num_dim", [32])
@pytest.mark.parametrize("num_layers", [4])
def test_lora(lora_rank: int,
num_dim: int,
num_layers: int):
model = nn.ModuleList(
[nn.Linear(num_dim, num_dim)
for _ in range(num_layers)]
)
lora_model = convert_to_lora_module(model, lora_rank)
assert isinstance(lora_model, nn.ModuleList)
for i in range(num_layers):
assert isinstance(lora_model[i], LoraLinear)
assert lora_model[i].lora_A.shape == (lora_rank, num_dim)
assert lora_model[i].lora_B.shape == (num_dim, lora_rank)
old_model = copy.deepcopy(lora_model)
for i in range(num_layers):
assert isinstance(lora_model[i], LoraLinear)
assert torch.allclose(old_model[i].weight, lora_model[i].weight)
assert torch.allclose(old_model[i].bias, lora_model[i].bias)
assert torch.allclose(old_model[i].lora_B @ old_model[i].lora_A,
lora_model[i].lora_B @ lora_model[i].lora_A)
optimizer = torch.optim.Adam(lora_model.parameters())
x = torch.randn(8, num_dim)
for i in range(num_layers):
x = lora_model[i](x)
loss = x.sum()
loss.backward()
optimizer.step()
for i in range(num_layers):
assert isinstance(lora_model[i], LoraLinear)
assert torch.allclose(old_model[i].weight, lora_model[i].weight)
assert torch.allclose(old_model[i].bias, lora_model[i].bias)
assert not torch.allclose(old_model[i].lora_B @ old_model[i].lora_A,
lora_model[i].lora_B @ lora_model[i].lora_A)
@pytest.mark.cpu
@pytest.mark.parametrize("batch_size", [8])
@pytest.mark.parametrize("seq_len", [128])
@pytest.mark.parametrize("models_maker", [
lambda: (BLOOMActor(), BLOOMCritic(), BLOOMRM()),
lambda: (GPTActor(), GPTCritic(), GPTRM()),
# HACK: skip llama due to long execution time
# lambda: (LlamaActor(), LlamaCritic(), LlamaRM()),
lambda: (OPTActor(), OPTCritic(), OPTRM()),
])
@torch.no_grad()
def test_models(models_maker: Callable[[], Tuple[Actor, Critic, RewardModel]],
batch_size: int,
seq_len: int):
actor_input = {
"input_ids": torch.randint(0, 100, (batch_size, seq_len)),
"attention_mask": torch.randint(0, 2, (batch_size, seq_len))
}
critic_input = {
"sequences": torch.randint(0, 100, (batch_size, seq_len)),
"action_mask": torch.randint(0, 2, (batch_size, seq_len)),
"attention_mask": torch.randint(0, 2, (batch_size, seq_len))
}
rm_input = {
"sequences": torch.randint(0, 100, (batch_size, seq_len)),
"attention_mask": torch.randint(0, 2, (batch_size, seq_len))
}
actor, critic, rm = models_maker()
assert isinstance(actor, Actor)
base_actor_model = get_base_model(actor)
assert isinstance(critic, Critic)
base_critic_model = get_base_model(critic)
assert isinstance(rm, RewardModel)
base_rm_model = get_base_model(rm)
actor_output = actor(**actor_input)
critic_output = critic(**critic_input)
rm_output = rm(**rm_input)
assert actor_output.logits.shape[:2] == (batch_size, seq_len)
assert critic_output.shape == (batch_size, )
assert rm_output.shape == (batch_size, )
@pytest.mark.cpu
@pytest.mark.parametrize("batch_size", [16])
@pytest.mark.parametrize("seq_len", [128])
@pytest.mark.parametrize("num_labels", [100])
def test_loss(batch_size: int,
seq_len: int,
num_labels: int):
loss = GPTLMLoss()
loss_input = {
"logits": torch.randn(batch_size, seq_len, num_labels),
"labels": torch.randint(0, num_labels, (batch_size, seq_len))
}
loss_output = loss(**loss_input)
loss = PolicyLoss()
loss_input = {
"log_probs": torch.randn(batch_size, ),
"old_log_probs": torch.randn(batch_size, ),
"advantages": torch.randn(batch_size, )
}
loss_output = loss(**loss_input)
loss = ValueLoss()
loss_input = {
"values": torch.randn(batch_size, ),
"old_values": torch.randn(batch_size, ),
"reward": torch.randn(batch_size, )
}
loss_output = loss(**loss_input)
loss = LogSigLoss()
loss_input = {
"chosen_reward": torch.randn(batch_size, ),
"reject_reward": torch.randn(batch_size, ),
}
loss_output = loss(**loss_input)
loss = LogExpLoss()
loss_input = {
"chosen_reward": torch.randn(batch_size, ),
"reject_reward": torch.randn(batch_size, ),
}
loss_output = loss(**loss_input)
if __name__ == "__main__":
generate_kwargs = dict(max_length=40,
use_cache=True,
do_sample=True,
temperature=1.0,
top_k=50)
test_generation(lambda: LlamaActor(),
batch_size=4,
seq_len=32,
generate_kwargs=generate_kwargs)
test_utils()
test_lora(lora_rank=2, num_dim=8, num_layers=2)
test_models(models_maker=lambda: (BLOOMActor(),
BLOOMCritic(),
BLOOMRM()),
batch_size=8,
seq_len=128)
test_loss(batch_size=8, seq_len=128, num_labels=100)
#!/usr/bin/env bash
set_n_least_used_CUDA_VISIBLE_DEVICES() {
local n=${1:-"9999"}
echo "GPU Memory Usage:"
local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv |
tail -n +2 |
nl -v 0 |
tee /dev/tty |
sort -g -k 2 |
awk '{print $1}' |
head -n $n)
export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
echo "Now CUDA_VISIBLE_DEVICES is set to:"
echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
}
set_n_least_used_CUDA_VISIBLE_DEVICES 4
set -xu
if [ -z "$SFT_DATASET" ]; then
echo "Please set \$SFT_DATASET to the path to sft dataset."
exit 1
fi
if [ -z "$PROMPT_PATH" ]; then
echo "Please set \$PROMPT_PATH to the path to prompts csv."
exit 1
fi
if [ -z "$PRETRAIN_DATASET" ]; then
echo "Please set \$PRETRAIN_DATASET to the path to alpaca data."
exit 1
fi
NUM_RETRY=3
BASE_DIR=$(dirname $(dirname $(realpath $BASH_SOURCE)))
EXAMPLES_DIR=$BASE_DIR/examples
MODELS_DIR=$BASE_DIR/examples/models_config
MODELS=('gpt2' 'bloom' 'opt' 'llama')
STRATEGIES=('ddp' 'colossalai_gemini' 'colossalai_zero2')
export OMP_NUM_THREADS=8
# install requirements
pip install -r $EXAMPLES_DIR/requirements.txt
python $EXAMPLES_DIR/download_model.py --model-dir $MODELS_DIR --config-only
get_pretrain() {
local model=$1
if [[ $model == "gpt2" ]]; then
echo "gpt2"
elif [[ $model == "bloom" ]]; then
echo "bigscience/bloom-560m"
elif [[ $model == "opt" ]]; then
echo "facebook/opt-350m"
else
echo "Unknown model $model"
exit 1
fi
}
random_choice() {
local arr=("$@")
local len=${#arr[@]}
local idx=$((RANDOM % len))
echo ${arr[$idx]}
}
echo "[Test]: testing sft ..."
# FIXME: This is a hack to skip tests that are not working
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
# - llama-*: These tests can be passed locally, skipped for long execution time
SKIPPED_TESTS=(
"gpt2-ddp"
"llama-ddp"
"llama-colossalai_gemini"
"llama-colossalai_zero2"
)
GRAD_CKPTS=('' '--grad_checkpoint')
for lora_rank in '0' '4'; do
for model in ${MODELS[@]}; do
strategies=($(shuf -e "${STRATEGIES[@]}"))
for strategy in ${strategies[@]}; do
if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy-$lora_rank " ]]; then
echo "[Test]: Skipped $model-$strategy-$lora_rank"
continue
elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy " ]]; then
echo "[Test]: Skipped $model-$strategy"
continue
fi
pretrain=$(get_pretrain $model)
pretrain_model=""
if [[ $lora_rank -gt 0 ]]; then
pretrain_model="--pretrain $pretrain"
fi
grad_ckpt=$(random_choice "${GRAD_CKPTS[@]}")
for i in $(seq $NUM_RETRY); do
echo "[Test]: $model-$strategy-$lora_rank, attempt $i"
torchrun --standalone --nproc_per_node=4 $EXAMPLES_DIR/train_sft.py \
$pretrain_model --tokenizer $MODELS_DIR/$model \
--model $model --strategy $strategy --lora_rank $lora_rank $grad_ckpt \
--dataset $SFT_DATASET --max_datasets_size 8 \
--max_epochs 1 --batch_size 1 --accumulation_steps 1 \
--save_path $EXAMPLES_DIR/rlhf_models/sft_ckpt_${model}_${lora_rank}
passed=$?
if [ $passed -eq 0 ]; then
break
fi
done
if [ $passed -ne 0 ]; then
echo "[Test]: Failed $model-$strategy-$lora_rank"
exit 1
fi
done
done
done
echo "[Test]: testing reward model ..."
# FIXME: This is a hack to skip tests that are not working
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
# - llama-*: These tests can be passed locally, skipped for long execution time
SKIPPED_TESTS=(
"gpt2-ddp"
"llama-ddp"
"llama-colossalai_gemini"
"llama-colossalai_zero2"
)
LOSS_FNS=('log_sig' 'log_exp')
DATASETS=('Anthropic/hh-rlhf' 'Dahoas/rm-static')
for lora_rank in '0' '4'; do
for model in ${MODELS[@]}; do
strategies=($(shuf -e "${STRATEGIES[@]}"))
for strategy in ${strategies[@]}; do
if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy-$lora_rank " ]]; then
echo "[Test]: Skipped $model-$strategy-$lora_rank"
continue
elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy " ]]; then
echo "[Test]: Skipped $model-$strategy"
continue
fi
pretrain=$(get_pretrain $model)
pretrain_model=""
if [[ $lora_rank -gt 0 ]]; then
pretrain_model="--pretrain $pretrain"
fi
loss_fn=$(random_choice "${LOSS_FNS[@]}")
dataset=$(random_choice "${DATASETS[@]}")
subset=$(if [[ $dataset == "Dahoas/rm-static" ]]; then echo "None"; else echo "harmless-base"; fi)
for i in $(seq $NUM_RETRY); do
echo "[Test]: $model-$strategy-$lora_rank, attempt $i"
torchrun --standalone --nproc_per_node=4 $EXAMPLES_DIR/train_reward_model.py \
$pretrain_model --tokenizer $MODELS_DIR/$model \
--model $model --strategy $strategy --lora_rank $lora_rank --loss_fn $loss_fn \
--dataset $dataset --subset $subset --test True --batch_size 1 \
--save_path $EXAMPLES_DIR/rlhf_models/rm_ckpt_${model}_${lora_rank}.pt
passed=$?
if [ $passed -eq 0 ]; then
break
fi
done
if [ $passed -ne 0 ]; then
echo "[Test]: Failed to train reward model $model-$strategy-$lora_rank"
exit 1
fi
done
done
done
echo "[Test]: testing RLHF ..."
# FIXME: This is a hack to skip tests that are not working
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
# - llama-*: These tests can be passed locally, skipped for long execution time
SKIPPED_TESTS=(
"gpt2-ddp"
"llama-ddp"
"llama-colossalai_gemini"
"llama-colossalai_zero2"
)
for model in ${MODELS[@]}; do
for lora_rank in '0' '4'; do
strategies=($(shuf -e "${STRATEGIES[@]}"))
for strategy in ${strategies[@]}; do
if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy-$lora_rank " ]]; then
echo "[Test]: Skipped $model-$strategy-$lora_rank"
continue
elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy " ]]; then
echo "[Test]: Skipped $model-$strategy"
continue
fi
rm_pretrain=$(get_pretrain $model)
rm_pretrain_model=""
if [[ $lora_rank -gt 0 ]]; then
rm_pretrain_model="--rm_pretrain $rm_pretrain"
fi
for i in $(seq $NUM_RETRY); do
echo "[Test]: $model-$strategy-$lora_rank, attempt $i"
torchrun --standalone --nproc_per_node=4 $EXAMPLES_DIR/train_prompts.py \
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
--strategy $strategy --model $model --tokenizer $MODELS_DIR/$model \
--num_episodes 1 --num_collect_steps 1 --num_update_steps 1 \
--experience_batch_size 2 --train_batch_size 1 --lora_rank $lora_rank \
--pretrain $EXAMPLES_DIR/rlhf_models/sft_ckpt_${model}_${lora_rank} \
$rm_pretrain_model --rm_path $EXAMPLES_DIR/rlhf_models/rm_ckpt_${model}_${lora_rank}.pt \
--save_path $EXAMPLES_DIR/rlhf_models/actor_checkpoint_prompts.pt
passed=$?
if [ $passed -eq 0 ]; then
break
fi
done
if [ $passed -ne 0 ]; then
echo "[Test]: Failed to train RLHF $model-$strategy-$lora_rank"
exit 1
fi
done
rm -rf $EXAMPLES_DIR/rlhf_models/sft_ckpt_${model}_${lora_rank}
rm $EXAMPLES_DIR/rlhf_models/rm_ckpt_${model}_${lora_rank}.pt
done
done
rm $EXAMPLES_DIR/rlhf_models/actor_checkpoint_prompts.pt
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment