Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
LLaMA-Factory-Llama3.2_pytorch
Commits
12d5cbac
Commit
12d5cbac
authored
Oct 21, 2024
by
chenzk
Browse files
v1.0
parents
Pipeline
#1780
canceled with stages
Changes
259
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2411 additions
and
0 deletions
+2411
-0
src/llamafactory/train/kto/workflow.py
src/llamafactory/train/kto/workflow.py
+97
-0
src/llamafactory/train/ppo/__init__.py
src/llamafactory/train/ppo/__init__.py
+18
-0
src/llamafactory/train/ppo/ppo_utils.py
src/llamafactory/train/ppo/ppo_utils.py
+88
-0
src/llamafactory/train/ppo/trainer.py
src/llamafactory/train/ppo/trainer.py
+513
-0
src/llamafactory/train/ppo/workflow.py
src/llamafactory/train/ppo/workflow.py
+79
-0
src/llamafactory/train/pt/__init__.py
src/llamafactory/train/pt/__init__.py
+18
-0
src/llamafactory/train/pt/trainer.py
src/llamafactory/train/pt/trainer.py
+70
-0
src/llamafactory/train/pt/workflow.py
src/llamafactory/train/pt/workflow.py
+84
-0
src/llamafactory/train/rm/__init__.py
src/llamafactory/train/rm/__init__.py
+18
-0
src/llamafactory/train/rm/metric.py
src/llamafactory/train/rm/metric.py
+53
-0
src/llamafactory/train/rm/trainer.py
src/llamafactory/train/rm/trainer.py
+124
-0
src/llamafactory/train/rm/workflow.py
src/llamafactory/train/rm/workflow.py
+91
-0
src/llamafactory/train/sft/__init__.py
src/llamafactory/train/sft/__init__.py
+18
-0
src/llamafactory/train/sft/metric.py
src/llamafactory/train/sft/metric.py
+139
-0
src/llamafactory/train/sft/trainer.py
src/llamafactory/train/sft/trainer.py
+155
-0
src/llamafactory/train/sft/workflow.py
src/llamafactory/train/sft/workflow.py
+125
-0
src/llamafactory/train/test_utils.py
src/llamafactory/train/test_utils.py
+119
-0
src/llamafactory/train/trainer_utils.py
src/llamafactory/train/trainer_utils.py
+459
-0
src/llamafactory/train/tuner.py
src/llamafactory/train/tuner.py
+143
-0
src/llamafactory/webui/__init__.py
src/llamafactory/webui/__init__.py
+0
-0
No files found.
src/llamafactory/train/kto/workflow.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's TRL library.
# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/kto.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
...data
import
KTODataCollatorWithPadding
,
get_dataset
,
get_template_and_fix_tokenizer
from
...extras.constants
import
IGNORE_INDEX
from
...extras.ploting
import
plot_loss
from
...hparams
import
ModelArguments
from
...model
import
load_model
,
load_tokenizer
from
..trainer_utils
import
create_modelcard_and_push
,
create_ref_model
from
.trainer
import
CustomKTOTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
def
run_kto
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
data_args
)
dataset_module
=
get_dataset
(
template
,
model_args
,
data_args
,
training_args
,
stage
=
"kto"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
)
data_collator
=
KTODataCollatorWithPadding
(
template
=
template
,
pad_to_multiple_of
=
8
,
label_pad_token_id
=
IGNORE_INDEX
if
data_args
.
ignore_pad_token_for_loss
else
tokenizer
.
pad_token_id
,
**
tokenizer_module
,
)
# Create reference model
if
finetuning_args
.
ref_model
is
None
and
(
not
training_args
.
do_train
):
# use the model itself
ref_model
=
model
else
:
ref_model
=
create_ref_model
(
model_args
,
finetuning_args
)
# Update arguments
training_args
.
remove_unused_columns
=
False
# important for multimodal and pairwise dataset
# Initialize our Trainer
trainer
=
CustomKTOTrainer
(
model
=
model
,
ref_model
=
ref_model
,
args
=
training_args
,
finetuning_args
=
finetuning_args
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
**
dataset_module
,
**
tokenizer_module
,
)
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
,
"train/rewards/chosen"
])
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
)
if
id
(
model
)
==
id
(
ref_model
):
# unable to compute rewards without a reference model
remove_keys
=
[
key
for
key
in
metrics
.
keys
()
if
"rewards"
in
key
]
for
key
in
remove_keys
:
metrics
.
pop
(
key
)
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
src/llamafactory/train/ppo/__init__.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.workflow
import
run_ppo
__all__
=
[
"run_ppo"
]
src/llamafactory/train/ppo/ppo_utils.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
from
contextlib
import
nullcontext
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Literal
,
Optional
import
torch
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
...extras.packages
import
is_requests_available
if
is_requests_available
():
import
requests
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
from
trl
import
AutoModelForCausalLMWithValueHead
def
get_rewards_from_server
(
server_url
:
str
,
messages
:
List
[
str
])
->
List
[
"torch.Tensor"
]:
r
"""
Gets reward scores from the API server.
"""
headers
=
{
"Content-Type"
:
"application/json"
}
payload
=
{
"model"
:
"model"
,
"messages"
:
messages
}
response
=
requests
.
post
(
server_url
,
json
=
payload
,
headers
=
headers
)
rewards
=
json
.
loads
(
response
.
text
)[
"scores"
]
return
torch
.
Tensor
(
rewards
)
def
replace_model
(
model
:
"AutoModelForCausalLMWithValueHead"
,
target
:
Literal
[
"default"
,
"reward"
])
->
None
:
r
"""
Replaces the default/reward modules in the model. The model is already unwrapped.
"""
v_head_layer
=
model
.
v_head
.
summary
if
is_deepspeed_zero3_enabled
():
import
deepspeed
# type: ignore
params
=
[
v_head_layer
.
weight
,
v_head_layer
.
bias
]
context_maybe_zero3
=
deepspeed
.
zero
.
GatheredParameters
(
params
,
modifier_rank
=
0
)
else
:
context_maybe_zero3
=
nullcontext
()
model
.
pretrained_model
.
set_adapter
(
target
)
# set the LoRA adapter to be active
with
context_maybe_zero3
:
if
target
==
"reward"
:
# save default head temporarily
setattr
(
model
,
"default_head_weight"
,
v_head_layer
.
weight
.
data
.
detach
().
clone
())
setattr
(
model
,
"default_head_bias"
,
v_head_layer
.
bias
.
data
.
detach
().
clone
())
device
=
v_head_layer
.
weight
.
device
v_head_layer
.
weight
.
data
=
model
.
get_buffer
(
"{}_head_weight"
.
format
(
target
)).
detach
().
clone
().
to
(
device
)
v_head_layer
.
bias
.
data
=
model
.
get_buffer
(
"{}_head_bias"
.
format
(
target
)).
detach
().
clone
().
to
(
device
)
def
dump_layernorm
(
model
:
"PreTrainedModel"
)
->
Dict
[
str
,
"torch.Tensor"
]:
r
"""
Dumps the layernorm parameters in the model. The model is already unwrapped (and gathered).
"""
layer_norm_params
=
{}
for
name
,
param
in
model
.
named_parameters
():
if
param
.
data
.
dtype
==
torch
.
float32
:
layer_norm_params
[
name
]
=
param
.
data
.
detach
().
clone
()
param
.
data
=
param
.
data
.
to
(
model
.
config
.
torch_dtype
)
return
layer_norm_params
def
restore_layernorm
(
model
:
"PreTrainedModel"
,
layernorm_params
:
Optional
[
Dict
[
str
,
"torch.Tensor"
]]
=
None
)
->
None
:
r
"""
Restores the layernorm parameters in the model. The model is already unwrapped (and gathered).
"""
for
name
,
param
in
model
.
named_parameters
():
if
name
in
layernorm_params
:
param
.
data
=
layernorm_params
[
name
]
src/llamafactory/train/ppo/trainer.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's TRL library.
# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/ppo_trainer.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
os
import
sys
import
warnings
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
,
Tuple
import
torch
from
accelerate.utils
import
DistributedDataParallelKwargs
from
tqdm
import
tqdm
from
transformers
import
GenerationConfig
,
Trainer
,
TrainerControl
,
TrainerState
from
transformers.optimization
import
get_scheduler
from
transformers.trainer
import
DEFAULT_CALLBACKS
from
transformers.trainer_callback
import
CallbackHandler
from
transformers.trainer_pt_utils
import
remove_dummy_checkpoint
from
transformers.trainer_utils
import
PREFIX_CHECKPOINT_DIR
from
transformers.utils
import
SAFE_WEIGHTS_NAME
,
WEIGHTS_NAME
from
trl
import
PPOConfig
,
PPOTrainer
from
trl.core
import
PPODecorators
,
logprobs_from_logits
from
trl.models.utils
import
unwrap_model_for_generation
from
typing_extensions
import
override
from
...extras.logging
import
get_logger
from
...extras.misc
import
AverageMeter
,
count_parameters
,
get_current_device
,
get_logits_processor
from
..callbacks
import
FixValueHeadModelCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optimizer
,
create_custom_scheduler
from
.ppo_utils
import
dump_layernorm
,
get_rewards_from_server
,
replace_model
,
restore_layernorm
if
TYPE_CHECKING
:
from
datasets
import
Dataset
from
transformers
import
(
DataCollatorWithPadding
,
PreTrainedTokenizer
,
ProcessorMixin
,
Seq2SeqTrainingArguments
,
TrainerCallback
,
)
from
trl
import
AutoModelForCausalLMWithValueHead
from
...hparams
import
FinetuningArguments
,
GeneratingArguments
,
ModelArguments
logger
=
get_logger
(
__name__
)
class
CustomPPOTrainer
(
PPOTrainer
,
Trainer
):
r
"""
Inherits PPOTrainer.
"""
def
__init__
(
self
,
model_args
:
"ModelArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
generating_args
:
"GeneratingArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]],
model
:
"AutoModelForCausalLMWithValueHead"
,
reward_model
:
Optional
[
"AutoModelForCausalLMWithValueHead"
],
ref_model
:
Optional
[
"AutoModelForCausalLMWithValueHead"
],
tokenizer
:
"PreTrainedTokenizer"
,
processor
:
Optional
[
"ProcessorMixin"
],
data_collator
:
"DataCollatorWithPadding"
,
train_dataset
:
Optional
[
"Dataset"
]
=
None
,
eval_dataset
:
Optional
[
"Dataset"
]
=
None
,
)
->
None
:
if
eval_dataset
is
not
None
:
raise
NotImplementedError
(
"PPOTrainer does not support eval dataset yet."
)
backward_batch_size
=
training_args
.
per_device_train_batch_size
*
training_args
.
gradient_accumulation_steps
ppo_config
=
PPOConfig
(
model_name
=
model_args
.
model_name_or_path
,
learning_rate
=
training_args
.
learning_rate
,
mini_batch_size
=
training_args
.
per_device_train_batch_size
,
batch_size
=
backward_batch_size
*
finetuning_args
.
ppo_buffer_size
,
gradient_accumulation_steps
=
training_args
.
gradient_accumulation_steps
,
ppo_epochs
=
finetuning_args
.
ppo_epochs
,
max_grad_norm
=
training_args
.
max_grad_norm
,
seed
=
training_args
.
seed
,
optimize_device_cache
=
True
,
target
=
finetuning_args
.
ppo_target
,
use_score_scaling
=
finetuning_args
.
ppo_score_norm
,
use_score_norm
=
finetuning_args
.
ppo_score_norm
,
whiten_rewards
=
finetuning_args
.
ppo_whiten_rewards
,
accelerator_kwargs
=
{
"step_scheduler_with_optimizer"
:
False
},
log_with
=
training_args
.
report_to
[
0
]
if
training_args
.
report_to
else
None
,
project_kwargs
=
{
"logging_dir"
:
training_args
.
logging_dir
},
)
# Add deepspeed config
if
training_args
.
deepspeed_plugin
is
not
None
:
ppo_config
.
accelerator_kwargs
[
"kwargs_handlers"
]
=
[
DistributedDataParallelKwargs
(
find_unused_parameters
=
training_args
.
ddp_find_unused_parameters
)
]
ppo_config
.
accelerator_kwargs
[
"deepspeed_plugin"
]
=
training_args
.
deepspeed_plugin
if
ppo_config
.
log_with
is
not
None
:
logger
.
warning
(
"PPOTrainer cannot use external logger when DeepSpeed is enabled."
)
ppo_config
.
log_with
=
None
# Create optimizer and scheduler
if
training_args
.
max_steps
>
0
:
num_training_steps
=
training_args
.
max_steps
else
:
total_train_batch_size
=
backward_batch_size
*
finetuning_args
.
ppo_buffer_size
*
training_args
.
world_size
num_training_steps
=
training_args
.
num_train_epochs
*
math
.
ceil
(
len
(
train_dataset
)
/
total_train_batch_size
)
optimizer
=
self
.
create_optimizer
(
model
,
training_args
,
finetuning_args
)
scheduler
=
self
.
create_scheduler
(
training_args
,
num_training_steps
,
optimizer
)
PPOTrainer
.
__init__
(
self
,
config
=
ppo_config
,
model
=
model
,
ref_model
=
ref_model
,
tokenizer
=
tokenizer
,
dataset
=
train_dataset
,
optimizer
=
optimizer
,
data_collator
=
data_collator
,
lr_scheduler
=
scheduler
,
)
self
.
args
=
training_args
self
.
model_args
=
model_args
self
.
finetuning_args
=
finetuning_args
self
.
reward_model
=
reward_model
self
.
current_device
=
get_current_device
()
# patch for deepspeed training
self
.
generation_config
=
GenerationConfig
(
pad_token_id
=
self
.
tokenizer
.
pad_token_id
,
eos_token_id
=
[
self
.
tokenizer
.
eos_token_id
]
+
self
.
tokenizer
.
additional_special_tokens_ids
,
**
generating_args
.
to_dict
(),
)
self
.
state
=
TrainerState
()
self
.
control
=
TrainerControl
()
self
.
is_deepspeed_enabled
=
getattr
(
self
.
accelerator
.
state
,
"deepspeed_plugin"
,
None
)
is
not
None
self
.
is_fsdp_enabled
=
getattr
(
self
.
accelerator
.
state
,
"fsdp_plugin"
,
None
)
is
not
None
callbacks
=
DEFAULT_CALLBACKS
if
callbacks
is
None
else
DEFAULT_CALLBACKS
+
callbacks
self
.
callback_handler
=
CallbackHandler
(
callbacks
,
self
.
accelerator
.
unwrap_model
(
self
.
model
),
self
.
tokenizer
,
self
.
optimizer
,
self
.
lr_scheduler
)
if
self
.
args
.
max_steps
>
0
:
logger
.
info
(
"max_steps is given, it will override any value given in num_train_epochs"
)
self
.
amp_context
=
torch
.
autocast
(
self
.
current_device
.
type
)
warnings
.
simplefilter
(
"ignore"
)
# remove gc warnings on ref model
if
finetuning_args
.
reward_model_type
==
"full"
:
if
self
.
is_deepspeed_enabled
:
if
not
(
getattr
(
reward_model
.
pretrained_model
,
"is_loaded_in_8bit"
,
False
)
or
getattr
(
reward_model
.
pretrained_model
,
"is_loaded_in_4bit"
,
False
)
):
# quantized models are already set on the correct device
self
.
reward_model
=
self
.
_prepare_deepspeed
(
self
.
reward_model
)
else
:
self
.
reward_model
=
self
.
accelerator
.
prepare_model
(
self
.
reward_model
,
evaluation_mode
=
True
)
self
.
add_callback
(
FixValueHeadModelCallback
)
if
processor
is
not
None
:
self
.
add_callback
(
SaveProcessorCallback
(
processor
))
if
finetuning_args
.
use_badam
:
from
badam
import
BAdamCallback
,
clip_grad_norm_old_version
self
.
accelerator
.
clip_grad_norm_
=
MethodType
(
clip_grad_norm_old_version
,
self
.
accelerator
)
self
.
add_callback
(
BAdamCallback
)
def
ppo_train
(
self
,
resume_from_checkpoint
:
Optional
[
str
]
=
None
)
->
None
:
r
"""
Implements training loop for the PPO stage, like _inner_training_loop() in Huggingface's Trainer.
"""
if
resume_from_checkpoint
is
not
None
:
raise
ValueError
(
"`resume_from_checkpoint` will be supported in the future version."
)
total_train_batch_size
=
(
self
.
args
.
per_device_train_batch_size
*
self
.
args
.
gradient_accumulation_steps
*
self
.
finetuning_args
.
ppo_buffer_size
*
self
.
args
.
world_size
)
if
self
.
args
.
max_steps
>
0
:
num_examples
=
total_train_batch_size
*
self
.
args
.
max_steps
num_train_epochs
=
sys
.
maxsize
max_steps
=
self
.
args
.
max_steps
steps_in_epoch
=
self
.
args
.
max_steps
else
:
len_dataloader
=
len
(
self
.
dataloader
)
num_examples
=
len
(
self
.
dataset
)
num_train_epochs
=
self
.
args
.
num_train_epochs
max_steps
=
math
.
ceil
(
num_train_epochs
*
len_dataloader
)
steps_in_epoch
=
len_dataloader
self
.
state
.
max_steps
=
max_steps
self
.
state
.
num_train_epochs
=
num_train_epochs
self
.
state
.
is_local_process_zero
=
self
.
is_local_process_zero
()
self
.
state
.
is_world_process_zero
=
self
.
is_world_process_zero
()
if
self
.
is_world_process_zero
():
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = {:,}"
.
format
(
num_examples
))
logger
.
info
(
" Num Epochs = {:,}"
.
format
(
num_train_epochs
))
logger
.
info
(
" Instantaneous batch size per device = {:,}"
.
format
(
self
.
args
.
per_device_train_batch_size
))
logger
.
info
(
" Total train batch size (w. parallel, buffer, distributed & accumulation) = {:,}"
.
format
(
total_train_batch_size
)
)
logger
.
info
(
" Gradient Accumulation steps = {:,}"
.
format
(
self
.
args
.
gradient_accumulation_steps
))
logger
.
info
(
" Num optimization epochs per batch = {:,}"
.
format
(
self
.
finetuning_args
.
ppo_epochs
))
logger
.
info
(
" Total training steps = {:,}"
.
format
(
max_steps
))
logger
.
info
(
" Number of trainable parameters = {:,}"
.
format
(
count_parameters
(
self
.
model
)[
0
]))
dataiter
=
iter
(
self
.
dataloader
)
loss_meter
=
AverageMeter
()
reward_meter
=
AverageMeter
()
self
.
callback_handler
.
on_train_begin
(
self
.
args
,
self
.
state
,
self
.
control
)
for
step
in
tqdm
(
range
(
max_steps
),
disable
=
not
self
.
is_local_process_zero
()):
try
:
batch
=
next
(
dataiter
)
except
StopIteration
:
dataiter
=
iter
(
self
.
dataloader
)
batch
=
next
(
dataiter
)
# Get inputs
self
.
model
.
eval
()
self
.
tokenizer
.
padding_side
=
"right"
# change padding side
queries
,
responses
,
rewards
=
[],
[],
[]
for
idx
in
range
(
0
,
self
.
config
.
batch_size
,
self
.
config
.
mini_batch_size
):
mini_batch_queries
,
mini_batch_responses
=
self
.
get_inputs
(
batch
[
idx
:
idx
+
self
.
config
.
mini_batch_size
]
)
mini_batch_rewards
=
self
.
get_rewards
(
mini_batch_queries
,
mini_batch_responses
)
queries
.
extend
(
mini_batch_queries
)
responses
.
extend
(
mini_batch_responses
)
rewards
.
extend
(
mini_batch_rewards
)
# Run PPO step
self
.
model
.
train
()
stats
=
self
.
step
(
queries
,
responses
,
rewards
)
self
.
tokenizer
.
padding_side
=
"left"
# restore padding side
loss_meter
.
update
(
float
(
stats
[
"ppo/loss/total"
]),
n
=
len
(
rewards
))
reward_meter
.
update
(
torch
.
stack
(
rewards
).
mean
().
item
(),
n
=
len
(
rewards
))
if
self
.
config
.
log_with
is
not
None
:
try
:
batch
[
"query"
]
=
self
.
tokenizer
.
batch_decode
(
queries
,
skip_special_tokens
=
True
)
batch
[
"response"
]
=
self
.
tokenizer
.
batch_decode
(
responses
,
skip_special_tokens
=
True
)
self
.
log_stats
(
stats
,
batch
,
rewards
)
except
Exception
:
logger
.
warning
(
"Failed to save stats due to unknown errors."
)
self
.
state
.
global_step
+=
1
self
.
callback_handler
.
on_step_end
(
self
.
args
,
self
.
state
,
self
.
control
)
if
self
.
is_local_process_zero
()
and
(
step
+
1
)
%
self
.
args
.
logging_steps
==
0
:
logs
=
dict
(
loss
=
round
(
loss_meter
.
avg
,
4
),
reward
=
round
(
reward_meter
.
avg
,
4
),
learning_rate
=
stats
[
"ppo/learning_rate"
],
epoch
=
round
(
step
/
steps_in_epoch
,
2
),
)
tqdm
.
write
(
str
(
logs
))
logs
[
"step"
]
=
step
self
.
state
.
log_history
.
append
(
logs
)
self
.
callback_handler
.
on_log
(
self
.
args
,
self
.
state
,
self
.
control
,
logs
)
loss_meter
.
reset
()
reward_meter
.
reset
()
if
(
step
+
1
)
%
self
.
args
.
save_steps
==
0
:
# save checkpoint
self
.
save_model
(
os
.
path
.
join
(
self
.
args
.
output_dir
,
"{}-{}"
.
format
(
PREFIX_CHECKPOINT_DIR
,
self
.
state
.
global_step
))
)
self
.
callback_handler
.
on_save
(
self
.
args
,
self
.
state
,
self
.
control
)
if
self
.
control
.
should_epoch_stop
or
self
.
control
.
should_training_stop
:
break
self
.
callback_handler
.
on_train_end
(
self
.
args
,
self
.
state
,
self
.
control
)
@
override
def
create_optimizer
(
self
,
model
:
"AutoModelForCausalLMWithValueHead"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
)
->
"torch.optim.Optimizer"
:
optimizer
=
create_custom_optimizer
(
model
,
training_args
,
finetuning_args
)
if
optimizer
is
None
:
decay_params
,
nodecay_params
=
[],
[]
decay_param_names
=
self
.
get_decay_parameter_names
(
model
)
for
name
,
param
in
model
.
named_parameters
():
if
param
.
requires_grad
:
if
name
in
decay_param_names
:
decay_params
.
append
(
param
)
else
:
nodecay_params
.
append
(
param
)
optim_class
,
optim_kwargs
=
Trainer
.
get_optimizer_cls_and_kwargs
(
training_args
)
param_groups
=
[
dict
(
params
=
nodecay_params
),
dict
(
params
=
decay_params
,
weight_decay
=
training_args
.
weight_decay
),
]
optimizer
=
optim_class
(
param_groups
,
**
optim_kwargs
)
return
optimizer
@
override
def
create_scheduler
(
self
,
training_args
:
"Seq2SeqTrainingArguments"
,
num_training_steps
:
int
,
optimizer
:
"torch.optim.Optimizer"
)
->
"torch.optim.lr_scheduler.LRScheduler"
:
create_custom_scheduler
(
training_args
,
num_training_steps
,
optimizer
)
lr_scheduler
=
get_scheduler
(
training_args
.
lr_scheduler_type
,
optimizer
=
optimizer
,
num_warmup_steps
=
training_args
.
get_warmup_steps
(
num_training_steps
),
num_training_steps
=
num_training_steps
,
)
return
lr_scheduler
@
torch
.
no_grad
()
def
get_inputs
(
self
,
batch
:
Dict
[
str
,
"torch.Tensor"
])
->
Tuple
[
List
[
"torch.Tensor"
],
List
[
"torch.Tensor"
]]:
r
"""
Generates model's responses given queries.
"""
if
batch
[
"input_ids"
].
size
(
0
)
==
1
:
# handle llama2 ppo with gradient accumulation > 1
start_index
=
(
batch
[
"input_ids"
][
0
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()[
0
].
item
()
for
k
,
v
in
batch
.
items
():
batch
[
k
]
=
v
[:,
start_index
:]
with
unwrap_model_for_generation
(
self
.
model
,
self
.
accelerator
)
as
unwrapped_model
:
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
if
self
.
model_args
.
upcast_layernorm
:
layernorm_params
=
dump_layernorm
(
unwrapped_model
)
generate_output
:
"torch.Tensor"
=
unwrapped_model
.
generate
(
generation_config
=
self
.
generation_config
,
logits_processor
=
get_logits_processor
(),
**
batch
)
if
self
.
model_args
.
upcast_layernorm
:
restore_layernorm
(
unwrapped_model
,
layernorm_params
)
query
=
batch
[
"input_ids"
].
detach
().
cpu
()
response
=
generate_output
[:,
batch
[
"input_ids"
].
size
(
-
1
)
:].
detach
().
cpu
()
queries
,
responses
=
[],
[]
for
i
in
range
(
len
(
query
)):
query_start_index
=
(
query
[
i
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()[
0
].
item
()
response_indexes
=
(
response
[
i
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()
if
len
(
response_indexes
)
==
0
:
# allow empty response
response_length
=
1
elif
self
.
tokenizer
.
eos_token_id
==
self
.
tokenizer
.
pad_token_id
:
# include eos token
response_length
=
response_indexes
[
-
1
].
item
()
+
2
else
:
response_length
=
response_indexes
[
-
1
].
item
()
+
1
queries
.
append
(
query
[
i
,
query_start_index
:])
# remove padding from left
responses
.
append
(
response
[
i
,
:
response_length
])
# remove padding from right
return
queries
,
responses
@
torch
.
no_grad
()
def
get_rewards
(
self
,
queries
:
List
[
"torch.Tensor"
],
responses
:
List
[
"torch.Tensor"
],
)
->
List
[
"torch.Tensor"
]:
r
"""
Computes scores using given reward model.
Both inputs and outputs are put on CPU.
"""
if
self
.
finetuning_args
.
reward_model_type
==
"api"
:
token_ids
=
[
torch
.
cat
((
q
,
r
),
dim
=-
1
).
tolist
()
for
q
,
r
in
zip
(
queries
,
responses
)]
messages
=
self
.
tokenizer
.
batch_decode
(
token_ids
,
skip_special_tokens
=
False
)
return
get_rewards_from_server
(
self
.
reward_model
,
messages
)
batch
:
Dict
[
str
,
"torch.Tensor"
]
=
self
.
prepare_model_inputs
(
queries
,
responses
)
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
if
self
.
finetuning_args
.
reward_model_type
==
"lora"
:
replace_model
(
unwrapped_model
,
target
=
"reward"
)
reward_model
=
self
.
model
else
:
reward_model
=
self
.
reward_model
with
unwrap_model_for_generation
(
reward_model
,
self
.
accelerator
),
self
.
amp_context
:
# support bf16
values
:
"torch.Tensor"
=
reward_model
(
**
batch
,
return_dict
=
True
,
use_cache
=
False
)[
-
1
]
if
self
.
finetuning_args
.
reward_model_type
==
"lora"
:
replace_model
(
unwrapped_model
,
target
=
"default"
)
rewards
=
values
.
gather
(
dim
=-
1
,
index
=
(
batch
[
"attention_mask"
].
sum
(
dim
=-
1
,
keepdim
=
True
)
-
1
))
return
rewards
.
float
().
detach
()
# use fp32 type
@
override
@
PPODecorators
.
empty_device_cache
()
def
batched_forward_pass
(
self
,
model
:
"AutoModelForCausalLMWithValueHead"
,
queries
:
"torch.Tensor"
,
responses
:
"torch.Tensor"
,
model_inputs
:
Dict
[
str
,
Any
],
return_logits
:
bool
=
False
,
response_masks
:
Optional
[
"torch.Tensor"
]
=
None
,
)
->
Tuple
[
"torch.Tensor"
,
Optional
[
"torch.Tensor"
],
"torch.Tensor"
,
"torch.Tensor"
]:
r
"""
Calculates model outputs in multiple batches.
Subclass and override to inject custom behavior.
"""
bs
=
len
(
queries
)
fbs
=
self
.
config
.
mini_batch_size
all_logprobs
=
[]
all_logits
=
[]
all_masks
=
[]
all_values
=
[]
for
i
in
range
(
math
.
ceil
(
bs
/
fbs
)):
input_kwargs
=
{
key
:
value
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
for
key
,
value
in
model_inputs
.
items
()}
query_batch
=
queries
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
response_batch
=
responses
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
if
response_masks
is
not
None
:
response_masks_batch
=
response_masks
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
input_ids
=
input_kwargs
[
"input_ids"
]
attention_mask
=
input_kwargs
[
"attention_mask"
]
with
self
.
amp_context
:
# support bf16
logits
,
_
,
values
=
model
(
**
input_kwargs
,
return_dict
=
True
,
use_cache
=
False
)
logprobs
=
logprobs_from_logits
(
logits
[:,
:
-
1
,
:],
input_ids
[:,
1
:])
masks
=
torch
.
zeros_like
(
attention_mask
)
masks
[:,
:
-
1
]
=
attention_mask
[:,
1
:]
for
j
in
range
(
len
(
query_batch
)):
start
=
len
(
query_batch
[
j
])
-
1
if
attention_mask
[
j
,
0
]
==
0
:
# offset left padding
start
+=
attention_mask
[
j
,
:].
nonzero
()[
0
].
item
()
end
=
start
+
len
(
response_batch
[
j
])
if
response_masks
is
not
None
:
response_masks_batch
=
torch
.
cat
((
torch
.
zeros_like
(
query_batch
[
j
]),
response_masks_batch
[
j
]))[
1
:]
masks
[
j
,
:
start
]
=
0
masks
[
j
,
end
:]
=
0
if
response_masks
is
not
None
:
masks
[
j
,
start
:
end
]
=
masks
[
j
,
start
:
end
]
*
response_masks_batch
[
j
][
start
:
end
]
if
return_logits
:
all_logits
.
append
(
logits
)
else
:
del
logits
all_values
.
append
(
values
)
all_logprobs
.
append
(
logprobs
)
all_masks
.
append
(
masks
)
return
(
torch
.
cat
(
all_logprobs
),
torch
.
cat
(
all_logits
)[:,
:
-
1
]
if
return_logits
else
None
,
torch
.
cat
(
all_values
)[:,
:
-
1
],
torch
.
cat
(
all_masks
)[:,
:
-
1
],
)
@
override
def
save_model
(
self
,
output_dir
:
Optional
[
str
]
=
None
)
->
None
:
r
"""
Saves model checkpoint.
Subclass and override to inject custom behavior.
"""
if
output_dir
is
None
:
output_dir
=
self
.
args
.
output_dir
if
self
.
is_fsdp_enabled
or
self
.
is_deepspeed_enabled
:
try
:
state_dict
=
self
.
accelerator
.
get_state_dict
(
self
.
model
)
# must be called at all ranks
if
self
.
args
.
should_save
:
self
.
_save
(
output_dir
,
state_dict
=
state_dict
)
except
ValueError
:
logger
.
warning
(
" stage3_gather_16bit_weights_on_model_save=false. Saving the full checkpoint instead,"
" use zero_to_fp32.py to recover weights"
)
if
self
.
args
.
should_save
:
self
.
_save
(
output_dir
,
state_dict
=
{})
# remove the dummy state_dict
remove_dummy_checkpoint
(
self
.
args
.
should_save
,
output_dir
,
[
WEIGHTS_NAME
,
SAFE_WEIGHTS_NAME
])
self
.
model
.
save_checkpoint
(
output_dir
)
elif
self
.
args
.
should_save
:
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
self
.
_save
(
output_dir
,
state_dict
=
unwrapped_model
.
state_dict
())
src/llamafactory/train/ppo/workflow.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's TRL library.
# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/ppo.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
...data
import
MultiModalDataCollatorForSeq2Seq
,
get_dataset
,
get_template_and_fix_tokenizer
from
...extras.ploting
import
plot_loss
from
...model
import
load_model
,
load_tokenizer
from
..callbacks
import
fix_valuehead_checkpoint
from
..trainer_utils
import
create_ref_model
,
create_reward_model
from
.trainer
import
CustomPPOTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
,
GeneratingArguments
,
ModelArguments
def
run_ppo
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
generating_args
:
"GeneratingArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
data_args
)
dataset_module
=
get_dataset
(
template
,
model_args
,
data_args
,
training_args
,
stage
=
"ppo"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
,
add_valuehead
=
True
)
tokenizer
.
padding_side
=
"left"
# use left-padding in generation while using right-padding in training
data_collator
=
MultiModalDataCollatorForSeq2Seq
(
template
=
template
,
**
tokenizer_module
)
# Create reference model and reward model
ref_model
=
create_ref_model
(
model_args
,
finetuning_args
,
add_valuehead
=
True
)
reward_model
=
create_reward_model
(
model
,
model_args
,
finetuning_args
)
# Initialize our Trainer
ppo_trainer
:
"CustomPPOTrainer"
=
CustomPPOTrainer
(
model_args
=
model_args
,
training_args
=
training_args
,
finetuning_args
=
finetuning_args
,
generating_args
=
generating_args
,
callbacks
=
callbacks
,
model
=
model
,
reward_model
=
reward_model
,
ref_model
=
ref_model
,
data_collator
=
data_collator
,
**
dataset_module
,
**
tokenizer_module
,
)
# Training
if
training_args
.
do_train
:
ppo_trainer
.
ppo_train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
ppo_trainer
.
save_model
()
if
training_args
.
should_save
:
fix_valuehead_checkpoint
(
model
,
training_args
.
output_dir
,
training_args
.
save_safetensors
)
ppo_trainer
.
save_state
()
# must be called after save_model to have a folder
if
ppo_trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"reward"
])
src/llamafactory/train/pt/__init__.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.workflow
import
run_pt
__all__
=
[
"run_pt"
]
src/llamafactory/train/pt/trainer.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Optional
from
transformers
import
Trainer
from
typing_extensions
import
override
from
...extras.logging
import
get_logger
from
..callbacks
import
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optimizer
,
create_custom_scheduler
if
TYPE_CHECKING
:
import
torch
from
transformers
import
ProcessorMixin
from
...hparams
import
FinetuningArguments
logger
=
get_logger
(
__name__
)
class
CustomTrainer
(
Trainer
):
r
"""
Inherits Trainer for custom optimizer.
"""
def
__init__
(
self
,
finetuning_args
:
"FinetuningArguments"
,
processor
:
Optional
[
"ProcessorMixin"
],
**
kwargs
)
->
None
:
super
().
__init__
(
**
kwargs
)
self
.
finetuning_args
=
finetuning_args
if
processor
is
not
None
:
self
.
add_callback
(
SaveProcessorCallback
(
processor
))
if
finetuning_args
.
pissa_convert
:
self
.
add_callback
(
PissaConvertCallback
)
if
finetuning_args
.
use_badam
:
from
badam
import
BAdamCallback
,
clip_grad_norm_old_version
self
.
accelerator
.
clip_grad_norm_
=
MethodType
(
clip_grad_norm_old_version
,
self
.
accelerator
)
self
.
add_callback
(
BAdamCallback
)
@
override
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optimizer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
@
override
def
create_scheduler
(
self
,
num_training_steps
:
int
,
optimizer
:
Optional
[
"torch.optim.Optimizer"
]
=
None
)
->
"torch.optim.lr_scheduler.LRScheduler"
:
create_custom_scheduler
(
self
.
args
,
num_training_steps
,
optimizer
)
return
super
().
create_scheduler
(
num_training_steps
,
optimizer
)
src/llamafactory/train/pt/workflow.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
transformers
import
DataCollatorForLanguageModeling
from
...data
import
get_dataset
,
get_template_and_fix_tokenizer
from
...extras.ploting
import
plot_loss
from
...model
import
load_model
,
load_tokenizer
from
..trainer_utils
import
create_modelcard_and_push
from
.trainer
import
CustomTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
,
ModelArguments
def
run_pt
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
data_args
)
dataset_module
=
get_dataset
(
template
,
model_args
,
data_args
,
training_args
,
stage
=
"pt"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
)
data_collator
=
DataCollatorForLanguageModeling
(
tokenizer
=
tokenizer
,
mlm
=
False
)
# Initialize our Trainer
trainer
=
CustomTrainer
(
model
=
model
,
args
=
training_args
,
finetuning_args
=
finetuning_args
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
**
dataset_module
,
**
tokenizer_module
,
)
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
])
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
)
try
:
perplexity
=
math
.
exp
(
metrics
[
"eval_loss"
])
except
OverflowError
:
perplexity
=
float
(
"inf"
)
metrics
[
"perplexity"
]
=
perplexity
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
src/llamafactory/train/rm/__init__.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.workflow
import
run_rm
__all__
=
[
"run_rm"
]
src/llamafactory/train/rm/metric.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
dataclasses
import
dataclass
from
typing
import
TYPE_CHECKING
,
Dict
,
Optional
import
numpy
as
np
from
...extras.misc
import
numpify
if
TYPE_CHECKING
:
from
transformers
import
EvalPrediction
@
dataclass
class
ComputeAccuracy
:
r
"""
Computes reward accuracy and supports `batch_eval_metrics`.
"""
def
_dump
(
self
)
->
Optional
[
Dict
[
str
,
float
]]:
result
=
None
if
hasattr
(
self
,
"score_dict"
):
result
=
{
k
:
float
(
np
.
mean
(
v
))
for
k
,
v
in
self
.
score_dict
.
items
()}
self
.
score_dict
=
{
"accuracy"
:
[]}
return
result
def
__post_init__
(
self
):
self
.
_dump
()
def
__call__
(
self
,
eval_preds
:
"EvalPrediction"
,
compute_result
:
bool
=
True
)
->
Optional
[
Dict
[
str
,
float
]]:
chosen_scores
,
rejected_scores
=
numpify
(
eval_preds
.
predictions
[
0
]),
numpify
(
eval_preds
.
predictions
[
1
])
if
not
chosen_scores
.
shape
:
self
.
score_dict
[
"accuracy"
].
append
(
chosen_scores
>
rejected_scores
)
else
:
for
i
in
range
(
len
(
chosen_scores
)):
self
.
score_dict
[
"accuracy"
].
append
(
chosen_scores
[
i
]
>
rejected_scores
[
i
])
if
compute_result
:
return
self
.
_dump
()
src/llamafactory/train/rm/trainer.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
os
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
,
Tuple
,
Union
import
torch
from
transformers
import
Trainer
from
typing_extensions
import
override
from
...extras.logging
import
get_logger
from
..callbacks
import
FixValueHeadModelCallback
,
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optimizer
,
create_custom_scheduler
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
,
ProcessorMixin
from
transformers.trainer
import
PredictionOutput
from
...hparams
import
FinetuningArguments
logger
=
get_logger
(
__name__
)
class
PairwiseTrainer
(
Trainer
):
r
"""
Inherits Trainer to compute pairwise loss.
"""
def
__init__
(
self
,
finetuning_args
:
"FinetuningArguments"
,
processor
:
Optional
[
"ProcessorMixin"
],
**
kwargs
)
->
None
:
super
().
__init__
(
**
kwargs
)
self
.
finetuning_args
=
finetuning_args
self
.
can_return_loss
=
True
# override property to return eval_loss
self
.
add_callback
(
FixValueHeadModelCallback
)
if
processor
is
not
None
:
self
.
add_callback
(
SaveProcessorCallback
(
processor
))
if
finetuning_args
.
pissa_convert
:
self
.
add_callback
(
PissaConvertCallback
)
if
finetuning_args
.
use_badam
:
from
badam
import
BAdamCallback
,
clip_grad_norm_old_version
self
.
accelerator
.
clip_grad_norm_
=
MethodType
(
clip_grad_norm_old_version
,
self
.
accelerator
)
self
.
add_callback
(
BAdamCallback
)
@
override
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optimizer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
@
override
def
create_scheduler
(
self
,
num_training_steps
:
int
,
optimizer
:
Optional
[
"torch.optim.Optimizer"
]
=
None
)
->
"torch.optim.lr_scheduler.LRScheduler"
:
create_custom_scheduler
(
self
.
args
,
num_training_steps
,
optimizer
)
return
super
().
create_scheduler
(
num_training_steps
,
optimizer
)
@
override
def
compute_loss
(
self
,
model
:
"PreTrainedModel"
,
inputs
:
Dict
[
str
,
"torch.Tensor"
],
return_outputs
:
bool
=
False
)
->
Union
[
"torch.Tensor"
,
Tuple
[
"torch.Tensor"
,
List
[
"torch.Tensor"
]]]:
r
"""
Computes pairwise loss. The first n examples are chosen and the last n examples are rejected.
Subclass and override to inject custom behavior.
Note that the first element will be removed from the output tuple.
See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842
"""
_
,
_
,
values
=
model
(
**
inputs
,
output_hidden_states
=
True
,
return_dict
=
True
,
use_cache
=
False
)
batch_size
=
inputs
[
"input_ids"
].
size
(
0
)
//
2
chosen_masks
,
rejected_masks
=
torch
.
split
(
inputs
[
"attention_mask"
],
batch_size
,
dim
=
0
)
chosen_rewards
,
rejected_rewards
=
torch
.
split
(
values
,
batch_size
,
dim
=
0
)
chosen_scores
=
chosen_rewards
.
gather
(
dim
=-
1
,
index
=
(
chosen_masks
.
sum
(
dim
=-
1
,
keepdim
=
True
)
-
1
))
rejected_scores
=
rejected_rewards
.
gather
(
dim
=-
1
,
index
=
(
rejected_masks
.
sum
(
dim
=-
1
,
keepdim
=
True
)
-
1
))
chosen_scores
,
rejected_scores
=
chosen_scores
.
squeeze
(),
rejected_scores
.
squeeze
()
loss
=
-
torch
.
nn
.
functional
.
logsigmoid
(
chosen_scores
.
float
()
-
rejected_scores
.
float
()).
mean
()
if
return_outputs
:
return
loss
,
(
loss
,
chosen_scores
,
rejected_scores
)
else
:
return
loss
def
save_predictions
(
self
,
predict_results
:
"PredictionOutput"
)
->
None
:
r
"""
Saves model predictions to `output_dir`.
A custom behavior that not contained in Seq2SeqTrainer.
"""
if
not
self
.
is_world_process_zero
():
return
output_prediction_file
=
os
.
path
.
join
(
self
.
args
.
output_dir
,
"generated_predictions.jsonl"
)
logger
.
info
(
f
"Saving prediction results to
{
output_prediction_file
}
"
)
chosen_scores
,
rejected_scores
=
predict_results
.
predictions
with
open
(
output_prediction_file
,
"w"
,
encoding
=
"utf-8"
)
as
writer
:
res
:
List
[
str
]
=
[]
for
c_score
,
r_score
in
zip
(
chosen_scores
,
rejected_scores
):
res
.
append
(
json
.
dumps
({
"chosen"
:
round
(
float
(
c_score
),
2
),
"rejected"
:
round
(
float
(
r_score
),
2
)}))
writer
.
write
(
"
\n
"
.
join
(
res
))
src/llamafactory/train/rm/workflow.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
...data
import
PairwiseDataCollatorWithPadding
,
get_dataset
,
get_template_and_fix_tokenizer
from
...extras.ploting
import
plot_loss
from
...model
import
load_model
,
load_tokenizer
from
..callbacks
import
fix_valuehead_checkpoint
from
..trainer_utils
import
create_modelcard_and_push
from
.metric
import
ComputeAccuracy
from
.trainer
import
PairwiseTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
,
ModelArguments
def
run_rm
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
data_args
)
dataset_module
=
get_dataset
(
template
,
model_args
,
data_args
,
training_args
,
stage
=
"rm"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
,
add_valuehead
=
True
)
data_collator
=
PairwiseDataCollatorWithPadding
(
template
=
template
,
pad_to_multiple_of
=
8
,
**
tokenizer_module
)
# Update arguments
training_args
.
remove_unused_columns
=
False
# important for multimodal and pairwise dataset
# Initialize our Trainer
trainer
=
PairwiseTrainer
(
model
=
model
,
args
=
training_args
,
finetuning_args
=
finetuning_args
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
compute_metrics
=
ComputeAccuracy
(),
**
dataset_module
,
**
tokenizer_module
,
)
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
if
training_args
.
should_save
:
fix_valuehead_checkpoint
(
model
,
training_args
.
output_dir
,
training_args
.
save_safetensors
)
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
,
"eval_accuracy"
])
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
)
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Predict
if
training_args
.
do_predict
:
predict_results
=
trainer
.
predict
(
dataset_module
[
"eval_dataset"
],
metric_key_prefix
=
"predict"
)
trainer
.
log_metrics
(
"predict"
,
predict_results
.
metrics
)
trainer
.
save_metrics
(
"predict"
,
predict_results
.
metrics
)
trainer
.
save_predictions
(
predict_results
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
src/llamafactory/train/sft/__init__.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.workflow
import
run_sft
__all__
=
[
"run_sft"
]
src/llamafactory/train/sft/metric.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc., THUDM, and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library and the THUDM's ChatGLM implementation.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py
# https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
dataclasses
import
dataclass
from
typing
import
TYPE_CHECKING
,
Dict
,
Optional
import
numpy
as
np
import
torch
from
transformers.utils
import
is_jieba_available
,
is_nltk_available
from
...extras.constants
import
IGNORE_INDEX
from
...extras.misc
import
numpify
from
...extras.packages
import
is_rouge_available
if
TYPE_CHECKING
:
from
transformers
import
EvalPrediction
,
PreTrainedTokenizer
if
is_jieba_available
():
import
jieba
# type: ignore
if
is_nltk_available
():
from
nltk.translate.bleu_score
import
SmoothingFunction
,
sentence_bleu
if
is_rouge_available
():
from
rouge_chinese
import
Rouge
def
eval_logit_processor
(
logits
:
"torch.Tensor"
,
labels
:
"torch.Tensor"
)
->
"torch.Tensor"
:
r
"""
Computes the token with the largest likelihood to reduce memory footprint.
"""
if
isinstance
(
logits
,
(
list
,
tuple
)):
if
logits
[
0
].
dim
()
==
3
:
# (batch_size, seq_len, vocab_size)
logits
=
logits
[
0
]
else
:
# moe models have aux loss
logits
=
logits
[
1
]
if
logits
.
dim
()
!=
3
:
raise
ValueError
(
"Cannot process the logits."
)
return
torch
.
argmax
(
logits
,
dim
=-
1
)
@
dataclass
class
ComputeAccuracy
:
r
"""
Computes accuracy and supports `batch_eval_metrics`.
"""
def
_dump
(
self
)
->
Optional
[
Dict
[
str
,
float
]]:
result
=
None
if
hasattr
(
self
,
"score_dict"
):
result
=
{
k
:
float
(
np
.
mean
(
v
))
for
k
,
v
in
self
.
score_dict
.
items
()}
self
.
score_dict
=
{
"accuracy"
:
[]}
return
result
def
__post_init__
(
self
):
self
.
_dump
()
def
__call__
(
self
,
eval_preds
:
"EvalPrediction"
,
compute_result
:
bool
=
True
)
->
Optional
[
Dict
[
str
,
float
]]:
preds
,
labels
=
numpify
(
eval_preds
.
predictions
),
numpify
(
eval_preds
.
label_ids
)
for
i
in
range
(
len
(
preds
)):
pred
,
label
=
preds
[
i
,
:
-
1
],
labels
[
i
,
1
:]
label_mask
=
label
!=
IGNORE_INDEX
self
.
score_dict
[
"accuracy"
].
append
(
np
.
mean
(
pred
[
label_mask
]
==
label
[
label_mask
]))
if
compute_result
:
return
self
.
_dump
()
@
dataclass
class
ComputeSimilarity
:
r
"""
Computes text similarity scores and supports `batch_eval_metrics`.
Wraps the tokenizer into metric functions, used in CustomSeq2SeqTrainer.
"""
tokenizer
:
"PreTrainedTokenizer"
def
_dump
(
self
)
->
Optional
[
Dict
[
str
,
float
]]:
result
=
None
if
hasattr
(
self
,
"score_dict"
):
result
=
{
k
:
float
(
np
.
mean
(
v
))
for
k
,
v
in
self
.
score_dict
.
items
()}
self
.
score_dict
=
{
"rouge-1"
:
[],
"rouge-2"
:
[],
"rouge-l"
:
[],
"bleu-4"
:
[]}
return
result
def
__post_init__
(
self
):
self
.
_dump
()
def
__call__
(
self
,
eval_preds
:
"EvalPrediction"
,
compute_result
:
bool
=
True
)
->
Optional
[
Dict
[
str
,
float
]]:
preds
,
labels
=
numpify
(
eval_preds
.
predictions
),
numpify
(
eval_preds
.
label_ids
)
preds
=
np
.
where
(
preds
!=
IGNORE_INDEX
,
preds
,
self
.
tokenizer
.
pad_token_id
)
labels
=
np
.
where
(
labels
!=
IGNORE_INDEX
,
labels
,
self
.
tokenizer
.
pad_token_id
)
decoded_preds
=
self
.
tokenizer
.
batch_decode
(
preds
,
skip_special_tokens
=
True
)
decoded_labels
=
self
.
tokenizer
.
batch_decode
(
labels
,
skip_special_tokens
=
True
)
for
pred
,
label
in
zip
(
decoded_preds
,
decoded_labels
):
hypothesis
=
list
(
jieba
.
cut
(
pred
))
reference
=
list
(
jieba
.
cut
(
label
))
if
len
(
" "
.
join
(
hypothesis
).
split
())
==
0
or
len
(
" "
.
join
(
reference
).
split
())
==
0
:
result
=
{
"rouge-1"
:
{
"f"
:
0.0
},
"rouge-2"
:
{
"f"
:
0.0
},
"rouge-l"
:
{
"f"
:
0.0
}}
else
:
rouge
=
Rouge
()
scores
=
rouge
.
get_scores
(
" "
.
join
(
hypothesis
),
" "
.
join
(
reference
))
result
=
scores
[
0
]
for
k
,
v
in
result
.
items
():
self
.
score_dict
[
k
].
append
(
round
(
v
[
"f"
]
*
100
,
4
))
bleu_score
=
sentence_bleu
([
list
(
label
)],
list
(
pred
),
smoothing_function
=
SmoothingFunction
().
method3
)
self
.
score_dict
[
"bleu-4"
].
append
(
round
(
bleu_score
*
100
,
4
))
if
compute_result
:
return
self
.
_dump
()
src/llamafactory/train/sft/trainer.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer_seq2seq.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
os
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
,
Tuple
,
Union
import
numpy
as
np
import
torch
from
transformers
import
Seq2SeqTrainer
from
typing_extensions
import
override
from
...extras.constants
import
IGNORE_INDEX
from
...extras.logging
import
get_logger
from
..callbacks
import
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optimizer
,
create_custom_scheduler
if
TYPE_CHECKING
:
from
torch.utils.data
import
Dataset
from
transformers
import
ProcessorMixin
from
transformers.trainer
import
PredictionOutput
from
...hparams
import
FinetuningArguments
logger
=
get_logger
(
__name__
)
class
CustomSeq2SeqTrainer
(
Seq2SeqTrainer
):
r
"""
Inherits Seq2SeqTrainer to compute generative metrics such as BLEU and ROUGE.
"""
def
__init__
(
self
,
finetuning_args
:
"FinetuningArguments"
,
processor
:
Optional
[
"ProcessorMixin"
],
**
kwargs
)
->
None
:
super
().
__init__
(
**
kwargs
)
self
.
finetuning_args
=
finetuning_args
if
processor
is
not
None
:
self
.
add_callback
(
SaveProcessorCallback
(
processor
))
if
finetuning_args
.
pissa_convert
:
self
.
add_callback
(
PissaConvertCallback
)
if
finetuning_args
.
use_badam
:
from
badam
import
BAdamCallback
,
clip_grad_norm_old_version
self
.
accelerator
.
clip_grad_norm_
=
MethodType
(
clip_grad_norm_old_version
,
self
.
accelerator
)
self
.
add_callback
(
BAdamCallback
)
@
override
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optimizer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
@
override
def
create_scheduler
(
self
,
num_training_steps
:
int
,
optimizer
:
Optional
[
"torch.optim.Optimizer"
]
=
None
)
->
"torch.optim.lr_scheduler.LRScheduler"
:
create_custom_scheduler
(
self
.
args
,
num_training_steps
,
optimizer
)
return
super
().
create_scheduler
(
num_training_steps
,
optimizer
)
@
override
def
prediction_step
(
self
,
model
:
"torch.nn.Module"
,
inputs
:
Dict
[
str
,
Union
[
"torch.Tensor"
,
Any
]],
prediction_loss_only
:
bool
,
ignore_keys
:
Optional
[
List
[
str
]]
=
None
,
)
->
Tuple
[
Optional
[
float
],
Optional
[
"torch.Tensor"
],
Optional
[
"torch.Tensor"
]]:
r
"""
Removes the prompt part in the generated tokens.
Subclass and override to inject custom behavior.
"""
labels
=
inputs
[
"labels"
]
if
"labels"
in
inputs
else
None
if
self
.
args
.
predict_with_generate
:
assert
self
.
tokenizer
.
padding_side
==
"left"
,
"This method only accepts left-padded tensor."
labels
=
labels
.
detach
().
clone
()
if
labels
is
not
None
else
None
# backup labels
prompt_len
,
label_len
=
inputs
[
"input_ids"
].
size
(
-
1
),
inputs
[
"labels"
].
size
(
-
1
)
if
prompt_len
>
label_len
:
inputs
[
"labels"
]
=
self
.
_pad_tensors_to_target_len
(
inputs
[
"labels"
],
inputs
[
"input_ids"
])
if
label_len
>
prompt_len
:
# truncate the labels instead of padding the inputs (llama2 fp16 compatibility)
inputs
[
"labels"
]
=
inputs
[
"labels"
][:,
:
prompt_len
]
loss
,
generated_tokens
,
_
=
super
().
prediction_step
(
# ignore the returned labels (may be truncated)
model
,
inputs
,
prediction_loss_only
=
prediction_loss_only
,
ignore_keys
=
ignore_keys
)
if
generated_tokens
is
not
None
and
self
.
args
.
predict_with_generate
:
generated_tokens
[:,
:
prompt_len
]
=
self
.
tokenizer
.
pad_token_id
generated_tokens
=
generated_tokens
.
contiguous
()
return
loss
,
generated_tokens
,
labels
def
_pad_tensors_to_target_len
(
self
,
src_tensor
:
"torch.Tensor"
,
tgt_tensor
:
"torch.Tensor"
)
->
"torch.Tensor"
:
r
"""
Pads the tensor to the same length as the target tensor.
"""
assert
self
.
tokenizer
.
pad_token_id
is
not
None
,
"Pad token is required."
padded_tensor
=
self
.
tokenizer
.
pad_token_id
*
torch
.
ones_like
(
tgt_tensor
)
padded_tensor
[:,
-
src_tensor
.
shape
[
-
1
]
:]
=
src_tensor
# adopt left-padding
return
padded_tensor
.
contiguous
()
# in contiguous memory
def
save_predictions
(
self
,
dataset
:
"Dataset"
,
predict_results
:
"PredictionOutput"
)
->
None
:
r
"""
Saves model predictions to `output_dir`.
A custom behavior that not contained in Seq2SeqTrainer.
"""
if
not
self
.
is_world_process_zero
():
return
output_prediction_file
=
os
.
path
.
join
(
self
.
args
.
output_dir
,
"generated_predictions.jsonl"
)
logger
.
info
(
f
"Saving prediction results to
{
output_prediction_file
}
"
)
labels
=
np
.
where
(
predict_results
.
label_ids
!=
IGNORE_INDEX
,
predict_results
.
label_ids
,
self
.
tokenizer
.
pad_token_id
)
preds
=
np
.
where
(
predict_results
.
predictions
!=
IGNORE_INDEX
,
predict_results
.
predictions
,
self
.
tokenizer
.
pad_token_id
)
for
i
in
range
(
len
(
preds
)):
pad_len
=
np
.
nonzero
(
preds
[
i
]
!=
self
.
tokenizer
.
pad_token_id
)[
0
]
if
len
(
pad_len
):
# move pad token to last
preds
[
i
]
=
np
.
concatenate
((
preds
[
i
][
pad_len
[
0
]
:],
preds
[
i
][:
pad_len
[
0
]]),
axis
=-
1
)
decoded_inputs
=
self
.
tokenizer
.
batch_decode
(
dataset
[
"input_ids"
],
skip_special_tokens
=
True
)
decoded_labels
=
self
.
tokenizer
.
batch_decode
(
labels
,
skip_special_tokens
=
True
)
decoded_preds
=
self
.
tokenizer
.
batch_decode
(
preds
,
skip_special_tokens
=
True
)
with
open
(
output_prediction_file
,
"w"
,
encoding
=
"utf-8"
)
as
writer
:
res
:
List
[
str
]
=
[]
for
text
,
label
,
pred
in
zip
(
decoded_inputs
,
decoded_labels
,
decoded_preds
):
res
.
append
(
json
.
dumps
({
"prompt"
:
text
,
"label"
:
label
,
"predict"
:
pred
},
ensure_ascii
=
False
))
writer
.
write
(
"
\n
"
.
join
(
res
))
src/llamafactory/train/sft/workflow.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
...data
import
SFTDataCollatorWith4DAttentionMask
,
get_dataset
,
get_template_and_fix_tokenizer
from
...extras.constants
import
IGNORE_INDEX
from
...extras.misc
import
get_logits_processor
from
...extras.ploting
import
plot_loss
from
...model
import
load_model
,
load_tokenizer
from
..trainer_utils
import
create_modelcard_and_push
from
.metric
import
ComputeAccuracy
,
ComputeSimilarity
,
eval_logit_processor
from
.trainer
import
CustomSeq2SeqTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
,
GeneratingArguments
,
ModelArguments
def
run_sft
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
generating_args
:
"GeneratingArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
data_args
)
dataset_module
=
get_dataset
(
template
,
model_args
,
data_args
,
training_args
,
stage
=
"sft"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
)
if
getattr
(
model
,
"is_quantized"
,
False
)
and
not
training_args
.
do_train
:
setattr
(
model
,
"_hf_peft_config_loaded"
,
True
)
# hack here: make model compatible with prediction
data_collator
=
SFTDataCollatorWith4DAttentionMask
(
template
=
template
,
pad_to_multiple_of
=
8
if
training_args
.
do_train
else
None
,
# for shift short attention
label_pad_token_id
=
IGNORE_INDEX
if
data_args
.
ignore_pad_token_for_loss
else
tokenizer
.
pad_token_id
,
block_diag_attn
=
model_args
.
block_diag_attn
,
attn_implementation
=
getattr
(
model
.
config
,
"_attn_implementation"
,
None
),
compute_dtype
=
model_args
.
compute_dtype
,
**
tokenizer_module
,
)
# Override the decoding parameters of Seq2SeqTrainer
training_args
.
generation_max_length
=
training_args
.
generation_max_length
or
data_args
.
cutoff_len
training_args
.
generation_num_beams
=
data_args
.
eval_num_beams
or
training_args
.
generation_num_beams
training_args
.
remove_unused_columns
=
False
# important for multimodal dataset
# Metric utils
metric_module
=
{}
if
training_args
.
predict_with_generate
:
metric_module
[
"compute_metrics"
]
=
ComputeSimilarity
(
tokenizer
=
tokenizer
)
elif
finetuning_args
.
compute_accuracy
:
metric_module
[
"compute_metrics"
]
=
ComputeAccuracy
()
metric_module
[
"preprocess_logits_for_metrics"
]
=
eval_logit_processor
# Initialize our Trainer
trainer
=
CustomSeq2SeqTrainer
(
model
=
model
,
args
=
training_args
,
finetuning_args
=
finetuning_args
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
**
dataset_module
,
**
tokenizer_module
,
**
metric_module
,
)
# Keyword arguments for `model.generate`
gen_kwargs
=
generating_args
.
to_dict
()
gen_kwargs
[
"eos_token_id"
]
=
[
tokenizer
.
eos_token_id
]
+
tokenizer
.
additional_special_tokens_ids
gen_kwargs
[
"pad_token_id"
]
=
tokenizer
.
pad_token_id
gen_kwargs
[
"logits_processor"
]
=
get_logits_processor
()
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
,
"eval_accuracy"
])
if
training_args
.
predict_with_generate
:
tokenizer
.
padding_side
=
"left"
# use left-padding in generation
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
,
**
gen_kwargs
)
if
training_args
.
predict_with_generate
:
# eval_loss will be wrong if predict_with_generate is enabled
metrics
.
pop
(
"eval_loss"
,
None
)
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Predict
if
training_args
.
do_predict
:
predict_results
=
trainer
.
predict
(
dataset_module
[
"eval_dataset"
],
metric_key_prefix
=
"predict"
,
**
gen_kwargs
)
if
training_args
.
predict_with_generate
:
# predict_loss will be wrong if predict_with_generate is enabled
predict_results
.
metrics
.
pop
(
"predict_loss"
,
None
)
trainer
.
log_metrics
(
"predict"
,
predict_results
.
metrics
)
trainer
.
save_metrics
(
"predict"
,
predict_results
.
metrics
)
trainer
.
save_predictions
(
dataset_module
[
"eval_dataset"
],
predict_results
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
src/llamafactory/train/test_utils.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
Dict
,
Optional
,
Sequence
,
Set
,
Tuple
,
Union
import
torch
from
peft
import
PeftModel
from
transformers
import
AutoModelForCausalLM
from
trl
import
AutoModelForCausalLMWithValueHead
from
..data
import
get_dataset
,
get_template_and_fix_tokenizer
from
..extras.misc
import
get_current_device
from
..hparams
import
get_infer_args
,
get_train_args
from
..model
import
load_model
,
load_tokenizer
if
TYPE_CHECKING
:
from
datasets
import
Dataset
from
peft
import
LoraModel
from
transformers
import
PreTrainedModel
def
compare_model
(
model_a
:
"torch.nn.Module"
,
model_b
:
"torch.nn.Module"
,
diff_keys
:
Sequence
[
str
]
=
[])
->
None
:
state_dict_a
=
model_a
.
state_dict
()
state_dict_b
=
model_b
.
state_dict
()
assert
set
(
state_dict_a
.
keys
())
==
set
(
state_dict_b
.
keys
())
for
name
in
state_dict_a
.
keys
():
if
any
(
key
in
name
for
key
in
diff_keys
):
assert
torch
.
allclose
(
state_dict_a
[
name
],
state_dict_b
[
name
],
rtol
=
1e-3
,
atol
=
1e-4
)
is
False
else
:
assert
torch
.
allclose
(
state_dict_a
[
name
],
state_dict_b
[
name
],
rtol
=
1e-3
,
atol
=
1e-4
)
is
True
def
check_lora_model
(
model
:
"LoraModel"
)
->
Tuple
[
Set
[
str
],
Set
[
str
]]:
linear_modules
,
extra_modules
=
set
(),
set
()
for
name
,
param
in
model
.
named_parameters
():
if
any
(
module
in
name
for
module
in
[
"lora_A"
,
"lora_B"
]):
linear_modules
.
add
(
name
.
split
(
".lora_"
,
maxsplit
=
1
)[
0
].
split
(
"."
)[
-
1
])
assert
param
.
requires_grad
is
True
assert
param
.
dtype
==
torch
.
float32
elif
"modules_to_save"
in
name
:
extra_modules
.
add
(
name
.
split
(
".modules_to_save"
,
maxsplit
=
1
)[
0
].
split
(
"."
)[
-
1
])
assert
param
.
requires_grad
is
True
assert
param
.
dtype
==
torch
.
float32
else
:
assert
param
.
requires_grad
is
False
assert
param
.
dtype
==
torch
.
float16
return
linear_modules
,
extra_modules
def
load_train_model
(
add_valuehead
:
bool
=
False
,
**
kwargs
)
->
"PreTrainedModel"
:
model_args
,
_
,
_
,
finetuning_args
,
_
=
get_train_args
(
kwargs
)
tokenizer
=
load_tokenizer
(
model_args
)[
"tokenizer"
]
return
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
is_trainable
=
True
,
add_valuehead
=
add_valuehead
)
def
load_infer_model
(
add_valuehead
:
bool
=
False
,
**
kwargs
)
->
"PreTrainedModel"
:
model_args
,
_
,
finetuning_args
,
_
=
get_infer_args
(
kwargs
)
tokenizer
=
load_tokenizer
(
model_args
)[
"tokenizer"
]
return
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
add_valuehead
)
def
load_reference_model
(
model_path
:
str
,
lora_path
:
Optional
[
str
]
=
None
,
use_lora
:
bool
=
False
,
use_pissa
:
bool
=
False
,
is_trainable
:
bool
=
False
,
add_valuehead
:
bool
=
False
,
)
->
Union
[
"PreTrainedModel"
,
"LoraModel"
]:
if
add_valuehead
:
model
:
"AutoModelForCausalLMWithValueHead"
=
AutoModelForCausalLMWithValueHead
.
from_pretrained
(
model_path
,
torch_dtype
=
torch
.
float16
,
device_map
=
get_current_device
()
)
if
not
is_trainable
:
model
.
v_head
=
model
.
v_head
.
to
(
torch
.
float16
)
return
model
model
=
AutoModelForCausalLM
.
from_pretrained
(
model_path
,
torch_dtype
=
torch
.
float16
,
device_map
=
get_current_device
()
)
if
use_lora
or
use_pissa
:
model
=
PeftModel
.
from_pretrained
(
model
,
lora_path
,
subfolder
=
"pissa_init"
if
use_pissa
else
None
,
is_trainable
=
is_trainable
)
for
param
in
filter
(
lambda
p
:
p
.
requires_grad
,
model
.
parameters
()):
param
.
data
=
param
.
data
.
to
(
torch
.
float32
)
return
model
def
load_train_dataset
(
**
kwargs
)
->
"Dataset"
:
model_args
,
data_args
,
training_args
,
_
,
_
=
get_train_args
(
kwargs
)
tokenizer_module
=
load_tokenizer
(
model_args
)
template
=
get_template_and_fix_tokenizer
(
tokenizer_module
[
"tokenizer"
],
data_args
)
dataset_module
=
get_dataset
(
template
,
model_args
,
data_args
,
training_args
,
kwargs
[
"stage"
],
**
tokenizer_module
)
return
dataset_module
[
"train_dataset"
]
def
patch_valuehead_model
():
def
post_init
(
self
:
"AutoModelForCausalLMWithValueHead"
,
state_dict
:
Dict
[
str
,
"torch.Tensor"
])
->
None
:
state_dict
=
{
k
[
7
:]:
state_dict
[
k
]
for
k
in
state_dict
.
keys
()
if
k
.
startswith
(
"v_head."
)}
self
.
v_head
.
load_state_dict
(
state_dict
,
strict
=
False
)
del
state_dict
AutoModelForCausalLMWithValueHead
.
post_init
=
post_init
src/llamafactory/train/trainer_utils.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the original GaLore's implementation: https://github.com/jiaweizzhao/GaLore
# and the original LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus
# and the original BAdam's implementation: https://github.com/Ledzy/BAdam
# and the HuggingFace's TRL library: https://github.com/huggingface/trl
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
,
Union
import
torch
from
transformers
import
Trainer
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
transformers.modeling_utils
import
is_fsdp_enabled
from
transformers.optimization
import
get_scheduler
from
transformers.pytorch_utils
import
ALL_LAYERNORM_LAYERS
from
transformers.trainer_pt_utils
import
get_parameter_names
from
typing_extensions
import
override
from
..extras.constants
import
IGNORE_INDEX
from
..extras.logging
import
get_logger
from
..extras.packages
import
is_galore_available
from
..hparams
import
FinetuningArguments
,
ModelArguments
from
..model
import
find_all_linear_modules
,
load_model
,
load_tokenizer
,
load_valuehead_params
if
is_galore_available
():
from
galore_torch
import
GaLoreAdafactor
,
GaLoreAdamW
,
GaLoreAdamW8bit
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
,
Seq2SeqTrainingArguments
from
trl
import
AutoModelForCausalLMWithValueHead
from
..hparams
import
DataArguments
logger
=
get_logger
(
__name__
)
class
DummyOptimizer
(
torch
.
optim
.
Optimizer
):
r
"""
A dummy optimizer used for the GaLore algorithm.
"""
def
__init__
(
self
,
lr
:
float
=
1e-3
,
optimizer_dict
:
Optional
[
Dict
[
"torch.nn.Parameter"
,
"torch.optim.Optimizer"
]]
=
None
)
->
None
:
dummy_tensor
=
torch
.
randn
(
1
,
1
)
self
.
optimizer_dict
=
optimizer_dict
super
().
__init__
([
dummy_tensor
],
{
"lr"
:
lr
})
@
override
def
zero_grad
(
self
,
set_to_none
:
bool
=
True
)
->
None
:
pass
@
override
def
step
(
self
,
closure
:
Optional
[
Callable
[[],
float
]]
=
None
)
->
Optional
[
float
]:
pass
def
create_modelcard_and_push
(
trainer
:
"Trainer"
,
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
)
->
None
:
kwargs
=
{
"tasks"
:
"text-generation"
,
"finetuned_from"
:
model_args
.
model_name_or_path
,
"tags"
:
[
"llama-factory"
,
finetuning_args
.
finetuning_type
],
}
if
data_args
.
dataset
is
not
None
:
kwargs
[
"dataset"
]
=
data_args
.
dataset
if
model_args
.
use_unsloth
:
kwargs
[
"tags"
]
=
kwargs
[
"tags"
]
+
[
"unsloth"
]
if
not
training_args
.
do_train
:
pass
elif
training_args
.
push_to_hub
:
trainer
.
push_to_hub
(
**
kwargs
)
else
:
trainer
.
create_model_card
(
license
=
"other"
,
**
kwargs
)
# prevent from connecting to hub
def
create_ref_model
(
model_args
:
"ModelArguments"
,
finetuning_args
:
"FinetuningArguments"
,
add_valuehead
:
bool
=
False
)
->
Optional
[
Union
[
"PreTrainedModel"
,
"AutoModelForCausalLMWithValueHead"
]]:
r
"""
Creates reference model for PPO/DPO training. Evaluation mode is not supported.
The valuehead parameter is randomly initialized since it is useless for PPO training.
"""
if
finetuning_args
.
ref_model
is
not
None
:
ref_model_args
=
ModelArguments
.
copyfrom
(
model_args
,
model_name_or_path
=
finetuning_args
.
ref_model
,
adapter_name_or_path
=
finetuning_args
.
ref_model_adapters
,
quantization_bit
=
finetuning_args
.
ref_model_quantization_bit
,
)
ref_finetuning_args
=
FinetuningArguments
()
tokenizer
=
load_tokenizer
(
ref_model_args
)[
"tokenizer"
]
ref_model
=
load_model
(
tokenizer
,
ref_model_args
,
ref_finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
add_valuehead
)
logger
.
info
(
"Created reference model from {}"
.
format
(
finetuning_args
.
ref_model
))
else
:
if
finetuning_args
.
finetuning_type
==
"lora"
:
ref_model
=
None
else
:
ref_model_args
=
ModelArguments
.
copyfrom
(
model_args
)
ref_finetuning_args
=
FinetuningArguments
()
tokenizer
=
load_tokenizer
(
ref_model_args
)[
"tokenizer"
]
ref_model
=
load_model
(
tokenizer
,
ref_model_args
,
ref_finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
add_valuehead
)
logger
.
info
(
"Created reference model from the model itself."
)
return
ref_model
def
create_reward_model
(
model
:
"AutoModelForCausalLMWithValueHead"
,
model_args
:
"ModelArguments"
,
finetuning_args
:
"FinetuningArguments"
)
->
Optional
[
"AutoModelForCausalLMWithValueHead"
]:
r
"""
Creates reward model for PPO training.
"""
if
finetuning_args
.
reward_model_type
==
"api"
:
assert
finetuning_args
.
reward_model
.
startswith
(
"http"
),
"Please provide full url."
logger
.
info
(
"Use reward server {}"
.
format
(
finetuning_args
.
reward_model
))
return
finetuning_args
.
reward_model
elif
finetuning_args
.
reward_model_type
==
"lora"
:
model
.
pretrained_model
.
load_adapter
(
finetuning_args
.
reward_model
,
"reward"
)
for
name
,
param
in
model
.
named_parameters
():
# https://github.com/huggingface/peft/issues/1090
if
"default"
in
name
:
param
.
data
=
param
.
data
.
to
(
torch
.
float32
)
# trainable params should in fp32
vhead_params
=
load_valuehead_params
(
finetuning_args
.
reward_model
,
model_args
)
assert
vhead_params
is
not
None
,
"Reward model is not correctly loaded."
model
.
register_buffer
(
"reward_head_weight"
,
vhead_params
[
"v_head.summary.weight"
],
persistent
=
False
)
model
.
register_buffer
(
"reward_head_bias"
,
vhead_params
[
"v_head.summary.bias"
],
persistent
=
False
)
model
.
register_buffer
(
"default_head_weight"
,
torch
.
zeros_like
(
vhead_params
[
"v_head.summary.weight"
]),
persistent
=
False
)
model
.
register_buffer
(
"default_head_bias"
,
torch
.
zeros_like
(
vhead_params
[
"v_head.summary.bias"
]),
persistent
=
False
)
logger
.
info
(
"Loaded adapter weights of reward model from {}"
.
format
(
finetuning_args
.
reward_model
))
return
None
else
:
reward_model_args
=
ModelArguments
.
copyfrom
(
model_args
,
model_name_or_path
=
finetuning_args
.
reward_model
,
adapter_name_or_path
=
finetuning_args
.
reward_model_adapters
,
quantization_bit
=
finetuning_args
.
reward_model_quantization_bit
,
)
reward_finetuning_args
=
FinetuningArguments
()
tokenizer
=
load_tokenizer
(
reward_model_args
)[
"tokenizer"
]
reward_model
=
load_model
(
tokenizer
,
reward_model_args
,
reward_finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
True
)
logger
.
info
(
"Loaded full weights of reward model from {}"
.
format
(
finetuning_args
.
reward_model
))
logger
.
warning
(
"Please ensure the ppo model and reward model share SAME tokenizer and vocabulary."
)
return
reward_model
def
_get_decay_parameter_names
(
model
:
"PreTrainedModel"
)
->
List
[
str
]:
r
"""
Returns a list of names of parameters with weight decay. (weights in non-layernorm layers)
"""
decay_parameters
=
get_parameter_names
(
model
,
ALL_LAYERNORM_LAYERS
)
decay_parameters
=
[
name
for
name
in
decay_parameters
if
"bias"
not
in
name
]
return
decay_parameters
def
_create_galore_optimizer
(
model
:
"PreTrainedModel"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
)
->
"torch.optim.Optimizer"
:
if
len
(
finetuning_args
.
galore_target
)
==
1
and
finetuning_args
.
galore_target
[
0
]
==
"all"
:
galore_targets
=
find_all_linear_modules
(
model
,
finetuning_args
.
freeze_vision_tower
)
else
:
galore_targets
=
finetuning_args
.
galore_target
galore_params
:
List
[
"torch.nn.Parameter"
]
=
[]
for
name
,
module
in
model
.
named_modules
():
if
isinstance
(
module
,
torch
.
nn
.
Linear
)
and
any
(
target
in
name
for
target
in
galore_targets
):
for
param
in
module
.
parameters
():
if
param
.
requires_grad
and
len
(
param
.
shape
)
>
1
:
galore_params
.
append
(
param
)
galore_kwargs
=
{
"rank"
:
finetuning_args
.
galore_rank
,
"update_proj_gap"
:
finetuning_args
.
galore_update_interval
,
"scale"
:
finetuning_args
.
galore_scale
,
"proj_type"
:
finetuning_args
.
galore_proj_type
,
}
id_galore_params
=
{
id
(
param
)
for
param
in
galore_params
}
decay_params
,
nodecay_params
=
[],
[]
# they are non-galore parameters
trainable_params
:
List
[
"torch.nn.Parameter"
]
=
[]
# galore_params + decay_params + nodecay_params
decay_param_names
=
_get_decay_parameter_names
(
model
)
for
name
,
param
in
model
.
named_parameters
():
if
param
.
requires_grad
:
trainable_params
.
append
(
param
)
if
id
(
param
)
not
in
id_galore_params
:
if
name
in
decay_param_names
:
decay_params
.
append
(
param
)
else
:
nodecay_params
.
append
(
param
)
_
,
optim_kwargs
=
Trainer
.
get_optimizer_cls_and_kwargs
(
training_args
)
if
training_args
.
optim
==
"adamw_torch"
:
optim_class
=
GaLoreAdamW
elif
training_args
.
optim
in
[
"adamw_bnb_8bit"
,
"adamw_8bit"
,
"paged_adamw_8bit"
]:
optim_class
=
GaLoreAdamW8bit
elif
training_args
.
optim
==
"adafactor"
:
optim_class
=
GaLoreAdafactor
else
:
raise
NotImplementedError
(
"Unknow optim: {}"
.
format
(
training_args
.
optim
))
if
finetuning_args
.
galore_layerwise
:
if
training_args
.
gradient_accumulation_steps
!=
1
:
raise
ValueError
(
"Per-layer GaLore does not support gradient accumulation."
)
optimizer_dict
:
Dict
[
"torch.Tensor"
,
"torch.optim.Optimizer"
]
=
{}
for
param
in
nodecay_params
:
param_groups
=
[
dict
(
params
=
[
param
],
weight_decay
=
0.0
)]
optimizer_dict
[
param
]
=
optim_class
(
param_groups
,
**
optim_kwargs
)
for
param
in
decay_params
:
param_groups
=
[
dict
(
params
=
[
param
],
weight_decay
=
training_args
.
weight_decay
)]
optimizer_dict
[
param
]
=
optim_class
(
param_groups
,
**
optim_kwargs
)
for
param
in
galore_params
:
# galore params have weight decay
param_groups
=
[
dict
(
params
=
[
param
],
weight_decay
=
training_args
.
weight_decay
,
**
galore_kwargs
)]
optimizer_dict
[
param
]
=
optim_class
(
param_groups
,
**
optim_kwargs
)
def
optimizer_hook
(
param
:
"torch.nn.Parameter"
):
if
param
.
grad
is
not
None
:
optimizer_dict
[
param
].
step
()
optimizer_dict
[
param
].
zero_grad
()
for
param
in
trainable_params
:
param
.
register_post_accumulate_grad_hook
(
optimizer_hook
)
optimizer
=
DummyOptimizer
(
lr
=
training_args
.
learning_rate
,
optimizer_dict
=
optimizer_dict
)
else
:
param_groups
=
[
dict
(
params
=
nodecay_params
,
weight_decay
=
0.0
),
dict
(
params
=
decay_params
,
weight_decay
=
training_args
.
weight_decay
),
dict
(
params
=
galore_params
,
weight_decay
=
training_args
.
weight_decay
,
**
galore_kwargs
),
]
optimizer
=
optim_class
(
param_groups
,
**
optim_kwargs
)
logger
.
info
(
"Using GaLore optimizer, may cause hanging at the start of training, wait patiently."
)
return
optimizer
def
_create_loraplus_optimizer
(
model
:
"PreTrainedModel"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
)
->
"torch.optim.Optimizer"
:
default_lr
=
training_args
.
learning_rate
loraplus_lr
=
training_args
.
learning_rate
*
finetuning_args
.
loraplus_lr_ratio
embedding_lr
=
finetuning_args
.
loraplus_lr_embedding
decay_param_names
=
_get_decay_parameter_names
(
model
)
param_dict
:
Dict
[
str
,
List
[
"torch.nn.Parameter"
]]
=
{
"lora_a"
:
[],
"lora_b"
:
[],
"lora_b_nodecay"
:
[],
"embedding"
:
[],
}
for
name
,
param
in
model
.
named_parameters
():
if
param
.
requires_grad
:
if
"lora_embedding_B"
in
name
:
param_dict
[
"embedding"
].
append
(
param
)
elif
"lora_B"
in
name
or
param
.
ndim
==
1
:
if
name
in
decay_param_names
:
param_dict
[
"lora_b"
].
append
(
param
)
else
:
param_dict
[
"lora_b_nodecay"
].
append
(
param
)
else
:
param_dict
[
"lora_a"
].
append
(
param
)
optim_class
,
optim_kwargs
=
Trainer
.
get_optimizer_cls_and_kwargs
(
training_args
)
param_groups
=
[
dict
(
params
=
param_dict
[
"lora_a"
],
lr
=
default_lr
,
weight_decay
=
training_args
.
weight_decay
),
dict
(
params
=
param_dict
[
"lora_b"
],
lr
=
loraplus_lr
,
weight_decay
=
training_args
.
weight_decay
),
dict
(
params
=
param_dict
[
"lora_b_nodecay"
],
lr
=
loraplus_lr
,
weight_decay
=
0.0
),
dict
(
params
=
param_dict
[
"embedding"
],
lr
=
embedding_lr
,
weight_decay
=
training_args
.
weight_decay
),
]
optimizer
=
optim_class
(
param_groups
,
**
optim_kwargs
)
logger
.
info
(
"Using LoRA+ optimizer with loraplus lr ratio {:.2f}."
.
format
(
finetuning_args
.
loraplus_lr_ratio
))
return
optimizer
def
_create_badam_optimizer
(
model
:
"PreTrainedModel"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
)
->
"torch.optim.Optimizer"
:
decay_params
,
nodecay_params
=
[],
[]
decay_param_names
=
_get_decay_parameter_names
(
model
)
for
name
,
param
in
model
.
named_parameters
():
if
param
.
requires_grad
:
if
name
in
decay_param_names
:
decay_params
.
append
(
param
)
else
:
nodecay_params
.
append
(
param
)
optim_class
,
optim_kwargs
=
Trainer
.
get_optimizer_cls_and_kwargs
(
training_args
)
param_groups
=
[
dict
(
params
=
nodecay_params
,
weight_decay
=
0.0
),
dict
(
params
=
decay_params
,
weight_decay
=
training_args
.
weight_decay
),
]
if
finetuning_args
.
badam_mode
==
"layer"
:
from
badam
import
BlockOptimizer
base_optimizer
=
optim_class
(
param_groups
,
**
optim_kwargs
)
optimizer
=
BlockOptimizer
(
base_optimizer
=
base_optimizer
,
named_parameters_list
=
list
(
model
.
named_parameters
()),
block_prefix_list
=
None
,
switch_block_every
=
finetuning_args
.
badam_switch_interval
,
start_block
=
finetuning_args
.
badam_start_block
,
switch_mode
=
finetuning_args
.
badam_switch_mode
,
verbose
=
finetuning_args
.
badam_verbose
,
ds_zero3_enabled
=
is_deepspeed_zero3_enabled
(),
)
logger
.
info
(
f
"Using BAdam optimizer with layer-wise update, switch mode is
{
finetuning_args
.
badam_switch_mode
}
, "
f
"switch block every
{
finetuning_args
.
badam_switch_interval
}
steps, "
f
"default start block is
{
finetuning_args
.
badam_start_block
}
"
)
elif
finetuning_args
.
badam_mode
==
"ratio"
:
from
badam
import
BlockOptimizerRatio
assert
finetuning_args
.
badam_update_ratio
>
1e-6
optimizer
=
BlockOptimizerRatio
(
param_groups
=
param_groups
,
named_parameters_list
=
list
(
model
.
named_parameters
()),
update_ratio
=
finetuning_args
.
badam_update_ratio
,
mask_mode
=
finetuning_args
.
badam_mask_mode
,
verbose
=
finetuning_args
.
badam_verbose
,
include_embedding
=
False
,
**
optim_kwargs
,
)
logger
.
info
(
f
"Using BAdam optimizer with ratio-based update, update ratio is
{
finetuning_args
.
badam_update_ratio
}
, "
f
"mask mode is
{
finetuning_args
.
badam_mask_mode
}
"
)
return
optimizer
def
_create_adam_mini_optimizer
(
model
:
"PreTrainedModel"
,
training_args
:
"Seq2SeqTrainingArguments"
,
)
->
"torch.optim.Optimizer"
:
from
adam_mini
import
Adam_mini
hidden_size
=
getattr
(
model
.
config
,
"hidden_size"
,
None
)
num_q_head
=
getattr
(
model
.
config
,
"num_attention_heads"
,
None
)
num_kv_head
=
getattr
(
model
.
config
,
"num_key_value_heads"
,
None
)
optimizer
=
Adam_mini
(
named_parameters
=
model
.
named_parameters
(),
lr
=
training_args
.
learning_rate
,
betas
=
(
training_args
.
adam_beta1
,
training_args
.
adam_beta2
),
eps
=
training_args
.
adam_epsilon
,
weight_decay
=
training_args
.
weight_decay
,
model_sharding
=
is_fsdp_enabled
()
or
is_deepspeed_zero3_enabled
(),
dim
=
hidden_size
,
n_heads
=
num_q_head
,
n_kv_heads
=
num_kv_head
,
)
logger
.
info
(
"Using Adam-mini optimizer."
)
return
optimizer
def
create_custom_optimizer
(
model
:
"PreTrainedModel"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
)
->
Optional
[
"torch.optim.Optimizer"
]:
if
finetuning_args
.
use_galore
:
return
_create_galore_optimizer
(
model
,
training_args
,
finetuning_args
)
if
finetuning_args
.
loraplus_lr_ratio
is
not
None
:
return
_create_loraplus_optimizer
(
model
,
training_args
,
finetuning_args
)
if
finetuning_args
.
use_badam
:
return
_create_badam_optimizer
(
model
,
training_args
,
finetuning_args
)
if
finetuning_args
.
use_adam_mini
:
return
_create_adam_mini_optimizer
(
model
,
training_args
)
def
create_custom_scheduler
(
training_args
:
"Seq2SeqTrainingArguments"
,
num_training_steps
:
int
,
optimizer
:
Optional
[
"torch.optim.Optimizer"
]
=
None
,
)
->
None
:
if
optimizer
is
not
None
and
isinstance
(
optimizer
,
DummyOptimizer
):
optimizer_dict
=
optimizer
.
optimizer_dict
scheduler_dict
:
Dict
[
"torch.nn.Parameter"
,
"torch.optim.lr_scheduler.LRScheduler"
]
=
{}
for
param
in
optimizer_dict
.
keys
():
scheduler_dict
[
param
]
=
get_scheduler
(
training_args
.
lr_scheduler_type
,
optimizer
=
optimizer_dict
[
param
],
num_warmup_steps
=
training_args
.
get_warmup_steps
(
num_training_steps
),
num_training_steps
=
num_training_steps
,
scheduler_specific_kwargs
=
training_args
.
lr_scheduler_kwargs
,
)
def
scheduler_hook
(
param
:
"torch.nn.Parameter"
):
scheduler_dict
[
param
].
step
()
for
param
in
optimizer_dict
.
keys
():
param
.
register_post_accumulate_grad_hook
(
scheduler_hook
)
def
get_batch_logps
(
logits
:
"torch.Tensor"
,
labels
:
"torch.Tensor"
,
label_pad_token_id
:
int
=
IGNORE_INDEX
)
->
Tuple
[
"torch.Tensor"
,
"torch.Tensor"
]:
r
"""
Computes the log probabilities of the given labels under the given logits.
Returns:
logps: A tensor of shape (batch_size,) containing the sum of log probabilities.
valid_length: A tensor of shape (batch_size,) containing the number of non-masked tokens.
"""
if
logits
.
shape
[:
-
1
]
!=
labels
.
shape
:
raise
ValueError
(
"Logits (batchsize x seqlen) and labels must have the same shape."
)
labels
=
labels
[:,
1
:].
clone
()
logits
=
logits
[:,
:
-
1
,
:]
loss_mask
=
labels
!=
label_pad_token_id
labels
[
labels
==
label_pad_token_id
]
=
0
# dummy token
per_token_logps
=
torch
.
gather
(
logits
.
log_softmax
(
-
1
),
dim
=
2
,
index
=
labels
.
unsqueeze
(
2
)).
squeeze
(
2
)
return
(
per_token_logps
*
loss_mask
).
sum
(
-
1
),
loss_mask
.
sum
(
-
1
)
src/llamafactory/train/tuner.py
0 → 100644
View file @
12d5cbac
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
shutil
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
import
torch
from
transformers
import
PreTrainedModel
from
..data
import
get_template_and_fix_tokenizer
from
..extras.constants
import
V_HEAD_SAFE_WEIGHTS_NAME
,
V_HEAD_WEIGHTS_NAME
from
..extras.logging
import
get_logger
from
..hparams
import
get_infer_args
,
get_train_args
from
..model
import
load_model
,
load_tokenizer
from
.callbacks
import
LogCallback
from
.dpo
import
run_dpo
from
.kto
import
run_kto
from
.ppo
import
run_ppo
from
.pt
import
run_pt
from
.rm
import
run_rm
from
.sft
import
run_sft
if
TYPE_CHECKING
:
from
transformers
import
TrainerCallback
logger
=
get_logger
(
__name__
)
def
run_exp
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
callbacks
:
List
[
"TrainerCallback"
]
=
[])
->
None
:
callbacks
.
append
(
LogCallback
())
model_args
,
data_args
,
training_args
,
finetuning_args
,
generating_args
=
get_train_args
(
args
)
if
finetuning_args
.
stage
==
"pt"
:
run_pt
(
model_args
,
data_args
,
training_args
,
finetuning_args
,
callbacks
)
elif
finetuning_args
.
stage
==
"sft"
:
run_sft
(
model_args
,
data_args
,
training_args
,
finetuning_args
,
generating_args
,
callbacks
)
elif
finetuning_args
.
stage
==
"rm"
:
run_rm
(
model_args
,
data_args
,
training_args
,
finetuning_args
,
callbacks
)
elif
finetuning_args
.
stage
==
"ppo"
:
run_ppo
(
model_args
,
data_args
,
training_args
,
finetuning_args
,
generating_args
,
callbacks
)
elif
finetuning_args
.
stage
==
"dpo"
:
run_dpo
(
model_args
,
data_args
,
training_args
,
finetuning_args
,
callbacks
)
elif
finetuning_args
.
stage
==
"kto"
:
run_kto
(
model_args
,
data_args
,
training_args
,
finetuning_args
,
callbacks
)
else
:
raise
ValueError
(
"Unknown task: {}."
.
format
(
finetuning_args
.
stage
))
def
export_model
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
None
:
model_args
,
data_args
,
finetuning_args
,
_
=
get_infer_args
(
args
)
if
model_args
.
export_dir
is
None
:
raise
ValueError
(
"Please specify `export_dir` to save model."
)
if
model_args
.
adapter_name_or_path
is
not
None
and
model_args
.
export_quantization_bit
is
not
None
:
raise
ValueError
(
"Please merge adapters before quantizing the model."
)
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
processor
=
tokenizer_module
[
"processor"
]
get_template_and_fix_tokenizer
(
tokenizer
,
data_args
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
)
# must after fixing tokenizer to resize vocab
if
getattr
(
model
,
"quantization_method"
,
None
)
is
not
None
and
model_args
.
adapter_name_or_path
is
not
None
:
raise
ValueError
(
"Cannot merge adapters to a quantized model."
)
if
not
isinstance
(
model
,
PreTrainedModel
):
raise
ValueError
(
"The model is not a `PreTrainedModel`, export aborted."
)
if
getattr
(
model
,
"quantization_method"
,
None
)
is
not
None
:
# quantized model adopts float16 type
setattr
(
model
.
config
,
"torch_dtype"
,
torch
.
float16
)
else
:
if
model_args
.
infer_dtype
==
"auto"
:
output_dtype
=
getattr
(
model
.
config
,
"torch_dtype"
,
torch
.
float16
)
else
:
output_dtype
=
getattr
(
torch
,
model_args
.
infer_dtype
)
setattr
(
model
.
config
,
"torch_dtype"
,
output_dtype
)
model
=
model
.
to
(
output_dtype
)
logger
.
info
(
"Convert model dtype to: {}."
.
format
(
output_dtype
))
model
.
save_pretrained
(
save_directory
=
model_args
.
export_dir
,
max_shard_size
=
"{}GB"
.
format
(
model_args
.
export_size
),
safe_serialization
=
(
not
model_args
.
export_legacy_format
),
)
if
model_args
.
export_hub_model_id
is
not
None
:
model
.
push_to_hub
(
model_args
.
export_hub_model_id
,
token
=
model_args
.
hf_hub_token
,
max_shard_size
=
"{}GB"
.
format
(
model_args
.
export_size
),
safe_serialization
=
(
not
model_args
.
export_legacy_format
),
)
if
finetuning_args
.
stage
==
"rm"
:
if
model_args
.
adapter_name_or_path
is
not
None
:
vhead_path
=
model_args
.
adapter_name_or_path
[
-
1
]
else
:
vhead_path
=
model_args
.
model_name_or_path
if
os
.
path
.
exists
(
os
.
path
.
join
(
vhead_path
,
V_HEAD_SAFE_WEIGHTS_NAME
)):
shutil
.
copy
(
os
.
path
.
join
(
vhead_path
,
V_HEAD_SAFE_WEIGHTS_NAME
),
os
.
path
.
join
(
model_args
.
export_dir
,
V_HEAD_SAFE_WEIGHTS_NAME
),
)
logger
.
info
(
"Copied valuehead to {}."
.
format
(
model_args
.
export_dir
))
elif
os
.
path
.
exists
(
os
.
path
.
join
(
vhead_path
,
V_HEAD_WEIGHTS_NAME
)):
shutil
.
copy
(
os
.
path
.
join
(
vhead_path
,
V_HEAD_WEIGHTS_NAME
),
os
.
path
.
join
(
model_args
.
export_dir
,
V_HEAD_WEIGHTS_NAME
),
)
logger
.
info
(
"Copied valuehead to {}."
.
format
(
model_args
.
export_dir
))
try
:
tokenizer
.
padding_side
=
"left"
# restore padding side
tokenizer
.
init_kwargs
[
"padding_side"
]
=
"left"
tokenizer
.
save_pretrained
(
model_args
.
export_dir
)
if
model_args
.
export_hub_model_id
is
not
None
:
tokenizer
.
push_to_hub
(
model_args
.
export_hub_model_id
,
token
=
model_args
.
hf_hub_token
)
if
processor
is
not
None
:
getattr
(
processor
,
"image_processor"
).
save_pretrained
(
model_args
.
export_dir
)
if
model_args
.
export_hub_model_id
is
not
None
:
getattr
(
processor
,
"image_processor"
).
push_to_hub
(
model_args
.
export_hub_model_id
,
token
=
model_args
.
hf_hub_token
)
except
Exception
as
e
:
logger
.
warning
(
"Cannot save tokenizer, please copy the files manually: {}."
.
format
(
e
))
src/llamafactory/webui/__init__.py
0 → 100644
View file @
12d5cbac
Prev
1
…
7
8
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment