Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
DISC-FinLLM_pytorch
Commits
afe180a6
Commit
afe180a6
authored
May 21, 2024
by
wanglch
Browse files
Initial commit
parents
Pipeline
#1006
canceled with stages
Changes
258
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1946 additions
and
0 deletions
+1946
-0
LLaMA-Factory/src/llmtuner/hparams/parser.py
LLaMA-Factory/src/llmtuner/hparams/parser.py
+271
-0
LLaMA-Factory/src/llmtuner/model/__init__.py
LLaMA-Factory/src/llmtuner/model/__init__.py
+5
-0
LLaMA-Factory/src/llmtuner/model/adapter.py
LLaMA-Factory/src/llmtuner/model/adapter.py
+149
-0
LLaMA-Factory/src/llmtuner/model/loader.py
LLaMA-Factory/src/llmtuner/model/loader.py
+134
-0
LLaMA-Factory/src/llmtuner/model/patcher.py
LLaMA-Factory/src/llmtuner/model/patcher.py
+326
-0
LLaMA-Factory/src/llmtuner/model/utils.py
LLaMA-Factory/src/llmtuner/model/utils.py
+113
-0
LLaMA-Factory/src/llmtuner/train/__init__.py
LLaMA-Factory/src/llmtuner/train/__init__.py
+4
-0
LLaMA-Factory/src/llmtuner/train/dpo/__init__.py
LLaMA-Factory/src/llmtuner/train/dpo/__init__.py
+4
-0
LLaMA-Factory/src/llmtuner/train/dpo/collator.py
LLaMA-Factory/src/llmtuner/train/dpo/collator.py
+54
-0
LLaMA-Factory/src/llmtuner/train/dpo/trainer.py
LLaMA-Factory/src/llmtuner/train/dpo/trainer.py
+148
-0
LLaMA-Factory/src/llmtuner/train/dpo/workflow.py
LLaMA-Factory/src/llmtuner/train/dpo/workflow.py
+84
-0
LLaMA-Factory/src/llmtuner/train/ppo/__init__.py
LLaMA-Factory/src/llmtuner/train/ppo/__init__.py
+4
-0
LLaMA-Factory/src/llmtuner/train/ppo/trainer.py
LLaMA-Factory/src/llmtuner/train/ppo/trainer.py
+375
-0
LLaMA-Factory/src/llmtuner/train/ppo/utils.py
LLaMA-Factory/src/llmtuner/train/ppo/utils.py
+59
-0
LLaMA-Factory/src/llmtuner/train/ppo/workflow.py
LLaMA-Factory/src/llmtuner/train/ppo/workflow.py
+107
-0
LLaMA-Factory/src/llmtuner/train/pt/__init__.py
LLaMA-Factory/src/llmtuner/train/pt/__init__.py
+4
-0
LLaMA-Factory/src/llmtuner/train/pt/workflow.py
LLaMA-Factory/src/llmtuner/train/pt/workflow.py
+64
-0
LLaMA-Factory/src/llmtuner/train/rm/__init__.py
LLaMA-Factory/src/llmtuner/train/rm/__init__.py
+4
-0
LLaMA-Factory/src/llmtuner/train/rm/collator.py
LLaMA-Factory/src/llmtuner/train/rm/collator.py
+29
-0
LLaMA-Factory/src/llmtuner/train/rm/metric.py
LLaMA-Factory/src/llmtuner/train/rm/metric.py
+8
-0
No files found.
LLaMA-Factory/src/llmtuner/hparams/parser.py
0 → 100644
View file @
afe180a6
import
logging
import
os
import
sys
from
typing
import
Any
,
Dict
,
Optional
,
Tuple
import
datasets
import
torch
import
transformers
from
transformers
import
HfArgumentParser
,
Seq2SeqTrainingArguments
from
transformers.trainer_utils
import
get_last_checkpoint
from
transformers.utils.versions
import
require_version
from
..extras.logging
import
get_logger
from
..extras.packages
import
is_unsloth_available
from
.data_args
import
DataArguments
from
.evaluation_args
import
EvaluationArguments
from
.finetuning_args
import
FinetuningArguments
from
.generating_args
import
GeneratingArguments
from
.model_args
import
ModelArguments
logger
=
get_logger
(
__name__
)
_TRAIN_ARGS
=
[
ModelArguments
,
DataArguments
,
Seq2SeqTrainingArguments
,
FinetuningArguments
,
GeneratingArguments
]
_TRAIN_CLS
=
Tuple
[
ModelArguments
,
DataArguments
,
Seq2SeqTrainingArguments
,
FinetuningArguments
,
GeneratingArguments
]
_INFER_ARGS
=
[
ModelArguments
,
DataArguments
,
FinetuningArguments
,
GeneratingArguments
]
_INFER_CLS
=
Tuple
[
ModelArguments
,
DataArguments
,
FinetuningArguments
,
GeneratingArguments
]
_EVAL_ARGS
=
[
ModelArguments
,
DataArguments
,
EvaluationArguments
,
FinetuningArguments
]
_EVAL_CLS
=
Tuple
[
ModelArguments
,
DataArguments
,
EvaluationArguments
,
FinetuningArguments
]
def
_check_dependencies
(
disabled
:
bool
)
->
None
:
if
disabled
:
logger
.
warning
(
"Version checking has been disabled, may lead to unexpected behaviors."
)
else
:
require_version
(
"transformers>=4.37.2"
,
"To fix: pip install transformers>=4.37.2"
)
require_version
(
"datasets>=2.14.3"
,
"To fix: pip install datasets>=2.14.3"
)
require_version
(
"accelerate>=0.21.0"
,
"To fix: pip install accelerate>=0.21.0"
)
require_version
(
"peft>=0.8.2"
,
"To fix: pip install peft>=0.8.2"
)
require_version
(
"trl>=0.7.6"
,
"To fix: pip install trl>=0.7.6"
)
def
_parse_args
(
parser
:
"HfArgumentParser"
,
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
Tuple
[
Any
]:
if
args
is
not
None
:
return
parser
.
parse_dict
(
args
)
if
len
(
sys
.
argv
)
==
2
and
sys
.
argv
[
1
].
endswith
(
".yaml"
):
return
parser
.
parse_yaml_file
(
os
.
path
.
abspath
(
sys
.
argv
[
1
]))
if
len
(
sys
.
argv
)
==
2
and
sys
.
argv
[
1
].
endswith
(
".json"
):
return
parser
.
parse_json_file
(
os
.
path
.
abspath
(
sys
.
argv
[
1
]))
(
*
parsed_args
,
unknown_args
)
=
parser
.
parse_args_into_dataclasses
(
return_remaining_strings
=
True
)
if
unknown_args
:
print
(
parser
.
format_help
())
print
(
"Got unknown args, potentially deprecated arguments: {}"
.
format
(
unknown_args
))
raise
ValueError
(
"Some specified arguments are not used by the HfArgumentParser: {}"
.
format
(
unknown_args
))
return
(
*
parsed_args
,)
def
_set_transformers_logging
(
log_level
:
Optional
[
int
]
=
logging
.
INFO
)
->
None
:
datasets
.
utils
.
logging
.
set_verbosity
(
log_level
)
transformers
.
utils
.
logging
.
set_verbosity
(
log_level
)
transformers
.
utils
.
logging
.
enable_default_handler
()
transformers
.
utils
.
logging
.
enable_explicit_format
()
def
_verify_model_args
(
model_args
:
"ModelArguments"
,
finetuning_args
:
"FinetuningArguments"
)
->
None
:
if
model_args
.
quantization_bit
is
not
None
:
if
finetuning_args
.
finetuning_type
!=
"lora"
:
raise
ValueError
(
"Quantization is only compatible with the LoRA method."
)
if
model_args
.
adapter_name_or_path
is
not
None
and
finetuning_args
.
create_new_adapter
:
raise
ValueError
(
"Cannot create new adapter upon a quantized model."
)
if
model_args
.
adapter_name_or_path
is
not
None
and
len
(
model_args
.
adapter_name_or_path
)
!=
1
:
raise
ValueError
(
"Quantized model only accepts a single adapter. Merge them first."
)
if
model_args
.
adapter_name_or_path
is
not
None
and
finetuning_args
.
finetuning_type
!=
"lora"
:
raise
ValueError
(
"Adapter is only valid for the LoRA method."
)
def
_parse_train_args
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
_TRAIN_CLS
:
parser
=
HfArgumentParser
(
_TRAIN_ARGS
)
return
_parse_args
(
parser
,
args
)
def
_parse_infer_args
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
_INFER_CLS
:
parser
=
HfArgumentParser
(
_INFER_ARGS
)
return
_parse_args
(
parser
,
args
)
def
_parse_eval_args
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
_EVAL_CLS
:
parser
=
HfArgumentParser
(
_EVAL_ARGS
)
return
_parse_args
(
parser
,
args
)
def
get_train_args
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
_TRAIN_CLS
:
model_args
,
data_args
,
training_args
,
finetuning_args
,
generating_args
=
_parse_train_args
(
args
)
# Setup logging
if
training_args
.
should_log
:
_set_transformers_logging
()
# Check arguments
if
finetuning_args
.
stage
!=
"pt"
and
data_args
.
template
is
None
:
raise
ValueError
(
"Please specify which `template` to use."
)
if
finetuning_args
.
stage
!=
"sft"
and
training_args
.
predict_with_generate
:
raise
ValueError
(
"`predict_with_generate` cannot be set as True except SFT."
)
if
finetuning_args
.
stage
==
"sft"
and
training_args
.
do_predict
and
not
training_args
.
predict_with_generate
:
raise
ValueError
(
"Please enable `predict_with_generate` to save model predictions."
)
if
finetuning_args
.
stage
in
[
"rm"
,
"ppo"
]
and
training_args
.
load_best_model_at_end
:
raise
ValueError
(
"RM and PPO stages do not support `load_best_model_at_end`."
)
if
finetuning_args
.
stage
==
"ppo"
and
not
training_args
.
do_train
:
raise
ValueError
(
"PPO training does not support evaluation, use the SFT stage to evaluate models."
)
if
finetuning_args
.
stage
==
"ppo"
and
model_args
.
shift_attn
:
raise
ValueError
(
"PPO training is incompatible with S^2-Attn."
)
if
finetuning_args
.
stage
==
"ppo"
and
finetuning_args
.
reward_model_type
==
"lora"
and
model_args
.
use_unsloth
:
raise
ValueError
(
"Unsloth does not support lora reward model."
)
if
training_args
.
max_steps
==
-
1
and
data_args
.
streaming
:
raise
ValueError
(
"Please specify `max_steps` in streaming mode."
)
if
training_args
.
do_train
and
training_args
.
predict_with_generate
:
raise
ValueError
(
"`predict_with_generate` cannot be set as True while training."
)
if
(
training_args
.
do_train
and
finetuning_args
.
finetuning_type
==
"freeze"
and
finetuning_args
.
name_module_trainable
is
None
):
raise
ValueError
(
"Please specify `name_module_trainable` in Freeze training."
)
if
training_args
.
do_train
and
finetuning_args
.
finetuning_type
==
"lora"
and
finetuning_args
.
lora_target
is
None
:
raise
ValueError
(
"Please specify `lora_target` in LoRA training."
)
if
training_args
.
do_train
and
model_args
.
use_unsloth
and
not
is_unsloth_available
:
raise
ValueError
(
"Install Unsloth: https://github.com/unslothai/unsloth"
)
_verify_model_args
(
model_args
,
finetuning_args
)
_check_dependencies
(
disabled
=
finetuning_args
.
disable_version_checking
)
if
(
training_args
.
do_train
and
finetuning_args
.
finetuning_type
==
"lora"
and
model_args
.
resize_vocab
and
finetuning_args
.
additional_target
is
None
):
logger
.
warning
(
"Add token embeddings to `additional_target` to make the added tokens trainable."
)
if
training_args
.
do_train
and
model_args
.
quantization_bit
is
not
None
and
(
not
model_args
.
upcast_layernorm
):
logger
.
warning
(
"We recommend enable `upcast_layernorm` in quantized training."
)
if
training_args
.
do_train
and
(
not
training_args
.
fp16
)
and
(
not
training_args
.
bf16
):
logger
.
warning
(
"We recommend enable mixed precision training."
)
if
(
not
training_args
.
do_train
)
and
model_args
.
quantization_bit
is
not
None
:
logger
.
warning
(
"Evaluating model in 4/8-bit mode may cause lower scores."
)
if
(
not
training_args
.
do_train
)
and
finetuning_args
.
stage
==
"dpo"
and
finetuning_args
.
ref_model
is
None
:
logger
.
warning
(
"Specify `ref_model` for computing rewards at evaluation."
)
# Post-process training arguments
if
(
training_args
.
local_rank
!=
-
1
and
training_args
.
ddp_find_unused_parameters
is
None
and
finetuning_args
.
finetuning_type
==
"lora"
):
logger
.
warning
(
"`ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training."
)
training_args_dict
=
training_args
.
to_dict
()
training_args_dict
.
update
(
dict
(
ddp_find_unused_parameters
=
False
))
training_args
=
Seq2SeqTrainingArguments
(
**
training_args_dict
)
if
finetuning_args
.
stage
in
[
"rm"
,
"ppo"
]
and
finetuning_args
.
finetuning_type
in
[
"full"
,
"freeze"
]:
can_resume_from_checkpoint
=
False
if
training_args
.
resume_from_checkpoint
is
not
None
:
logger
.
warning
(
"Cannot resume from checkpoint in current stage."
)
training_args
.
resume_from_checkpoint
=
None
else
:
can_resume_from_checkpoint
=
True
if
(
training_args
.
resume_from_checkpoint
is
None
and
training_args
.
do_train
and
os
.
path
.
isdir
(
training_args
.
output_dir
)
and
not
training_args
.
overwrite_output_dir
and
can_resume_from_checkpoint
):
last_checkpoint
=
get_last_checkpoint
(
training_args
.
output_dir
)
if
last_checkpoint
is
None
and
len
(
os
.
listdir
(
training_args
.
output_dir
))
>
0
:
raise
ValueError
(
"Output directory already exists and is not empty. Please set `overwrite_output_dir`."
)
if
last_checkpoint
is
not
None
:
training_args_dict
=
training_args
.
to_dict
()
training_args_dict
.
update
(
dict
(
resume_from_checkpoint
=
last_checkpoint
))
training_args
=
Seq2SeqTrainingArguments
(
**
training_args_dict
)
logger
.
info
(
"Resuming training from {}. Change `output_dir` or use `overwrite_output_dir` to avoid."
.
format
(
training_args
.
resume_from_checkpoint
)
)
if
(
finetuning_args
.
stage
in
[
"rm"
,
"ppo"
]
and
finetuning_args
.
finetuning_type
==
"lora"
and
training_args
.
resume_from_checkpoint
is
not
None
):
logger
.
warning
(
"Add {} to `adapter_name_or_path` to resume training from checkpoint."
.
format
(
training_args
.
resume_from_checkpoint
)
)
# Post-process model arguments
model_args
.
compute_dtype
=
(
torch
.
bfloat16
if
training_args
.
bf16
else
(
torch
.
float16
if
training_args
.
fp16
else
None
)
)
model_args
.
model_max_length
=
data_args
.
cutoff_len
# Log on each process the small summary:
logger
.
info
(
"Process rank: {}, device: {}, n_gpu: {}
\n
distributed training: {}, compute dtype: {}"
.
format
(
training_args
.
local_rank
,
training_args
.
device
,
training_args
.
n_gpu
,
bool
(
training_args
.
local_rank
!=
-
1
),
str
(
model_args
.
compute_dtype
),
)
)
logger
.
info
(
f
"Training/evaluation parameters
{
training_args
}
"
)
transformers
.
set_seed
(
training_args
.
seed
)
return
model_args
,
data_args
,
training_args
,
finetuning_args
,
generating_args
def
get_infer_args
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
_INFER_CLS
:
model_args
,
data_args
,
finetuning_args
,
generating_args
=
_parse_infer_args
(
args
)
_set_transformers_logging
()
_verify_model_args
(
model_args
,
finetuning_args
)
_check_dependencies
(
disabled
=
finetuning_args
.
disable_version_checking
)
if
data_args
.
template
is
None
:
raise
ValueError
(
"Please specify which `template` to use."
)
return
model_args
,
data_args
,
finetuning_args
,
generating_args
def
get_eval_args
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
_EVAL_CLS
:
model_args
,
data_args
,
eval_args
,
finetuning_args
=
_parse_eval_args
(
args
)
_set_transformers_logging
()
_verify_model_args
(
model_args
,
finetuning_args
)
_check_dependencies
(
disabled
=
finetuning_args
.
disable_version_checking
)
if
data_args
.
template
is
None
:
raise
ValueError
(
"Please specify which `template` to use."
)
transformers
.
set_seed
(
eval_args
.
seed
)
return
model_args
,
data_args
,
eval_args
,
finetuning_args
LLaMA-Factory/src/llmtuner/model/__init__.py
0 → 100644
View file @
afe180a6
from
.loader
import
load_model_and_tokenizer
from
.utils
import
dispatch_model
,
load_valuehead_params
__all__
=
[
"load_model_and_tokenizer"
,
"dispatch_model"
,
"load_valuehead_params"
]
LLaMA-Factory/src/llmtuner/model/adapter.py
0 → 100644
View file @
afe180a6
from
typing
import
TYPE_CHECKING
import
torch
from
peft
import
LoraConfig
,
LoraModel
,
PeftModel
,
TaskType
,
get_peft_model
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
..extras.logging
import
get_logger
from
.utils
import
find_all_linear_modules
if
TYPE_CHECKING
:
from
transformers.modeling_utils
import
PreTrainedModel
from
..hparams
import
FinetuningArguments
,
ModelArguments
logger
=
get_logger
(
__name__
)
def
init_adapter
(
model
:
"PreTrainedModel"
,
model_args
:
"ModelArguments"
,
finetuning_args
:
"FinetuningArguments"
,
is_trainable
:
bool
)
->
"PreTrainedModel"
:
r
"""
Initializes the adapters.
Support full-parameter, freeze and LoRA training.
Note that the trainable parameters must be cast to float32.
"""
if
(
not
is_trainable
)
and
model_args
.
adapter_name_or_path
is
None
:
logger
.
info
(
"Adapter is not found at evaluation, load the base model."
)
return
model
if
finetuning_args
.
finetuning_type
==
"full"
and
is_trainable
:
logger
.
info
(
"Fine-tuning method: Full"
)
model
=
model
.
float
()
if
finetuning_args
.
finetuning_type
==
"freeze"
and
is_trainable
:
logger
.
info
(
"Fine-tuning method: Freeze"
)
num_layers
=
(
getattr
(
model
.
config
,
"num_hidden_layers"
,
None
)
or
getattr
(
model
.
config
,
"num_layers"
,
None
)
or
getattr
(
model
.
config
,
"n_layer"
,
None
)
)
if
not
num_layers
:
raise
ValueError
(
"Current model does not support freeze tuning."
)
if
finetuning_args
.
use_llama_pro
:
if
num_layers
%
finetuning_args
.
num_layer_trainable
!=
0
:
raise
ValueError
(
"`num_layers` {} should be divisible by `num_layer_trainable` {}."
.
format
(
num_layers
,
finetuning_args
.
num_layer_trainable
)
)
stride
=
num_layers
//
finetuning_args
.
num_layer_trainable
trainable_layer_ids
=
range
(
stride
-
1
,
num_layers
+
stride
-
1
,
stride
)
elif
finetuning_args
.
num_layer_trainable
>
0
:
# fine-tuning the last n layers if num_layer_trainable > 0
trainable_layer_ids
=
range
(
num_layers
-
finetuning_args
.
num_layer_trainable
,
num_layers
)
else
:
# fine-tuning the first n layers if num_layer_trainable < 0
trainable_layer_ids
=
range
(
-
finetuning_args
.
num_layer_trainable
)
freeze_modules
=
{
"all"
}
for
name
,
_
in
model
.
named_modules
():
if
".0."
in
name
:
freeze_modules
.
add
(
name
.
split
(
".0."
)[
-
1
].
split
(
"."
)[
0
])
trainable_layers
=
[]
for
module_name
in
finetuning_args
.
name_module_trainable
:
if
module_name
not
in
freeze_modules
:
raise
ValueError
(
"Module {} is not found, please choose from {}"
.
format
(
module_name
,
", "
.
join
(
freeze_modules
))
)
for
idx
in
trainable_layer_ids
:
trainable_layers
.
append
(
".{:d}.{}"
.
format
(
idx
,
module_name
if
module_name
!=
"all"
else
""
))
for
name
,
param
in
model
.
named_parameters
():
if
any
(
trainable_layer
in
name
for
trainable_layer
in
trainable_layers
):
param
.
data
=
param
.
data
.
to
(
torch
.
float32
)
else
:
param
.
requires_grad_
(
False
)
if
finetuning_args
.
finetuning_type
==
"lora"
:
logger
.
info
(
"Fine-tuning method: LoRA"
)
adapter_to_resume
=
None
if
model_args
.
adapter_name_or_path
is
not
None
:
is_mergeable
=
True
if
getattr
(
model
,
"quantization_method"
,
None
):
# merge lora in quantized model is unstable
assert
len
(
model_args
.
adapter_name_or_path
)
==
1
,
"Quantized model only accepts a single adapter."
is_mergeable
=
False
if
is_deepspeed_zero3_enabled
():
assert
len
(
model_args
.
adapter_name_or_path
)
==
1
,
"Cannot use multiple adapters in DeepSpeed ZeRO-3."
is_mergeable
=
False
if
(
is_trainable
and
not
finetuning_args
.
create_new_adapter
)
or
(
not
is_mergeable
):
adapter_to_merge
=
model_args
.
adapter_name_or_path
[:
-
1
]
adapter_to_resume
=
model_args
.
adapter_name_or_path
[
-
1
]
else
:
adapter_to_merge
=
model_args
.
adapter_name_or_path
for
adapter
in
adapter_to_merge
:
model
:
"LoraModel"
=
PeftModel
.
from_pretrained
(
model
,
adapter
)
model
=
model
.
merge_and_unload
()
if
len
(
adapter_to_merge
)
>
0
:
logger
.
info
(
"Merged {} adapter(s)."
.
format
(
len
(
adapter_to_merge
)))
if
adapter_to_resume
is
not
None
:
# resume lora training
model
=
PeftModel
.
from_pretrained
(
model
,
adapter_to_resume
,
is_trainable
=
is_trainable
)
if
is_trainable
and
adapter_to_resume
is
None
:
# create new lora weights while training
if
len
(
finetuning_args
.
lora_target
)
==
1
and
finetuning_args
.
lora_target
[
0
]
==
"all"
:
target_modules
=
find_all_linear_modules
(
model
)
else
:
target_modules
=
finetuning_args
.
lora_target
peft_kwargs
=
{
"r"
:
finetuning_args
.
lora_rank
,
"target_modules"
:
target_modules
,
"lora_alpha"
:
finetuning_args
.
lora_alpha
,
"lora_dropout"
:
finetuning_args
.
lora_dropout
,
"use_rslora"
:
finetuning_args
.
use_rslora
,
}
if
model_args
.
use_unsloth
:
from
unsloth
import
FastLanguageModel
# type: ignore
unsloth_peft_kwargs
=
{
"model"
:
model
,
"max_seq_length"
:
model_args
.
model_max_length
}
model
=
FastLanguageModel
.
get_peft_model
(
**
peft_kwargs
,
**
unsloth_peft_kwargs
)
else
:
lora_config
=
LoraConfig
(
task_type
=
TaskType
.
CAUSAL_LM
,
inference_mode
=
False
,
modules_to_save
=
finetuning_args
.
additional_target
,
**
peft_kwargs
,
)
model
=
get_peft_model
(
model
,
lora_config
)
for
param
in
filter
(
lambda
p
:
p
.
requires_grad
,
model
.
parameters
()):
param
.
data
=
param
.
data
.
to
(
torch
.
bfloat16
if
finetuning_args
.
lora_bf16_mode
else
torch
.
float32
)
if
model_args
.
adapter_name_or_path
is
not
None
:
logger
.
info
(
"Loaded adapter(s): {}"
.
format
(
","
.
join
(
model_args
.
adapter_name_or_path
)))
return
model
LLaMA-Factory/src/llmtuner/model/loader.py
0 → 100644
View file @
afe180a6
from
typing
import
TYPE_CHECKING
,
Optional
,
Tuple
from
transformers
import
AutoConfig
,
AutoModelForCausalLM
,
AutoTokenizer
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
trl
import
AutoModelForCausalLMWithValueHead
from
..extras.logging
import
get_logger
from
..extras.misc
import
count_parameters
,
get_current_device
,
try_download_model_from_ms
from
.adapter
import
init_adapter
from
.patcher
import
patch_config
,
patch_model
,
patch_tokenizer
,
patch_valuehead_model
from
.utils
import
load_valuehead_params
,
register_autoclass
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
,
PreTrainedTokenizer
from
..hparams
import
FinetuningArguments
,
ModelArguments
logger
=
get_logger
(
__name__
)
def
load_model_and_tokenizer
(
model_args
:
"ModelArguments"
,
finetuning_args
:
"FinetuningArguments"
,
is_trainable
:
Optional
[
bool
]
=
False
,
add_valuehead
:
Optional
[
bool
]
=
False
,
)
->
Tuple
[
"PreTrainedModel"
,
"PreTrainedTokenizer"
]:
r
"""
Loads pretrained model and tokenizer.
Support both training and inference.
"""
try_download_model_from_ms
(
model_args
)
config_kwargs
=
{
"trust_remote_code"
:
True
,
"cache_dir"
:
model_args
.
cache_dir
,
"revision"
:
model_args
.
model_revision
,
"token"
:
model_args
.
hf_hub_token
,
}
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_args
.
model_name_or_path
,
use_fast
=
model_args
.
use_fast_tokenizer
,
split_special_tokens
=
model_args
.
split_special_tokens
,
padding_side
=
"right"
,
**
config_kwargs
,
)
patch_tokenizer
(
tokenizer
)
config
=
AutoConfig
.
from_pretrained
(
model_args
.
model_name_or_path
,
**
config_kwargs
)
patch_config
(
config
,
tokenizer
,
model_args
,
config_kwargs
,
is_trainable
)
model
=
None
if
is_trainable
and
model_args
.
use_unsloth
:
from
unsloth
import
FastLanguageModel
# type: ignore
unsloth_kwargs
=
{
"model_name"
:
model_args
.
model_name_or_path
,
"max_seq_length"
:
model_args
.
model_max_length
,
"dtype"
:
model_args
.
compute_dtype
,
"load_in_4bit"
:
model_args
.
quantization_bit
==
4
,
"token"
:
model_args
.
hf_hub_token
,
"device_map"
:
{
""
:
get_current_device
()},
"rope_scaling"
:
getattr
(
config
,
"rope_scaling"
,
None
),
}
try
:
model
,
_
=
FastLanguageModel
.
from_pretrained
(
**
unsloth_kwargs
)
except
NotImplementedError
:
logger
.
warning
(
"Unsloth does not support model type {}."
.
format
(
getattr
(
config
,
"model_type"
,
None
)))
model_args
.
use_unsloth
=
False
if
model_args
.
adapter_name_or_path
:
model_args
.
adapter_name_or_path
=
None
logger
.
warning
(
"Unsloth does not support loading adapters."
)
if
model
is
None
:
if
getattr
(
config
,
"model_type"
,
None
)
==
"chatglm"
:
config_kwargs
[
"empty_init"
]
=
False
model
=
AutoModelForCausalLM
.
from_pretrained
(
model_args
.
model_name_or_path
,
config
=
config
,
torch_dtype
=
model_args
.
compute_dtype
,
low_cpu_mem_usage
=
(
not
is_deepspeed_zero3_enabled
()),
**
config_kwargs
,
)
patch_model
(
model
,
tokenizer
,
model_args
,
is_trainable
)
register_autoclass
(
config
,
model
,
tokenizer
)
model
=
init_adapter
(
model
,
model_args
,
finetuning_args
,
is_trainable
)
if
add_valuehead
:
model
:
"AutoModelForCausalLMWithValueHead"
=
AutoModelForCausalLMWithValueHead
.
from_pretrained
(
model
)
patch_valuehead_model
(
model
)
if
model_args
.
adapter_name_or_path
is
not
None
:
vhead_path
=
model_args
.
adapter_name_or_path
[
-
1
]
else
:
vhead_path
=
model_args
.
model_name_or_path
vhead_params
=
load_valuehead_params
(
vhead_path
,
model_args
)
if
vhead_params
is
not
None
:
model
.
load_state_dict
(
vhead_params
,
strict
=
False
)
logger
.
info
(
"Loaded valuehead from checkpoint: {}"
.
format
(
vhead_path
))
if
not
is_trainable
:
model
.
requires_grad_
(
False
)
model
=
model
.
to
(
model_args
.
compute_dtype
)
if
not
getattr
(
model
,
"quantization_method"
,
None
)
else
model
model
.
eval
()
else
:
model
.
train
()
trainable_params
,
all_param
=
count_parameters
(
model
)
logger
.
info
(
"trainable params: {:d} || all params: {:d} || trainable%: {:.4f}"
.
format
(
trainable_params
,
all_param
,
100
*
trainable_params
/
all_param
)
)
if
not
is_trainable
:
logger
.
info
(
"This IS expected that the trainable params is 0 if you are using model for inference only."
)
if
model_args
.
print_param_status
:
for
name
,
param
in
model
.
named_parameters
():
print
(
"name: {}, dtype: {}, device: {}, trainable: {}"
.
format
(
name
,
param
.
dtype
,
param
.
device
,
param
.
requires_grad
)
)
return
model
,
tokenizer
LLaMA-Factory/src/llmtuner/model/patcher.py
0 → 100644
View file @
afe180a6
import
math
import
os
import
random
from
contextlib
import
nullcontext
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
,
Tuple
import
torch
from
datasets
import
load_dataset
from
peft
import
PeftModel
from
transformers
import
BitsAndBytesConfig
,
GPTQConfig
,
PreTrainedModel
,
PreTrainedTokenizerBase
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
transformers.utils.versions
import
require_version
from
..extras.constants
import
FILEEXT2TYPE
,
LAYERNORM_NAMES
from
..extras.logging
import
get_logger
from
..extras.misc
import
get_current_device
,
infer_optim_dtype
from
..extras.packages
import
is_flash_attn2_available
from
..extras.patches.llama_patch
import
apply_llama_patch
from
..extras.patches.mixtral_patch
import
patch_mixtral_replace_moe_impl
if
TYPE_CHECKING
:
from
transformers
import
PretrainedConfig
,
PreTrainedTokenizer
from
trl
import
AutoModelForCausalLMWithValueHead
from
..hparams
import
ModelArguments
logger
=
get_logger
(
__name__
)
SUPPORTED_CLASS_FOR_S2ATTN
=
[
"llama"
]
def
_noisy_mean_initialization
(
embed_weight
:
torch
.
Tensor
,
num_new_tokens
:
int
):
embedding_dim
=
embed_weight
.
size
(
1
)
avg_weight
=
embed_weight
[:
-
num_new_tokens
].
mean
(
dim
=
0
,
keepdim
=
True
)
noise_weight
=
torch
.
empty_like
(
embed_weight
[
-
num_new_tokens
:])
noise_weight
.
normal_
(
mean
=
0
,
std
=
(
1.0
/
math
.
sqrt
(
embedding_dim
)))
embed_weight
[
-
num_new_tokens
:]
=
avg_weight
+
noise_weight
def
_resize_embedding_layer
(
model
:
"PreTrainedModel"
,
tokenizer
:
"PreTrainedTokenizer"
)
->
None
:
r
"""
Resize token embeddings.
"""
if
is_deepspeed_zero3_enabled
():
import
deepspeed
# type: ignore
params
=
[
model
.
get_input_embeddings
().
weight
]
if
model
.
get_output_embeddings
()
is
not
None
and
not
model
.
config
.
tie_word_embeddings
:
params
.
append
(
model
.
get_output_embeddings
().
weight
)
context_maybe_zero3
=
deepspeed
.
zero
.
GatheredParameters
(
params
,
modifier_rank
=
0
)
else
:
context_maybe_zero3
=
nullcontext
()
with
context_maybe_zero3
:
current_embedding_size
=
model
.
get_input_embeddings
().
weight
.
size
(
0
)
if
len
(
tokenizer
)
>
current_embedding_size
:
if
not
isinstance
(
model
.
get_output_embeddings
(),
torch
.
nn
.
Linear
):
logger
.
warning
(
"Current model does not support resizing token embeddings."
)
return
model
.
resize_token_embeddings
(
len
(
tokenizer
),
pad_to_multiple_of
=
64
)
with
context_maybe_zero3
:
new_embedding_size
=
model
.
get_input_embeddings
().
weight
.
size
(
0
)
num_new_tokens
=
new_embedding_size
-
current_embedding_size
_noisy_mean_initialization
(
model
.
get_input_embeddings
().
weight
.
data
,
num_new_tokens
)
_noisy_mean_initialization
(
model
.
get_output_embeddings
().
weight
.
data
,
num_new_tokens
)
logger
.
info
(
"Resized token embeddings from {} to {}."
.
format
(
current_embedding_size
,
new_embedding_size
))
def
_get_quantization_dataset
(
tokenizer
:
"PreTrainedTokenizer"
,
model_args
:
"ModelArguments"
)
->
List
[
str
]:
r
"""
Inspired by: https://github.com/huggingface/optimum/blob/v1.16.0/optimum/gptq/data.py#L133
TODO: remove tokenizer.decode() https://github.com/huggingface/optimum/pull/1600
"""
if
os
.
path
.
isfile
(
model_args
.
export_quantization_dataset
):
data_path
=
FILEEXT2TYPE
.
get
(
model_args
.
export_quantization_dataset
.
split
(
"."
)[
-
1
],
None
)
data_files
=
model_args
.
export_quantization_dataset
else
:
data_path
=
model_args
.
export_quantization_dataset
data_files
=
None
dataset
=
load_dataset
(
path
=
data_path
,
data_files
=
data_files
,
split
=
"train"
,
cache_dir
=
model_args
.
cache_dir
)
maxlen
=
model_args
.
export_quantization_maxlen
samples
=
[]
for
_
in
range
(
model_args
.
export_quantization_nsamples
):
while
True
:
sample_idx
=
random
.
randint
(
0
,
len
(
dataset
)
-
1
)
sample
:
Dict
[
str
,
torch
.
Tensor
]
=
tokenizer
(
dataset
[
sample_idx
][
"text"
],
return_tensors
=
"pt"
)
if
sample
[
"input_ids"
].
size
(
1
)
>=
maxlen
:
break
# TODO: fix large maxlen
word_idx
=
random
.
randint
(
0
,
sample
[
"input_ids"
].
size
(
1
)
-
maxlen
-
1
)
input_ids
=
sample
[
"input_ids"
][:,
word_idx
:
word_idx
+
maxlen
]
samples
.
append
(
tokenizer
.
decode
(
input_ids
[
0
].
tolist
(),
skip_special_tokens
=
True
))
return
samples
def
_configure_attn_implementation
(
model_args
:
"ModelArguments"
,
config_kwargs
:
Dict
[
str
,
Any
])
->
None
:
if
model_args
.
flash_attn
:
if
is_flash_attn2_available
():
config_kwargs
[
"attn_implementation"
]
=
"flash_attention_2"
logger
.
info
(
"Using FlashAttention-2 for faster training and inference."
)
else
:
logger
.
warning
(
"FlashAttention2 is not installed."
)
config_kwargs
[
"attn_implementation"
]
=
None
else
:
config_kwargs
[
"attn_implementation"
]
=
"eager"
def
_configure_rope
(
config
:
"PretrainedConfig"
,
model_args
:
"ModelArguments"
,
is_trainable
:
bool
)
->
None
:
if
not
hasattr
(
config
,
"rope_scaling"
):
logger
.
warning
(
"Current model does not support RoPE scaling."
)
return
if
is_trainable
:
if
model_args
.
rope_scaling
==
"dynamic"
:
logger
.
warning
(
"Dynamic NTK scaling may not work well with fine-tuning. "
"See: https://github.com/huggingface/transformers/pull/24653"
)
current_max_length
=
getattr
(
config
,
"max_position_embeddings"
,
None
)
if
current_max_length
and
model_args
.
model_max_length
>
current_max_length
:
scaling_factor
=
float
(
math
.
ceil
(
model_args
.
model_max_length
/
current_max_length
))
else
:
logger
.
warning
(
"Input length is smaller than max length. Consider increase input length."
)
scaling_factor
=
1.0
else
:
scaling_factor
=
2.0
setattr
(
config
,
"rope_scaling"
,
{
"type"
:
model_args
.
rope_scaling
,
"factor"
:
scaling_factor
})
logger
.
info
(
"Using {} scaling strategy and setting scaling factor to {}"
.
format
(
model_args
.
rope_scaling
,
scaling_factor
)
)
def
_configure_longlora
(
config
:
"PretrainedConfig"
)
->
None
:
if
getattr
(
config
,
"model_type"
,
None
)
in
SUPPORTED_CLASS_FOR_S2ATTN
:
setattr
(
config
,
"group_size_ratio"
,
0.25
)
apply_llama_patch
()
logger
.
info
(
"Using shift short attention with group_size_ratio=1/4."
)
else
:
logger
.
warning
(
"Current model does not support shift short attention."
)
def
_configure_quantization
(
config
:
"PretrainedConfig"
,
tokenizer
:
"PreTrainedTokenizer"
,
model_args
:
"ModelArguments"
,
config_kwargs
:
Dict
[
str
,
Any
],
)
->
None
:
r
"""
Priority: GPTQ-quantized (training) > AutoGPTQ (export) > Bitsandbytes (training)
"""
if
getattr
(
config
,
"quantization_config"
,
None
):
# gptq
if
is_deepspeed_zero3_enabled
():
raise
ValueError
(
"DeepSpeed ZeRO-3 is incompatible with quantization."
)
config_kwargs
[
"device_map"
]
=
{
""
:
get_current_device
()}
quantization_config
:
Dict
[
str
,
Any
]
=
getattr
(
config
,
"quantization_config"
,
None
)
if
quantization_config
.
get
(
"quant_method"
,
None
)
==
"gptq"
and
quantization_config
.
get
(
"bits"
,
-
1
)
==
4
:
quantization_config
[
"use_exllama"
]
=
False
# disable exllama
logger
.
info
(
"Loading {}-bit GPTQ-quantized model."
.
format
(
quantization_config
.
get
(
"bits"
,
-
1
)))
elif
model_args
.
export_quantization_bit
is
not
None
:
# auto-gptq
require_version
(
"optimum>=1.16.0"
,
"To fix: pip install optimum>=1.16.0"
)
require_version
(
"auto_gptq>=0.5.0"
,
"To fix: pip install auto_gptq>=0.5.0"
)
from
accelerate.utils
import
get_max_memory
if
getattr
(
config
,
"model_type"
,
None
)
==
"chatglm"
:
raise
ValueError
(
"ChatGLM model is not supported."
)
config_kwargs
[
"quantization_config"
]
=
GPTQConfig
(
bits
=
model_args
.
export_quantization_bit
,
tokenizer
=
tokenizer
,
dataset
=
_get_quantization_dataset
(
tokenizer
,
model_args
),
)
config_kwargs
[
"device_map"
]
=
"auto"
config_kwargs
[
"max_memory"
]
=
get_max_memory
()
logger
.
info
(
"Quantizing model to {} bit."
.
format
(
model_args
.
export_quantization_bit
))
elif
model_args
.
quantization_bit
is
not
None
:
# bnb
if
is_deepspeed_zero3_enabled
():
raise
ValueError
(
"DeepSpeed ZeRO-3 is incompatible with quantization."
)
if
model_args
.
quantization_bit
==
8
:
require_version
(
"bitsandbytes>=0.37.0"
,
"To fix: pip install bitsandbytes>=0.37.0"
)
config_kwargs
[
"quantization_config"
]
=
BitsAndBytesConfig
(
load_in_8bit
=
True
)
elif
model_args
.
quantization_bit
==
4
:
require_version
(
"bitsandbytes>=0.39.0"
,
"To fix: pip install bitsandbytes>=0.39.0"
)
config_kwargs
[
"quantization_config"
]
=
BitsAndBytesConfig
(
load_in_4bit
=
True
,
bnb_4bit_compute_dtype
=
model_args
.
compute_dtype
,
bnb_4bit_use_double_quant
=
model_args
.
double_quantization
,
bnb_4bit_quant_type
=
model_args
.
quantization_type
,
)
config_kwargs
[
"device_map"
]
=
{
""
:
get_current_device
()}
logger
.
info
(
"Quantizing model to {} bit."
.
format
(
model_args
.
quantization_bit
))
def
_prepare_model_for_training
(
model
:
"PreTrainedModel"
,
model_args
:
"ModelArguments"
,
output_layer_name
:
Optional
[
str
]
=
"lm_head"
)
->
None
:
r
"""
Includes:
(1) cast the layernorm in fp32
(2) make output embedding layer require grads
(3) add the upcasting of the lm_head in fp32
Inspired by: https://github.com/huggingface/peft/blob/v0.7.1/src/peft/utils/other.py#L72
"""
if
model_args
.
upcast_layernorm
:
for
name
,
param
in
model
.
named_parameters
():
if
param
.
ndim
==
1
and
any
(
ln_name
in
name
for
ln_name
in
LAYERNORM_NAMES
):
param
.
data
=
param
.
data
.
to
(
torch
.
float32
)
logger
.
info
(
"Upcasting layernorm weights in float32."
)
if
not
model_args
.
disable_gradient_checkpointing
:
if
not
getattr
(
model
,
"supports_gradient_checkpointing"
,
False
):
logger
.
warning
(
"Current model does not support gradient checkpointing."
)
else
:
# use_reentrant=False might increase VRAM usage (have not been empirically verified yet)
# According to: https://github.com/huggingface/transformers/issues/28339
model
.
gradient_checkpointing_enable
(
gradient_checkpointing_kwargs
=
{
"use_reentrant"
:
True
})
model
.
enable_input_require_grads
()
model
.
config
.
use_cache
=
False
# turn off when gradient checkpointing is enabled
logger
.
info
(
"Gradient checkpointing enabled."
)
if
hasattr
(
model
,
output_layer_name
)
and
model_args
.
upcast_lmhead_output
:
def
fp32_forward_post_hook
(
module
:
torch
.
nn
.
Module
,
args
:
Tuple
[
torch
.
Tensor
],
output
:
torch
.
Tensor
):
return
output
.
to
(
torch
.
float32
)
output_layer
=
getattr
(
model
,
output_layer_name
)
if
isinstance
(
output_layer
,
torch
.
nn
.
Linear
)
and
output_layer
.
weight
.
dtype
!=
torch
.
float32
:
output_layer
.
register_forward_hook
(
fp32_forward_post_hook
)
def
patch_tokenizer
(
tokenizer
:
"PreTrainedTokenizer"
)
->
None
:
if
"PreTrainedTokenizerBase"
not
in
str
(
tokenizer
.
_pad
.
__func__
):
tokenizer
.
_pad
=
MethodType
(
PreTrainedTokenizerBase
.
_pad
,
tokenizer
)
def
patch_config
(
config
:
"PretrainedConfig"
,
tokenizer
:
"PreTrainedTokenizer"
,
model_args
:
"ModelArguments"
,
config_kwargs
:
Dict
[
str
,
Any
],
is_trainable
:
bool
,
)
->
None
:
if
model_args
.
compute_dtype
is
None
:
# priority: bf16 > fp16 > fp32
model_args
.
compute_dtype
=
infer_optim_dtype
(
model_dtype
=
getattr
(
config
,
"torch_dtype"
,
None
))
if
getattr
(
config
,
"model_type"
,
None
)
==
"qwen"
:
for
dtype_name
,
dtype
in
[(
"fp16"
,
torch
.
float16
),
(
"bf16"
,
torch
.
bfloat16
),
(
"fp32"
,
torch
.
float32
)]:
setattr
(
config
,
dtype_name
,
model_args
.
compute_dtype
==
dtype
)
_configure_attn_implementation
(
model_args
,
config_kwargs
)
if
model_args
.
rope_scaling
is
not
None
:
_configure_rope
(
config
,
model_args
,
is_trainable
)
if
is_trainable
and
model_args
.
shift_attn
:
_configure_longlora
(
config
)
_configure_quantization
(
config
,
tokenizer
,
model_args
,
config_kwargs
)
def
patch_model
(
model
:
"PreTrainedModel"
,
tokenizer
:
"PreTrainedTokenizer"
,
model_args
:
"ModelArguments"
,
is_trainable
:
bool
)
->
None
:
if
"GenerationMixin"
not
in
str
(
model
.
generate
.
__func__
):
model
.
generate
=
MethodType
(
PreTrainedModel
.
generate
,
model
)
if
getattr
(
model
.
config
,
"model_type"
,
None
)
==
"chatglm"
:
setattr
(
model
,
"lm_head"
,
model
.
transformer
.
output_layer
)
setattr
(
model
,
"_keys_to_ignore_on_save"
,
[
"lm_head.weight"
])
if
model_args
.
resize_vocab
:
_resize_embedding_layer
(
model
,
tokenizer
)
if
is_trainable
:
_prepare_model_for_training
(
model
,
model_args
)
if
getattr
(
model
.
config
,
"model_type"
,
None
)
==
"mixtral"
and
is_deepspeed_zero3_enabled
():
require_version
(
"deepspeed>=0.13.0"
,
"To fix: pip install deepspeed>=0.13.0"
)
from
deepspeed.utils
import
set_z3_leaf_modules
# type: ignore
from
transformers.models.mixtral.modeling_mixtral
import
MixtralSparseMoeBlock
set_z3_leaf_modules
(
model
,
[
MixtralSparseMoeBlock
])
if
is_trainable
:
patch_mixtral_replace_moe_impl
()
try
:
model
.
add_model_tags
([
"llama-factory"
])
except
Exception
:
logger
.
warning
(
"Cannot properly tag the model."
)
def
patch_valuehead_model
(
model
:
"AutoModelForCausalLMWithValueHead"
)
->
None
:
def
tie_weights
(
self
:
"AutoModelForCausalLMWithValueHead"
)
->
None
:
if
isinstance
(
self
.
pretrained_model
,
PreTrainedModel
):
self
.
pretrained_model
.
tie_weights
()
def
get_input_embeddings
(
self
:
"AutoModelForCausalLMWithValueHead"
)
->
torch
.
nn
.
Module
:
if
isinstance
(
self
.
pretrained_model
,
PreTrainedModel
):
return
self
.
pretrained_model
.
get_input_embeddings
()
def
create_or_update_model_card
(
self
:
"AutoModelForCausalLMWithValueHead"
,
output_dir
:
str
)
->
None
:
if
isinstance
(
self
.
pretrained_model
,
PeftModel
):
self
.
pretrained_model
.
create_or_update_model_card
(
output_dir
)
ignore_modules
=
[
name
for
name
,
_
in
model
.
named_parameters
()
if
"pretrained_model"
in
name
]
setattr
(
model
,
"_keys_to_ignore_on_save"
,
ignore_modules
)
setattr
(
model
,
"tie_weights"
,
MethodType
(
tie_weights
,
model
))
setattr
(
model
,
"get_input_embeddings"
,
MethodType
(
get_input_embeddings
,
model
))
setattr
(
model
,
"create_or_update_model_card"
,
MethodType
(
create_or_update_model_card
,
model
))
LLaMA-Factory/src/llmtuner/model/utils.py
0 → 100644
View file @
afe180a6
import
inspect
from
typing
import
TYPE_CHECKING
,
Dict
,
List
import
torch
from
transformers
import
PreTrainedModel
from
transformers.utils
import
cached_file
from
..extras.constants
import
V_HEAD_SAFE_WEIGHTS_NAME
,
V_HEAD_WEIGHTS_NAME
from
..extras.logging
import
get_logger
from
..extras.misc
import
get_current_device
if
TYPE_CHECKING
:
from
transformers
import
PretrainedConfig
,
PreTrainedTokenizer
from
..hparams
import
ModelArguments
logger
=
get_logger
(
__name__
)
def
dispatch_model
(
model
:
"PreTrainedModel"
)
->
"PreTrainedModel"
:
r
"""
Dispatches a pre-trained model to GPUs with balanced memory when the GPU is available.
Borrowed from: https://github.com/huggingface/transformers/blob/v4.36.2/src/transformers/modeling_utils.py#L3570
"""
if
getattr
(
model
,
"quantization_method"
,
None
):
# already set on current device
return
model
if
(
torch
.
cuda
.
device_count
()
>
1
and
isinstance
(
model
,
PreTrainedModel
)
and
model
.
_no_split_modules
is
not
None
and
model
.
config
.
model_type
!=
"chatglm"
):
from
accelerate
import
dispatch_model
from
accelerate.utils
import
get_balanced_memory
,
infer_auto_device_map
kwargs
=
{
"dtype"
:
model
.
dtype
,
"no_split_module_classes"
:
model
.
_get_no_split_modules
(
"auto"
)}
max_memory
=
get_balanced_memory
(
model
,
**
kwargs
)
# Make sure tied weights are tied before creating the device map.
model
.
tie_weights
()
device_map
=
infer_auto_device_map
(
model
,
max_memory
=
max_memory
,
**
kwargs
)
device_map_kwargs
=
{
"device_map"
:
device_map
,
"offload_dir"
:
"offload"
}
if
"skip_keys"
in
inspect
.
signature
(
dispatch_model
).
parameters
:
device_map_kwargs
[
"skip_keys"
]
=
model
.
_skip_keys_device_placement
return
dispatch_model
(
model
,
**
device_map_kwargs
)
else
:
return
model
.
to
(
device
=
get_current_device
())
def
find_all_linear_modules
(
model
:
"PreTrainedModel"
)
->
List
[
str
]:
r
"""
Finds all available modules to apply lora.
"""
quantization_method
=
getattr
(
model
,
"quantization_method"
,
None
)
if
quantization_method
is
None
:
linear_cls
=
torch
.
nn
.
Linear
elif
quantization_method
==
"bitsandbytes"
:
import
bitsandbytes
as
bnb
linear_cls
=
bnb
.
nn
.
Linear4bit
if
getattr
(
model
,
"is_loaded_in_4bit"
,
False
)
else
bnb
.
nn
.
Linear8bitLt
else
:
raise
ValueError
(
"Finding linear modules for {} models is not supported."
.
format
(
quantization_method
))
output_layer_names
=
[
"lm_head"
]
if
model
.
config
.
model_type
==
"chatglm"
:
output_layer_names
.
append
(
"output_layer"
)
module_names
=
set
()
for
name
,
module
in
model
.
named_modules
():
if
isinstance
(
module
,
linear_cls
)
and
not
any
(
output_layer
in
name
for
output_layer
in
output_layer_names
):
module_names
.
add
(
name
.
split
(
"."
)[
-
1
])
logger
.
info
(
"Found linear modules: {}"
.
format
(
","
.
join
(
module_names
)))
return
list
(
module_names
)
def
load_valuehead_params
(
path_or_repo_id
:
str
,
model_args
:
"ModelArguments"
)
->
Dict
[
str
,
torch
.
Tensor
]:
r
"""
Loads value head parameters from Hugging Face Hub or local disk.
Returns: dict with keys `v_head.summary.weight` and `v_head.summary.bias`.
"""
kwargs
=
{
"path_or_repo_id"
:
path_or_repo_id
,
"cache_dir"
:
model_args
.
cache_dir
,
"token"
:
model_args
.
hf_hub_token
}
try
:
from
safetensors
import
safe_open
vhead_file
=
cached_file
(
filename
=
V_HEAD_SAFE_WEIGHTS_NAME
,
**
kwargs
)
with
safe_open
(
vhead_file
,
framework
=
"pt"
,
device
=
"cpu"
)
as
f
:
return
{
key
:
f
.
get_tensor
(
key
)
for
key
in
f
.
keys
()}
except
Exception
as
err
:
logger
.
info
(
"Failed to load {}: {}"
.
format
(
V_HEAD_SAFE_WEIGHTS_NAME
,
str
(
err
)))
try
:
vhead_file
=
cached_file
(
filename
=
V_HEAD_WEIGHTS_NAME
,
**
kwargs
)
return
torch
.
load
(
vhead_file
,
map_location
=
"cpu"
)
except
Exception
as
err
:
logger
.
info
(
"Failed to load {}: {}"
.
format
(
V_HEAD_WEIGHTS_NAME
,
str
(
err
)))
logger
.
info
(
"Provided path ({}) does not contain value head weights."
.
format
(
path_or_repo_id
))
logger
.
info
(
"Ignore these messages if you are not resuming the training of a value head model."
)
return
None
def
register_autoclass
(
config
:
"PretrainedConfig"
,
model
:
"PreTrainedModel"
,
tokenizer
:
"PreTrainedTokenizer"
):
if
"AutoConfig"
in
getattr
(
config
,
"auto_map"
,
{}):
config
.
__class__
.
register_for_auto_class
()
if
"AutoModelForCausalLM"
in
getattr
(
config
,
"auto_map"
,
{}):
model
.
__class__
.
register_for_auto_class
()
if
"AutoTokenizer"
in
tokenizer
.
init_kwargs
.
get
(
"auto_map"
,
{}):
tokenizer
.
__class__
.
register_for_auto_class
()
LLaMA-Factory/src/llmtuner/train/__init__.py
0 → 100644
View file @
afe180a6
from
.tuner
import
export_model
,
run_exp
__all__
=
[
"export_model"
,
"run_exp"
]
LLaMA-Factory/src/llmtuner/train/dpo/__init__.py
0 → 100644
View file @
afe180a6
from
.workflow
import
run_dpo
__all__
=
[
"run_dpo"
]
LLaMA-Factory/src/llmtuner/train/dpo/collator.py
0 → 100644
View file @
afe180a6
from
dataclasses
import
dataclass
from
typing
import
Any
,
Dict
,
List
,
Sequence
,
Tuple
import
torch
from
transformers
import
DataCollatorForSeq2Seq
@
dataclass
class
DPODataCollatorWithPadding
(
DataCollatorForSeq2Seq
):
r
"""
Data collator for pairwise data.
"""
def
_pad_labels
(
self
,
batch
:
torch
.
Tensor
,
positions
:
List
[
Tuple
[
int
,
int
]])
->
torch
.
Tensor
:
padded_labels
=
[]
for
feature
,
(
prompt_len
,
answer_len
)
in
zip
(
batch
,
positions
):
if
self
.
tokenizer
.
padding_side
==
"left"
:
start
,
end
=
feature
.
size
(
0
)
-
answer_len
,
feature
.
size
(
0
)
else
:
start
,
end
=
prompt_len
,
prompt_len
+
answer_len
padded_tensor
=
self
.
label_pad_token_id
*
torch
.
ones_like
(
feature
)
padded_tensor
[
start
:
end
]
=
feature
[
start
:
end
]
padded_labels
.
append
(
padded_tensor
)
return
torch
.
stack
(
padded_labels
,
dim
=
0
).
contiguous
()
# in contiguous memory
def
__call__
(
self
,
features
:
Sequence
[
Dict
[
str
,
Any
]])
->
Dict
[
str
,
torch
.
Tensor
]:
r
"""
Pads batched data to the longest sequence in the batch.
We generate 2 * n examples where the first n examples represent chosen examples and
the last n examples represent rejected examples.
"""
concatenated_features
=
[]
label_positions
=
[]
for
key
in
(
"chosen_ids"
,
"rejected_ids"
):
for
feature
in
features
:
prompt_len
,
answer_len
=
len
(
feature
[
"prompt_ids"
]),
len
(
feature
[
key
])
concatenated_features
.
append
(
{
"input_ids"
:
feature
[
"prompt_ids"
]
+
feature
[
key
],
"attention_mask"
:
[
1
]
*
(
prompt_len
+
answer_len
),
}
)
label_positions
.
append
((
prompt_len
,
answer_len
))
batch
=
self
.
tokenizer
.
pad
(
concatenated_features
,
padding
=
self
.
padding
,
max_length
=
self
.
max_length
,
pad_to_multiple_of
=
self
.
pad_to_multiple_of
,
return_tensors
=
self
.
return_tensors
,
)
batch
[
"labels"
]
=
self
.
_pad_labels
(
batch
[
"input_ids"
],
label_positions
)
return
batch
LLaMA-Factory/src/llmtuner/train/dpo/trainer.py
0 → 100644
View file @
afe180a6
from
collections
import
defaultdict
from
contextlib
import
nullcontext
from
typing
import
TYPE_CHECKING
,
Dict
,
Literal
,
Optional
,
Tuple
,
Union
import
torch
from
transformers
import
BatchEncoding
,
Trainer
from
trl
import
DPOTrainer
from
trl.trainer.utils
import
disable_dropout_in_model
from
...extras.constants
import
IGNORE_INDEX
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
class
CustomDPOTrainer
(
DPOTrainer
):
def
__init__
(
self
,
beta
:
float
,
loss_type
:
Literal
[
"sigmoid"
,
"hinge"
,
"ipo"
,
"kto"
],
ftx_gamma
:
float
,
model
:
Union
[
"PreTrainedModel"
,
torch
.
nn
.
Module
],
ref_model
:
Optional
[
Union
[
"PreTrainedModel"
,
torch
.
nn
.
Module
]]
=
None
,
disable_dropout
:
Optional
[
bool
]
=
True
,
**
kwargs
,
):
if
disable_dropout
:
disable_dropout_in_model
(
model
)
if
ref_model
is
not
None
:
disable_dropout_in_model
(
ref_model
)
self
.
use_dpo_data_collator
=
True
# hack to avoid warning
self
.
generate_during_eval
=
False
# disable at evaluation
self
.
label_pad_token_id
=
IGNORE_INDEX
self
.
padding_value
=
0
self
.
is_encoder_decoder
=
model
.
config
.
is_encoder_decoder
self
.
precompute_ref_log_probs
=
False
self
.
_precomputed_train_ref_log_probs
=
False
self
.
_precomputed_eval_ref_log_probs
=
False
self
.
_peft_has_been_casted_to_bf16
=
False
self
.
ref_model
=
ref_model
self
.
beta
=
beta
self
.
label_smoothing
=
0
self
.
loss_type
=
loss_type
self
.
ftx_gamma
=
ftx_gamma
self
.
_stored_metrics
=
defaultdict
(
lambda
:
defaultdict
(
list
))
Trainer
.
__init__
(
self
,
model
=
model
,
**
kwargs
)
if
not
hasattr
(
self
,
"accelerator"
):
raise
AttributeError
(
"Please update `transformers`."
)
if
ref_model
is
not
None
:
if
self
.
is_deepspeed_enabled
:
if
not
(
getattr
(
ref_model
,
"is_loaded_in_8bit"
,
False
)
or
getattr
(
ref_model
,
"is_loaded_in_4bit"
,
False
)
):
# quantized models are already set on the correct device
self
.
ref_model
=
self
.
_prepare_deepspeed
(
self
.
ref_model
)
else
:
self
.
ref_model
=
self
.
accelerator
.
prepare_model
(
self
.
ref_model
,
evaluation_mode
=
True
)
def
sft_loss
(
self
,
chosen_logits
:
torch
.
FloatTensor
,
chosen_labels
:
torch
.
LongTensor
)
->
torch
.
Tensor
:
r
"""
Computes supervised cross-entropy loss of given labels under the given logits.
Returns:
A tensor of shape (batch_size,) containing the cross-entropy loss of each samples.
"""
all_logps
=
self
.
get_batch_logps
(
chosen_logits
,
chosen_labels
,
average_log_prob
=
True
)
return
-
all_logps
def
concatenated_forward
(
self
,
model
:
"PreTrainedModel"
,
batch
:
Dict
[
str
,
torch
.
Tensor
]
)
->
Tuple
[
torch
.
FloatTensor
,
torch
.
FloatTensor
,
torch
.
FloatTensor
,
torch
.
FloatTensor
]:
batch_copied
=
BatchEncoding
({
k
:
v
.
detach
().
clone
()
for
k
,
v
in
batch
.
items
()})
# avoid error
all_logits
=
model
(
input_ids
=
batch_copied
[
"input_ids"
],
attention_mask
=
batch_copied
[
"attention_mask"
],
return_dict
=
True
).
logits
.
to
(
torch
.
float32
)
all_logps
=
self
.
get_batch_logps
(
all_logits
,
batch
[
"labels"
],
average_log_prob
=
False
,
label_pad_token_id
=
self
.
label_pad_token_id
,
)
batch_size
=
batch
[
"input_ids"
].
size
(
0
)
//
2
chosen_logps
,
rejected_logps
=
all_logps
.
split
(
batch_size
,
dim
=
0
)
chosen_logits
,
rejected_logits
=
all_logits
.
split
(
batch_size
,
dim
=
0
)
return
chosen_logps
,
rejected_logps
,
chosen_logits
,
rejected_logits
def
get_batch_loss_metrics
(
self
,
model
:
"PreTrainedModel"
,
batch
:
Dict
[
str
,
torch
.
Tensor
],
train_eval
:
Optional
[
Literal
[
"train"
,
"eval"
]]
=
"train"
,
)
->
Tuple
[
torch
.
Tensor
,
Dict
[
str
,
torch
.
Tensor
]]:
r
"""
Computes the DPO loss and other metrics for the given batch of inputs for train or test.
"""
metrics
=
{}
(
policy_chosen_logps
,
policy_rejected_logps
,
policy_chosen_logits
,
policy_rejected_logits
,
)
=
self
.
concatenated_forward
(
model
,
batch
)
with
torch
.
no_grad
():
if
self
.
ref_model
is
None
:
ref_model
=
self
.
model
ref_context
=
self
.
accelerator
.
unwrap_model
(
self
.
model
).
disable_adapter
()
else
:
ref_model
=
self
.
ref_model
ref_context
=
nullcontext
()
with
ref_context
:
(
reference_chosen_logps
,
reference_rejected_logps
,
_
,
_
,
)
=
self
.
concatenated_forward
(
ref_model
,
batch
)
losses
,
chosen_rewards
,
rejected_rewards
=
self
.
dpo_loss
(
policy_chosen_logps
,
policy_rejected_logps
,
reference_chosen_logps
,
reference_rejected_logps
,
)
if
self
.
ftx_gamma
>
1e-6
:
batch_size
=
batch
[
"input_ids"
].
size
(
0
)
//
2
chosen_labels
,
_
=
batch
[
"labels"
].
split
(
batch_size
,
dim
=
0
)
losses
+=
self
.
ftx_gamma
*
self
.
sft_loss
(
policy_chosen_logits
,
chosen_labels
)
reward_accuracies
=
(
chosen_rewards
>
rejected_rewards
).
float
()
prefix
=
"eval_"
if
train_eval
==
"eval"
else
""
metrics
[
f
"
{
prefix
}
rewards/chosen"
]
=
chosen_rewards
.
cpu
().
mean
()
metrics
[
f
"
{
prefix
}
rewards/rejected"
]
=
rejected_rewards
.
cpu
().
mean
()
metrics
[
f
"
{
prefix
}
rewards/accuracies"
]
=
reward_accuracies
.
cpu
().
mean
()
metrics
[
f
"
{
prefix
}
rewards/margins"
]
=
(
chosen_rewards
-
rejected_rewards
).
cpu
().
mean
()
metrics
[
f
"
{
prefix
}
logps/rejected"
]
=
policy_rejected_logps
.
detach
().
cpu
().
mean
()
metrics
[
f
"
{
prefix
}
logps/chosen"
]
=
policy_chosen_logps
.
detach
().
cpu
().
mean
()
metrics
[
f
"
{
prefix
}
logits/rejected"
]
=
policy_rejected_logits
.
detach
().
cpu
().
mean
()
metrics
[
f
"
{
prefix
}
logits/chosen"
]
=
policy_chosen_logits
.
detach
().
cpu
().
mean
()
return
losses
.
mean
(),
metrics
LLaMA-Factory/src/llmtuner/train/dpo/workflow.py
0 → 100644
View file @
afe180a6
# Inspired by: https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
transformers
import
Seq2SeqTrainingArguments
from
...data
import
get_dataset
,
split_dataset
from
...extras.constants
import
IGNORE_INDEX
from
...extras.ploting
import
plot_loss
from
...hparams
import
ModelArguments
from
...model
import
load_model_and_tokenizer
from
...train.dpo.collator
import
DPODataCollatorWithPadding
from
...train.dpo.trainer
import
CustomDPOTrainer
from
...train.utils
import
create_modelcard_and_push
,
create_ref_model
if
TYPE_CHECKING
:
from
transformers
import
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
def
run_dpo
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
model
,
tokenizer
=
load_model_and_tokenizer
(
model_args
,
finetuning_args
,
training_args
.
do_train
)
dataset
=
get_dataset
(
tokenizer
,
model_args
,
data_args
,
training_args
,
stage
=
"rm"
)
data_collator
=
DPODataCollatorWithPadding
(
tokenizer
=
tokenizer
,
pad_to_multiple_of
=
8
,
label_pad_token_id
=
IGNORE_INDEX
if
data_args
.
ignore_pad_token_for_loss
else
tokenizer
.
pad_token_id
,
)
# Create reference model
if
finetuning_args
.
ref_model
is
None
and
(
not
training_args
.
do_train
):
# use the model itself
ref_model
=
model
else
:
ref_model
=
create_ref_model
(
model_args
,
finetuning_args
)
# Update arguments
training_args_dict
=
training_args
.
to_dict
()
training_args_dict
.
update
(
dict
(
remove_unused_columns
=
False
))
# important for pairwise dataset
training_args
=
Seq2SeqTrainingArguments
(
**
training_args_dict
)
# Initialize our Trainer
trainer
=
CustomDPOTrainer
(
beta
=
finetuning_args
.
dpo_beta
,
loss_type
=
finetuning_args
.
dpo_loss
,
ftx_gamma
=
finetuning_args
.
dpo_ftx
,
model
=
model
,
ref_model
=
ref_model
,
args
=
training_args
,
tokenizer
=
tokenizer
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
**
split_dataset
(
dataset
,
data_args
,
training_args
),
)
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
])
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
)
if
id
(
model
)
==
id
(
ref_model
):
# unable to compute rewards without a reference model
remove_keys
=
[
key
for
key
in
metrics
.
keys
()
if
"rewards"
in
key
]
for
key
in
remove_keys
:
metrics
.
pop
(
key
)
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
LLaMA-Factory/src/llmtuner/train/ppo/__init__.py
0 → 100644
View file @
afe180a6
from
.workflow
import
run_ppo
__all__
=
[
"run_ppo"
]
LLaMA-Factory/src/llmtuner/train/ppo/trainer.py
0 → 100644
View file @
afe180a6
import
math
import
os
import
sys
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
,
Tuple
import
torch
from
tqdm
import
tqdm
from
transformers
import
GenerationConfig
,
Trainer
,
TrainerControl
,
TrainerState
from
transformers.trainer_pt_utils
import
remove_dummy_checkpoint
from
transformers.trainer_utils
import
PREFIX_CHECKPOINT_DIR
from
transformers.utils
import
SAFE_WEIGHTS_NAME
,
WEIGHTS_NAME
from
trl
import
PPOTrainer
from
trl.core
import
PPODecorators
,
logprobs_from_logits
from
...extras.callbacks
import
FixValueHeadModelCallback
,
LogCallback
from
...extras.logging
import
get_logger
from
...extras.misc
import
AverageMeter
,
count_parameters
,
get_logits_processor
from
.utils
import
dump_layernorm
,
get_rewards_from_server
,
replace_model
,
restore_layernorm
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
trl
import
AutoModelForCausalLMWithValueHead
from
...hparams
import
FinetuningArguments
,
GeneratingArguments
,
ModelArguments
logger
=
get_logger
(
__name__
)
class
CustomPPOTrainer
(
PPOTrainer
,
Trainer
):
r
"""
Inherits PPOTrainer.
"""
def
__init__
(
self
,
model_args
:
"ModelArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
generating_args
:
"GeneratingArguments"
,
callbacks
:
List
[
"TrainerCallback"
],
reward_model
:
"AutoModelForCausalLMWithValueHead"
,
**
kwargs
,
):
PPOTrainer
.
__init__
(
self
,
**
kwargs
)
self
.
args
=
training_args
self
.
model_args
=
model_args
self
.
finetuning_args
=
finetuning_args
self
.
reward_model
=
reward_model
self
.
generation_config
=
GenerationConfig
(
pad_token_id
=
self
.
tokenizer
.
pad_token_id
,
eos_token_id
=
[
self
.
tokenizer
.
eos_token_id
]
+
self
.
tokenizer
.
additional_special_tokens_ids
,
**
generating_args
.
to_dict
(),
)
self
.
state
=
TrainerState
()
self
.
control
=
TrainerControl
()
self
.
is_deepspeed_enabled
=
self
.
accelerator
.
distributed_type
==
"DEEPSPEED"
and
hasattr
(
self
.
accelerator
.
state
,
"deepspeed_plugin"
)
self
.
log_callback
,
self
.
save_callback
=
callbacks
[
0
],
callbacks
[
1
]
assert
isinstance
(
self
.
log_callback
,
LogCallback
)
and
isinstance
(
self
.
save_callback
,
FixValueHeadModelCallback
)
if
self
.
args
.
max_steps
>
0
:
logger
.
info
(
"max_steps is given, it will override any value given in num_train_epochs"
)
if
finetuning_args
.
reward_model_type
==
"full"
:
if
self
.
is_deepspeed_enabled
:
if
not
(
getattr
(
reward_model
.
pretrained_model
,
"is_loaded_in_8bit"
,
False
)
or
getattr
(
reward_model
.
pretrained_model
,
"is_loaded_in_4bit"
,
False
)
):
# quantized models are already set on the correct device
self
.
reward_model
=
self
.
_prepare_deepspeed
(
self
.
reward_model
)
else
:
self
.
reward_model
=
self
.
accelerator
.
prepare_model
(
self
.
reward_model
,
evaluation_mode
=
True
)
def
ppo_train
(
self
,
resume_from_checkpoint
:
Optional
[
str
]
=
None
)
->
None
:
r
"""
Implements training loop for the PPO stage, like _inner_training_loop() in Huggingface's Trainer.
"""
if
resume_from_checkpoint
is
not
None
:
raise
ValueError
(
"`resume_from_checkpoint` will be supported in the future version."
)
total_train_batch_size
=
(
self
.
args
.
per_device_train_batch_size
*
self
.
args
.
gradient_accumulation_steps
*
self
.
finetuning_args
.
ppo_buffer_size
*
self
.
args
.
world_size
)
if
self
.
args
.
max_steps
>
0
:
num_examples
=
total_train_batch_size
*
self
.
args
.
max_steps
num_train_epochs
=
sys
.
maxsize
max_steps
=
self
.
args
.
max_steps
steps_in_epoch
=
self
.
args
.
max_steps
else
:
len_dataloader
=
len
(
self
.
dataloader
)
num_examples
=
len
(
self
.
dataset
)
num_train_epochs
=
self
.
args
.
num_train_epochs
max_steps
=
math
.
ceil
(
num_train_epochs
*
len_dataloader
)
steps_in_epoch
=
len_dataloader
self
.
state
.
max_steps
=
max_steps
self
.
state
.
num_train_epochs
=
num_train_epochs
self
.
state
.
is_local_process_zero
=
self
.
is_local_process_zero
()
self
.
state
.
is_world_process_zero
=
self
.
is_world_process_zero
()
if
self
.
is_world_process_zero
():
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = {}"
.
format
(
num_examples
))
logger
.
info
(
" Num Epochs = {}"
.
format
(
num_train_epochs
))
logger
.
info
(
" Instantaneous batch size per device = {}"
.
format
(
self
.
args
.
per_device_train_batch_size
))
logger
.
info
(
" Total train batch size (w. parallel, buffer, distributed & accumulation) = {}"
.
format
(
total_train_batch_size
)
)
logger
.
info
(
" Gradient Accumulation steps = {}"
.
format
(
self
.
args
.
gradient_accumulation_steps
))
logger
.
info
(
" Num optimization epochs per batch = {}"
.
format
(
self
.
finetuning_args
.
ppo_epochs
))
logger
.
info
(
" Total training steps = {}"
.
format
(
max_steps
))
logger
.
info
(
" Number of trainable parameters = {}"
.
format
(
count_parameters
(
self
.
model
)[
0
]))
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
dataiter
=
iter
(
self
.
dataloader
)
loss_meter
=
AverageMeter
()
reward_meter
=
AverageMeter
()
self
.
log_callback
.
on_train_begin
(
self
.
args
,
self
.
state
,
self
.
control
)
for
step
in
tqdm
(
range
(
max_steps
),
disable
=
not
self
.
is_local_process_zero
()):
try
:
batch
=
next
(
dataiter
)
except
StopIteration
:
dataiter
=
iter
(
self
.
dataloader
)
batch
=
next
(
dataiter
)
# Cast to inference mode
unwrapped_model
.
gradient_checkpointing_disable
()
unwrapped_model
.
config
.
use_cache
=
True
self
.
model
.
eval
()
# Get inputs
self
.
tokenizer
.
padding_side
=
"right"
# change padding side
queries
,
responses
,
rewards
=
[],
[],
[]
for
idx
in
range
(
0
,
self
.
config
.
batch_size
,
self
.
config
.
mini_batch_size
):
mini_batch_queries
,
mini_batch_responses
=
self
.
get_inputs
(
batch
[
idx
:
idx
+
self
.
config
.
mini_batch_size
]
)
mini_batch_rewards
=
self
.
get_rewards
(
mini_batch_queries
,
mini_batch_responses
,
unwrapped_model
)
queries
.
extend
(
mini_batch_queries
)
responses
.
extend
(
mini_batch_responses
)
rewards
.
extend
(
mini_batch_rewards
)
# Cast to training mode
unwrapped_model
.
gradient_checkpointing_enable
()
unwrapped_model
.
config
.
use_cache
=
False
self
.
model
.
train
()
# Run PPO step
stats
=
self
.
step
(
queries
,
responses
,
rewards
)
self
.
tokenizer
.
padding_side
=
"left"
# restore padding side
loss_meter
.
update
(
float
(
stats
[
"ppo/loss/total"
]),
n
=
len
(
rewards
))
reward_meter
.
update
(
torch
.
stack
(
rewards
).
mean
().
item
(),
n
=
len
(
rewards
))
if
self
.
config
.
log_with
is
not
None
:
try
:
batch
[
"query"
]
=
self
.
tokenizer
.
batch_decode
(
queries
,
skip_special_tokens
=
True
)
batch
[
"response"
]
=
self
.
tokenizer
.
batch_decode
(
responses
,
skip_special_tokens
=
True
)
self
.
log_stats
(
stats
,
batch
,
rewards
)
except
Exception
:
logger
.
warning
(
"Failed to save stats due to unknown errors."
)
self
.
state
.
global_step
+=
1
self
.
log_callback
.
on_step_end
(
self
.
args
,
self
.
state
,
self
.
control
)
if
self
.
is_local_process_zero
()
and
(
step
+
1
)
%
self
.
args
.
logging_steps
==
0
:
logs
=
dict
(
loss
=
round
(
loss_meter
.
avg
,
4
),
reward
=
round
(
reward_meter
.
avg
,
4
),
learning_rate
=
stats
[
"ppo/learning_rate"
],
epoch
=
round
(
step
/
steps_in_epoch
,
2
),
)
tqdm
.
write
(
str
(
logs
))
logs
[
"step"
]
=
step
self
.
state
.
log_history
.
append
(
logs
)
self
.
log_callback
.
on_log
(
self
.
args
,
self
.
state
,
self
.
control
)
loss_meter
.
reset
()
reward_meter
.
reset
()
if
(
step
+
1
)
%
self
.
args
.
save_steps
==
0
:
# save checkpoint
self
.
save_model
(
os
.
path
.
join
(
self
.
args
.
output_dir
,
"{}-{}"
.
format
(
PREFIX_CHECKPOINT_DIR
,
self
.
state
.
global_step
))
)
self
.
save_callback
.
on_save
(
self
.
args
,
self
.
state
,
self
.
control
,
model
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
)
if
self
.
control
.
should_epoch_stop
or
self
.
control
.
should_training_stop
:
break
self
.
log_callback
.
on_train_end
(
self
.
args
,
self
.
state
,
self
.
control
)
self
.
save_callback
.
on_train_end
(
self
.
args
,
self
.
state
,
self
.
control
,
model
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
)
@
torch
.
no_grad
()
def
get_inputs
(
self
,
batch
:
Dict
[
str
,
torch
.
Tensor
])
->
Tuple
[
List
[
torch
.
Tensor
],
List
[
torch
.
Tensor
]]:
r
"""
Generates model's responses given queries.
"""
if
self
.
model_args
.
upcast_layernorm
:
layernorm_params
=
dump_layernorm
(
self
.
model
)
if
batch
[
"input_ids"
].
size
(
0
)
==
1
:
# handle llama2 ppo with gradient accumulation > 1
start_index
=
(
batch
[
"input_ids"
][
0
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()[
0
].
item
()
for
k
,
v
in
batch
.
items
():
batch
[
k
]
=
v
[:,
start_index
:]
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
generate_output
:
torch
.
Tensor
=
unwrapped_model
.
generate
(
generation_config
=
self
.
generation_config
,
logits_processor
=
get_logits_processor
(),
**
batch
)
if
self
.
model_args
.
upcast_layernorm
:
restore_layernorm
(
self
.
model
,
layernorm_params
)
query
=
batch
[
"input_ids"
].
detach
().
cpu
()
response
=
generate_output
[:,
batch
[
"input_ids"
].
size
(
-
1
)
:].
detach
().
cpu
()
queries
,
responses
=
[],
[]
for
i
in
range
(
len
(
query
)):
query_start_index
=
(
query
[
i
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()[
0
].
item
()
response_index
=
(
response
[
i
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()
if
len
(
response_index
)
==
0
:
response_length
=
1
# allow empty response
else
:
response_length
=
response_index
[
-
1
].
item
()
+
1
queries
.
append
(
query
[
i
,
query_start_index
:])
# remove padding from left
responses
.
append
(
response
[
i
,
:
response_length
])
# remove padding from right
return
queries
,
responses
@
torch
.
no_grad
()
def
get_rewards
(
self
,
queries
:
List
[
torch
.
Tensor
],
responses
:
List
[
torch
.
Tensor
],
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
,
)
->
List
[
torch
.
Tensor
]:
r
"""
Computes scores using given reward model.
Both inputs and outputs are put on CPU.
"""
if
self
.
finetuning_args
.
reward_model_type
==
"api"
:
token_ids
=
[
torch
.
cat
((
q
,
r
),
dim
=-
1
).
tolist
()
for
q
,
r
in
zip
(
queries
,
responses
)]
messages
=
self
.
tokenizer
.
batch_decode
(
token_ids
,
skip_special_tokens
=
True
)
return
get_rewards_from_server
(
self
.
reward_model
,
messages
)
if
self
.
finetuning_args
.
reward_model_type
==
"lora"
:
replace_model
(
unwrapped_model
,
target
=
"reward"
)
reward_model
=
self
.
model
else
:
reward_model
=
self
.
reward_model
batch
=
self
.
prepare_model_inputs
(
queries
,
responses
)
with
torch
.
cuda
.
amp
.
autocast
(
dtype
=
self
.
model_args
.
compute_dtype
):
# support bf16
_
,
_
,
values
=
reward_model
(
**
batch
,
output_hidden_states
=
True
,
return_dict
=
True
)
if
getattr
(
unwrapped_model
.
config
,
"model_type"
,
None
)
==
"chatglm"
:
# assume same architecture
values
=
torch
.
transpose
(
values
,
0
,
1
)
rewards
=
[]
for
i
in
range
(
values
.
size
(
0
)):
end_indexes
=
(
batch
[
"input_ids"
][
i
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()
end_index
=
end_indexes
[
-
1
].
item
()
if
len
(
end_indexes
)
else
0
rewards
.
append
(
values
[
i
,
end_index
].
float
().
detach
().
cpu
())
# use fp32 type
if
self
.
finetuning_args
.
reward_model_type
==
"lora"
:
replace_model
(
unwrapped_model
,
target
=
"default"
)
return
rewards
@
PPODecorators
.
empty_device_cache
()
def
batched_forward_pass
(
self
,
model
:
"AutoModelForCausalLMWithValueHead"
,
queries
:
torch
.
Tensor
,
responses
:
torch
.
Tensor
,
model_inputs
:
dict
,
return_logits
:
Optional
[
bool
]
=
False
,
response_masks
:
Optional
[
torch
.
Tensor
]
=
None
,
):
r
"""
Calculates model outputs in multiple batches.
Subclass and override to inject custom behavior.
"""
bs
=
len
(
queries
)
fbs
=
self
.
config
.
mini_batch_size
all_logprobs
=
[]
all_logits
=
[]
all_masks
=
[]
all_values
=
[]
for
i
in
range
(
math
.
ceil
(
bs
/
fbs
)):
input_kwargs
=
{
key
:
value
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
for
key
,
value
in
model_inputs
.
items
()}
query_batch
=
queries
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
response_batch
=
responses
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
if
response_masks
is
not
None
:
response_masks_batch
=
response_masks
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
input_ids
=
input_kwargs
[
"input_ids"
]
attention_mask
=
input_kwargs
[
"attention_mask"
]
with
torch
.
cuda
.
amp
.
autocast
(
dtype
=
self
.
model_args
.
compute_dtype
):
# support bf16
logits
,
_
,
values
=
model
(
**
input_kwargs
)
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
if
getattr
(
unwrapped_model
.
config
,
"model_type"
,
None
)
==
"chatglm"
:
values
=
torch
.
transpose
(
values
,
0
,
1
)
logprobs
=
logprobs_from_logits
(
logits
[:,
:
-
1
,
:],
input_ids
[:,
1
:])
masks
=
torch
.
zeros_like
(
attention_mask
)
masks
[:,
:
-
1
]
=
attention_mask
[:,
1
:]
for
j
in
range
(
len
(
query_batch
)):
start
=
len
(
query_batch
[
j
])
-
1
if
attention_mask
[
j
,
0
]
==
0
:
# offset left padding
start
+=
attention_mask
[
j
,
:].
nonzero
()[
0
].
item
()
end
=
start
+
len
(
response_batch
[
j
])
if
response_masks
is
not
None
:
response_masks_batch
=
torch
.
cat
((
torch
.
zeros_like
(
query_batch
[
j
]),
response_masks_batch
[
j
]))[
1
:]
masks
[
j
,
:
start
]
=
0
masks
[
j
,
end
:]
=
0
if
response_masks
is
not
None
:
masks
[
j
,
start
:
end
]
=
masks
[
j
,
start
:
end
]
*
response_masks_batch
[
j
][
start
:
end
]
if
return_logits
:
all_logits
.
append
(
logits
)
else
:
del
logits
all_values
.
append
(
values
)
all_logprobs
.
append
(
logprobs
)
all_masks
.
append
(
masks
)
return
(
torch
.
cat
(
all_logprobs
),
torch
.
cat
(
all_logits
)[:,
:
-
1
]
if
return_logits
else
None
,
torch
.
cat
(
all_values
)[:,
:
-
1
],
torch
.
cat
(
all_masks
)[:,
:
-
1
],
)
def
save_model
(
self
,
output_dir
:
Optional
[
str
]
=
None
)
->
None
:
r
"""
Saves model checkpoint.
Subclass and override to inject custom behavior.
"""
if
self
.
args
.
should_save
:
try
:
self
.
_save
(
output_dir
,
state_dict
=
self
.
accelerator
.
get_state_dict
(
self
.
model
))
except
ValueError
:
logger
.
warning
(
" stage3_gather_16bit_weights_on_model_save=false. Saving the full checkpoint instead,"
" use zero_to_fp32.py to recover weights"
)
self
.
_save
(
output_dir
,
state_dict
=
{})
remove_dummy_checkpoint
(
True
,
output_dir
,
[
WEIGHTS_NAME
,
SAFE_WEIGHTS_NAME
])
self
.
model
.
save_checkpoint
(
output_dir
)
LLaMA-Factory/src/llmtuner/train/ppo/utils.py
0 → 100644
View file @
afe180a6
import
json
from
contextlib
import
nullcontext
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Literal
,
Optional
import
torch
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
...extras.packages
import
is_requests_available
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
from
trl
import
AutoModelForCausalLMWithValueHead
if
is_requests_available
():
import
requests
def
get_rewards_from_server
(
server_url
:
str
,
messages
:
List
[
str
])
->
List
[
torch
.
Tensor
]:
headers
=
{
"Content-Type"
:
"application/json"
}
payload
=
{
"model"
:
"model"
,
"messages"
:
messages
}
response
=
requests
.
post
(
server_url
,
json
=
payload
,
headers
=
headers
)
rewards
=
json
.
loads
(
response
.
text
)[
"scores"
]
return
torch
.
Tensor
(
rewards
)
def
replace_model
(
model
:
"AutoModelForCausalLMWithValueHead"
,
target
:
Literal
[
"default"
,
"reward"
])
->
None
:
if
is_deepspeed_zero3_enabled
():
import
deepspeed
# type: ignore
params
=
[
model
.
v_head
.
summary
.
weight
,
model
.
v_head
.
summary
.
bias
]
context_maybe_zero3
=
deepspeed
.
zero
.
GatheredParameters
(
params
,
modifier_rank
=
0
)
else
:
context_maybe_zero3
=
nullcontext
()
with
context_maybe_zero3
:
if
target
==
"reward"
:
# save default head temporarily
setattr
(
model
,
"default_head_weight"
,
model
.
v_head
.
summary
.
weight
.
data
.
detach
().
clone
())
setattr
(
model
,
"default_head_bias"
,
model
.
v_head
.
summary
.
bias
.
data
.
detach
().
clone
())
model
.
pretrained_model
.
set_adapter
(
target
)
# set the LoRA adapter to be active
model
.
v_head
.
summary
.
weight
.
data
=
model
.
get_buffer
(
"{}_head_weight"
.
format
(
target
)).
detach
().
clone
()
model
.
v_head
.
summary
.
bias
.
data
=
model
.
get_buffer
(
"{}_head_bias"
.
format
(
target
)).
detach
().
clone
()
def
dump_layernorm
(
model
:
"PreTrainedModel"
)
->
Dict
[
str
,
torch
.
Tensor
]:
layer_norm_params
=
{}
for
name
,
param
in
model
.
named_parameters
():
if
param
.
data
.
dtype
==
torch
.
float32
:
layer_norm_params
[
name
]
=
param
.
data
.
detach
().
clone
()
param
.
data
=
param
.
data
.
to
(
model
.
config
.
torch_dtype
)
return
layer_norm_params
def
restore_layernorm
(
model
:
"PreTrainedModel"
,
layernorm_params
:
Optional
[
Dict
[
str
,
torch
.
Tensor
]]
=
None
)
->
None
:
for
name
,
param
in
model
.
named_parameters
():
if
name
in
layernorm_params
:
param
.
data
=
layernorm_params
[
name
]
LLaMA-Factory/src/llmtuner/train/ppo/workflow.py
0 → 100644
View file @
afe180a6
# Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py
import
math
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
torch.optim
import
AdamW
from
transformers
import
DataCollatorWithPadding
from
transformers.optimization
import
get_scheduler
from
trl
import
PPOConfig
from
...data
import
get_dataset
from
...extras.callbacks
import
FixValueHeadModelCallback
from
...extras.misc
import
fix_valuehead_checkpoint
from
...extras.ploting
import
plot_loss
from
...model
import
load_model_and_tokenizer
from
...train.ppo.trainer
import
CustomPPOTrainer
from
...train.utils
import
create_ref_model
,
create_reward_model
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
,
GeneratingArguments
,
ModelArguments
def
run_ppo
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
generating_args
:
"GeneratingArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
model
,
tokenizer
=
load_model_and_tokenizer
(
model_args
,
finetuning_args
,
training_args
.
do_train
,
add_valuehead
=
True
)
dataset
=
get_dataset
(
tokenizer
,
model_args
,
data_args
,
training_args
,
stage
=
"ppo"
)
tokenizer
.
padding_side
=
"left"
# use left-padding in generation while using right-padding in training
data_collator
=
DataCollatorWithPadding
(
tokenizer
=
tokenizer
)
# Create reference model and reward model
ref_model
=
create_ref_model
(
model_args
,
finetuning_args
,
add_valuehead
=
True
)
reward_model
=
create_reward_model
(
model
,
model_args
,
finetuning_args
)
# Create ppo config
backward_batch_size
=
training_args
.
per_device_train_batch_size
*
training_args
.
gradient_accumulation_steps
ppo_config
=
PPOConfig
(
model_name
=
model_args
.
model_name_or_path
,
learning_rate
=
training_args
.
learning_rate
,
mini_batch_size
=
training_args
.
per_device_train_batch_size
,
batch_size
=
backward_batch_size
*
finetuning_args
.
ppo_buffer_size
,
gradient_accumulation_steps
=
training_args
.
gradient_accumulation_steps
,
ppo_epochs
=
finetuning_args
.
ppo_epochs
,
max_grad_norm
=
training_args
.
max_grad_norm
,
seed
=
training_args
.
seed
,
optimize_device_cache
=
True
,
target
=
finetuning_args
.
ppo_target
,
log_with
=
finetuning_args
.
ppo_logger
,
use_score_scaling
=
finetuning_args
.
ppo_score_norm
,
use_score_norm
=
finetuning_args
.
ppo_score_norm
,
whiten_rewards
=
finetuning_args
.
ppo_whiten_rewards
,
accelerator_kwargs
=
{
"step_scheduler_with_optimizer"
:
False
},
)
# Create optimizer and scheduler
optimizer
=
AdamW
(
filter
(
lambda
p
:
p
.
requires_grad
,
model
.
parameters
()),
lr
=
training_args
.
learning_rate
)
if
training_args
.
max_steps
>
0
:
num_training_steps
=
training_args
.
max_steps
else
:
total_train_batch_size
=
backward_batch_size
*
finetuning_args
.
ppo_buffer_size
*
training_args
.
world_size
num_training_steps
=
training_args
.
num_train_epochs
*
math
.
ceil
(
len
(
dataset
)
/
total_train_batch_size
)
lr_scheduler
=
get_scheduler
(
training_args
.
lr_scheduler_type
,
optimizer
=
optimizer
,
num_warmup_steps
=
training_args
.
get_warmup_steps
(
num_training_steps
),
num_training_steps
=
num_training_steps
,
)
# Initialize our Trainer
ppo_trainer
=
CustomPPOTrainer
(
model_args
=
model_args
,
training_args
=
training_args
,
finetuning_args
=
finetuning_args
,
generating_args
=
generating_args
,
callbacks
=
callbacks
+
[
FixValueHeadModelCallback
()],
reward_model
=
reward_model
,
config
=
ppo_config
,
model
=
model
,
ref_model
=
ref_model
,
tokenizer
=
tokenizer
,
dataset
=
dataset
,
data_collator
=
data_collator
,
optimizer
=
optimizer
,
lr_scheduler
=
lr_scheduler
,
)
# Training
if
training_args
.
do_train
:
ppo_trainer
.
ppo_train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
ppo_trainer
.
save_model
()
if
training_args
.
should_save
:
fix_valuehead_checkpoint
(
model
,
training_args
.
output_dir
,
training_args
.
save_safetensors
)
ppo_trainer
.
save_state
()
# must be called after save_model to have a folder
if
ppo_trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"reward"
])
LLaMA-Factory/src/llmtuner/train/pt/__init__.py
0 → 100644
View file @
afe180a6
from
.workflow
import
run_pt
__all__
=
[
"run_pt"
]
LLaMA-Factory/src/llmtuner/train/pt/workflow.py
0 → 100644
View file @
afe180a6
# Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/language-modeling/run_clm.py
import
math
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
transformers
import
DataCollatorForLanguageModeling
,
Trainer
from
...data
import
get_dataset
,
split_dataset
from
...extras.ploting
import
plot_loss
from
...model
import
load_model_and_tokenizer
from
...train.utils
import
create_modelcard_and_push
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
,
ModelArguments
def
run_pt
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
model
,
tokenizer
=
load_model_and_tokenizer
(
model_args
,
finetuning_args
,
training_args
.
do_train
)
dataset
=
get_dataset
(
tokenizer
,
model_args
,
data_args
,
training_args
,
stage
=
"pt"
)
data_collator
=
DataCollatorForLanguageModeling
(
tokenizer
=
tokenizer
,
mlm
=
False
)
# Initialize our Trainer
trainer
=
Trainer
(
model
=
model
,
args
=
training_args
,
tokenizer
=
tokenizer
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
**
split_dataset
(
dataset
,
data_args
,
training_args
),
)
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
])
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
)
try
:
perplexity
=
math
.
exp
(
metrics
[
"eval_loss"
])
except
OverflowError
:
perplexity
=
float
(
"inf"
)
metrics
[
"perplexity"
]
=
perplexity
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
LLaMA-Factory/src/llmtuner/train/rm/__init__.py
0 → 100644
View file @
afe180a6
from
.workflow
import
run_rm
__all__
=
[
"run_rm"
]
LLaMA-Factory/src/llmtuner/train/rm/collator.py
0 → 100644
View file @
afe180a6
from
dataclasses
import
dataclass
from
typing
import
Any
,
Dict
,
Sequence
import
torch
from
transformers
import
DataCollatorWithPadding
@
dataclass
class
PairwiseDataCollatorWithPadding
(
DataCollatorWithPadding
):
r
"""
Data collator for pairwise data.
"""
def
__call__
(
self
,
features
:
Sequence
[
Dict
[
str
,
Any
]])
->
Dict
[
str
,
torch
.
Tensor
]:
r
"""
Pads batched data to the longest sequence in the batch.
We generate 2 * n examples where the first n examples represent chosen examples and
the last n examples represent rejected examples.
"""
features
=
[
{
"input_ids"
:
feature
[
"prompt_ids"
]
+
feature
[
key
],
"attention_mask"
:
[
1
]
*
(
len
(
feature
[
"prompt_ids"
])
+
len
(
feature
[
key
])),
}
for
key
in
(
"chosen_ids"
,
"rejected_ids"
)
for
feature
in
features
]
return
super
().
__call__
(
features
)
LLaMA-Factory/src/llmtuner/train/rm/metric.py
0 → 100644
View file @
afe180a6
from
typing
import
Dict
,
Sequence
,
Tuple
,
Union
import
numpy
as
np
def
compute_accuracy
(
eval_preds
:
Sequence
[
Union
[
np
.
ndarray
,
Tuple
[
np
.
ndarray
]]])
->
Dict
[
str
,
float
]:
preds
,
_
=
eval_preds
return
{
"accuracy"
:
(
preds
[
0
]
>
preds
[
1
]).
sum
()
/
len
(
preds
[
0
])}
Prev
1
…
5
6
7
8
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment