Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Qwen2_pytorch
Commits
032b90a1
Commit
032b90a1
authored
Sep 12, 2024
by
luopl
Browse files
init commit
parents
Pipeline
#1684
canceled with stages
Changes
233
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2364 additions
and
0 deletions
+2364
-0
LLaMA-Factory/src/llamafactory/model/patcher.py
LLaMA-Factory/src/llamafactory/model/patcher.py
+170
-0
LLaMA-Factory/src/llamafactory/train/__init__.py
LLaMA-Factory/src/llamafactory/train/__init__.py
+0
-0
LLaMA-Factory/src/llamafactory/train/callbacks.py
LLaMA-Factory/src/llamafactory/train/callbacks.py
+349
-0
LLaMA-Factory/src/llamafactory/train/dpo/__init__.py
LLaMA-Factory/src/llamafactory/train/dpo/__init__.py
+18
-0
LLaMA-Factory/src/llamafactory/train/dpo/trainer.py
LLaMA-Factory/src/llamafactory/train/dpo/trainer.py
+255
-0
LLaMA-Factory/src/llamafactory/train/dpo/workflow.py
LLaMA-Factory/src/llamafactory/train/dpo/workflow.py
+98
-0
LLaMA-Factory/src/llamafactory/train/kto/__init__.py
LLaMA-Factory/src/llamafactory/train/kto/__init__.py
+18
-0
LLaMA-Factory/src/llamafactory/train/kto/trainer.py
LLaMA-Factory/src/llamafactory/train/kto/trainer.py
+223
-0
LLaMA-Factory/src/llamafactory/train/kto/workflow.py
LLaMA-Factory/src/llamafactory/train/kto/workflow.py
+95
-0
LLaMA-Factory/src/llamafactory/train/ppo/__init__.py
LLaMA-Factory/src/llamafactory/train/ppo/__init__.py
+18
-0
LLaMA-Factory/src/llamafactory/train/ppo/ppo_utils.py
LLaMA-Factory/src/llamafactory/train/ppo/ppo_utils.py
+88
-0
LLaMA-Factory/src/llamafactory/train/ppo/trainer.py
LLaMA-Factory/src/llamafactory/train/ppo/trainer.py
+507
-0
LLaMA-Factory/src/llamafactory/train/ppo/workflow.py
LLaMA-Factory/src/llamafactory/train/ppo/workflow.py
+80
-0
LLaMA-Factory/src/llamafactory/train/pt/__init__.py
LLaMA-Factory/src/llamafactory/train/pt/__init__.py
+18
-0
LLaMA-Factory/src/llamafactory/train/pt/trainer.py
LLaMA-Factory/src/llamafactory/train/pt/trainer.py
+67
-0
LLaMA-Factory/src/llamafactory/train/pt/workflow.py
LLaMA-Factory/src/llamafactory/train/pt/workflow.py
+83
-0
LLaMA-Factory/src/llamafactory/train/rm/__init__.py
LLaMA-Factory/src/llamafactory/train/rm/__init__.py
+18
-0
LLaMA-Factory/src/llamafactory/train/rm/metric.py
LLaMA-Factory/src/llamafactory/train/rm/metric.py
+49
-0
LLaMA-Factory/src/llamafactory/train/rm/trainer.py
LLaMA-Factory/src/llamafactory/train/rm/trainer.py
+120
-0
LLaMA-Factory/src/llamafactory/train/rm/workflow.py
LLaMA-Factory/src/llamafactory/train/rm/workflow.py
+90
-0
No files found.
LLaMA-Factory/src/llamafactory/model/patcher.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
import
torch
from
peft
import
PeftModel
from
transformers
import
PreTrainedModel
,
PreTrainedTokenizerBase
,
is_torch_npu_available
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
transformers.modeling_utils
import
is_fsdp_enabled
from
..extras.logging
import
get_logger
from
..extras.misc
import
infer_optim_dtype
from
.model_utils.attention
import
configure_attn_implementation
,
print_attn_implementation
from
.model_utils.checkpointing
import
prepare_model_for_training
from
.model_utils.embedding
import
resize_embedding_layer
from
.model_utils.longlora
import
configure_longlora
from
.model_utils.moe
import
add_z3_leaf_module
,
configure_moe
from
.model_utils.packing
import
configure_packing
from
.model_utils.quantization
import
configure_quantization
from
.model_utils.rope
import
configure_rope
from
.model_utils.valuehead
import
prepare_valuehead_model
from
.model_utils.visual
import
autocast_projector_dtype
,
configure_visual_model
if
TYPE_CHECKING
:
from
transformers
import
PretrainedConfig
,
PreTrainedTokenizer
from
trl
import
AutoModelForCausalLMWithValueHead
from
..hparams
import
ModelArguments
logger
=
get_logger
(
__name__
)
def
patch_tokenizer
(
tokenizer
:
"PreTrainedTokenizer"
)
->
None
:
if
"PreTrainedTokenizerBase"
not
in
str
(
tokenizer
.
_pad
.
__func__
):
tokenizer
.
_pad
=
MethodType
(
PreTrainedTokenizerBase
.
_pad
,
tokenizer
)
def
patch_config
(
config
:
"PretrainedConfig"
,
tokenizer
:
"PreTrainedTokenizer"
,
model_args
:
"ModelArguments"
,
init_kwargs
:
Dict
[
str
,
Any
],
is_trainable
:
bool
,
)
->
None
:
if
model_args
.
compute_dtype
is
None
:
# priority: bf16 > fp16 > fp32
if
model_args
.
infer_dtype
!=
"auto"
and
not
is_trainable
:
model_args
.
compute_dtype
=
getattr
(
torch
,
model_args
.
infer_dtype
)
else
:
model_args
.
compute_dtype
=
infer_optim_dtype
(
model_dtype
=
getattr
(
config
,
"torch_dtype"
,
None
))
if
is_torch_npu_available
():
use_jit_compile
=
os
.
environ
.
get
(
"JIT_COMPILE"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
torch
.
npu
.
set_compile_mode
(
jit_compile
=
use_jit_compile
)
configure_attn_implementation
(
config
,
model_args
,
is_trainable
)
configure_rope
(
config
,
model_args
,
is_trainable
)
configure_longlora
(
config
,
model_args
,
is_trainable
)
configure_quantization
(
config
,
tokenizer
,
model_args
,
init_kwargs
)
configure_moe
(
config
,
model_args
,
is_trainable
)
configure_visual_model
(
config
)
configure_packing
(
config
,
model_args
,
is_trainable
)
if
model_args
.
use_cache
and
not
is_trainable
:
setattr
(
config
,
"use_cache"
,
True
)
logger
.
info
(
"Using KV cache for faster generation."
)
if
getattr
(
config
,
"model_type"
,
None
)
==
"qwen"
:
setattr
(
config
,
"use_flash_attn"
,
model_args
.
flash_attn
==
"fa2"
)
for
dtype_name
,
dtype
in
[(
"fp16"
,
torch
.
float16
),
(
"bf16"
,
torch
.
bfloat16
),
(
"fp32"
,
torch
.
float32
)]:
setattr
(
config
,
dtype_name
,
model_args
.
compute_dtype
==
dtype
)
if
getattr
(
config
,
"model_type"
,
None
)
==
"qwen2"
and
is_trainable
and
model_args
.
flash_attn
==
"fa2"
:
setattr
(
config
,
"use_cache"
,
False
)
# qwen2 does not support use_cache when using flash attn
# deepspeed zero3 is not compatible with low_cpu_mem_usage
init_kwargs
[
"low_cpu_mem_usage"
]
=
model_args
.
low_cpu_mem_usage
and
(
not
is_deepspeed_zero3_enabled
())
# cast data type of the model if:
# 1. not deepspeed zero3 and not fsdp (keep zero3 or fsdp in float32)
# 2. quantization_bit is not None (qlora)
if
(
not
is_deepspeed_zero3_enabled
()
and
not
is_fsdp_enabled
())
or
model_args
.
quantization_bit
is
not
None
:
init_kwargs
[
"torch_dtype"
]
=
model_args
.
compute_dtype
if
init_kwargs
[
"low_cpu_mem_usage"
]:
# device map requires low_cpu_mem_usage=True
if
"device_map"
not
in
init_kwargs
and
model_args
.
device_map
:
init_kwargs
[
"device_map"
]
=
model_args
.
device_map
if
init_kwargs
.
get
(
"device_map"
,
None
)
==
"auto"
:
init_kwargs
[
"offload_folder"
]
=
model_args
.
offload_folder
def
patch_model
(
model
:
"PreTrainedModel"
,
tokenizer
:
"PreTrainedTokenizer"
,
model_args
:
"ModelArguments"
,
is_trainable
:
bool
,
add_valuehead
:
bool
,
)
->
None
:
gen_config
=
model
.
generation_config
# check and fix generation config
if
not
gen_config
.
do_sample
and
(
(
gen_config
.
temperature
is
not
None
and
gen_config
.
temperature
!=
1.0
)
or
(
gen_config
.
top_p
is
not
None
and
gen_config
.
top_p
!=
1.0
)
or
(
gen_config
.
typical_p
is
not
None
and
gen_config
.
typical_p
!=
1.0
)
):
gen_config
.
do_sample
=
True
if
"GenerationMixin"
not
in
str
(
model
.
generate
.
__func__
):
model
.
generate
=
MethodType
(
PreTrainedModel
.
generate
,
model
)
if
add_valuehead
:
prepare_valuehead_model
(
model
)
if
model_args
.
resize_vocab
:
resize_embedding_layer
(
model
,
tokenizer
)
if
model_args
.
visual_inputs
:
autocast_projector_dtype
(
model
,
model_args
)
if
is_trainable
:
prepare_model_for_training
(
model
,
model_args
)
add_z3_leaf_module
(
model
)
if
not
model_args
.
use_unsloth
:
print_attn_implementation
(
model
.
config
)
try
:
model
.
add_model_tags
([
"llama-factory"
])
except
Exception
:
logger
.
warning
(
"Cannot properly tag the model."
)
def
patch_valuehead_model
(
model
:
"AutoModelForCausalLMWithValueHead"
)
->
None
:
def
tie_weights
(
self
:
"AutoModelForCausalLMWithValueHead"
)
->
None
:
if
isinstance
(
self
.
pretrained_model
,
PreTrainedModel
):
self
.
pretrained_model
.
tie_weights
()
def
get_input_embeddings
(
self
:
"AutoModelForCausalLMWithValueHead"
)
->
torch
.
nn
.
Module
:
if
isinstance
(
self
.
pretrained_model
,
PreTrainedModel
):
return
self
.
pretrained_model
.
get_input_embeddings
()
def
get_output_embeddings
(
self
:
"AutoModelForCausalLMWithValueHead"
)
->
torch
.
nn
.
Module
:
if
isinstance
(
self
.
pretrained_model
,
PreTrainedModel
):
return
self
.
pretrained_model
.
get_output_embeddings
()
def
create_or_update_model_card
(
self
:
"AutoModelForCausalLMWithValueHead"
,
output_dir
:
str
)
->
None
:
if
isinstance
(
self
.
pretrained_model
,
PeftModel
):
self
.
pretrained_model
.
create_or_update_model_card
(
output_dir
)
ignore_modules
=
[
name
for
name
,
_
in
model
.
named_parameters
()
if
"pretrained_model"
in
name
]
setattr
(
model
,
"_keys_to_ignore_on_save"
,
ignore_modules
)
setattr
(
model
,
"tie_weights"
,
MethodType
(
tie_weights
,
model
))
setattr
(
model
,
"get_input_embeddings"
,
MethodType
(
get_input_embeddings
,
model
))
setattr
(
model
,
"get_output_embeddings"
,
MethodType
(
get_output_embeddings
,
model
))
setattr
(
model
,
"create_or_update_model_card"
,
MethodType
(
create_or_update_model_card
,
model
))
LLaMA-Factory/src/llamafactory/train/__init__.py
0 → 100644
View file @
032b90a1
LLaMA-Factory/src/llamafactory/train/callbacks.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
logging
import
os
import
signal
import
sys
import
time
from
concurrent.futures
import
ThreadPoolExecutor
from
datetime
import
timedelta
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
Optional
import
torch
import
transformers
from
peft
import
PeftModel
from
transformers
import
PreTrainedModel
,
ProcessorMixin
,
TrainerCallback
from
transformers.trainer_utils
import
PREFIX_CHECKPOINT_DIR
,
has_length
from
transformers.utils
import
(
SAFE_WEIGHTS_NAME
,
WEIGHTS_NAME
,
is_safetensors_available
,
)
from
..extras.constants
import
TRAINER_LOG
,
V_HEAD_SAFE_WEIGHTS_NAME
,
V_HEAD_WEIGHTS_NAME
from
..extras.logging
import
LoggerHandler
,
get_logger
if
is_safetensors_available
():
from
safetensors
import
safe_open
from
safetensors.torch
import
save_file
if
TYPE_CHECKING
:
from
transformers
import
TrainerControl
,
TrainerState
,
TrainingArguments
from
trl
import
AutoModelForCausalLMWithValueHead
logger
=
get_logger
(
__name__
)
def
fix_valuehead_checkpoint
(
model
:
"AutoModelForCausalLMWithValueHead"
,
output_dir
:
str
,
safe_serialization
:
bool
)
->
None
:
r
"""
The model is already unwrapped.
There are three cases:
1. full tuning without ds_zero3: state_dict = {"model.layers.*": ..., "v_head.summary.*": ...}
2. lora tuning without ds_zero3: state_dict = {"v_head.summary.*": ...}
3. under deepspeed zero3: state_dict = {"pretrained_model.model.layers.*": ..., "v_head.summary.*": ...}
We assume `stage3_gather_16bit_weights_on_model_save=true`.
"""
if
not
isinstance
(
model
.
pretrained_model
,
(
PreTrainedModel
,
PeftModel
)):
return
if
safe_serialization
:
path_to_checkpoint
=
os
.
path
.
join
(
output_dir
,
SAFE_WEIGHTS_NAME
)
with
safe_open
(
path_to_checkpoint
,
framework
=
"pt"
,
device
=
"cpu"
)
as
f
:
state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
{
key
:
f
.
get_tensor
(
key
)
for
key
in
f
.
keys
()}
else
:
path_to_checkpoint
=
os
.
path
.
join
(
output_dir
,
WEIGHTS_NAME
)
state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
torch
.
load
(
path_to_checkpoint
,
map_location
=
"cpu"
)
decoder_state_dict
=
{}
v_head_state_dict
=
{}
for
name
,
param
in
state_dict
.
items
():
if
name
.
startswith
(
"v_head."
):
v_head_state_dict
[
name
]
=
param
else
:
decoder_state_dict
[
name
.
replace
(
"pretrained_model."
,
""
,
1
)]
=
param
model
.
pretrained_model
.
save_pretrained
(
output_dir
,
state_dict
=
decoder_state_dict
or
None
,
safe_serialization
=
safe_serialization
)
if
safe_serialization
:
save_file
(
v_head_state_dict
,
os
.
path
.
join
(
output_dir
,
V_HEAD_SAFE_WEIGHTS_NAME
),
metadata
=
{
"format"
:
"pt"
})
else
:
torch
.
save
(
v_head_state_dict
,
os
.
path
.
join
(
output_dir
,
V_HEAD_WEIGHTS_NAME
))
os
.
remove
(
path_to_checkpoint
)
logger
.
info
(
"Value head model saved at: {}"
.
format
(
output_dir
))
class
FixValueHeadModelCallback
(
TrainerCallback
):
def
on_save
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called after a checkpoint save.
"""
if
args
.
should_save
:
fix_valuehead_checkpoint
(
model
=
kwargs
.
pop
(
"model"
),
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"{}-{}"
.
format
(
PREFIX_CHECKPOINT_DIR
,
state
.
global_step
)),
safe_serialization
=
args
.
save_safetensors
,
)
class
SaveProcessorCallback
(
TrainerCallback
):
def
__init__
(
self
,
processor
:
"ProcessorMixin"
)
->
None
:
r
"""
Initializes a callback for saving the processor.
"""
self
.
processor
=
processor
def
on_train_end
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called at the end of training.
"""
if
args
.
should_save
:
getattr
(
self
.
processor
,
"image_processor"
).
save_pretrained
(
args
.
output_dir
)
class
PissaConvertCallback
(
TrainerCallback
):
r
"""
Initializes a callback for converting the PiSSA adapter to a normal one.
"""
def
on_train_begin
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called at the beginning of training.
"""
if
args
.
should_save
:
model
=
kwargs
.
pop
(
"model"
)
pissa_init_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"pissa_init"
)
logger
.
info
(
"Initial PiSSA adapter will be saved at: {}."
.
format
(
pissa_init_dir
))
if
isinstance
(
model
,
PeftModel
):
init_lora_weights
=
getattr
(
model
.
peft_config
[
"default"
],
"init_lora_weights"
)
setattr
(
model
.
peft_config
[
"default"
],
"init_lora_weights"
,
True
)
model
.
save_pretrained
(
pissa_init_dir
,
safe_serialization
=
args
.
save_safetensors
)
setattr
(
model
.
peft_config
[
"default"
],
"init_lora_weights"
,
init_lora_weights
)
def
on_train_end
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called at the end of training.
"""
if
args
.
should_save
:
model
=
kwargs
.
pop
(
"model"
)
pissa_init_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"pissa_init"
)
pissa_backup_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"pissa_backup"
)
pissa_convert_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"pissa_converted"
)
logger
.
info
(
"Converted PiSSA adapter will be saved at: {}."
.
format
(
pissa_convert_dir
))
# 1. save a pissa backup with init_lora_weights: True
# 2. save a converted lora with init_lora_weights: pissa
# 3. load the pissa backup with init_lora_weights: True
# 4. delete the initial adapter and change init_lora_weights to pissa
if
isinstance
(
model
,
PeftModel
):
init_lora_weights
=
getattr
(
model
.
peft_config
[
"default"
],
"init_lora_weights"
)
setattr
(
model
.
peft_config
[
"default"
],
"init_lora_weights"
,
True
)
model
.
save_pretrained
(
pissa_backup_dir
,
safe_serialization
=
args
.
save_safetensors
)
setattr
(
model
.
peft_config
[
"default"
],
"init_lora_weights"
,
init_lora_weights
)
model
.
save_pretrained
(
pissa_convert_dir
,
safe_serialization
=
args
.
save_safetensors
,
convert_pissa_to_lora
=
pissa_init_dir
)
model
.
load_adapter
(
pissa_backup_dir
,
"default"
,
is_trainable
=
True
)
model
.
set_adapter
(
"default"
)
model
.
delete_adapter
(
"pissa_init"
)
setattr
(
model
.
peft_config
[
"default"
],
"init_lora_weights"
,
init_lora_weights
)
class
LogCallback
(
TrainerCallback
):
def
__init__
(
self
)
->
None
:
r
"""
Initializes a callback for logging training and evaluation status.
"""
""" Progress """
self
.
start_time
=
0
self
.
cur_steps
=
0
self
.
max_steps
=
0
self
.
elapsed_time
=
""
self
.
remaining_time
=
""
self
.
thread_pool
:
Optional
[
"ThreadPoolExecutor"
]
=
None
""" Status """
self
.
aborted
=
False
self
.
do_train
=
False
""" Web UI """
self
.
webui_mode
=
os
.
environ
.
get
(
"LLAMABOARD_ENABLED"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
if
self
.
webui_mode
:
signal
.
signal
(
signal
.
SIGABRT
,
self
.
_set_abort
)
self
.
logger_handler
=
LoggerHandler
(
os
.
environ
.
get
(
"LLAMABOARD_WORKDIR"
))
logging
.
root
.
addHandler
(
self
.
logger_handler
)
transformers
.
logging
.
add_handler
(
self
.
logger_handler
)
def
_set_abort
(
self
,
signum
,
frame
)
->
None
:
self
.
aborted
=
True
def
_reset
(
self
,
max_steps
:
int
=
0
)
->
None
:
self
.
start_time
=
time
.
time
()
self
.
cur_steps
=
0
self
.
max_steps
=
max_steps
self
.
elapsed_time
=
""
self
.
remaining_time
=
""
def
_timing
(
self
,
cur_steps
:
int
)
->
None
:
cur_time
=
time
.
time
()
elapsed_time
=
cur_time
-
self
.
start_time
avg_time_per_step
=
elapsed_time
/
cur_steps
if
cur_steps
!=
0
else
0
remaining_time
=
(
self
.
max_steps
-
cur_steps
)
*
avg_time_per_step
self
.
cur_steps
=
cur_steps
self
.
elapsed_time
=
str
(
timedelta
(
seconds
=
int
(
elapsed_time
)))
self
.
remaining_time
=
str
(
timedelta
(
seconds
=
int
(
remaining_time
)))
def
_write_log
(
self
,
output_dir
:
str
,
logs
:
Dict
[
str
,
Any
])
->
None
:
with
open
(
os
.
path
.
join
(
output_dir
,
TRAINER_LOG
),
"a"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
json
.
dumps
(
logs
)
+
"
\n
"
)
def
_create_thread_pool
(
self
,
output_dir
:
str
)
->
None
:
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
self
.
thread_pool
=
ThreadPoolExecutor
(
max_workers
=
1
)
def
_close_thread_pool
(
self
)
->
None
:
if
self
.
thread_pool
is
not
None
:
self
.
thread_pool
.
shutdown
(
wait
=
True
)
self
.
thread_pool
=
None
def
on_init_end
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called at the end of the initialization of the `Trainer`.
"""
if
(
args
.
should_save
and
os
.
path
.
exists
(
os
.
path
.
join
(
args
.
output_dir
,
TRAINER_LOG
))
and
args
.
overwrite_output_dir
):
logger
.
warning
(
"Previous trainer log in this folder will be deleted."
)
os
.
remove
(
os
.
path
.
join
(
args
.
output_dir
,
TRAINER_LOG
))
def
on_train_begin
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called at the beginning of training.
"""
if
args
.
should_save
:
self
.
do_train
=
True
self
.
_reset
(
max_steps
=
state
.
max_steps
)
self
.
_create_thread_pool
(
output_dir
=
args
.
output_dir
)
def
on_train_end
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called at the end of training.
"""
self
.
_close_thread_pool
()
def
on_substep_end
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called at the end of an substep during gradient accumulation.
"""
if
self
.
aborted
:
control
.
should_epoch_stop
=
True
control
.
should_training_stop
=
True
def
on_step_end
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called at the end of a training step.
"""
if
self
.
aborted
:
control
.
should_epoch_stop
=
True
control
.
should_training_stop
=
True
def
on_evaluate
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called after an evaluation phase.
"""
if
not
self
.
do_train
:
self
.
_close_thread_pool
()
def
on_predict
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called after a successful prediction.
"""
if
not
self
.
do_train
:
self
.
_close_thread_pool
()
def
on_log
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called after logging the last logs.
"""
if
not
args
.
should_save
:
return
self
.
_timing
(
cur_steps
=
state
.
global_step
)
logs
=
dict
(
current_steps
=
self
.
cur_steps
,
total_steps
=
self
.
max_steps
,
loss
=
state
.
log_history
[
-
1
].
get
(
"loss"
,
None
),
eval_loss
=
state
.
log_history
[
-
1
].
get
(
"eval_loss"
,
None
),
predict_loss
=
state
.
log_history
[
-
1
].
get
(
"predict_loss"
,
None
),
reward
=
state
.
log_history
[
-
1
].
get
(
"reward"
,
None
),
accuracy
=
state
.
log_history
[
-
1
].
get
(
"rewards/accuracies"
,
None
),
learning_rate
=
state
.
log_history
[
-
1
].
get
(
"learning_rate"
,
None
),
epoch
=
state
.
log_history
[
-
1
].
get
(
"epoch"
,
None
),
percentage
=
round
(
self
.
cur_steps
/
self
.
max_steps
*
100
,
2
)
if
self
.
max_steps
!=
0
else
100
,
elapsed_time
=
self
.
elapsed_time
,
remaining_time
=
self
.
remaining_time
,
throughput
=
"{:.2f}"
.
format
(
state
.
num_input_tokens_seen
/
(
time
.
time
()
-
self
.
start_time
)),
total_tokens
=
state
.
num_input_tokens_seen
,
)
logs
=
{
k
:
v
for
k
,
v
in
logs
.
items
()
if
v
is
not
None
}
if
self
.
webui_mode
and
all
(
key
in
logs
for
key
in
[
"loss"
,
"learning_rate"
,
"epoch"
]):
logger
.
info
(
"{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}, 'throughput': {}}}"
.
format
(
logs
[
"loss"
],
logs
[
"learning_rate"
],
logs
[
"epoch"
],
logs
[
"throughput"
]
)
)
if
self
.
thread_pool
is
not
None
:
self
.
thread_pool
.
submit
(
self
.
_write_log
,
args
.
output_dir
,
logs
)
def
on_prediction_step
(
self
,
args
:
"TrainingArguments"
,
state
:
"TrainerState"
,
control
:
"TrainerControl"
,
**
kwargs
):
r
"""
Event called after a prediction step.
"""
if
self
.
do_train
:
return
if
self
.
aborted
:
sys
.
exit
(
0
)
if
not
args
.
should_save
:
return
eval_dataloader
=
kwargs
.
pop
(
"eval_dataloader"
,
None
)
if
has_length
(
eval_dataloader
):
if
self
.
max_steps
==
0
:
self
.
_reset
(
max_steps
=
len
(
eval_dataloader
))
self
.
_create_thread_pool
(
output_dir
=
args
.
output_dir
)
self
.
_timing
(
cur_steps
=
self
.
cur_steps
+
1
)
if
self
.
cur_steps
%
5
==
0
and
self
.
thread_pool
is
not
None
:
logs
=
dict
(
current_steps
=
self
.
cur_steps
,
total_steps
=
self
.
max_steps
,
percentage
=
round
(
self
.
cur_steps
/
self
.
max_steps
*
100
,
2
)
if
self
.
max_steps
!=
0
else
100
,
elapsed_time
=
self
.
elapsed_time
,
remaining_time
=
self
.
remaining_time
,
)
self
.
thread_pool
.
submit
(
self
.
_write_log
,
args
.
output_dir
,
logs
)
LLaMA-Factory/src/llamafactory/train/dpo/__init__.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.workflow
import
run_dpo
__all__
=
[
"run_dpo"
]
LLaMA-Factory/src/llamafactory/train/dpo/trainer.py
0 → 100644
View file @
032b90a1
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's TRL library.
# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/dpo_trainer.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
warnings
from
collections
import
defaultdict
from
contextlib
import
nullcontext
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Dict
,
Literal
,
Optional
,
Tuple
,
Union
import
torch
import
torch.nn.functional
as
F
from
transformers
import
Trainer
from
trl
import
DPOTrainer
from
trl.trainer
import
disable_dropout_in_model
from
...extras.constants
import
IGNORE_INDEX
from
..callbacks
import
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
,
get_batch_logps
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
,
ProcessorMixin
from
...hparams
import
FinetuningArguments
class
CustomDPOTrainer
(
DPOTrainer
):
def
__init__
(
self
,
model
:
Union
[
"PreTrainedModel"
,
torch
.
nn
.
Module
],
ref_model
:
Optional
[
Union
[
"PreTrainedModel"
,
torch
.
nn
.
Module
]],
finetuning_args
:
"FinetuningArguments"
,
processor
:
Optional
[
"ProcessorMixin"
],
disable_dropout
:
bool
=
True
,
**
kwargs
,
):
if
disable_dropout
:
disable_dropout_in_model
(
model
)
if
ref_model
is
not
None
:
disable_dropout_in_model
(
ref_model
)
self
.
finetuning_args
=
finetuning_args
self
.
f_divergence_type
=
"reverse_kl"
self
.
reference_free
=
False
self
.
use_dpo_data_collator
=
True
# hack to avoid warning
self
.
generate_during_eval
=
False
# disable at evaluation
self
.
label_pad_token_id
=
IGNORE_INDEX
self
.
padding_value
=
0
self
.
is_encoder_decoder
=
model
.
config
.
is_encoder_decoder
self
.
precompute_ref_log_probs
=
False
self
.
_precomputed_train_ref_log_probs
=
False
self
.
_precomputed_eval_ref_log_probs
=
False
self
.
_peft_has_been_casted_to_bf16
=
False
self
.
ref_model
=
ref_model
self
.
_stored_metrics
=
defaultdict
(
lambda
:
defaultdict
(
list
))
# dpo hyperparams
self
.
beta
=
finetuning_args
.
pref_beta
self
.
loss_type
=
finetuning_args
.
pref_loss
self
.
ftx_gamma
=
finetuning_args
.
pref_ftx
self
.
label_smoothing
=
finetuning_args
.
dpo_label_smoothing
self
.
simpo_gamma
=
finetuning_args
.
simpo_gamma
Trainer
.
__init__
(
self
,
model
=
model
,
**
kwargs
)
if
not
hasattr
(
self
,
"accelerator"
):
raise
AttributeError
(
"Please update `transformers`."
)
warnings
.
simplefilter
(
"ignore"
)
# remove gc warnings on ref model
if
ref_model
is
not
None
:
if
self
.
is_deepspeed_enabled
:
if
not
(
getattr
(
ref_model
,
"is_loaded_in_8bit"
,
False
)
or
getattr
(
ref_model
,
"is_loaded_in_4bit"
,
False
)
):
# quantized models are already set on the correct device
self
.
ref_model
=
self
.
_prepare_deepspeed
(
self
.
ref_model
)
else
:
self
.
ref_model
=
self
.
accelerator
.
prepare_model
(
self
.
ref_model
,
evaluation_mode
=
True
)
self
.
ref_model
.
eval
()
if
processor
is
not
None
:
self
.
add_callback
(
SaveProcessorCallback
(
processor
))
if
finetuning_args
.
pissa_convert
:
self
.
callback_handler
.
add_callback
(
PissaConvertCallback
)
if
finetuning_args
.
use_badam
:
from
badam
import
BAdamCallback
,
clip_grad_norm_old_version
self
.
accelerator
.
clip_grad_norm_
=
MethodType
(
clip_grad_norm_old_version
,
self
.
accelerator
)
self
.
add_callback
(
BAdamCallback
)
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optimzer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
def
create_scheduler
(
self
,
num_training_steps
:
int
,
optimizer
:
Optional
[
"torch.optim.Optimizer"
]
=
None
)
->
"torch.optim.lr_scheduler.LRScheduler"
:
create_custom_scheduler
(
self
.
args
,
num_training_steps
,
optimizer
)
return
super
().
create_scheduler
(
num_training_steps
,
optimizer
)
def
odds_ratio_loss
(
self
,
chosen_logps
:
"torch.Tensor"
,
rejected_logps
:
"torch.Tensor"
)
->
"torch.Tensor"
:
r
"""
Computes ORPO's odds ratio (OR) loss for batched log probabilities of the policy model.
"""
log_odds
=
(
chosen_logps
-
rejected_logps
)
-
(
torch
.
log1p
(
-
torch
.
exp
(
chosen_logps
))
-
torch
.
log1p
(
-
torch
.
exp
(
rejected_logps
))
)
sft_loss
=
-
chosen_logps
odds_ratio_loss
=
-
F
.
logsigmoid
(
log_odds
)
orpo_loss
=
sft_loss
+
self
.
beta
*
odds_ratio_loss
return
orpo_loss
def
simpo_loss
(
self
,
chosen_logps
:
"torch.Tensor"
,
rejected_logps
:
"torch.Tensor"
)
->
"torch.Tensor"
:
r
"""
Computes SimPO loss for batched log probabilities of the policy model.
"""
pi_logratios
=
chosen_logps
-
rejected_logps
gamma_logratios
=
self
.
simpo_gamma
/
self
.
beta
logits
=
pi_logratios
-
gamma_logratios
simpo_loss
=
-
F
.
logsigmoid
(
self
.
beta
*
logits
)
return
simpo_loss
def
compute_preference_loss
(
self
,
policy_chosen_logps
:
"torch.Tensor"
,
policy_rejected_logps
:
"torch.Tensor"
,
reference_chosen_logps
:
Optional
[
"torch.Tensor"
],
reference_rejected_logps
:
Optional
[
"torch.Tensor"
],
)
->
Tuple
[
"torch.Tensor"
,
"torch.Tensor"
,
"torch.Tensor"
]:
r
"""
Computes loss for preference learning.
"""
if
not
self
.
finetuning_args
.
use_ref_model
:
if
self
.
loss_type
==
"orpo"
:
losses
=
self
.
odds_ratio_loss
(
policy_chosen_logps
,
policy_rejected_logps
)
elif
self
.
loss_type
==
"simpo"
:
losses
=
self
.
simpo_loss
(
policy_chosen_logps
,
policy_rejected_logps
)
else
:
raise
NotImplementedError
(
"Unknown loss type: {}."
.
format
(
self
.
loss_type
))
chosen_rewards
=
self
.
beta
*
policy_chosen_logps
.
to
(
self
.
accelerator
.
device
).
detach
()
rejected_rewards
=
self
.
beta
*
policy_rejected_logps
.
to
(
self
.
accelerator
.
device
).
detach
()
else
:
losses
,
chosen_rewards
,
rejected_rewards
=
self
.
dpo_loss
(
policy_chosen_logps
,
policy_rejected_logps
,
reference_chosen_logps
,
reference_rejected_logps
)
return
losses
,
chosen_rewards
,
rejected_rewards
def
concatenated_forward
(
self
,
model
:
"PreTrainedModel"
,
batch
:
Dict
[
str
,
"torch.Tensor"
]
)
->
Tuple
[
"torch.Tensor"
,
"torch.Tensor"
,
"torch.Tensor"
,
"torch.Tensor"
,
"torch.Tensor"
]:
r
"""
Computes the sum log probabilities of the labels under given logits if loss_type is not IPO, ORPO or SimPO.
Otherwise the average log probabilities.
"""
if
self
.
finetuning_args
.
use_ref_model
:
batch
=
{
k
:
v
.
detach
().
clone
()
for
k
,
v
in
batch
.
items
()}
# avoid error
all_logits
:
"torch.Tensor"
=
model
(
**
batch
,
return_dict
=
True
,
use_cache
=
False
).
logits
.
to
(
torch
.
float32
)
all_logps
,
valid_length
=
get_batch_logps
(
logits
=
all_logits
,
labels
=
batch
[
"labels"
])
if
self
.
loss_type
in
[
"ipo"
,
"orpo"
,
"simpo"
]:
all_logps
=
all_logps
/
valid_length
batch_size
=
batch
[
"input_ids"
].
size
(
0
)
//
2
chosen_logps
,
rejected_logps
=
all_logps
.
split
(
batch_size
,
dim
=
0
)
chosen_logits
,
rejected_logits
=
all_logits
.
split
(
batch_size
,
dim
=
0
)
chosen_length
,
_
=
valid_length
.
split
(
batch_size
,
dim
=
0
)
return
chosen_logps
,
rejected_logps
,
chosen_logits
,
rejected_logits
,
chosen_logps
/
chosen_length
def
compute_reference_log_probs
(
self
,
model
:
"PreTrainedModel"
,
batch
:
Dict
[
str
,
"torch.Tensor"
]
)
->
Tuple
[
Optional
[
"torch.Tensor"
],
Optional
[
"torch.Tensor"
]]:
r
"""
Computes log probabilities of the reference model.
"""
if
not
self
.
finetuning_args
.
use_ref_model
:
return
None
,
None
if
self
.
ref_model
is
None
:
ref_model
=
model
ref_context
=
self
.
accelerator
.
unwrap_model
(
model
).
disable_adapter
()
else
:
ref_model
=
self
.
ref_model
ref_context
=
nullcontext
()
with
torch
.
no_grad
(),
ref_context
:
reference_chosen_logps
,
reference_rejected_logps
,
*
_
=
self
.
concatenated_forward
(
ref_model
,
batch
)
return
reference_chosen_logps
,
reference_rejected_logps
def
get_batch_loss_metrics
(
self
,
model
:
"PreTrainedModel"
,
batch
:
Dict
[
str
,
"torch.Tensor"
],
train_eval
:
Literal
[
"train"
,
"eval"
]
=
"train"
,
)
->
Tuple
[
"torch.Tensor"
,
Dict
[
str
,
"torch.Tensor"
]]:
r
"""
Computes the DPO loss and other metrics for the given batch of inputs for train or test.
"""
metrics
=
{}
(
policy_chosen_logps
,
policy_rejected_logps
,
policy_chosen_logits
,
policy_rejected_logits
,
policy_chosen_logps_avg
,
)
=
self
.
concatenated_forward
(
model
,
batch
)
reference_chosen_logps
,
reference_rejected_logps
=
self
.
compute_reference_log_probs
(
model
,
batch
)
losses
,
chosen_rewards
,
rejected_rewards
=
self
.
compute_preference_loss
(
policy_chosen_logps
,
policy_rejected_logps
,
reference_chosen_logps
,
reference_rejected_logps
,
)
sft_loss
=
-
policy_chosen_logps_avg
if
self
.
ftx_gamma
>
1e-6
:
losses
+=
self
.
ftx_gamma
*
sft_loss
reward_accuracies
=
(
chosen_rewards
>
rejected_rewards
).
float
()
prefix
=
"eval_"
if
train_eval
==
"eval"
else
""
metrics
[
"{}rewards/chosen"
.
format
(
prefix
)]
=
chosen_rewards
.
mean
().
cpu
()
metrics
[
"{}rewards/rejected"
.
format
(
prefix
)]
=
rejected_rewards
.
mean
().
cpu
()
metrics
[
"{}rewards/accuracies"
.
format
(
prefix
)]
=
reward_accuracies
.
mean
().
cpu
()
metrics
[
"{}rewards/margins"
.
format
(
prefix
)]
=
(
chosen_rewards
-
rejected_rewards
).
mean
().
cpu
()
metrics
[
"{}logps/rejected"
.
format
(
prefix
)]
=
policy_rejected_logps
.
detach
().
mean
().
cpu
()
metrics
[
"{}logps/chosen"
.
format
(
prefix
)]
=
policy_chosen_logps
.
detach
().
mean
().
cpu
()
metrics
[
"{}logits/rejected"
.
format
(
prefix
)]
=
policy_rejected_logits
.
detach
().
mean
().
cpu
()
metrics
[
"{}logits/chosen"
.
format
(
prefix
)]
=
policy_chosen_logits
.
detach
().
mean
().
cpu
()
if
self
.
loss_type
==
"orpo"
:
metrics
[
"{}sft_loss"
.
format
(
prefix
)]
=
sft_loss
.
detach
().
mean
().
cpu
()
metrics
[
"{}odds_ratio_loss"
.
format
(
prefix
)]
=
((
losses
-
sft_loss
)
/
self
.
beta
).
detach
().
mean
().
cpu
()
return
losses
.
mean
(),
metrics
LLaMA-Factory/src/llamafactory/train/dpo/workflow.py
0 → 100644
View file @
032b90a1
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's TRL library.
# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/dpo.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
...data
import
PairwiseDataCollatorWithPadding
,
get_dataset
from
...extras.constants
import
IGNORE_INDEX
from
...extras.ploting
import
plot_loss
from
...hparams
import
ModelArguments
from
...model
import
load_model
,
load_tokenizer
from
..trainer_utils
import
create_modelcard_and_push
,
create_ref_model
from
.trainer
import
CustomDPOTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
def
run_dpo
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
dataset_module
=
get_dataset
(
model_args
,
data_args
,
training_args
,
stage
=
"rm"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
)
data_collator
=
PairwiseDataCollatorWithPadding
(
tokenizer
=
tokenizer
,
pad_to_multiple_of
=
8
,
label_pad_token_id
=
IGNORE_INDEX
if
data_args
.
ignore_pad_token_for_loss
else
tokenizer
.
pad_token_id
,
)
# Create reference model
if
finetuning_args
.
use_ref_model
:
if
finetuning_args
.
ref_model
is
None
and
(
not
training_args
.
do_train
):
# use the model itself
ref_model
=
model
else
:
ref_model
=
create_ref_model
(
model_args
,
finetuning_args
)
else
:
ref_model
=
None
# Update arguments
training_args
.
remove_unused_columns
=
False
# important for pairwise dataset
# Initialize our Trainer
trainer
=
CustomDPOTrainer
(
model
=
model
,
ref_model
=
ref_model
,
args
=
training_args
,
finetuning_args
=
finetuning_args
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
**
dataset_module
,
**
tokenizer_module
,
)
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
,
"rewards/accuracies"
])
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
)
if
id
(
model
)
==
id
(
ref_model
):
# unable to compute rewards if reference model is the model itself
remove_keys
=
[
key
for
key
in
metrics
.
keys
()
if
"rewards"
in
key
]
for
key
in
remove_keys
:
metrics
.
pop
(
key
)
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
LLaMA-Factory/src/llamafactory/train/kto/__init__.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.workflow
import
run_kto
__all__
=
[
"run_kto"
]
LLaMA-Factory/src/llamafactory/train/kto/trainer.py
0 → 100644
View file @
032b90a1
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's TRL library.
# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/kto_trainer.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
warnings
from
collections
import
defaultdict
from
contextlib
import
nullcontext
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Dict
,
Literal
,
Optional
,
Tuple
,
Union
import
torch
from
transformers
import
Trainer
from
trl
import
KTOTrainer
from
trl.trainer
import
disable_dropout_in_model
from
...extras.constants
import
IGNORE_INDEX
from
..callbacks
import
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
,
get_batch_logps
if
TYPE_CHECKING
:
import
torch.utils.data
from
transformers
import
PreTrainedModel
,
ProcessorMixin
from
...hparams
import
FinetuningArguments
class
CustomKTOTrainer
(
KTOTrainer
):
def
__init__
(
self
,
model
:
Union
[
"PreTrainedModel"
,
torch
.
nn
.
Module
],
ref_model
:
Optional
[
Union
[
"PreTrainedModel"
,
torch
.
nn
.
Module
]],
finetuning_args
:
"FinetuningArguments"
,
processor
:
Optional
[
"ProcessorMixin"
],
disable_dropout
:
bool
=
True
,
**
kwargs
,
):
if
disable_dropout
:
disable_dropout_in_model
(
model
)
if
ref_model
is
not
None
:
disable_dropout_in_model
(
ref_model
)
self
.
finetuning_args
=
finetuning_args
self
.
reference_free
=
False
self
.
use_dpo_data_collator
=
True
# hack to avoid warning
self
.
generate_during_eval
=
False
# disable at evaluation
self
.
label_pad_token_id
=
IGNORE_INDEX
self
.
padding_value
=
0
self
.
is_encoder_decoder
=
model
.
config
.
is_encoder_decoder
self
.
precompute_ref_log_probs
=
False
self
.
_precomputed_train_ref_log_probs
=
False
self
.
_precomputed_eval_ref_log_probs
=
False
self
.
_peft_has_been_casted_to_bf16
=
False
self
.
ref_model
=
ref_model
self
.
_stored_metrics
=
defaultdict
(
lambda
:
defaultdict
(
list
))
# kto hyperparams
self
.
beta
=
finetuning_args
.
pref_beta
self
.
desirable_weight
=
finetuning_args
.
kto_chosen_weight
self
.
undesirable_weight
=
finetuning_args
.
kto_rejected_weight
self
.
ftx_gamma
=
finetuning_args
.
pref_ftx
Trainer
.
__init__
(
self
,
model
=
model
,
**
kwargs
)
if
not
hasattr
(
self
,
"accelerator"
):
raise
AttributeError
(
"Please update `transformers`."
)
warnings
.
simplefilter
(
"ignore"
)
# remove gc warnings on ref model
if
ref_model
is
not
None
:
if
self
.
is_deepspeed_enabled
:
if
not
(
getattr
(
ref_model
,
"is_loaded_in_8bit"
,
False
)
or
getattr
(
ref_model
,
"is_loaded_in_4bit"
,
False
)
):
# quantized models are already set on the correct device
self
.
ref_model
=
self
.
_prepare_deepspeed
(
self
.
ref_model
)
else
:
self
.
ref_model
=
self
.
accelerator
.
prepare_model
(
self
.
ref_model
,
evaluation_mode
=
True
)
self
.
ref_model
.
eval
()
if
processor
is
not
None
:
self
.
add_callback
(
SaveProcessorCallback
(
processor
))
if
finetuning_args
.
use_badam
:
from
badam
import
BAdamCallback
,
clip_grad_norm_old_version
self
.
accelerator
.
clip_grad_norm_
=
MethodType
(
clip_grad_norm_old_version
,
self
.
accelerator
)
self
.
add_callback
(
BAdamCallback
)
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optimzer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
def
create_scheduler
(
self
,
num_training_steps
:
int
,
optimizer
:
Optional
[
"torch.optim.Optimizer"
]
=
None
)
->
"torch.optim.lr_scheduler.LRScheduler"
:
create_custom_scheduler
(
self
.
args
,
num_training_steps
,
optimizer
)
return
super
().
create_scheduler
(
num_training_steps
,
optimizer
)
def
_get_train_sampler
(
self
)
->
Optional
[
"torch.utils.data.Sampler"
]:
r
"""
Replaces the sequential sampler of KTO Trainer created by trl with the random sampler.
"""
return
Trainer
.
_get_train_sampler
(
self
)
def
forward
(
self
,
model
:
"PreTrainedModel"
,
batch
:
Dict
[
str
,
"torch.Tensor"
],
prefix
:
Literal
[
""
,
"kl_"
]
=
""
)
->
Tuple
[
"torch.Tensor"
,
"torch.Tensor"
]:
r
"""
Runs forward pass and computes the log probabilities.
"""
batch
=
{
k
:
v
.
detach
().
clone
()
for
k
,
v
in
batch
.
items
()}
# avoid error
model_inputs
=
{
"input_ids"
:
batch
[
"{}input_ids"
.
format
(
prefix
)],
"attention_mask"
:
batch
[
"{}attention_mask"
.
format
(
prefix
)],
}
if
"pixel_values"
in
batch
:
model_inputs
[
"pixel_values"
]
=
batch
[
"pixel_values"
]
if
"{}token_type_ids"
.
format
(
prefix
)
in
batch
:
model_inputs
[
"token_type_ids"
]
=
batch
[
"{}token_type_ids"
.
format
(
prefix
)]
logits
=
model
(
**
model_inputs
,
return_dict
=
True
,
use_cache
=
False
).
logits
.
to
(
torch
.
float32
)
logps
,
valid_length
=
get_batch_logps
(
logits
=
logits
,
labels
=
batch
[
"{}labels"
.
format
(
prefix
)])
return
logps
,
logps
/
valid_length
def
concatenated_forward
(
self
,
model
:
"PreTrainedModel"
,
batch
:
Dict
[
str
,
"torch.Tensor"
]
)
->
Tuple
[
"torch.Tensor"
,
"torch.Tensor"
,
"torch.Tensor"
,
"torch.Tensor"
]:
target_logps
,
target_logps_avg
=
self
.
forward
(
model
,
batch
)
with
torch
.
no_grad
():
kl_logps
,
_
=
self
.
forward
(
model
,
batch
,
prefix
=
"kl_"
)
if
len
(
target_logps
)
!=
len
(
batch
[
"kto_tags"
]):
raise
ValueError
(
"Mismatched shape of inputs and labels."
)
chosen_logps
=
target_logps
[
batch
[
"kto_tags"
]]
rejected_logps
=
target_logps
[
~
batch
[
"kto_tags"
]]
chosen_logps_avg
=
target_logps_avg
[
batch
[
"kto_tags"
]]
return
chosen_logps
,
rejected_logps
,
kl_logps
,
chosen_logps_avg
def
compute_reference_log_probs
(
self
,
model
:
"PreTrainedModel"
,
batch
:
Dict
[
str
,
"torch.Tensor"
]
)
->
Tuple
[
"torch.Tensor"
,
"torch.Tensor"
,
"torch.Tensor"
]:
r
"""
Computes log probabilities of the reference model.
"""
if
self
.
ref_model
is
None
:
ref_model
=
model
ref_context
=
self
.
accelerator
.
unwrap_model
(
model
).
disable_adapter
()
else
:
ref_model
=
self
.
ref_model
ref_context
=
nullcontext
()
with
torch
.
no_grad
(),
ref_context
:
reference_chosen_logps
,
reference_rejected_logps
,
reference_kl_logps
,
_
=
self
.
concatenated_forward
(
ref_model
,
batch
)
return
reference_chosen_logps
,
reference_rejected_logps
,
reference_kl_logps
def
get_batch_loss_metrics
(
self
,
model
:
"PreTrainedModel"
,
batch
:
Dict
[
str
,
"torch.Tensor"
],
)
->
Tuple
[
"torch.Tensor"
,
Dict
[
str
,
"torch.Tensor"
]]:
r
"""
Computes the DPO loss and other metrics for the given batch of inputs for train or test.
"""
metrics
=
{}
policy_chosen_logps
,
policy_rejected_logps
,
policy_kl_logps
,
policy_chosen_logps_avg
=
(
self
.
concatenated_forward
(
model
,
batch
)
)
reference_chosen_logps
,
reference_rejected_logps
,
reference_kl_logps
=
self
.
compute_reference_log_probs
(
model
,
batch
)
losses
,
chosen_rewards
,
rejected_rewards
,
kl
=
self
.
kto_loss
(
policy_chosen_logps
,
policy_rejected_logps
,
policy_kl_logps
,
reference_chosen_logps
,
reference_rejected_logps
,
reference_kl_logps
,
)
losses
=
losses
.
nanmean
()
if
self
.
ftx_gamma
>
1e-6
and
len
(
policy_chosen_logps
)
>
0
:
# remember to rescale
sft_loss
=
-
policy_chosen_logps_avg
losses
+=
self
.
ftx_gamma
*
sft_loss
.
nanmean
()
/
len
(
policy_chosen_logps
)
*
len
(
batch
[
"labels"
])
num_chosen
=
torch
.
Tensor
([
len
(
chosen_rewards
)]).
to
(
self
.
accelerator
.
device
)
num_rejected
=
torch
.
Tensor
([
len
(
rejected_rewards
)]).
to
(
self
.
accelerator
.
device
)
all_num_chosen
=
self
.
accelerator
.
gather
(
num_chosen
).
sum
().
item
()
all_num_rejected
=
self
.
accelerator
.
gather
(
num_rejected
).
sum
().
item
()
if
all_num_chosen
>
0
:
metrics
[
"rewards/chosen_sum"
]
=
self
.
accelerator
.
gather
(
chosen_rewards
.
nansum
()).
nansum
().
item
()
metrics
[
"logps/chosen_sum"
]
=
self
.
accelerator
.
gather
(
policy_chosen_logps
.
nansum
()).
nansum
().
item
()
metrics
[
"count/chosen"
]
=
all_num_chosen
if
all_num_rejected
>
0
:
metrics
[
"rewards/rejected_sum"
]
=
self
.
accelerator
.
gather
(
rejected_rewards
.
nansum
()).
nansum
().
item
()
metrics
[
"logps/rejected_sum"
]
=
self
.
accelerator
.
gather
(
policy_rejected_logps
.
nansum
()).
nansum
().
item
()
metrics
[
"count/rejected"
]
=
all_num_rejected
metrics
[
"kl"
]
=
kl
.
item
()
return
losses
,
metrics
LLaMA-Factory/src/llamafactory/train/kto/workflow.py
0 → 100644
View file @
032b90a1
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's TRL library.
# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/kto.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
...data
import
KTODataCollatorWithPadding
,
get_dataset
from
...extras.constants
import
IGNORE_INDEX
from
...extras.ploting
import
plot_loss
from
...hparams
import
ModelArguments
from
...model
import
load_model
,
load_tokenizer
from
..trainer_utils
import
create_modelcard_and_push
,
create_ref_model
from
.trainer
import
CustomKTOTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
def
run_kto
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
dataset_module
=
get_dataset
(
model_args
,
data_args
,
training_args
,
stage
=
"kto"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
)
data_collator
=
KTODataCollatorWithPadding
(
tokenizer
=
tokenizer
,
pad_to_multiple_of
=
8
,
label_pad_token_id
=
IGNORE_INDEX
if
data_args
.
ignore_pad_token_for_loss
else
tokenizer
.
pad_token_id
,
)
# Create reference model
if
finetuning_args
.
ref_model
is
None
and
(
not
training_args
.
do_train
):
# use the model itself
ref_model
=
model
else
:
ref_model
=
create_ref_model
(
model_args
,
finetuning_args
)
# Update arguments
training_args
.
remove_unused_columns
=
False
# important for pairwise dataset
# Initialize our Trainer
trainer
=
CustomKTOTrainer
(
model
=
model
,
ref_model
=
ref_model
,
args
=
training_args
,
finetuning_args
=
finetuning_args
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
**
dataset_module
,
**
tokenizer_module
,
)
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
,
"train/rewards/chosen"
])
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
)
if
id
(
model
)
==
id
(
ref_model
):
# unable to compute rewards without a reference model
remove_keys
=
[
key
for
key
in
metrics
.
keys
()
if
"rewards"
in
key
]
for
key
in
remove_keys
:
metrics
.
pop
(
key
)
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
LLaMA-Factory/src/llamafactory/train/ppo/__init__.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.workflow
import
run_ppo
__all__
=
[
"run_ppo"
]
LLaMA-Factory/src/llamafactory/train/ppo/ppo_utils.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
from
contextlib
import
nullcontext
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Literal
,
Optional
import
torch
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
...extras.packages
import
is_requests_available
if
is_requests_available
():
import
requests
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
from
trl
import
AutoModelForCausalLMWithValueHead
def
get_rewards_from_server
(
server_url
:
str
,
messages
:
List
[
str
])
->
List
[
torch
.
Tensor
]:
r
"""
Gets reward scores from the API server.
"""
headers
=
{
"Content-Type"
:
"application/json"
}
payload
=
{
"model"
:
"model"
,
"messages"
:
messages
}
response
=
requests
.
post
(
server_url
,
json
=
payload
,
headers
=
headers
)
rewards
=
json
.
loads
(
response
.
text
)[
"scores"
]
return
torch
.
Tensor
(
rewards
)
def
replace_model
(
model
:
"AutoModelForCausalLMWithValueHead"
,
target
:
Literal
[
"default"
,
"reward"
])
->
None
:
r
"""
Replaces the default/reward modules in the model. The model is already unwrapped.
"""
v_head_layer
=
model
.
v_head
.
summary
if
is_deepspeed_zero3_enabled
():
import
deepspeed
# type: ignore
params
=
[
v_head_layer
.
weight
,
v_head_layer
.
bias
]
context_maybe_zero3
=
deepspeed
.
zero
.
GatheredParameters
(
params
,
modifier_rank
=
0
)
else
:
context_maybe_zero3
=
nullcontext
()
model
.
pretrained_model
.
set_adapter
(
target
)
# set the LoRA adapter to be active
with
context_maybe_zero3
:
if
target
==
"reward"
:
# save default head temporarily
setattr
(
model
,
"default_head_weight"
,
v_head_layer
.
weight
.
data
.
detach
().
clone
())
setattr
(
model
,
"default_head_bias"
,
v_head_layer
.
bias
.
data
.
detach
().
clone
())
device
=
v_head_layer
.
weight
.
device
v_head_layer
.
weight
.
data
=
model
.
get_buffer
(
"{}_head_weight"
.
format
(
target
)).
detach
().
clone
().
to
(
device
)
v_head_layer
.
bias
.
data
=
model
.
get_buffer
(
"{}_head_bias"
.
format
(
target
)).
detach
().
clone
().
to
(
device
)
def
dump_layernorm
(
model
:
"PreTrainedModel"
)
->
Dict
[
str
,
torch
.
Tensor
]:
r
"""
Dumps the layernorm parameters in the model. The model is already unwrapped (and gathered).
"""
layer_norm_params
=
{}
for
name
,
param
in
model
.
named_parameters
():
if
param
.
data
.
dtype
==
torch
.
float32
:
layer_norm_params
[
name
]
=
param
.
data
.
detach
().
clone
()
param
.
data
=
param
.
data
.
to
(
model
.
config
.
torch_dtype
)
return
layer_norm_params
def
restore_layernorm
(
model
:
"PreTrainedModel"
,
layernorm_params
:
Optional
[
Dict
[
str
,
torch
.
Tensor
]]
=
None
)
->
None
:
r
"""
Restores the layernorm parameters in the model. The model is already unwrapped (and gathered).
"""
for
name
,
param
in
model
.
named_parameters
():
if
name
in
layernorm_params
:
param
.
data
=
layernorm_params
[
name
]
LLaMA-Factory/src/llamafactory/train/ppo/trainer.py
0 → 100644
View file @
032b90a1
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's TRL library.
# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/ppo_trainer.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
os
import
sys
import
warnings
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
,
Tuple
import
torch
from
accelerate.utils
import
DistributedDataParallelKwargs
from
tqdm
import
tqdm
from
transformers
import
GenerationConfig
,
Trainer
,
TrainerControl
,
TrainerState
from
transformers.optimization
import
get_scheduler
from
transformers.trainer
import
DEFAULT_CALLBACKS
from
transformers.trainer_callback
import
CallbackHandler
from
transformers.trainer_pt_utils
import
remove_dummy_checkpoint
from
transformers.trainer_utils
import
PREFIX_CHECKPOINT_DIR
from
transformers.utils
import
SAFE_WEIGHTS_NAME
,
WEIGHTS_NAME
from
trl
import
PPOConfig
,
PPOTrainer
from
trl.core
import
PPODecorators
,
logprobs_from_logits
from
trl.models.utils
import
unwrap_model_for_generation
from
...extras.logging
import
get_logger
from
...extras.misc
import
AverageMeter
,
count_parameters
,
get_current_device
,
get_logits_processor
from
..callbacks
import
FixValueHeadModelCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
from
.ppo_utils
import
dump_layernorm
,
get_rewards_from_server
,
replace_model
,
restore_layernorm
if
TYPE_CHECKING
:
from
datasets
import
Dataset
from
transformers
import
(
DataCollatorWithPadding
,
PreTrainedTokenizer
,
ProcessorMixin
,
Seq2SeqTrainingArguments
,
TrainerCallback
,
)
from
trl
import
AutoModelForCausalLMWithValueHead
from
...hparams
import
FinetuningArguments
,
GeneratingArguments
,
ModelArguments
logger
=
get_logger
(
__name__
)
class
CustomPPOTrainer
(
PPOTrainer
,
Trainer
):
r
"""
Inherits PPOTrainer.
"""
def
__init__
(
self
,
model_args
:
"ModelArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
generating_args
:
"GeneratingArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]],
model
:
"AutoModelForCausalLMWithValueHead"
,
reward_model
:
Optional
[
"AutoModelForCausalLMWithValueHead"
],
ref_model
:
Optional
[
"AutoModelForCausalLMWithValueHead"
],
tokenizer
:
"PreTrainedTokenizer"
,
processor
:
Optional
[
"ProcessorMixin"
],
data_collator
:
"DataCollatorWithPadding"
,
train_dataset
:
Optional
[
"Dataset"
]
=
None
,
eval_dataset
:
Optional
[
"Dataset"
]
=
None
,
)
->
None
:
if
eval_dataset
is
not
None
:
raise
NotImplementedError
(
"PPOTrainer does not support eval dataset yet."
)
backward_batch_size
=
training_args
.
per_device_train_batch_size
*
training_args
.
gradient_accumulation_steps
ppo_config
=
PPOConfig
(
model_name
=
model_args
.
model_name_or_path
,
learning_rate
=
training_args
.
learning_rate
,
mini_batch_size
=
training_args
.
per_device_train_batch_size
,
batch_size
=
backward_batch_size
*
finetuning_args
.
ppo_buffer_size
,
gradient_accumulation_steps
=
training_args
.
gradient_accumulation_steps
,
ppo_epochs
=
finetuning_args
.
ppo_epochs
,
max_grad_norm
=
training_args
.
max_grad_norm
,
seed
=
training_args
.
seed
,
optimize_device_cache
=
True
,
target
=
finetuning_args
.
ppo_target
,
use_score_scaling
=
finetuning_args
.
ppo_score_norm
,
use_score_norm
=
finetuning_args
.
ppo_score_norm
,
whiten_rewards
=
finetuning_args
.
ppo_whiten_rewards
,
accelerator_kwargs
=
{
"step_scheduler_with_optimizer"
:
False
},
log_with
=
training_args
.
report_to
[
0
]
if
training_args
.
report_to
else
None
,
project_kwargs
=
{
"logging_dir"
:
training_args
.
logging_dir
},
)
# Add deepspeed config
if
training_args
.
deepspeed_plugin
is
not
None
:
ppo_config
.
accelerator_kwargs
[
"kwargs_handlers"
]
=
[
DistributedDataParallelKwargs
(
find_unused_parameters
=
training_args
.
ddp_find_unused_parameters
)
]
ppo_config
.
accelerator_kwargs
[
"deepspeed_plugin"
]
=
training_args
.
deepspeed_plugin
if
ppo_config
.
log_with
is
not
None
:
logger
.
warning
(
"PPOTrainer cannot use external logger when DeepSpeed is enabled."
)
ppo_config
.
log_with
=
None
# Create optimizer and scheduler
if
training_args
.
max_steps
>
0
:
num_training_steps
=
training_args
.
max_steps
else
:
total_train_batch_size
=
backward_batch_size
*
finetuning_args
.
ppo_buffer_size
*
training_args
.
world_size
num_training_steps
=
training_args
.
num_train_epochs
*
math
.
ceil
(
len
(
train_dataset
)
/
total_train_batch_size
)
optimizer
=
self
.
create_optimizer
(
model
,
training_args
,
finetuning_args
)
scheduler
=
self
.
create_scheduler
(
training_args
,
num_training_steps
,
optimizer
)
PPOTrainer
.
__init__
(
self
,
config
=
ppo_config
,
model
=
model
,
ref_model
=
ref_model
,
tokenizer
=
tokenizer
,
dataset
=
train_dataset
,
data_collator
=
data_collator
,
lr_scheduler
=
scheduler
,
)
self
.
args
=
training_args
self
.
model_args
=
model_args
self
.
finetuning_args
=
finetuning_args
self
.
reward_model
=
reward_model
self
.
current_device
=
get_current_device
()
# patch for deepspeed training
self
.
generation_config
=
GenerationConfig
(
pad_token_id
=
self
.
tokenizer
.
pad_token_id
,
eos_token_id
=
[
self
.
tokenizer
.
eos_token_id
]
+
self
.
tokenizer
.
additional_special_tokens_ids
,
**
generating_args
.
to_dict
(),
)
self
.
state
=
TrainerState
()
self
.
control
=
TrainerControl
()
self
.
is_deepspeed_enabled
=
getattr
(
self
.
accelerator
.
state
,
"deepspeed_plugin"
,
None
)
is
not
None
self
.
is_fsdp_enabled
=
getattr
(
self
.
accelerator
.
state
,
"fsdp_plugin"
,
None
)
is
not
None
callbacks
=
DEFAULT_CALLBACKS
if
callbacks
is
None
else
DEFAULT_CALLBACKS
+
callbacks
self
.
callback_handler
=
CallbackHandler
(
callbacks
,
self
.
accelerator
.
unwrap_model
(
self
.
model
),
self
.
tokenizer
,
self
.
optimizer
,
self
.
lr_scheduler
)
if
self
.
args
.
max_steps
>
0
:
logger
.
info
(
"max_steps is given, it will override any value given in num_train_epochs"
)
self
.
amp_context
=
torch
.
autocast
(
self
.
current_device
.
type
)
warnings
.
simplefilter
(
"ignore"
)
# remove gc warnings on ref model
if
finetuning_args
.
reward_model_type
==
"full"
:
if
self
.
is_deepspeed_enabled
:
if
not
(
getattr
(
reward_model
.
pretrained_model
,
"is_loaded_in_8bit"
,
False
)
or
getattr
(
reward_model
.
pretrained_model
,
"is_loaded_in_4bit"
,
False
)
):
# quantized models are already set on the correct device
self
.
reward_model
=
self
.
_prepare_deepspeed
(
self
.
reward_model
)
else
:
self
.
reward_model
=
self
.
accelerator
.
prepare_model
(
self
.
reward_model
,
evaluation_mode
=
True
)
self
.
add_callback
(
FixValueHeadModelCallback
)
if
processor
is
not
None
:
self
.
add_callback
(
SaveProcessorCallback
(
processor
))
if
finetuning_args
.
use_badam
:
from
badam
import
BAdamCallback
,
clip_grad_norm_old_version
self
.
accelerator
.
clip_grad_norm_
=
MethodType
(
clip_grad_norm_old_version
,
self
.
accelerator
)
self
.
add_callback
(
BAdamCallback
)
def
ppo_train
(
self
,
resume_from_checkpoint
:
Optional
[
str
]
=
None
)
->
None
:
r
"""
Implements training loop for the PPO stage, like _inner_training_loop() in Huggingface's Trainer.
"""
if
resume_from_checkpoint
is
not
None
:
raise
ValueError
(
"`resume_from_checkpoint` will be supported in the future version."
)
total_train_batch_size
=
(
self
.
args
.
per_device_train_batch_size
*
self
.
args
.
gradient_accumulation_steps
*
self
.
finetuning_args
.
ppo_buffer_size
*
self
.
args
.
world_size
)
if
self
.
args
.
max_steps
>
0
:
num_examples
=
total_train_batch_size
*
self
.
args
.
max_steps
num_train_epochs
=
sys
.
maxsize
max_steps
=
self
.
args
.
max_steps
steps_in_epoch
=
self
.
args
.
max_steps
else
:
len_dataloader
=
len
(
self
.
dataloader
)
num_examples
=
len
(
self
.
dataset
)
num_train_epochs
=
self
.
args
.
num_train_epochs
max_steps
=
math
.
ceil
(
num_train_epochs
*
len_dataloader
)
steps_in_epoch
=
len_dataloader
self
.
state
.
max_steps
=
max_steps
self
.
state
.
num_train_epochs
=
num_train_epochs
self
.
state
.
is_local_process_zero
=
self
.
is_local_process_zero
()
self
.
state
.
is_world_process_zero
=
self
.
is_world_process_zero
()
if
self
.
is_world_process_zero
():
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = {:,}"
.
format
(
num_examples
))
logger
.
info
(
" Num Epochs = {:,}"
.
format
(
num_train_epochs
))
logger
.
info
(
" Instantaneous batch size per device = {:,}"
.
format
(
self
.
args
.
per_device_train_batch_size
))
logger
.
info
(
" Total train batch size (w. parallel, buffer, distributed & accumulation) = {:,}"
.
format
(
total_train_batch_size
)
)
logger
.
info
(
" Gradient Accumulation steps = {:,}"
.
format
(
self
.
args
.
gradient_accumulation_steps
))
logger
.
info
(
" Num optimization epochs per batch = {:,}"
.
format
(
self
.
finetuning_args
.
ppo_epochs
))
logger
.
info
(
" Total training steps = {:,}"
.
format
(
max_steps
))
logger
.
info
(
" Number of trainable parameters = {:,}"
.
format
(
count_parameters
(
self
.
model
)[
0
]))
dataiter
=
iter
(
self
.
dataloader
)
loss_meter
=
AverageMeter
()
reward_meter
=
AverageMeter
()
self
.
callback_handler
.
on_train_begin
(
self
.
args
,
self
.
state
,
self
.
control
)
for
step
in
tqdm
(
range
(
max_steps
),
disable
=
not
self
.
is_local_process_zero
()):
try
:
batch
=
next
(
dataiter
)
except
StopIteration
:
dataiter
=
iter
(
self
.
dataloader
)
batch
=
next
(
dataiter
)
# Get inputs
self
.
model
.
eval
()
self
.
tokenizer
.
padding_side
=
"right"
# change padding side
queries
,
responses
,
rewards
=
[],
[],
[]
for
idx
in
range
(
0
,
self
.
config
.
batch_size
,
self
.
config
.
mini_batch_size
):
mini_batch_queries
,
mini_batch_responses
=
self
.
get_inputs
(
batch
[
idx
:
idx
+
self
.
config
.
mini_batch_size
]
)
mini_batch_rewards
=
self
.
get_rewards
(
mini_batch_queries
,
mini_batch_responses
)
queries
.
extend
(
mini_batch_queries
)
responses
.
extend
(
mini_batch_responses
)
rewards
.
extend
(
mini_batch_rewards
)
# Run PPO step
self
.
model
.
train
()
stats
=
self
.
step
(
queries
,
responses
,
rewards
)
self
.
tokenizer
.
padding_side
=
"left"
# restore padding side
loss_meter
.
update
(
float
(
stats
[
"ppo/loss/total"
]),
n
=
len
(
rewards
))
reward_meter
.
update
(
torch
.
stack
(
rewards
).
mean
().
item
(),
n
=
len
(
rewards
))
if
self
.
config
.
log_with
is
not
None
:
try
:
batch
[
"query"
]
=
self
.
tokenizer
.
batch_decode
(
queries
,
skip_special_tokens
=
True
)
batch
[
"response"
]
=
self
.
tokenizer
.
batch_decode
(
responses
,
skip_special_tokens
=
True
)
self
.
log_stats
(
stats
,
batch
,
rewards
)
except
Exception
:
logger
.
warning
(
"Failed to save stats due to unknown errors."
)
self
.
state
.
global_step
+=
1
self
.
callback_handler
.
on_step_end
(
self
.
args
,
self
.
state
,
self
.
control
)
if
self
.
is_local_process_zero
()
and
(
step
+
1
)
%
self
.
args
.
logging_steps
==
0
:
logs
=
dict
(
loss
=
round
(
loss_meter
.
avg
,
4
),
reward
=
round
(
reward_meter
.
avg
,
4
),
learning_rate
=
stats
[
"ppo/learning_rate"
],
epoch
=
round
(
step
/
steps_in_epoch
,
2
),
)
tqdm
.
write
(
str
(
logs
))
logs
[
"step"
]
=
step
self
.
state
.
log_history
.
append
(
logs
)
self
.
callback_handler
.
on_log
(
self
.
args
,
self
.
state
,
self
.
control
,
logs
)
loss_meter
.
reset
()
reward_meter
.
reset
()
if
(
step
+
1
)
%
self
.
args
.
save_steps
==
0
:
# save checkpoint
self
.
save_model
(
os
.
path
.
join
(
self
.
args
.
output_dir
,
"{}-{}"
.
format
(
PREFIX_CHECKPOINT_DIR
,
self
.
state
.
global_step
))
)
self
.
callback_handler
.
on_save
(
self
.
args
,
self
.
state
,
self
.
control
)
if
self
.
control
.
should_epoch_stop
or
self
.
control
.
should_training_stop
:
break
self
.
callback_handler
.
on_train_end
(
self
.
args
,
self
.
state
,
self
.
control
)
def
create_optimizer
(
self
,
model
:
"AutoModelForCausalLMWithValueHead"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
)
->
"torch.optim.Optimizer"
:
optimizer
=
create_custom_optimzer
(
model
,
training_args
,
finetuning_args
)
if
optimizer
is
None
:
decay_params
,
nodecay_params
=
[],
[]
decay_param_names
=
self
.
get_decay_parameter_names
(
model
)
for
name
,
param
in
model
.
named_parameters
():
if
param
.
requires_grad
:
if
name
in
decay_param_names
:
decay_params
.
append
(
param
)
else
:
nodecay_params
.
append
(
param
)
optim_class
,
optim_kwargs
=
Trainer
.
get_optimizer_cls_and_kwargs
(
training_args
)
param_groups
=
[
dict
(
params
=
nodecay_params
),
dict
(
params
=
decay_params
,
weight_decay
=
training_args
.
weight_decay
),
]
optimizer
=
optim_class
(
param_groups
,
**
optim_kwargs
)
return
optimizer
def
create_scheduler
(
self
,
training_args
:
"Seq2SeqTrainingArguments"
,
num_training_steps
:
int
,
optimizer
:
"torch.optim.Optimizer"
)
->
"torch.optim.lr_scheduler.LRScheduler"
:
create_custom_scheduler
(
training_args
,
num_training_steps
,
optimizer
)
lr_scheduler
=
get_scheduler
(
training_args
.
lr_scheduler_type
,
optimizer
=
optimizer
,
num_warmup_steps
=
training_args
.
get_warmup_steps
(
num_training_steps
),
num_training_steps
=
num_training_steps
,
)
return
lr_scheduler
@
torch
.
no_grad
()
def
get_inputs
(
self
,
batch
:
Dict
[
str
,
"torch.Tensor"
])
->
Tuple
[
List
[
"torch.Tensor"
],
List
[
"torch.Tensor"
]]:
r
"""
Generates model's responses given queries.
"""
if
batch
[
"input_ids"
].
size
(
0
)
==
1
:
# handle llama2 ppo with gradient accumulation > 1
start_index
=
(
batch
[
"input_ids"
][
0
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()[
0
].
item
()
for
k
,
v
in
batch
.
items
():
batch
[
k
]
=
v
[:,
start_index
:]
with
unwrap_model_for_generation
(
self
.
model
,
self
.
accelerator
)
as
unwrapped_model
:
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
if
self
.
model_args
.
upcast_layernorm
:
layernorm_params
=
dump_layernorm
(
unwrapped_model
)
generate_output
:
"torch.Tensor"
=
unwrapped_model
.
generate
(
generation_config
=
self
.
generation_config
,
logits_processor
=
get_logits_processor
(),
**
batch
)
if
self
.
model_args
.
upcast_layernorm
:
restore_layernorm
(
unwrapped_model
,
layernorm_params
)
query
=
batch
[
"input_ids"
].
detach
().
cpu
()
response
=
generate_output
[:,
batch
[
"input_ids"
].
size
(
-
1
)
:].
detach
().
cpu
()
queries
,
responses
=
[],
[]
for
i
in
range
(
len
(
query
)):
query_start_index
=
(
query
[
i
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()[
0
].
item
()
response_indexes
=
(
response
[
i
]
!=
self
.
tokenizer
.
pad_token_id
).
nonzero
()
if
len
(
response_indexes
)
==
0
:
# allow empty response
response_length
=
1
elif
self
.
tokenizer
.
eos_token_id
==
self
.
tokenizer
.
pad_token_id
:
# include eos token
response_length
=
response_indexes
[
-
1
].
item
()
+
2
else
:
response_length
=
response_indexes
[
-
1
].
item
()
+
1
queries
.
append
(
query
[
i
,
query_start_index
:])
# remove padding from left
responses
.
append
(
response
[
i
,
:
response_length
])
# remove padding from right
return
queries
,
responses
@
torch
.
no_grad
()
def
get_rewards
(
self
,
queries
:
List
[
"torch.Tensor"
],
responses
:
List
[
"torch.Tensor"
],
)
->
List
[
"torch.Tensor"
]:
r
"""
Computes scores using given reward model.
Both inputs and outputs are put on CPU.
"""
if
self
.
finetuning_args
.
reward_model_type
==
"api"
:
token_ids
=
[
torch
.
cat
((
q
,
r
),
dim
=-
1
).
tolist
()
for
q
,
r
in
zip
(
queries
,
responses
)]
messages
=
self
.
tokenizer
.
batch_decode
(
token_ids
,
skip_special_tokens
=
True
)
return
get_rewards_from_server
(
self
.
reward_model
,
messages
)
batch
:
Dict
[
str
,
"torch.Tensor"
]
=
self
.
prepare_model_inputs
(
queries
,
responses
)
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
if
self
.
finetuning_args
.
reward_model_type
==
"lora"
:
replace_model
(
unwrapped_model
,
target
=
"reward"
)
reward_model
=
self
.
model
else
:
reward_model
=
self
.
reward_model
with
unwrap_model_for_generation
(
reward_model
,
self
.
accelerator
),
self
.
amp_context
:
# support bf16
_
,
_
,
values
=
reward_model
(
**
batch
,
return_dict
=
True
,
use_cache
=
False
)
if
self
.
finetuning_args
.
reward_model_type
==
"lora"
:
replace_model
(
unwrapped_model
,
target
=
"default"
)
rewards
=
values
.
gather
(
dim
=-
1
,
index
=
(
batch
[
"attention_mask"
].
sum
(
dim
=-
1
,
keepdim
=
True
)
-
1
))
return
rewards
.
float
().
detach
()
# use fp32 type
@
PPODecorators
.
empty_device_cache
()
def
batched_forward_pass
(
self
,
model
:
"AutoModelForCausalLMWithValueHead"
,
queries
:
"torch.Tensor"
,
responses
:
"torch.Tensor"
,
model_inputs
:
Dict
[
str
,
Any
],
return_logits
:
bool
=
False
,
response_masks
:
Optional
[
"torch.Tensor"
]
=
None
,
)
->
Tuple
[
"torch.Tensor"
,
Optional
[
"torch.Tensor"
],
"torch.Tensor"
,
"torch.Tensor"
]:
r
"""
Calculates model outputs in multiple batches.
Subclass and override to inject custom behavior.
"""
bs
=
len
(
queries
)
fbs
=
self
.
config
.
mini_batch_size
all_logprobs
=
[]
all_logits
=
[]
all_masks
=
[]
all_values
=
[]
for
i
in
range
(
math
.
ceil
(
bs
/
fbs
)):
input_kwargs
=
{
key
:
value
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
for
key
,
value
in
model_inputs
.
items
()}
query_batch
=
queries
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
response_batch
=
responses
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
if
response_masks
is
not
None
:
response_masks_batch
=
response_masks
[
i
*
fbs
:
(
i
+
1
)
*
fbs
]
input_ids
=
input_kwargs
[
"input_ids"
]
attention_mask
=
input_kwargs
[
"attention_mask"
]
with
self
.
amp_context
:
# support bf16
logits
,
_
,
values
=
model
(
**
input_kwargs
,
return_dict
=
True
,
use_cache
=
False
)
logprobs
=
logprobs_from_logits
(
logits
[:,
:
-
1
,
:],
input_ids
[:,
1
:])
masks
=
torch
.
zeros_like
(
attention_mask
)
masks
[:,
:
-
1
]
=
attention_mask
[:,
1
:]
for
j
in
range
(
len
(
query_batch
)):
start
=
len
(
query_batch
[
j
])
-
1
if
attention_mask
[
j
,
0
]
==
0
:
# offset left padding
start
+=
attention_mask
[
j
,
:].
nonzero
()[
0
].
item
()
end
=
start
+
len
(
response_batch
[
j
])
if
response_masks
is
not
None
:
response_masks_batch
=
torch
.
cat
((
torch
.
zeros_like
(
query_batch
[
j
]),
response_masks_batch
[
j
]))[
1
:]
masks
[
j
,
:
start
]
=
0
masks
[
j
,
end
:]
=
0
if
response_masks
is
not
None
:
masks
[
j
,
start
:
end
]
=
masks
[
j
,
start
:
end
]
*
response_masks_batch
[
j
][
start
:
end
]
if
return_logits
:
all_logits
.
append
(
logits
)
else
:
del
logits
all_values
.
append
(
values
)
all_logprobs
.
append
(
logprobs
)
all_masks
.
append
(
masks
)
return
(
torch
.
cat
(
all_logprobs
),
torch
.
cat
(
all_logits
)[:,
:
-
1
]
if
return_logits
else
None
,
torch
.
cat
(
all_values
)[:,
:
-
1
],
torch
.
cat
(
all_masks
)[:,
:
-
1
],
)
def
save_model
(
self
,
output_dir
:
Optional
[
str
]
=
None
)
->
None
:
r
"""
Saves model checkpoint.
Subclass and override to inject custom behavior.
"""
if
output_dir
is
None
:
output_dir
=
self
.
args
.
output_dir
if
self
.
is_fsdp_enabled
or
self
.
is_deepspeed_enabled
:
try
:
state_dict
=
self
.
accelerator
.
get_state_dict
(
self
.
model
)
# must be called at all ranks
if
self
.
args
.
should_save
:
self
.
_save
(
output_dir
,
state_dict
=
state_dict
)
except
ValueError
:
logger
.
warning
(
" stage3_gather_16bit_weights_on_model_save=false. Saving the full checkpoint instead,"
" use zero_to_fp32.py to recover weights"
)
if
self
.
args
.
should_save
:
self
.
_save
(
output_dir
,
state_dict
=
{})
# remove the dummy state_dict
remove_dummy_checkpoint
(
self
.
args
.
should_save
,
output_dir
,
[
WEIGHTS_NAME
,
SAFE_WEIGHTS_NAME
])
self
.
model
.
save_checkpoint
(
output_dir
)
elif
self
.
args
.
should_save
:
unwrapped_model
:
"AutoModelForCausalLMWithValueHead"
=
self
.
accelerator
.
unwrap_model
(
self
.
model
)
self
.
_save
(
output_dir
,
state_dict
=
unwrapped_model
.
state_dict
())
LLaMA-Factory/src/llamafactory/train/ppo/workflow.py
0 → 100644
View file @
032b90a1
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's TRL library.
# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/ppo.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
transformers
import
DataCollatorWithPadding
from
...data
import
get_dataset
from
...extras.ploting
import
plot_loss
from
...model
import
load_model
,
load_tokenizer
from
..callbacks
import
fix_valuehead_checkpoint
from
..trainer_utils
import
create_ref_model
,
create_reward_model
from
.trainer
import
CustomPPOTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
,
GeneratingArguments
,
ModelArguments
def
run_ppo
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
generating_args
:
"GeneratingArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
dataset_module
=
get_dataset
(
model_args
,
data_args
,
training_args
,
stage
=
"ppo"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
,
add_valuehead
=
True
)
tokenizer
.
padding_side
=
"left"
# use left-padding in generation while using right-padding in training
data_collator
=
DataCollatorWithPadding
(
tokenizer
=
tokenizer
)
# Create reference model and reward model
ref_model
=
create_ref_model
(
model_args
,
finetuning_args
,
add_valuehead
=
True
)
reward_model
=
create_reward_model
(
model
,
model_args
,
finetuning_args
)
# Initialize our Trainer
ppo_trainer
:
"CustomPPOTrainer"
=
CustomPPOTrainer
(
model_args
=
model_args
,
training_args
=
training_args
,
finetuning_args
=
finetuning_args
,
generating_args
=
generating_args
,
callbacks
=
callbacks
,
model
=
model
,
reward_model
=
reward_model
,
ref_model
=
ref_model
,
data_collator
=
data_collator
,
**
dataset_module
,
**
tokenizer_module
,
)
# Training
if
training_args
.
do_train
:
ppo_trainer
.
ppo_train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
ppo_trainer
.
save_model
()
if
training_args
.
should_save
:
fix_valuehead_checkpoint
(
model
,
training_args
.
output_dir
,
training_args
.
save_safetensors
)
ppo_trainer
.
save_state
()
# must be called after save_model to have a folder
if
ppo_trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"reward"
])
LLaMA-Factory/src/llamafactory/train/pt/__init__.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.workflow
import
run_pt
__all__
=
[
"run_pt"
]
LLaMA-Factory/src/llamafactory/train/pt/trainer.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Optional
from
transformers
import
Trainer
from
...extras.logging
import
get_logger
from
..callbacks
import
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
if
TYPE_CHECKING
:
import
torch
from
transformers
import
ProcessorMixin
from
...hparams
import
FinetuningArguments
logger
=
get_logger
(
__name__
)
class
CustomTrainer
(
Trainer
):
r
"""
Inherits Trainer for custom optimizer.
"""
def
__init__
(
self
,
finetuning_args
:
"FinetuningArguments"
,
processor
:
Optional
[
"ProcessorMixin"
],
**
kwargs
)
->
None
:
super
().
__init__
(
**
kwargs
)
self
.
finetuning_args
=
finetuning_args
if
processor
is
not
None
:
self
.
add_callback
(
SaveProcessorCallback
(
processor
))
if
finetuning_args
.
pissa_convert
:
self
.
add_callback
(
PissaConvertCallback
)
if
finetuning_args
.
use_badam
:
from
badam
import
BAdamCallback
,
clip_grad_norm_old_version
self
.
accelerator
.
clip_grad_norm_
=
MethodType
(
clip_grad_norm_old_version
,
self
.
accelerator
)
self
.
add_callback
(
BAdamCallback
)
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optimzer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
def
create_scheduler
(
self
,
num_training_steps
:
int
,
optimizer
:
Optional
[
"torch.optim.Optimizer"
]
=
None
)
->
"torch.optim.lr_scheduler.LRScheduler"
:
create_custom_scheduler
(
self
.
args
,
num_training_steps
,
optimizer
)
return
super
().
create_scheduler
(
num_training_steps
,
optimizer
)
LLaMA-Factory/src/llamafactory/train/pt/workflow.py
0 → 100644
View file @
032b90a1
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
transformers
import
DataCollatorForLanguageModeling
from
...data
import
get_dataset
from
...extras.ploting
import
plot_loss
from
...model
import
load_model
,
load_tokenizer
from
..trainer_utils
import
create_modelcard_and_push
from
.trainer
import
CustomTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
,
ModelArguments
def
run_pt
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
dataset_module
=
get_dataset
(
model_args
,
data_args
,
training_args
,
stage
=
"pt"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
)
data_collator
=
DataCollatorForLanguageModeling
(
tokenizer
=
tokenizer
,
mlm
=
False
)
# Initialize our Trainer
trainer
=
CustomTrainer
(
model
=
model
,
args
=
training_args
,
finetuning_args
=
finetuning_args
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
**
dataset_module
,
**
tokenizer_module
,
)
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
])
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
)
try
:
perplexity
=
math
.
exp
(
metrics
[
"eval_loss"
])
except
OverflowError
:
perplexity
=
float
(
"inf"
)
metrics
[
"perplexity"
]
=
perplexity
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
LLaMA-Factory/src/llamafactory/train/rm/__init__.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.workflow
import
run_rm
__all__
=
[
"run_rm"
]
LLaMA-Factory/src/llamafactory/train/rm/metric.py
0 → 100644
View file @
032b90a1
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
dataclasses
import
dataclass
from
typing
import
TYPE_CHECKING
,
Dict
,
Optional
import
numpy
as
np
from
...extras.misc
import
numpify
if
TYPE_CHECKING
:
from
transformers
import
EvalPrediction
@
dataclass
class
ComputeAccuracy
:
def
_dump
(
self
)
->
Optional
[
Dict
[
str
,
float
]]:
result
=
None
if
hasattr
(
self
,
"score_dict"
):
result
=
{
k
:
float
(
np
.
mean
(
v
))
for
k
,
v
in
self
.
score_dict
.
items
()}
self
.
score_dict
=
{
"accuracy"
:
[]}
return
result
def
__post_init__
(
self
):
self
.
_dump
()
def
__call__
(
self
,
eval_preds
:
"EvalPrediction"
,
compute_result
:
bool
=
True
)
->
Optional
[
Dict
[
str
,
float
]]:
chosen_scores
,
rejected_scores
=
numpify
(
eval_preds
.
predictions
[
0
]),
numpify
(
eval_preds
.
predictions
[
1
])
if
not
chosen_scores
.
shape
:
self
.
score_dict
[
"accuracy"
].
append
(
chosen_scores
>
rejected_scores
)
else
:
for
i
in
range
(
len
(
chosen_scores
)):
self
.
score_dict
[
"accuracy"
].
append
(
chosen_scores
[
i
]
>
rejected_scores
[
i
])
if
compute_result
:
return
self
.
_dump
()
LLaMA-Factory/src/llamafactory/train/rm/trainer.py
0 → 100644
View file @
032b90a1
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
os
from
types
import
MethodType
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
,
Tuple
,
Union
import
torch
from
transformers
import
Trainer
from
...extras.logging
import
get_logger
from
..callbacks
import
FixValueHeadModelCallback
,
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
,
ProcessorMixin
from
transformers.trainer
import
PredictionOutput
from
...hparams
import
FinetuningArguments
logger
=
get_logger
(
__name__
)
class
PairwiseTrainer
(
Trainer
):
r
"""
Inherits Trainer to compute pairwise loss.
"""
def
__init__
(
self
,
finetuning_args
:
"FinetuningArguments"
,
processor
:
Optional
[
"ProcessorMixin"
],
**
kwargs
)
->
None
:
super
().
__init__
(
**
kwargs
)
self
.
finetuning_args
=
finetuning_args
self
.
can_return_loss
=
True
# override property to return eval_loss
self
.
add_callback
(
FixValueHeadModelCallback
)
if
processor
is
not
None
:
self
.
add_callback
(
SaveProcessorCallback
(
processor
))
if
finetuning_args
.
pissa_convert
:
self
.
add_callback
(
PissaConvertCallback
)
if
finetuning_args
.
use_badam
:
from
badam
import
BAdamCallback
,
clip_grad_norm_old_version
self
.
accelerator
.
clip_grad_norm_
=
MethodType
(
clip_grad_norm_old_version
,
self
.
accelerator
)
self
.
add_callback
(
BAdamCallback
)
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optimzer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
def
create_scheduler
(
self
,
num_training_steps
:
int
,
optimizer
:
Optional
[
"torch.optim.Optimizer"
]
=
None
)
->
"torch.optim.lr_scheduler.LRScheduler"
:
create_custom_scheduler
(
self
.
args
,
num_training_steps
,
optimizer
)
return
super
().
create_scheduler
(
num_training_steps
,
optimizer
)
def
compute_loss
(
self
,
model
:
"PreTrainedModel"
,
inputs
:
Dict
[
str
,
torch
.
Tensor
],
return_outputs
:
bool
=
False
)
->
Union
[
torch
.
Tensor
,
Tuple
[
torch
.
Tensor
,
List
[
torch
.
Tensor
]]]:
r
"""
Computes pairwise loss. The first n examples are chosen and the last n examples are rejected.
Subclass and override to inject custom behavior.
Note that the first element will be removed from the output tuple.
See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842
"""
_
,
_
,
values
=
model
(
**
inputs
,
output_hidden_states
=
True
,
return_dict
=
True
,
use_cache
=
False
)
batch_size
=
inputs
[
"input_ids"
].
size
(
0
)
//
2
chosen_masks
,
rejected_masks
=
torch
.
split
(
inputs
[
"attention_mask"
],
batch_size
,
dim
=
0
)
chosen_rewards
,
rejected_rewards
=
torch
.
split
(
values
,
batch_size
,
dim
=
0
)
chosen_scores
=
chosen_rewards
.
gather
(
dim
=-
1
,
index
=
(
chosen_masks
.
sum
(
dim
=-
1
,
keepdim
=
True
)
-
1
))
rejected_scores
=
rejected_rewards
.
gather
(
dim
=-
1
,
index
=
(
rejected_masks
.
sum
(
dim
=-
1
,
keepdim
=
True
)
-
1
))
chosen_scores
,
rejected_scores
=
chosen_scores
.
squeeze
(),
rejected_scores
.
squeeze
()
loss
=
-
torch
.
nn
.
functional
.
logsigmoid
(
chosen_scores
.
float
()
-
rejected_scores
.
float
()).
mean
()
if
return_outputs
:
return
loss
,
(
loss
,
chosen_scores
,
rejected_scores
)
else
:
return
loss
def
save_predictions
(
self
,
predict_results
:
"PredictionOutput"
)
->
None
:
r
"""
Saves model predictions to `output_dir`.
A custom behavior that not contained in Seq2SeqTrainer.
"""
if
not
self
.
is_world_process_zero
():
return
output_prediction_file
=
os
.
path
.
join
(
self
.
args
.
output_dir
,
"generated_predictions.jsonl"
)
logger
.
info
(
f
"Saving prediction results to
{
output_prediction_file
}
"
)
chosen_scores
,
rejected_scores
=
predict_results
.
predictions
with
open
(
output_prediction_file
,
"w"
,
encoding
=
"utf-8"
)
as
writer
:
res
:
List
[
str
]
=
[]
for
c_score
,
r_score
in
zip
(
chosen_scores
,
rejected_scores
):
res
.
append
(
json
.
dumps
({
"chosen"
:
round
(
float
(
c_score
),
2
),
"rejected"
:
round
(
float
(
r_score
),
2
)}))
writer
.
write
(
"
\n
"
.
join
(
res
))
LLaMA-Factory/src/llamafactory/train/rm/workflow.py
0 → 100644
View file @
032b90a1
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
...data
import
PairwiseDataCollatorWithPadding
,
get_dataset
from
...extras.ploting
import
plot_loss
from
...model
import
load_model
,
load_tokenizer
from
..callbacks
import
fix_valuehead_checkpoint
from
..trainer_utils
import
create_modelcard_and_push
from
.metric
import
ComputeAccuracy
from
.trainer
import
PairwiseTrainer
if
TYPE_CHECKING
:
from
transformers
import
Seq2SeqTrainingArguments
,
TrainerCallback
from
...hparams
import
DataArguments
,
FinetuningArguments
,
ModelArguments
def
run_rm
(
model_args
:
"ModelArguments"
,
data_args
:
"DataArguments"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
callbacks
:
Optional
[
List
[
"TrainerCallback"
]]
=
None
,
):
tokenizer_module
=
load_tokenizer
(
model_args
)
tokenizer
=
tokenizer_module
[
"tokenizer"
]
dataset_module
=
get_dataset
(
model_args
,
data_args
,
training_args
,
stage
=
"rm"
,
**
tokenizer_module
)
model
=
load_model
(
tokenizer
,
model_args
,
finetuning_args
,
training_args
.
do_train
,
add_valuehead
=
True
)
data_collator
=
PairwiseDataCollatorWithPadding
(
tokenizer
,
pad_to_multiple_of
=
8
)
# Update arguments
training_args
.
remove_unused_columns
=
False
# important for pairwise dataset
# Initialize our Trainer
trainer
=
PairwiseTrainer
(
model
=
model
,
args
=
training_args
,
finetuning_args
=
finetuning_args
,
data_collator
=
data_collator
,
callbacks
=
callbacks
,
compute_metrics
=
ComputeAccuracy
(),
**
dataset_module
,
**
tokenizer_module
,
)
# Training
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
trainer
.
save_model
()
if
training_args
.
should_save
:
fix_valuehead_checkpoint
(
model
,
training_args
.
output_dir
,
training_args
.
save_safetensors
)
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
,
"eval_accuracy"
])
# Evaluation
if
training_args
.
do_eval
:
metrics
=
trainer
.
evaluate
(
metric_key_prefix
=
"eval"
)
trainer
.
log_metrics
(
"eval"
,
metrics
)
trainer
.
save_metrics
(
"eval"
,
metrics
)
# Predict
if
training_args
.
do_predict
:
predict_results
=
trainer
.
predict
(
dataset_module
[
"eval_dataset"
],
metric_key_prefix
=
"predict"
)
trainer
.
log_metrics
(
"predict"
,
predict_results
.
metrics
)
trainer
.
save_metrics
(
"predict"
,
predict_results
.
metrics
)
trainer
.
save_predictions
(
predict_results
)
# Create model card
create_modelcard_and_push
(
trainer
,
model_args
,
data_args
,
training_args
,
finetuning_args
)
Prev
1
…
5
6
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment