Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenzk
Baichuan_pytorch
Commits
0938ae70
Commit
0938ae70
authored
Sep 12, 2023
by
zhaoying1
Browse files
fix save method of adapter_model.bin
parent
1b73554f
Changes
103
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
54 deletions
+44
-54
src/llmtuner/webui/locales.py
src/llmtuner/webui/locales.py
+0
-10
src/llmtuner/webui/runner.py
src/llmtuner/webui/runner.py
+26
-27
src/llmtuner/webui/utils.py
src/llmtuner/webui/utils.py
+18
-17
No files found.
src/llmtuner/webui/locales.py
View file @
0938ae70
...
...
@@ -287,16 +287,6 @@ LOCALES = {
"info"
:
"是否启用 FP16 或 BF16 混合精度训练。"
}
},
"padding_side"
:
{
"en"
:
{
"label"
:
"Padding side"
,
"info"
:
"The side on which the model should have padding applied."
},
"zh"
:
{
"label"
:
"填充位置"
,
"info"
:
"使用左填充或右填充。"
}
},
"lora_tab"
:
{
"en"
:
{
"label"
:
"LoRA configurations"
...
...
src/llmtuner/webui/runner.py
View file @
0938ae70
...
...
@@ -8,11 +8,11 @@ from transformers.trainer import TRAINING_ARGS_NAME
from
typing
import
Any
,
Dict
,
Generator
,
List
,
Tuple
from
llmtuner.extras.callbacks
import
LogCallback
from
llmtuner.extras.constants
import
DEFAULT_MODULE
from
llmtuner.extras.constants
import
DEFAULT_MODULE
,
TRAINING_STAGES
from
llmtuner.extras.logging
import
LoggerHandler
from
llmtuner.extras.misc
import
torch_gc
from
llmtuner.tuner
import
run_exp
from
llmtuner.webui.common
import
get_model_path
,
get_save_dir
from
llmtuner.webui.common
import
get_model_path
,
get_save_dir
,
load_config
from
llmtuner.webui.locales
import
ALERTS
from
llmtuner.webui.utils
import
gen_cmd
,
get_eval_results
,
update_process_bar
...
...
@@ -87,7 +87,6 @@ class Runner:
save_steps
:
int
,
warmup_steps
:
int
,
compute_type
:
str
,
padding_side
:
str
,
lora_rank
:
int
,
lora_dropout
:
float
,
lora_target
:
str
,
...
...
@@ -98,21 +97,25 @@ class Runner:
)
->
Tuple
[
str
,
str
,
List
[
str
],
str
,
Dict
[
str
,
Any
]]:
if
checkpoints
:
checkpoint_dir
=
","
.
join
(
[
os
.
path
.
join
(
get_save_dir
(
model_name
)
,
finetuning_type
,
ckpt
)
for
ckpt
in
checkpoints
]
[
get_save_dir
(
model_name
,
finetuning_type
,
ckpt
)
for
ckpt
in
checkpoints
]
)
else
:
checkpoint_dir
=
None
output_dir
=
os
.
path
.
join
(
get_save_dir
(
model_name
),
finetuning_type
,
output_dir
)
output_dir
=
get_save_dir
(
model_name
,
finetuning_type
,
output_dir
)
user_config
=
load_config
()
cache_dir
=
user_config
.
get
(
"cache_dir"
,
None
)
args
=
dict
(
stage
=
"sft"
,
stage
=
TRAINING_STAGES
[
training_stage
]
,
model_name_or_path
=
get_model_path
(
model_name
),
do_train
=
True
,
overwrite_cache
=
True
,
overwrite_cache
=
False
,
cache_dir
=
cache_dir
,
checkpoint_dir
=
checkpoint_dir
,
finetuning_type
=
finetuning_type
,
quantization_bit
=
int
(
quantization_bit
)
if
quantization_bit
and
quantization_bit
!=
"None"
else
None
,
quantization_bit
=
int
(
quantization_bit
)
if
quantization_bit
in
[
"8"
,
"4"
]
else
None
,
template
=
template
,
system_prompt
=
system_prompt
,
dataset_dir
=
dataset_dir
,
...
...
@@ -129,30 +132,22 @@ class Runner:
logging_steps
=
logging_steps
,
save_steps
=
save_steps
,
warmup_steps
=
warmup_steps
,
padding_side
=
padding_side
,
lora_rank
=
lora_rank
,
lora_dropout
=
lora_dropout
,
lora_target
=
lora_target
or
DEFAULT_MODULE
.
get
(
model_name
.
split
(
"-"
)[
0
],
"q_proj,v_proj"
),
resume_lora_training
=
resume_lora_training
,
resume_lora_training
=
(
False
if
TRAINING_STAGES
[
training_stage
]
in
[
"rm"
,
"ppo"
,
"dpo"
]
else
resume_lora_training
),
output_dir
=
output_dir
)
args
[
compute_type
]
=
True
if
training_stage
==
"Reward Modeling"
:
args
[
"stage"
]
=
"rm"
args
[
"resume_lora_training"
]
=
False
elif
training_stage
==
"PPO"
:
args
[
"stage"
]
=
"ppo"
args
[
"resume_lora_training"
]
=
False
if
args
[
"stage"
]
==
"ppo"
:
args
[
"reward_model"
]
=
reward_model
args
[
"padding_side"
]
=
"left"
val_size
=
0
elif
training_stage
==
"DPO"
:
args
[
"stage"
]
=
"dpo"
args
[
"resume_lora_training"
]
=
False
if
args
[
"stage"
]
==
"dpo"
:
args
[
"dpo_beta"
]
=
dpo_beta
elif
training_stage
==
"Pre-Training"
:
args
[
"stage"
]
=
"pt"
if
val_size
>
1e-6
:
args
[
"val_size"
]
=
val_size
...
...
@@ -181,22 +176,26 @@ class Runner:
)
->
Tuple
[
str
,
str
,
List
[
str
],
str
,
Dict
[
str
,
Any
]]:
if
checkpoints
:
checkpoint_dir
=
","
.
join
(
[
os
.
path
.
join
(
get_save_dir
(
model_name
)
,
finetuning_type
,
c
heckpoint
)
for
checkpoin
t
in
checkpoints
]
[
get_save_dir
(
model_name
,
finetuning_type
,
c
kpt
)
for
ckp
t
in
checkpoints
]
)
output_dir
=
os
.
path
.
join
(
get_save_dir
(
model_name
)
,
finetuning_type
,
"eval_"
+
"_"
.
join
(
checkpoints
))
output_dir
=
get_save_dir
(
model_name
,
finetuning_type
,
"eval_"
+
"_"
.
join
(
checkpoints
))
else
:
checkpoint_dir
=
None
output_dir
=
os
.
path
.
join
(
get_save_dir
(
model_name
),
finetuning_type
,
"eval_base"
)
output_dir
=
get_save_dir
(
model_name
,
finetuning_type
,
"eval_base"
)
user_config
=
load_config
()
cache_dir
=
user_config
.
get
(
"cache_dir"
,
None
)
args
=
dict
(
stage
=
"sft"
,
model_name_or_path
=
get_model_path
(
model_name
),
do_eval
=
True
,
overwrite_cache
=
Tru
e
,
overwrite_cache
=
Fals
e
,
predict_with_generate
=
True
,
cache_dir
=
cache_dir
,
checkpoint_dir
=
checkpoint_dir
,
finetuning_type
=
finetuning_type
,
quantization_bit
=
int
(
quantization_bit
)
if
quantization_bit
and
quantization_bit
!=
"None"
else
None
,
quantization_bit
=
int
(
quantization_bit
)
if
quantization_bit
in
[
"8"
,
"4"
]
else
None
,
template
=
template
,
system_prompt
=
system_prompt
,
dataset_dir
=
dataset_dir
,
...
...
src/llmtuner/webui/utils.py
View file @
0938ae70
...
...
@@ -3,10 +3,9 @@ import json
import
gradio
as
gr
import
matplotlib.figure
import
matplotlib.pyplot
as
plt
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
Generator
,
List
,
Tuple
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
Generator
,
List
,
Optional
,
Tuple
from
datetime
import
datetime
from
llmtuner.dsets.utils
import
EXT2TYPE
from
llmtuner.extras.ploting
import
smooth
from
llmtuner.tuner
import
export_model
from
llmtuner.webui.common
import
get_model_path
,
get_save_dir
,
DATA_CONFIG
...
...
@@ -37,6 +36,7 @@ def get_time() -> str:
def
can_preview
(
dataset_dir
:
str
,
dataset
:
list
)
->
Dict
[
str
,
Any
]:
with
open
(
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
dataset_info
=
json
.
load
(
f
)
if
(
len
(
dataset
)
>
0
and
"file_name"
in
dataset_info
[
dataset
[
0
]]
...
...
@@ -47,25 +47,26 @@ def can_preview(dataset_dir: str, dataset: list) -> Dict[str, Any]:
return
gr
.
update
(
interactive
=
False
)
def
get_preview
(
dataset_dir
:
str
,
dataset
:
list
)
->
Tuple
[
int
,
list
,
Dict
[
str
,
Any
]]:
def
get_preview
(
dataset_dir
:
str
,
dataset
:
list
,
start
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
2
)
->
Tuple
[
int
,
list
,
Dict
[
str
,
Any
]]:
with
open
(
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
dataset_info
=
json
.
load
(
f
)
data_file
=
dataset_info
[
dataset
[
0
]][
"file_name"
]
data
=
[]
data_format
=
EXT2TYPE
.
get
(
data_file
.
split
(
"."
)[
-
1
],
None
)
if
data_format
==
"text"
:
with
open
(
os
.
path
.
join
(
dataset_dir
,
data_file
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
for
line
in
f
:
data
.
append
(
line
.
strip
())
elif
data_format
==
"json"
:
with
open
(
os
.
path
.
join
(
dataset_dir
,
data_file
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
data_file
:
str
=
dataset_info
[
dataset
[
0
]][
"file_name"
]
with
open
(
os
.
path
.
join
(
dataset_dir
,
data_file
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
if
data_file
.
endswith
(
".json"
):
data
=
json
.
load
(
f
)
return
len
(
data
),
data
[:
2
],
gr
.
update
(
visible
=
True
)
elif
data_file
.
endswith
(
".jsonl"
):
data
=
[
json
.
loads
(
line
)
for
line
in
f
]
else
:
data
=
[
line
for
line
in
f
]
return
len
(
data
),
data
[
start
:
end
],
gr
.
update
(
visible
=
True
)
def
can_quantize
(
finetuning_type
:
str
)
->
Dict
[
str
,
Any
]:
if
finetuning_type
!=
"lora"
:
return
gr
.
update
(
value
=
""
,
interactive
=
False
)
return
gr
.
update
(
value
=
"
None
"
,
interactive
=
False
)
else
:
return
gr
.
update
(
interactive
=
True
)
...
...
@@ -73,7 +74,7 @@ def can_quantize(finetuning_type: str) -> Dict[str, Any]:
def
gen_cmd
(
args
:
Dict
[
str
,
Any
])
->
str
:
if
args
.
get
(
"do_train"
,
None
):
args
[
"plot_loss"
]
=
True
cmd_lines
=
[
"CUDA_VISIBLE_DEVICES=0 python "
]
cmd_lines
=
[
"CUDA_VISIBLE_DEVICES=0 python
src/train_bash.py
"
]
for
k
,
v
in
args
.
items
():
if
v
is
not
None
and
v
!=
""
:
cmd_lines
.
append
(
" --{} {} "
.
format
(
k
,
str
(
v
)))
...
...
@@ -89,7 +90,7 @@ def get_eval_results(path: os.PathLike) -> str:
def
gen_plot
(
base_model
:
str
,
finetuning_type
:
str
,
output_dir
:
str
)
->
matplotlib
.
figure
.
Figure
:
log_file
=
os
.
path
.
join
(
get_save_dir
(
base_model
)
,
finetuning_type
,
output_dir
,
"trainer_log.jsonl"
)
log_file
=
get_save_dir
(
base_model
,
finetuning_type
,
output_dir
,
"trainer_log.jsonl"
)
if
not
os
.
path
.
isfile
(
log_file
):
return
None
...
...
@@ -138,7 +139,7 @@ def save_model(
return
checkpoint_dir
=
","
.
join
(
[
os
.
path
.
join
(
get_save_dir
(
model_name
)
,
finetuning_type
,
c
heckpoint
)
for
checkpoin
t
in
checkpoints
]
[
get_save_dir
(
model_name
,
finetuning_type
,
c
kpt
)
for
ckp
t
in
checkpoints
]
)
if
not
save_dir
:
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment