Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
LLaMA-Factory
Commits
2778a3d0
"vscode:/vscode.git/clone" did not exist on "1dc91af91fd024e30f3806e79b62807108eb348a"
Commit
2778a3d0
authored
Jan 16, 2025
by
luopl
Browse files
updata to v0.9.1_stable
parent
e92143e3
Changes
172
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
134 additions
and
118 deletions
+134
-118
src/llamafactory/train/sft/workflow.py
src/llamafactory/train/sft/workflow.py
+11
-1
src/llamafactory/train/test_utils.py
src/llamafactory/train/test_utils.py
+6
-7
src/llamafactory/train/trainer_utils.py
src/llamafactory/train/trainer_utils.py
+14
-14
src/llamafactory/train/tuner.py
src/llamafactory/train/tuner.py
+11
-13
src/llamafactory/webui/chatter.py
src/llamafactory/webui/chatter.py
+8
-1
src/llamafactory/webui/common.py
src/llamafactory/webui/common.py
+16
-9
src/llamafactory/webui/components/data.py
src/llamafactory/webui/components/data.py
+3
-3
src/llamafactory/webui/components/eval.py
src/llamafactory/webui/components/eval.py
+1
-1
src/llamafactory/webui/components/top.py
src/llamafactory/webui/components/top.py
+6
-8
src/llamafactory/webui/components/train.py
src/llamafactory/webui/components/train.py
+4
-4
src/llamafactory/webui/engine.py
src/llamafactory/webui/engine.py
+3
-3
src/llamafactory/webui/interface.py
src/llamafactory/webui/interface.py
+6
-4
src/llamafactory/webui/locales.py
src/llamafactory/webui/locales.py
+11
-25
src/llamafactory/webui/manager.py
src/llamafactory/webui/manager.py
+1
-1
src/llamafactory/webui/runner.py
src/llamafactory/webui/runner.py
+14
-6
src/llamafactory/webui/utils.py
src/llamafactory/webui/utils.py
+9
-9
src/webui.py
src/webui.py
+3
-2
tests/data/processors/test_feedback.py
tests/data/processors/test_feedback.py
+2
-2
tests/data/processors/test_pairwise.py
tests/data/processors/test_pairwise.py
+2
-2
tests/data/processors/test_supervised.py
tests/data/processors/test_supervised.py
+3
-3
No files found.
src/llamafactory/train/sft/workflow.py
View file @
2778a3d0
...
@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, List, Optional
...
@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, List, Optional
from
...data
import
SFTDataCollatorWith4DAttentionMask
,
get_dataset
,
get_template_and_fix_tokenizer
from
...data
import
SFTDataCollatorWith4DAttentionMask
,
get_dataset
,
get_template_and_fix_tokenizer
from
...extras.constants
import
IGNORE_INDEX
from
...extras.constants
import
IGNORE_INDEX
from
...extras.misc
import
get_logits_processor
from
...extras.misc
import
cal_effective_tokens
,
get_logits_processor
from
...extras.ploting
import
plot_loss
from
...extras.ploting
import
plot_loss
from
...model
import
load_model
,
load_tokenizer
from
...model
import
load_model
,
load_tokenizer
from
..trainer_utils
import
create_modelcard_and_push
from
..trainer_utils
import
create_modelcard_and_push
...
@@ -65,6 +65,11 @@ def run_sft(
...
@@ -65,6 +65,11 @@ def run_sft(
training_args
.
generation_num_beams
=
data_args
.
eval_num_beams
or
training_args
.
generation_num_beams
training_args
.
generation_num_beams
=
data_args
.
eval_num_beams
or
training_args
.
generation_num_beams
training_args
.
remove_unused_columns
=
False
# important for multimodal dataset
training_args
.
remove_unused_columns
=
False
# important for multimodal dataset
effective_token_num
=
0.0
if
finetuning_args
.
include_effective_tokens_per_second
:
for
data
in
dataset_module
[
"train_dataset"
]:
effective_token_num
+=
len
(
data
[
"input_ids"
])
# Metric utils
# Metric utils
metric_module
=
{}
metric_module
=
{}
if
training_args
.
predict_with_generate
:
if
training_args
.
predict_with_generate
:
...
@@ -94,6 +99,11 @@ def run_sft(
...
@@ -94,6 +99,11 @@ def run_sft(
# Training
# Training
if
training_args
.
do_train
:
if
training_args
.
do_train
:
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
train_result
=
trainer
.
train
(
resume_from_checkpoint
=
training_args
.
resume_from_checkpoint
)
if
finetuning_args
.
include_effective_tokens_per_second
:
train_result
.
metrics
[
"effective_tokens_per_sec"
]
=
cal_effective_tokens
(
effective_token_num
,
train_result
.
metrics
[
"epoch"
],
train_result
.
metrics
[
"train_runtime"
]
)
trainer
.
save_model
()
trainer
.
save_model
()
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
log_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
...
...
src/llamafactory/train/test_utils.py
View file @
2778a3d0
...
@@ -37,9 +37,9 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k
...
@@ -37,9 +37,9 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k
assert
set
(
state_dict_a
.
keys
())
==
set
(
state_dict_b
.
keys
())
assert
set
(
state_dict_a
.
keys
())
==
set
(
state_dict_b
.
keys
())
for
name
in
state_dict_a
.
keys
():
for
name
in
state_dict_a
.
keys
():
if
any
(
key
in
name
for
key
in
diff_keys
):
if
any
(
key
in
name
for
key
in
diff_keys
):
assert
torch
.
allclose
(
state_dict_a
[
name
],
state_dict_b
[
name
],
rtol
=
1e-
3
,
atol
=
1e-
4
)
is
False
assert
torch
.
allclose
(
state_dict_a
[
name
],
state_dict_b
[
name
],
rtol
=
1e-
4
,
atol
=
1e-
5
)
is
False
else
:
else
:
assert
torch
.
allclose
(
state_dict_a
[
name
],
state_dict_b
[
name
],
rtol
=
1e-
3
,
atol
=
1e-
4
)
is
True
assert
torch
.
allclose
(
state_dict_a
[
name
],
state_dict_b
[
name
],
rtol
=
1e-
4
,
atol
=
1e-
5
)
is
True
def
check_lora_model
(
model
:
"LoraModel"
)
->
Tuple
[
Set
[
str
],
Set
[
str
]]:
def
check_lora_model
(
model
:
"LoraModel"
)
->
Tuple
[
Set
[
str
],
Set
[
str
]]:
...
@@ -80,18 +80,17 @@ def load_reference_model(
...
@@ -80,18 +80,17 @@ def load_reference_model(
is_trainable
:
bool
=
False
,
is_trainable
:
bool
=
False
,
add_valuehead
:
bool
=
False
,
add_valuehead
:
bool
=
False
,
)
->
Union
[
"PreTrainedModel"
,
"LoraModel"
]:
)
->
Union
[
"PreTrainedModel"
,
"LoraModel"
]:
current_device
=
get_current_device
()
if
add_valuehead
:
if
add_valuehead
:
model
:
"AutoModelForCausalLMWithValueHead"
=
AutoModelForCausalLMWithValueHead
.
from_pretrained
(
model
:
"AutoModelForCausalLMWithValueHead"
=
AutoModelForCausalLMWithValueHead
.
from_pretrained
(
model_path
,
torch_dtype
=
torch
.
float16
,
device_map
=
get_
current_device
()
model_path
,
torch_dtype
=
torch
.
float16
,
device_map
=
current_device
)
)
if
not
is_trainable
:
if
not
is_trainable
:
model
.
v_head
=
model
.
v_head
.
to
(
torch
.
float16
)
model
.
v_head
=
model
.
v_head
.
to
(
torch
.
float16
)
return
model
return
model
model
=
AutoModelForCausalLM
.
from_pretrained
(
model
=
AutoModelForCausalLM
.
from_pretrained
(
model_path
,
torch_dtype
=
torch
.
float16
,
device_map
=
current_device
)
model_path
,
torch_dtype
=
torch
.
float16
,
device_map
=
get_current_device
()
)
if
use_lora
or
use_pissa
:
if
use_lora
or
use_pissa
:
model
=
PeftModel
.
from_pretrained
(
model
=
PeftModel
.
from_pretrained
(
model
,
lora_path
,
subfolder
=
"pissa_init"
if
use_pissa
else
None
,
is_trainable
=
is_trainable
model
,
lora_path
,
subfolder
=
"pissa_init"
if
use_pissa
else
None
,
is_trainable
=
is_trainable
...
@@ -110,7 +109,7 @@ def load_train_dataset(**kwargs) -> "Dataset":
...
@@ -110,7 +109,7 @@ def load_train_dataset(**kwargs) -> "Dataset":
return
dataset_module
[
"train_dataset"
]
return
dataset_module
[
"train_dataset"
]
def
patch_valuehead_model
():
def
patch_valuehead_model
()
->
None
:
def
post_init
(
self
:
"AutoModelForCausalLMWithValueHead"
,
state_dict
:
Dict
[
str
,
"torch.Tensor"
])
->
None
:
def
post_init
(
self
:
"AutoModelForCausalLMWithValueHead"
,
state_dict
:
Dict
[
str
,
"torch.Tensor"
])
->
None
:
state_dict
=
{
k
[
7
:]:
state_dict
[
k
]
for
k
in
state_dict
.
keys
()
if
k
.
startswith
(
"v_head."
)}
state_dict
=
{
k
[
7
:]:
state_dict
[
k
]
for
k
in
state_dict
.
keys
()
if
k
.
startswith
(
"v_head."
)}
self
.
v_head
.
load_state_dict
(
state_dict
,
strict
=
False
)
self
.
v_head
.
load_state_dict
(
state_dict
,
strict
=
False
)
...
...
src/llamafactory/train/trainer_utils.py
View file @
2778a3d0
...
@@ -28,8 +28,8 @@ from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
...
@@ -28,8 +28,8 @@ from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
from
transformers.trainer_pt_utils
import
get_parameter_names
from
transformers.trainer_pt_utils
import
get_parameter_names
from
typing_extensions
import
override
from
typing_extensions
import
override
from
..extras
import
logging
from
..extras.constants
import
IGNORE_INDEX
from
..extras.constants
import
IGNORE_INDEX
from
..extras.logging
import
get_logger
from
..extras.packages
import
is_galore_available
from
..extras.packages
import
is_galore_available
from
..hparams
import
FinetuningArguments
,
ModelArguments
from
..hparams
import
FinetuningArguments
,
ModelArguments
from
..model
import
find_all_linear_modules
,
load_model
,
load_tokenizer
,
load_valuehead_params
from
..model
import
find_all_linear_modules
,
load_model
,
load_tokenizer
,
load_valuehead_params
...
@@ -46,7 +46,7 @@ if TYPE_CHECKING:
...
@@ -46,7 +46,7 @@ if TYPE_CHECKING:
from
..hparams
import
DataArguments
from
..hparams
import
DataArguments
logger
=
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
class
DummyOptimizer
(
torch
.
optim
.
Optimizer
):
class
DummyOptimizer
(
torch
.
optim
.
Optimizer
):
...
@@ -116,7 +116,7 @@ def create_ref_model(
...
@@ -116,7 +116,7 @@ def create_ref_model(
ref_model
=
load_model
(
ref_model
=
load_model
(
tokenizer
,
ref_model_args
,
ref_finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
add_valuehead
tokenizer
,
ref_model_args
,
ref_finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
add_valuehead
)
)
logger
.
info
(
"Created reference model from {
}"
.
format
(
finetuning_args
.
ref_model
)
)
logger
.
info
_rank0
(
f
"Created reference model from
{
finetuning_args
.
ref_model
}
"
)
else
:
else
:
if
finetuning_args
.
finetuning_type
==
"lora"
:
if
finetuning_args
.
finetuning_type
==
"lora"
:
ref_model
=
None
ref_model
=
None
...
@@ -127,7 +127,7 @@ def create_ref_model(
...
@@ -127,7 +127,7 @@ def create_ref_model(
ref_model
=
load_model
(
ref_model
=
load_model
(
tokenizer
,
ref_model_args
,
ref_finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
add_valuehead
tokenizer
,
ref_model_args
,
ref_finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
add_valuehead
)
)
logger
.
info
(
"Created reference model from the model itself."
)
logger
.
info
_rank0
(
"Created reference model from the model itself."
)
return
ref_model
return
ref_model
...
@@ -140,7 +140,7 @@ def create_reward_model(
...
@@ -140,7 +140,7 @@ def create_reward_model(
"""
"""
if
finetuning_args
.
reward_model_type
==
"api"
:
if
finetuning_args
.
reward_model_type
==
"api"
:
assert
finetuning_args
.
reward_model
.
startswith
(
"http"
),
"Please provide full url."
assert
finetuning_args
.
reward_model
.
startswith
(
"http"
),
"Please provide full url."
logger
.
info
(
"Use reward server {
}"
.
format
(
finetuning_args
.
reward_model
)
)
logger
.
info
_rank0
(
f
"Use reward server
{
finetuning_args
.
reward_model
}
"
)
return
finetuning_args
.
reward_model
return
finetuning_args
.
reward_model
elif
finetuning_args
.
reward_model_type
==
"lora"
:
elif
finetuning_args
.
reward_model_type
==
"lora"
:
model
.
pretrained_model
.
load_adapter
(
finetuning_args
.
reward_model
,
"reward"
)
model
.
pretrained_model
.
load_adapter
(
finetuning_args
.
reward_model
,
"reward"
)
...
@@ -157,7 +157,7 @@ def create_reward_model(
...
@@ -157,7 +157,7 @@ def create_reward_model(
model
.
register_buffer
(
model
.
register_buffer
(
"default_head_bias"
,
torch
.
zeros_like
(
vhead_params
[
"v_head.summary.bias"
]),
persistent
=
False
"default_head_bias"
,
torch
.
zeros_like
(
vhead_params
[
"v_head.summary.bias"
]),
persistent
=
False
)
)
logger
.
info
(
"Loaded adapter weights of reward model from {
}"
.
format
(
finetuning_args
.
reward_model
)
)
logger
.
info
_rank0
(
f
"Loaded adapter weights of reward model from
{
finetuning_args
.
reward_model
}
"
)
return
None
return
None
else
:
else
:
reward_model_args
=
ModelArguments
.
copyfrom
(
reward_model_args
=
ModelArguments
.
copyfrom
(
...
@@ -171,8 +171,8 @@ def create_reward_model(
...
@@ -171,8 +171,8 @@ def create_reward_model(
reward_model
=
load_model
(
reward_model
=
load_model
(
tokenizer
,
reward_model_args
,
reward_finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
True
tokenizer
,
reward_model_args
,
reward_finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
True
)
)
logger
.
info
(
"Loaded full weights of reward model from {
}"
.
format
(
finetuning_args
.
reward_model
)
)
logger
.
info
_rank0
(
f
"Loaded full weights of reward model from
{
finetuning_args
.
reward_model
}
"
)
logger
.
warning
(
"Please ensure the ppo model and reward model share SAME tokenizer and vocabulary."
)
logger
.
warning
_rank0
(
"Please ensure the ppo model and reward model share SAME tokenizer and vocabulary."
)
return
reward_model
return
reward_model
...
@@ -231,7 +231,7 @@ def _create_galore_optimizer(
...
@@ -231,7 +231,7 @@ def _create_galore_optimizer(
elif
training_args
.
optim
==
"adafactor"
:
elif
training_args
.
optim
==
"adafactor"
:
optim_class
=
GaLoreAdafactor
optim_class
=
GaLoreAdafactor
else
:
else
:
raise
NotImplementedError
(
"Unknow optim: {
}"
.
format
(
training_args
.
optim
)
)
raise
NotImplementedError
(
f
"Unknow optim:
{
training_args
.
optim
}
"
)
if
finetuning_args
.
galore_layerwise
:
if
finetuning_args
.
galore_layerwise
:
if
training_args
.
gradient_accumulation_steps
!=
1
:
if
training_args
.
gradient_accumulation_steps
!=
1
:
...
@@ -265,7 +265,7 @@ def _create_galore_optimizer(
...
@@ -265,7 +265,7 @@ def _create_galore_optimizer(
]
]
optimizer
=
optim_class
(
param_groups
,
**
optim_kwargs
)
optimizer
=
optim_class
(
param_groups
,
**
optim_kwargs
)
logger
.
info
(
"Using GaLore optimizer, may cause hanging at the start of training, wait patiently."
)
logger
.
info
_rank0
(
"Using GaLore optimizer, may cause hanging at the start of training, wait patiently."
)
return
optimizer
return
optimizer
...
@@ -305,7 +305,7 @@ def _create_loraplus_optimizer(
...
@@ -305,7 +305,7 @@ def _create_loraplus_optimizer(
dict
(
params
=
param_dict
[
"embedding"
],
lr
=
embedding_lr
,
weight_decay
=
training_args
.
weight_decay
),
dict
(
params
=
param_dict
[
"embedding"
],
lr
=
embedding_lr
,
weight_decay
=
training_args
.
weight_decay
),
]
]
optimizer
=
optim_class
(
param_groups
,
**
optim_kwargs
)
optimizer
=
optim_class
(
param_groups
,
**
optim_kwargs
)
logger
.
info
(
"Using LoRA+ optimizer with loraplus lr ratio {
:.2f}."
.
format
(
finetuning_args
.
loraplus_lr_ratio
)
)
logger
.
info
_rank0
(
f
"Using LoRA+ optimizer with loraplus lr ratio
{
finetuning_args
.
loraplus_lr_ratio
:.
2
f
}
."
)
return
optimizer
return
optimizer
...
@@ -343,7 +343,7 @@ def _create_badam_optimizer(
...
@@ -343,7 +343,7 @@ def _create_badam_optimizer(
verbose
=
finetuning_args
.
badam_verbose
,
verbose
=
finetuning_args
.
badam_verbose
,
ds_zero3_enabled
=
is_deepspeed_zero3_enabled
(),
ds_zero3_enabled
=
is_deepspeed_zero3_enabled
(),
)
)
logger
.
info
(
logger
.
info
_rank0
(
f
"Using BAdam optimizer with layer-wise update, switch mode is
{
finetuning_args
.
badam_switch_mode
}
, "
f
"Using BAdam optimizer with layer-wise update, switch mode is
{
finetuning_args
.
badam_switch_mode
}
, "
f
"switch block every
{
finetuning_args
.
badam_switch_interval
}
steps, "
f
"switch block every
{
finetuning_args
.
badam_switch_interval
}
steps, "
f
"default start block is
{
finetuning_args
.
badam_start_block
}
"
f
"default start block is
{
finetuning_args
.
badam_start_block
}
"
...
@@ -362,7 +362,7 @@ def _create_badam_optimizer(
...
@@ -362,7 +362,7 @@ def _create_badam_optimizer(
include_embedding
=
False
,
include_embedding
=
False
,
**
optim_kwargs
,
**
optim_kwargs
,
)
)
logger
.
info
(
logger
.
info
_rank0
(
f
"Using BAdam optimizer with ratio-based update, update ratio is
{
finetuning_args
.
badam_update_ratio
}
, "
f
"Using BAdam optimizer with ratio-based update, update ratio is
{
finetuning_args
.
badam_update_ratio
}
, "
f
"mask mode is
{
finetuning_args
.
badam_mask_mode
}
"
f
"mask mode is
{
finetuning_args
.
badam_mask_mode
}
"
)
)
...
@@ -391,7 +391,7 @@ def _create_adam_mini_optimizer(
...
@@ -391,7 +391,7 @@ def _create_adam_mini_optimizer(
n_heads
=
num_q_head
,
n_heads
=
num_q_head
,
n_kv_heads
=
num_kv_head
,
n_kv_heads
=
num_kv_head
,
)
)
logger
.
info
(
"Using Adam-mini optimizer."
)
logger
.
info
_rank0
(
"Using Adam-mini optimizer."
)
return
optimizer
return
optimizer
...
...
src/llamafactory/train/tuner.py
View file @
2778a3d0
...
@@ -20,8 +20,8 @@ import torch
...
@@ -20,8 +20,8 @@ import torch
from
transformers
import
PreTrainedModel
from
transformers
import
PreTrainedModel
from
..data
import
get_template_and_fix_tokenizer
from
..data
import
get_template_and_fix_tokenizer
from
..extras
import
logging
from
..extras.constants
import
V_HEAD_SAFE_WEIGHTS_NAME
,
V_HEAD_WEIGHTS_NAME
from
..extras.constants
import
V_HEAD_SAFE_WEIGHTS_NAME
,
V_HEAD_WEIGHTS_NAME
from
..extras.logging
import
get_logger
from
..hparams
import
get_infer_args
,
get_train_args
from
..hparams
import
get_infer_args
,
get_train_args
from
..model
import
load_model
,
load_tokenizer
from
..model
import
load_model
,
load_tokenizer
from
.callbacks
import
LogCallback
from
.callbacks
import
LogCallback
...
@@ -37,7 +37,7 @@ if TYPE_CHECKING:
...
@@ -37,7 +37,7 @@ if TYPE_CHECKING:
from
transformers
import
TrainerCallback
from
transformers
import
TrainerCallback
logger
=
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
def
run_exp
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
callbacks
:
List
[
"TrainerCallback"
]
=
[])
->
None
:
def
run_exp
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
callbacks
:
List
[
"TrainerCallback"
]
=
[])
->
None
:
...
@@ -57,7 +57,7 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: List["TrainerCallb
...
@@ -57,7 +57,7 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: List["TrainerCallb
elif
finetuning_args
.
stage
==
"kto"
:
elif
finetuning_args
.
stage
==
"kto"
:
run_kto
(
model_args
,
data_args
,
training_args
,
finetuning_args
,
callbacks
)
run_kto
(
model_args
,
data_args
,
training_args
,
finetuning_args
,
callbacks
)
else
:
else
:
raise
ValueError
(
"Unknown task: {
}."
.
format
(
finetuning_args
.
stage
)
)
raise
ValueError
(
f
"Unknown task:
{
finetuning_args
.
stage
}
."
)
def
export_model
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
None
:
def
export_model
(
args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
)
->
None
:
...
@@ -91,18 +91,18 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
...
@@ -91,18 +91,18 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
setattr
(
model
.
config
,
"torch_dtype"
,
output_dtype
)
setattr
(
model
.
config
,
"torch_dtype"
,
output_dtype
)
model
=
model
.
to
(
output_dtype
)
model
=
model
.
to
(
output_dtype
)
logger
.
info
(
"Convert model dtype to: {
}."
.
format
(
output_dtype
)
)
logger
.
info
_rank0
(
f
"Convert model dtype to:
{
output_dtype
}
."
)
model
.
save_pretrained
(
model
.
save_pretrained
(
save_directory
=
model_args
.
export_dir
,
save_directory
=
model_args
.
export_dir
,
max_shard_size
=
"{
}GB"
.
format
(
model_args
.
export_size
)
,
max_shard_size
=
f
"
{
model_args
.
export_size
}
GB"
,
safe_serialization
=
(
not
model_args
.
export_legacy_format
),
safe_serialization
=
(
not
model_args
.
export_legacy_format
),
)
)
if
model_args
.
export_hub_model_id
is
not
None
:
if
model_args
.
export_hub_model_id
is
not
None
:
model
.
push_to_hub
(
model
.
push_to_hub
(
model_args
.
export_hub_model_id
,
model_args
.
export_hub_model_id
,
token
=
model_args
.
hf_hub_token
,
token
=
model_args
.
hf_hub_token
,
max_shard_size
=
"{
}GB"
.
format
(
model_args
.
export_size
)
,
max_shard_size
=
f
"
{
model_args
.
export_size
}
GB"
,
safe_serialization
=
(
not
model_args
.
export_legacy_format
),
safe_serialization
=
(
not
model_args
.
export_legacy_format
),
)
)
...
@@ -117,13 +117,13 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
...
@@ -117,13 +117,13 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
os
.
path
.
join
(
vhead_path
,
V_HEAD_SAFE_WEIGHTS_NAME
),
os
.
path
.
join
(
vhead_path
,
V_HEAD_SAFE_WEIGHTS_NAME
),
os
.
path
.
join
(
model_args
.
export_dir
,
V_HEAD_SAFE_WEIGHTS_NAME
),
os
.
path
.
join
(
model_args
.
export_dir
,
V_HEAD_SAFE_WEIGHTS_NAME
),
)
)
logger
.
info
(
"Copied valuehead to {
}."
.
format
(
model_args
.
export_dir
)
)
logger
.
info
_rank0
(
f
"Copied valuehead to
{
model_args
.
export_dir
}
."
)
elif
os
.
path
.
exists
(
os
.
path
.
join
(
vhead_path
,
V_HEAD_WEIGHTS_NAME
)):
elif
os
.
path
.
exists
(
os
.
path
.
join
(
vhead_path
,
V_HEAD_WEIGHTS_NAME
)):
shutil
.
copy
(
shutil
.
copy
(
os
.
path
.
join
(
vhead_path
,
V_HEAD_WEIGHTS_NAME
),
os
.
path
.
join
(
vhead_path
,
V_HEAD_WEIGHTS_NAME
),
os
.
path
.
join
(
model_args
.
export_dir
,
V_HEAD_WEIGHTS_NAME
),
os
.
path
.
join
(
model_args
.
export_dir
,
V_HEAD_WEIGHTS_NAME
),
)
)
logger
.
info
(
"Copied valuehead to {
}."
.
format
(
model_args
.
export_dir
)
)
logger
.
info
_rank0
(
f
"Copied valuehead to
{
model_args
.
export_dir
}
."
)
try
:
try
:
tokenizer
.
padding_side
=
"left"
# restore padding side
tokenizer
.
padding_side
=
"left"
# restore padding side
...
@@ -133,11 +133,9 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
...
@@ -133,11 +133,9 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
tokenizer
.
push_to_hub
(
model_args
.
export_hub_model_id
,
token
=
model_args
.
hf_hub_token
)
tokenizer
.
push_to_hub
(
model_args
.
export_hub_model_id
,
token
=
model_args
.
hf_hub_token
)
if
processor
is
not
None
:
if
processor
is
not
None
:
getattr
(
processor
,
"image_processor"
)
.
save_pretrained
(
model_args
.
export_dir
)
processor
.
save_pretrained
(
model_args
.
export_dir
)
if
model_args
.
export_hub_model_id
is
not
None
:
if
model_args
.
export_hub_model_id
is
not
None
:
getattr
(
processor
,
"image_processor"
).
push_to_hub
(
processor
.
push_to_hub
(
model_args
.
export_hub_model_id
,
token
=
model_args
.
hf_hub_token
)
model_args
.
export_hub_model_id
,
token
=
model_args
.
hf_hub_token
)
except
Exception
as
e
:
except
Exception
as
e
:
logger
.
warning
(
"Cannot save tokenizer, please copy the files manually: {}."
.
format
(
e
)
)
logger
.
warning
_rank0
(
f
"Cannot save tokenizer, please copy the files manually:
{
e
}
."
)
src/llamafactory/webui/chatter.py
View file @
2778a3d0
...
@@ -141,7 +141,14 @@ class WebChatModel(ChatModel):
...
@@ -141,7 +141,14 @@ class WebChatModel(ChatModel):
chatbot
[
-
1
][
1
]
=
""
chatbot
[
-
1
][
1
]
=
""
response
=
""
response
=
""
for
new_text
in
self
.
stream_chat
(
for
new_text
in
self
.
stream_chat
(
messages
,
system
,
tools
,
image
,
video
,
max_new_tokens
=
max_new_tokens
,
top_p
=
top_p
,
temperature
=
temperature
messages
,
system
,
tools
,
images
=
[
image
]
if
image
else
None
,
videos
=
[
video
]
if
video
else
None
,
max_new_tokens
=
max_new_tokens
,
top_p
=
top_p
,
temperature
=
temperature
,
):
):
response
+=
new_text
response
+=
new_text
if
tools
:
if
tools
:
...
...
src/llamafactory/webui/common.py
View file @
2778a3d0
...
@@ -19,6 +19,7 @@ from typing import Any, Dict, Optional, Tuple
...
@@ -19,6 +19,7 @@ from typing import Any, Dict, Optional, Tuple
from
yaml
import
safe_dump
,
safe_load
from
yaml
import
safe_dump
,
safe_load
from
..extras
import
logging
from
..extras.constants
import
(
from
..extras.constants
import
(
CHECKPOINT_NAMES
,
CHECKPOINT_NAMES
,
DATA_CONFIG
,
DATA_CONFIG
,
...
@@ -30,8 +31,7 @@ from ..extras.constants import (
...
@@ -30,8 +31,7 @@ from ..extras.constants import (
VISION_MODELS
,
VISION_MODELS
,
DownloadSource
,
DownloadSource
,
)
)
from
..extras.logging
import
get_logger
from
..extras.misc
import
use_modelscope
,
use_openmind
from
..extras.misc
import
use_modelscope
from
..extras.packages
import
is_gradio_available
from
..extras.packages
import
is_gradio_available
...
@@ -39,7 +39,7 @@ if is_gradio_available():
...
@@ -39,7 +39,7 @@ if is_gradio_available():
import
gradio
as
gr
import
gradio
as
gr
logger
=
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
DEFAULT_CACHE_DIR
=
"cache"
DEFAULT_CACHE_DIR
=
"cache"
...
@@ -56,7 +56,7 @@ def get_save_dir(*paths: str) -> os.PathLike:
...
@@ -56,7 +56,7 @@ def get_save_dir(*paths: str) -> os.PathLike:
Gets the path to saved model checkpoints.
Gets the path to saved model checkpoints.
"""
"""
if
os
.
path
.
sep
in
paths
[
-
1
]:
if
os
.
path
.
sep
in
paths
[
-
1
]:
logger
.
warning
(
"Found complex path, some features may be not available."
)
logger
.
warning
_rank0
(
"Found complex path, some features may be not available."
)
return
paths
[
-
1
]
return
paths
[
-
1
]
paths
=
(
path
.
replace
(
" "
,
""
).
strip
()
for
path
in
paths
)
paths
=
(
path
.
replace
(
" "
,
""
).
strip
()
for
path
in
paths
)
...
@@ -75,7 +75,7 @@ def load_config() -> Dict[str, Any]:
...
@@ -75,7 +75,7 @@ def load_config() -> Dict[str, Any]:
Loads user config if exists.
Loads user config if exists.
"""
"""
try
:
try
:
with
open
(
get_config_path
(),
"r"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
get_config_path
(),
encoding
=
"utf-8"
)
as
f
:
return
safe_load
(
f
)
return
safe_load
(
f
)
except
Exception
:
except
Exception
:
return
{
"lang"
:
None
,
"last_model"
:
None
,
"path_dict"
:
{},
"cache_dir"
:
None
}
return
{
"lang"
:
None
,
"last_model"
:
None
,
"path_dict"
:
{},
"cache_dir"
:
None
}
...
@@ -109,9 +109,16 @@ def get_model_path(model_name: str) -> str:
...
@@ -109,9 +109,16 @@ def get_model_path(model_name: str) -> str:
use_modelscope
()
use_modelscope
()
and
path_dict
.
get
(
DownloadSource
.
MODELSCOPE
)
and
path_dict
.
get
(
DownloadSource
.
MODELSCOPE
)
and
model_path
==
path_dict
.
get
(
DownloadSource
.
DEFAULT
)
and
model_path
==
path_dict
.
get
(
DownloadSource
.
DEFAULT
)
):
# replace path
):
# replace
hf path with ms
path
model_path
=
path_dict
.
get
(
DownloadSource
.
MODELSCOPE
)
model_path
=
path_dict
.
get
(
DownloadSource
.
MODELSCOPE
)
if
(
use_openmind
()
and
path_dict
.
get
(
DownloadSource
.
OPENMIND
)
and
model_path
==
path_dict
.
get
(
DownloadSource
.
DEFAULT
)
):
# replace hf path with om path
model_path
=
path_dict
.
get
(
DownloadSource
.
OPENMIND
)
return
model_path
return
model_path
...
@@ -165,14 +172,14 @@ def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
...
@@ -165,14 +172,14 @@ def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
Loads dataset_info.json.
Loads dataset_info.json.
"""
"""
if
dataset_dir
==
"ONLINE"
or
dataset_dir
.
startswith
(
"REMOTE:"
):
if
dataset_dir
==
"ONLINE"
or
dataset_dir
.
startswith
(
"REMOTE:"
):
logger
.
info
(
"dataset_dir is {}, using online dataset."
.
format
(
dataset_dir
)
)
logger
.
info
_rank0
(
f
"dataset_dir is
{
dataset_dir
}
, using online dataset."
)
return
{}
return
{}
try
:
try
:
with
open
(
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
),
encoding
=
"utf-8"
)
as
f
:
return
json
.
load
(
f
)
return
json
.
load
(
f
)
except
Exception
as
err
:
except
Exception
as
err
:
logger
.
warning
(
"Cannot open {
} due to {}."
.
format
(
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
)
,
str
(
err
)
)
)
logger
.
warning
_rank0
(
f
"Cannot open
{
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
)
}
due to
{
str
(
err
)
}
."
)
return
{}
return
{}
...
...
src/llamafactory/webui/components/data.py
View file @
2778a3d0
...
@@ -41,7 +41,7 @@ def next_page(page_index: int, total_num: int) -> int:
...
@@ -41,7 +41,7 @@ def next_page(page_index: int, total_num: int) -> int:
def
can_preview
(
dataset_dir
:
str
,
dataset
:
list
)
->
"gr.Button"
:
def
can_preview
(
dataset_dir
:
str
,
dataset
:
list
)
->
"gr.Button"
:
try
:
try
:
with
open
(
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
),
encoding
=
"utf-8"
)
as
f
:
dataset_info
=
json
.
load
(
f
)
dataset_info
=
json
.
load
(
f
)
except
Exception
:
except
Exception
:
return
gr
.
Button
(
interactive
=
False
)
return
gr
.
Button
(
interactive
=
False
)
...
@@ -57,7 +57,7 @@ def can_preview(dataset_dir: str, dataset: list) -> "gr.Button":
...
@@ -57,7 +57,7 @@ def can_preview(dataset_dir: str, dataset: list) -> "gr.Button":
def
_load_data_file
(
file_path
:
str
)
->
List
[
Any
]:
def
_load_data_file
(
file_path
:
str
)
->
List
[
Any
]:
with
open
(
file_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
file_path
,
encoding
=
"utf-8"
)
as
f
:
if
file_path
.
endswith
(
".json"
):
if
file_path
.
endswith
(
".json"
):
return
json
.
load
(
f
)
return
json
.
load
(
f
)
elif
file_path
.
endswith
(
".jsonl"
):
elif
file_path
.
endswith
(
".jsonl"
):
...
@@ -67,7 +67,7 @@ def _load_data_file(file_path: str) -> List[Any]:
...
@@ -67,7 +67,7 @@ def _load_data_file(file_path: str) -> List[Any]:
def
get_preview
(
dataset_dir
:
str
,
dataset
:
list
,
page_index
:
int
)
->
Tuple
[
int
,
list
,
"gr.Column"
]:
def
get_preview
(
dataset_dir
:
str
,
dataset
:
list
,
page_index
:
int
)
->
Tuple
[
int
,
list
,
"gr.Column"
]:
with
open
(
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
os
.
path
.
join
(
dataset_dir
,
DATA_CONFIG
),
encoding
=
"utf-8"
)
as
f
:
dataset_info
=
json
.
load
(
f
)
dataset_info
=
json
.
load
(
f
)
data_path
=
os
.
path
.
join
(
dataset_dir
,
dataset_info
[
dataset
[
0
]][
"file_name"
])
data_path
=
os
.
path
.
join
(
dataset_dir
,
dataset_info
[
dataset
[
0
]][
"file_name"
])
...
...
src/llamafactory/webui/components/eval.py
View file @
2778a3d0
...
@@ -42,7 +42,7 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]:
...
@@ -42,7 +42,7 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]:
elem_dict
.
update
(
dict
(
dataset_dir
=
dataset_dir
,
dataset
=
dataset
,
**
preview_elems
))
elem_dict
.
update
(
dict
(
dataset_dir
=
dataset_dir
,
dataset
=
dataset
,
**
preview_elems
))
with
gr
.
Row
():
with
gr
.
Row
():
cutoff_len
=
gr
.
Slider
(
minimum
=
4
,
maximum
=
65536
,
value
=
1024
,
step
=
1
)
cutoff_len
=
gr
.
Slider
(
minimum
=
4
,
maximum
=
131072
,
value
=
1024
,
step
=
1
)
max_samples
=
gr
.
Textbox
(
value
=
"100000"
)
max_samples
=
gr
.
Textbox
(
value
=
"100000"
)
batch_size
=
gr
.
Slider
(
minimum
=
1
,
maximum
=
1024
,
value
=
2
,
step
=
1
)
batch_size
=
gr
.
Slider
(
minimum
=
1
,
maximum
=
1024
,
value
=
2
,
step
=
1
)
predict
=
gr
.
Checkbox
(
value
=
True
)
predict
=
gr
.
Checkbox
(
value
=
True
)
...
...
src/llamafactory/webui/components/top.py
View file @
2778a3d0
...
@@ -41,13 +41,12 @@ def create_top() -> Dict[str, "Component"]:
...
@@ -41,13 +41,12 @@ def create_top() -> Dict[str, "Component"]:
finetuning_type
=
gr
.
Dropdown
(
choices
=
METHODS
,
value
=
"lora"
,
scale
=
1
)
finetuning_type
=
gr
.
Dropdown
(
choices
=
METHODS
,
value
=
"lora"
,
scale
=
1
)
checkpoint_path
=
gr
.
Dropdown
(
multiselect
=
True
,
allow_custom_value
=
True
,
scale
=
6
)
checkpoint_path
=
gr
.
Dropdown
(
multiselect
=
True
,
allow_custom_value
=
True
,
scale
=
6
)
with
gr
.
Accordion
(
open
=
False
)
as
advanced_tab
:
with
gr
.
Row
():
with
gr
.
Row
():
quantization_bit
=
gr
.
Dropdown
(
choices
=
[
"none"
,
"8"
,
"4"
],
value
=
"none"
,
allow_custom_value
=
True
,
scale
=
2
)
quantization_bit
=
gr
.
Dropdown
(
choices
=
[
"none"
,
"8"
,
"4"
],
value
=
"none"
,
allow_custom_value
=
True
,
scale
=
2
)
quantization_method
=
gr
.
Dropdown
(
choices
=
[
"bitsandbytes"
,
"hqq"
,
"eetq"
],
value
=
"bitsandbytes"
,
scale
=
2
)
quantization_method
=
gr
.
Dropdown
(
choices
=
[
"bitsandbytes"
,
"hqq"
,
"eetq"
],
value
=
"bitsandbytes"
,
scale
=
2
)
template
=
gr
.
Dropdown
(
choices
=
list
(
TEMPLATES
.
keys
()),
value
=
"default"
,
scale
=
2
)
template
=
gr
.
Dropdown
(
choices
=
list
(
TEMPLATES
.
keys
()),
value
=
"default"
,
scale
=
2
)
rope_scaling
=
gr
.
Radio
(
choices
=
[
"none"
,
"linear"
,
"dynamic"
],
value
=
"none"
,
scale
=
3
)
rope_scaling
=
gr
.
Radio
(
choices
=
[
"none"
,
"linear"
,
"dynamic"
],
value
=
"none"
,
scale
=
3
)
booster
=
gr
.
Radio
(
choices
=
[
"auto"
,
"flashattn2"
,
"unsloth"
,
"liger_kernel"
],
value
=
"auto"
,
scale
=
5
)
booster
=
gr
.
Radio
(
choices
=
[
"auto"
,
"flashattn2"
,
"unsloth"
,
"liger_kernel"
],
value
=
"auto"
,
scale
=
5
)
model_name
.
change
(
get_model_info
,
[
model_name
],
[
model_path
,
template
],
queue
=
False
).
then
(
model_name
.
change
(
get_model_info
,
[
model_name
],
[
model_path
,
template
],
queue
=
False
).
then
(
list_checkpoints
,
[
model_name
,
finetuning_type
],
[
checkpoint_path
],
queue
=
False
list_checkpoints
,
[
model_name
,
finetuning_type
],
[
checkpoint_path
],
queue
=
False
...
@@ -66,7 +65,6 @@ def create_top() -> Dict[str, "Component"]:
...
@@ -66,7 +65,6 @@ def create_top() -> Dict[str, "Component"]:
model_path
=
model_path
,
model_path
=
model_path
,
finetuning_type
=
finetuning_type
,
finetuning_type
=
finetuning_type
,
checkpoint_path
=
checkpoint_path
,
checkpoint_path
=
checkpoint_path
,
advanced_tab
=
advanced_tab
,
quantization_bit
=
quantization_bit
,
quantization_bit
=
quantization_bit
,
quantization_method
=
quantization_method
,
quantization_method
=
quantization_method
,
template
=
template
,
template
=
template
,
...
...
src/llamafactory/webui/components/train.py
View file @
2778a3d0
...
@@ -68,7 +68,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
...
@@ -68,7 +68,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
)
)
with
gr
.
Row
():
with
gr
.
Row
():
cutoff_len
=
gr
.
Slider
(
minimum
=
4
,
maximum
=
65536
,
value
=
1024
,
step
=
1
)
cutoff_len
=
gr
.
Slider
(
minimum
=
4
,
maximum
=
131072
,
value
=
2048
,
step
=
1
)
batch_size
=
gr
.
Slider
(
minimum
=
1
,
maximum
=
1024
,
value
=
2
,
step
=
1
)
batch_size
=
gr
.
Slider
(
minimum
=
1
,
maximum
=
1024
,
value
=
2
,
step
=
1
)
gradient_accumulation_steps
=
gr
.
Slider
(
minimum
=
1
,
maximum
=
1024
,
value
=
8
,
step
=
1
)
gradient_accumulation_steps
=
gr
.
Slider
(
minimum
=
1
,
maximum
=
1024
,
value
=
8
,
step
=
1
)
val_size
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
1
,
value
=
0
,
step
=
0.001
)
val_size
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
1
,
value
=
0
,
step
=
0.001
)
...
@@ -91,7 +91,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
...
@@ -91,7 +91,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
save_steps
=
gr
.
Slider
(
minimum
=
10
,
maximum
=
5000
,
value
=
100
,
step
=
10
)
save_steps
=
gr
.
Slider
(
minimum
=
10
,
maximum
=
5000
,
value
=
100
,
step
=
10
)
warmup_steps
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
5000
,
value
=
0
,
step
=
1
)
warmup_steps
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
5000
,
value
=
0
,
step
=
1
)
neftune_alpha
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
10
,
value
=
0
,
step
=
0.1
)
neftune_alpha
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
10
,
value
=
0
,
step
=
0.1
)
optim
=
gr
.
Textbox
(
value
=
"adamw_torch"
)
extra_args
=
gr
.
Textbox
(
value
=
'{"optim":
"adamw_torch"
}'
)
with
gr
.
Row
():
with
gr
.
Row
():
with
gr
.
Column
():
with
gr
.
Column
():
...
@@ -116,7 +116,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
...
@@ -116,7 +116,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
save_steps
,
save_steps
,
warmup_steps
,
warmup_steps
,
neftune_alpha
,
neftune_alpha
,
optim
,
extra_args
,
packing
,
packing
,
neat_packing
,
neat_packing
,
train_on_prompt
,
train_on_prompt
,
...
@@ -134,7 +134,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
...
@@ -134,7 +134,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
save_steps
=
save_steps
,
save_steps
=
save_steps
,
warmup_steps
=
warmup_steps
,
warmup_steps
=
warmup_steps
,
neftune_alpha
=
neftune_alpha
,
neftune_alpha
=
neftune_alpha
,
optim
=
optim
,
extra_args
=
extra_args
,
packing
=
packing
,
packing
=
packing
,
neat_packing
=
neat_packing
,
neat_packing
=
neat_packing
,
train_on_prompt
=
train_on_prompt
,
train_on_prompt
=
train_on_prompt
,
...
...
src/llamafactory/webui/engine.py
View file @
2778a3d0
...
@@ -56,9 +56,9 @@ class Engine:
...
@@ -56,9 +56,9 @@ class Engine:
if
not
self
.
pure_chat
:
if
not
self
.
pure_chat
:
current_time
=
get_time
()
current_time
=
get_time
()
init_dict
[
"train.current_time"
]
=
{
"value"
:
current_time
}
init_dict
[
"train.current_time"
]
=
{
"value"
:
current_time
}
init_dict
[
"train.output_dir"
]
=
{
"value"
:
"train_{
}"
.
format
(
current_time
)
}
init_dict
[
"train.output_dir"
]
=
{
"value"
:
f
"train_
{
current_time
}
"
}
init_dict
[
"train.config_path"
]
=
{
"value"
:
"{
}.yaml"
.
format
(
current_time
)
}
init_dict
[
"train.config_path"
]
=
{
"value"
:
f
"
{
current_time
}
.yaml"
}
init_dict
[
"eval.output_dir"
]
=
{
"value"
:
"eval_{
}"
.
format
(
current_time
)
}
init_dict
[
"eval.output_dir"
]
=
{
"value"
:
f
"eval_
{
current_time
}
"
}
init_dict
[
"infer.mm_box"
]
=
{
"visible"
:
False
}
init_dict
[
"infer.mm_box"
]
=
{
"visible"
:
False
}
if
user_config
.
get
(
"last_model"
,
None
):
if
user_config
.
get
(
"last_model"
,
None
):
...
...
src/llamafactory/webui/interface.py
View file @
2778a3d0
...
@@ -85,12 +85,14 @@ def create_web_demo() -> "gr.Blocks":
...
@@ -85,12 +85,14 @@ def create_web_demo() -> "gr.Blocks":
def
run_web_ui
()
->
None
:
def
run_web_ui
()
->
None
:
gradio_share
=
os
.
environ
.
get
(
"GRADIO_SHARE"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
gradio_ipv6
=
os
.
getenv
(
"GRADIO_IPV6"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
server_name
=
os
.
environ
.
get
(
"GRADIO_SERVER_NAME"
,
"0.0.0.0"
)
gradio_share
=
os
.
getenv
(
"GRADIO_SHARE"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
server_name
=
os
.
getenv
(
"GRADIO_SERVER_NAME"
,
"[::]"
if
gradio_ipv6
else
"0.0.0.0"
)
create_ui
().
queue
().
launch
(
share
=
gradio_share
,
server_name
=
server_name
,
inbrowser
=
True
)
create_ui
().
queue
().
launch
(
share
=
gradio_share
,
server_name
=
server_name
,
inbrowser
=
True
)
def
run_web_demo
()
->
None
:
def
run_web_demo
()
->
None
:
gradio_share
=
os
.
environ
.
get
(
"GRADIO_SHARE"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
gradio_ipv6
=
os
.
getenv
(
"GRADIO_IPV6"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
server_name
=
os
.
environ
.
get
(
"GRADIO_SERVER_NAME"
,
"0.0.0.0"
)
gradio_share
=
os
.
getenv
(
"GRADIO_SHARE"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
server_name
=
os
.
getenv
(
"GRADIO_SERVER_NAME"
,
"[::]"
if
gradio_ipv6
else
"0.0.0.0"
)
create_web_demo
().
queue
().
launch
(
share
=
gradio_share
,
server_name
=
server_name
,
inbrowser
=
True
)
create_web_demo
().
queue
().
launch
(
share
=
gradio_share
,
server_name
=
server_name
,
inbrowser
=
True
)
src/llamafactory/webui/locales.py
View file @
2778a3d0
...
@@ -87,20 +87,6 @@ LOCALES = {
...
@@ -87,20 +87,6 @@ LOCALES = {
"label"
:
"체크포인트 경로"
,
"label"
:
"체크포인트 경로"
,
},
},
},
},
"advanced_tab"
:
{
"en"
:
{
"label"
:
"Advanced configurations"
,
},
"ru"
:
{
"label"
:
"Расширенные конфигурации"
,
},
"zh"
:
{
"label"
:
"高级设置"
,
},
"ko"
:
{
"label"
:
"고급 설정"
,
},
},
"quantization_bit"
:
{
"quantization_bit"
:
{
"en"
:
{
"en"
:
{
"label"
:
"Quantization bit"
,
"label"
:
"Quantization bit"
,
...
@@ -581,11 +567,11 @@ LOCALES = {
...
@@ -581,11 +567,11 @@ LOCALES = {
},
},
"neftune_alpha"
:
{
"neftune_alpha"
:
{
"en"
:
{
"en"
:
{
"label"
:
"NEFTune
A
lpha"
,
"label"
:
"NEFTune
a
lpha"
,
"info"
:
"Magnitude of noise adding to embedding vectors."
,
"info"
:
"Magnitude of noise adding to embedding vectors."
,
},
},
"ru"
:
{
"ru"
:
{
"label"
:
"NEFTune
A
lpha"
,
"label"
:
"NEFTune
a
lpha"
,
"info"
:
"Величина шума, добавляемого к векторам вложений."
,
"info"
:
"Величина шума, добавляемого к векторам вложений."
,
},
},
"zh"
:
{
"zh"
:
{
...
@@ -597,22 +583,22 @@ LOCALES = {
...
@@ -597,22 +583,22 @@ LOCALES = {
"info"
:
"임베딩 벡터에 추가되는 노이즈의 크기."
,
"info"
:
"임베딩 벡터에 추가되는 노이즈의 크기."
,
},
},
},
},
"
optim
"
:
{
"
extra_args
"
:
{
"en"
:
{
"en"
:
{
"label"
:
"
Optimizer
"
,
"label"
:
"
Extra arguments
"
,
"info"
:
"
The optimizer to use: adamw_torch, adamw_8bit or adafactor
."
,
"info"
:
"
Extra arguments passed to the trainer in JSON format
."
,
},
},
"ru"
:
{
"ru"
:
{
"label"
:
"
Оптимизатор
"
,
"label"
:
"
Дополнительные аргументы
"
,
"info"
:
"
Оптимизатор для использования: adamw_torch, adamw_8bit или adafactor
."
,
"info"
:
"
Дополнительные аргументы, которые передаются тренеру в формате JSON
."
,
},
},
"zh"
:
{
"zh"
:
{
"label"
:
"
优化器
"
,
"label"
:
"
额外参数
"
,
"info"
:
"
使用的优化器:adamw_torch、adamw_8bit 或 adafactor
。"
,
"info"
:
"
以 JSON 格式传递给训练器的额外参数
。"
,
},
},
"ko"
:
{
"ko"
:
{
"label"
:
"
옵티마이저
"
,
"label"
:
"
추가 인수
"
,
"info"
:
"
사용할 옵티마이저: adamw_torch, adamw_8bit 또는 adafactor 등
."
,
"info"
:
"
JSON 형식으로 트레이너에게 전달할 추가 인수입니다
."
,
},
},
},
},
"packing"
:
{
"packing"
:
{
...
...
src/llamafactory/webui/manager.py
View file @
2778a3d0
...
@@ -29,7 +29,7 @@ class Manager:
...
@@ -29,7 +29,7 @@ class Manager:
Adds elements to manager.
Adds elements to manager.
"""
"""
for
elem_name
,
elem
in
elem_dict
.
items
():
for
elem_name
,
elem
in
elem_dict
.
items
():
elem_id
=
"{
}.{}"
.
format
(
tab_name
,
elem_name
)
elem_id
=
f
"
{
tab_name
}
.
{
elem_name
}
"
self
.
_id_to_elem
[
elem_id
]
=
elem
self
.
_id_to_elem
[
elem_id
]
=
elem
self
.
_elem_to_id
[
elem
]
=
elem_id
self
.
_elem_to_id
[
elem
]
=
elem_id
...
...
src/llamafactory/webui/runner.py
View file @
2778a3d0
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
json
import
os
import
os
from
copy
import
deepcopy
from
copy
import
deepcopy
from
subprocess
import
Popen
,
TimeoutExpired
from
subprocess
import
Popen
,
TimeoutExpired
...
@@ -21,7 +22,7 @@ from transformers.trainer import TRAINING_ARGS_NAME
...
@@ -21,7 +22,7 @@ from transformers.trainer import TRAINING_ARGS_NAME
from
..extras.constants
import
LLAMABOARD_CONFIG
,
PEFT_METHODS
,
TRAINING_STAGES
from
..extras.constants
import
LLAMABOARD_CONFIG
,
PEFT_METHODS
,
TRAINING_STAGES
from
..extras.misc
import
is_gpu_or_npu_available
,
torch_gc
from
..extras.misc
import
is_gpu_or_npu_available
,
torch_gc
from
..extras.packages
import
is_gradio_available
from
..extras.packages
import
is_gradio_available
,
is_transformers_version_equal_to_4_46
from
.common
import
DEFAULT_CACHE_DIR
,
DEFAULT_CONFIG_DIR
,
QUANTIZATION_BITS
,
get_save_dir
,
load_config
from
.common
import
DEFAULT_CACHE_DIR
,
DEFAULT_CONFIG_DIR
,
QUANTIZATION_BITS
,
get_save_dir
,
load_config
from
.locales
import
ALERTS
,
LOCALES
from
.locales
import
ALERTS
,
LOCALES
from
.utils
import
abort_process
,
gen_cmd
,
get_eval_results
,
get_trainer_info
,
load_args
,
save_args
,
save_cmd
from
.utils
import
abort_process
,
gen_cmd
,
get_eval_results
,
get_trainer_info
,
load_args
,
save_args
,
save_cmd
...
@@ -78,6 +79,11 @@ class Runner:
...
@@ -78,6 +79,11 @@ class Runner:
if
not
get
(
"train.output_dir"
):
if
not
get
(
"train.output_dir"
):
return
ALERTS
[
"err_no_output_dir"
][
lang
]
return
ALERTS
[
"err_no_output_dir"
][
lang
]
try
:
json
.
loads
(
get
(
"train.extra_args"
))
except
json
.
JSONDecodeError
:
return
ALERTS
[
"err_json_schema"
][
lang
]
stage
=
TRAINING_STAGES
[
get
(
"train.training_stage"
)]
stage
=
TRAINING_STAGES
[
get
(
"train.training_stage"
)]
if
stage
==
"ppo"
and
not
get
(
"train.reward_model"
):
if
stage
==
"ppo"
and
not
get
(
"train.reward_model"
):
return
ALERTS
[
"err_no_reward_model"
][
lang
]
return
ALERTS
[
"err_no_reward_model"
][
lang
]
...
@@ -92,6 +98,7 @@ class Runner:
...
@@ -92,6 +98,7 @@ class Runner:
def
_finalize
(
self
,
lang
:
str
,
finish_info
:
str
)
->
str
:
def
_finalize
(
self
,
lang
:
str
,
finish_info
:
str
)
->
str
:
finish_info
=
ALERTS
[
"info_aborted"
][
lang
]
if
self
.
aborted
else
finish_info
finish_info
=
ALERTS
[
"info_aborted"
][
lang
]
if
self
.
aborted
else
finish_info
gr
.
Info
(
finish_info
)
self
.
trainer
=
None
self
.
trainer
=
None
self
.
aborted
=
False
self
.
aborted
=
False
self
.
running
=
False
self
.
running
=
False
...
@@ -130,7 +137,6 @@ class Runner:
...
@@ -130,7 +137,6 @@ class Runner:
save_steps
=
get
(
"train.save_steps"
),
save_steps
=
get
(
"train.save_steps"
),
warmup_steps
=
get
(
"train.warmup_steps"
),
warmup_steps
=
get
(
"train.warmup_steps"
),
neftune_noise_alpha
=
get
(
"train.neftune_alpha"
)
or
None
,
neftune_noise_alpha
=
get
(
"train.neftune_alpha"
)
or
None
,
optim
=
get
(
"train.optim"
),
packing
=
get
(
"train.packing"
)
or
get
(
"train.neat_packing"
),
packing
=
get
(
"train.packing"
)
or
get
(
"train.neat_packing"
),
neat_packing
=
get
(
"train.neat_packing"
),
neat_packing
=
get
(
"train.neat_packing"
),
train_on_prompt
=
get
(
"train.train_on_prompt"
),
train_on_prompt
=
get
(
"train.train_on_prompt"
),
...
@@ -147,8 +153,9 @@ class Runner:
...
@@ -147,8 +153,9 @@ class Runner:
pure_bf16
=
(
get
(
"train.compute_type"
)
==
"pure_bf16"
),
pure_bf16
=
(
get
(
"train.compute_type"
)
==
"pure_bf16"
),
plot_loss
=
True
,
plot_loss
=
True
,
ddp_timeout
=
180000000
,
ddp_timeout
=
180000000
,
include_num_input_tokens_seen
=
True
,
include_num_input_tokens_seen
=
False
if
is_transformers_version_equal_to_4_46
()
else
True
,
# FIXME
)
)
args
.
update
(
json
.
loads
(
get
(
"train.extra_args"
)))
# checkpoints
# checkpoints
if
get
(
"top.checkpoint_path"
):
if
get
(
"top.checkpoint_path"
):
...
@@ -231,7 +238,7 @@ class Runner:
...
@@ -231,7 +238,7 @@ class Runner:
if
get
(
"train.ds_stage"
)
!=
"none"
:
if
get
(
"train.ds_stage"
)
!=
"none"
:
ds_stage
=
get
(
"train.ds_stage"
)
ds_stage
=
get
(
"train.ds_stage"
)
ds_offload
=
"offload_"
if
get
(
"train.ds_offload"
)
else
""
ds_offload
=
"offload_"
if
get
(
"train.ds_offload"
)
else
""
args
[
"deepspeed"
]
=
os
.
path
.
join
(
DEFAULT_CACHE_DIR
,
"ds_z{
}_{}config.json"
.
format
(
ds_stage
,
ds_offload
)
)
args
[
"deepspeed"
]
=
os
.
path
.
join
(
DEFAULT_CACHE_DIR
,
f
"ds_z
{
ds_stage
}
_
{
ds_offload
}
config.json"
)
return
args
return
args
...
@@ -313,7 +320,7 @@ class Runner:
...
@@ -313,7 +320,7 @@ class Runner:
if
args
.
get
(
"deepspeed"
,
None
)
is
not
None
:
if
args
.
get
(
"deepspeed"
,
None
)
is
not
None
:
env
[
"FORCE_TORCHRUN"
]
=
"1"
env
[
"FORCE_TORCHRUN"
]
=
"1"
self
.
trainer
=
Popen
(
"llamafactory-cli
train
{}"
.
format
(
save_cmd
(
args
)
)
,
env
=
env
,
shell
=
True
)
self
.
trainer
=
Popen
(
[
"llamafactory-cli
"
,
"
train
"
,
save_cmd
(
args
)
]
,
env
=
env
)
yield
from
self
.
monitor
()
yield
from
self
.
monitor
()
def
_form_config_dict
(
self
,
data
:
Dict
[
"Component"
,
Any
])
->
Dict
[
str
,
Any
]:
def
_form_config_dict
(
self
,
data
:
Dict
[
"Component"
,
Any
])
->
Dict
[
str
,
Any
]:
...
@@ -351,6 +358,7 @@ class Runner:
...
@@ -351,6 +358,7 @@ class Runner:
progress_bar
=
self
.
manager
.
get_elem_by_id
(
"{}.progress_bar"
.
format
(
"train"
if
self
.
do_train
else
"eval"
))
progress_bar
=
self
.
manager
.
get_elem_by_id
(
"{}.progress_bar"
.
format
(
"train"
if
self
.
do_train
else
"eval"
))
loss_viewer
=
self
.
manager
.
get_elem_by_id
(
"train.loss_viewer"
)
if
self
.
do_train
else
None
loss_viewer
=
self
.
manager
.
get_elem_by_id
(
"train.loss_viewer"
)
if
self
.
do_train
else
None
running_log
=
""
while
self
.
trainer
is
not
None
:
while
self
.
trainer
is
not
None
:
if
self
.
aborted
:
if
self
.
aborted
:
yield
{
yield
{
...
@@ -386,7 +394,7 @@ class Runner:
...
@@ -386,7 +394,7 @@ class Runner:
finish_info
=
ALERTS
[
"err_failed"
][
lang
]
finish_info
=
ALERTS
[
"err_failed"
][
lang
]
return_dict
=
{
return_dict
=
{
output_box
:
self
.
_finalize
(
lang
,
finish_info
),
output_box
:
self
.
_finalize
(
lang
,
finish_info
)
+
"
\n\n
"
+
running_log
,
progress_bar
:
gr
.
Slider
(
visible
=
False
),
progress_bar
:
gr
.
Slider
(
visible
=
False
),
}
}
yield
return_dict
yield
return_dict
...
...
src/llamafactory/webui/utils.py
View file @
2778a3d0
...
@@ -111,14 +111,14 @@ def gen_cmd(args: Dict[str, Any]) -> str:
...
@@ -111,14 +111,14 @@ def gen_cmd(args: Dict[str, Any]) -> str:
"""
"""
cmd_lines
=
[
"llamafactory-cli train "
]
cmd_lines
=
[
"llamafactory-cli train "
]
for
k
,
v
in
clean_cmd
(
args
).
items
():
for
k
,
v
in
clean_cmd
(
args
).
items
():
cmd_lines
.
append
(
" --{} {
} "
.
format
(
k
,
str
(
v
)
)
)
cmd_lines
.
append
(
f
" --
{
k
}
{
str
(
v
)
}
"
)
if
os
.
name
==
"nt"
:
if
os
.
name
==
"nt"
:
cmd_text
=
"`
\n
"
.
join
(
cmd_lines
)
cmd_text
=
"`
\n
"
.
join
(
cmd_lines
)
else
:
else
:
cmd_text
=
"
\\\n
"
.
join
(
cmd_lines
)
cmd_text
=
"
\\\n
"
.
join
(
cmd_lines
)
cmd_text
=
"```bash
\n
{
}
\n
```"
.
format
(
cmd_text
)
cmd_text
=
f
"```bash
\n
{
cmd_text
}
\n
```"
return
cmd_text
return
cmd_text
...
@@ -139,9 +139,9 @@ def get_eval_results(path: os.PathLike) -> str:
...
@@ -139,9 +139,9 @@ def get_eval_results(path: os.PathLike) -> str:
r
"""
r
"""
Gets scores after evaluation.
Gets scores after evaluation.
"""
"""
with
open
(
path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
path
,
encoding
=
"utf-8"
)
as
f
:
result
=
json
.
dumps
(
json
.
load
(
f
),
indent
=
4
)
result
=
json
.
dumps
(
json
.
load
(
f
),
indent
=
4
)
return
"```json
\n
{}
\n
```
\n
"
.
format
(
result
)
return
f
"```json
\n
{
result
}
\n
```
\n
"
def
get_time
()
->
str
:
def
get_time
()
->
str
:
...
@@ -161,13 +161,13 @@ def get_trainer_info(output_path: os.PathLike, do_train: bool) -> Tuple[str, "gr
...
@@ -161,13 +161,13 @@ def get_trainer_info(output_path: os.PathLike, do_train: bool) -> Tuple[str, "gr
running_log_path
=
os
.
path
.
join
(
output_path
,
RUNNING_LOG
)
running_log_path
=
os
.
path
.
join
(
output_path
,
RUNNING_LOG
)
if
os
.
path
.
isfile
(
running_log_path
):
if
os
.
path
.
isfile
(
running_log_path
):
with
open
(
running_log_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
running_log_path
,
encoding
=
"utf-8"
)
as
f
:
running_log
=
f
.
read
()
running_log
=
f
.
read
()
trainer_log_path
=
os
.
path
.
join
(
output_path
,
TRAINER_LOG
)
trainer_log_path
=
os
.
path
.
join
(
output_path
,
TRAINER_LOG
)
if
os
.
path
.
isfile
(
trainer_log_path
):
if
os
.
path
.
isfile
(
trainer_log_path
):
trainer_log
:
List
[
Dict
[
str
,
Any
]]
=
[]
trainer_log
:
List
[
Dict
[
str
,
Any
]]
=
[]
with
open
(
trainer_log_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
trainer_log_path
,
encoding
=
"utf-8"
)
as
f
:
for
line
in
f
:
for
line
in
f
:
trainer_log
.
append
(
json
.
loads
(
line
))
trainer_log
.
append
(
json
.
loads
(
line
))
...
@@ -193,7 +193,7 @@ def load_args(config_path: str) -> Optional[Dict[str, Any]]:
...
@@ -193,7 +193,7 @@ def load_args(config_path: str) -> Optional[Dict[str, Any]]:
Loads saved arguments.
Loads saved arguments.
"""
"""
try
:
try
:
with
open
(
config_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
config_path
,
encoding
=
"utf-8"
)
as
f
:
return
safe_load
(
f
)
return
safe_load
(
f
)
except
Exception
:
except
Exception
:
return
None
return
None
...
@@ -211,7 +211,7 @@ def list_config_paths(current_time: str) -> "gr.Dropdown":
...
@@ -211,7 +211,7 @@ def list_config_paths(current_time: str) -> "gr.Dropdown":
r
"""
r
"""
Lists all the saved configuration files.
Lists all the saved configuration files.
"""
"""
config_files
=
[
"{
}.yaml"
.
format
(
current_time
)
]
config_files
=
[
f
"
{
current_time
}
.yaml"
]
if
os
.
path
.
isdir
(
DEFAULT_CONFIG_DIR
):
if
os
.
path
.
isdir
(
DEFAULT_CONFIG_DIR
):
for
file_name
in
os
.
listdir
(
DEFAULT_CONFIG_DIR
):
for
file_name
in
os
.
listdir
(
DEFAULT_CONFIG_DIR
):
if
file_name
.
endswith
(
".yaml"
)
and
file_name
not
in
config_files
:
if
file_name
.
endswith
(
".yaml"
)
and
file_name
not
in
config_files
:
...
@@ -224,7 +224,7 @@ def list_output_dirs(model_name: Optional[str], finetuning_type: str, current_ti
...
@@ -224,7 +224,7 @@ def list_output_dirs(model_name: Optional[str], finetuning_type: str, current_ti
r
"""
r
"""
Lists all the directories that can resume from.
Lists all the directories that can resume from.
"""
"""
output_dirs
=
[
"train_{
}"
.
format
(
current_time
)
]
output_dirs
=
[
f
"train_
{
current_time
}
"
]
if
model_name
:
if
model_name
:
save_dir
=
get_save_dir
(
model_name
,
finetuning_type
)
save_dir
=
get_save_dir
(
model_name
,
finetuning_type
)
if
save_dir
and
os
.
path
.
isdir
(
save_dir
):
if
save_dir
and
os
.
path
.
isdir
(
save_dir
):
...
...
src/webui.py
View file @
2778a3d0
...
@@ -18,8 +18,9 @@ from llamafactory.webui.interface import create_ui
...
@@ -18,8 +18,9 @@ from llamafactory.webui.interface import create_ui
def
main
():
def
main
():
gradio_share
=
os
.
environ
.
get
(
"GRADIO_SHARE"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
gradio_ipv6
=
os
.
getenv
(
"GRADIO_IPV6"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
server_name
=
os
.
environ
.
get
(
"GRADIO_SERVER_NAME"
,
"0.0.0.0"
)
gradio_share
=
os
.
getenv
(
"GRADIO_SHARE"
,
"0"
).
lower
()
in
[
"true"
,
"1"
]
server_name
=
os
.
getenv
(
"GRADIO_SERVER_NAME"
,
"[::]"
if
gradio_ipv6
else
"0.0.0.0"
)
create_ui
().
queue
().
launch
(
share
=
gradio_share
,
server_name
=
server_name
,
inbrowser
=
True
)
create_ui
().
queue
().
launch
(
share
=
gradio_share
,
server_name
=
server_name
,
inbrowser
=
True
)
...
...
tests/data/processors/test_feedback.py
View file @
2778a3d0
...
@@ -23,9 +23,9 @@ from llamafactory.extras.constants import IGNORE_INDEX
...
@@ -23,9 +23,9 @@ from llamafactory.extras.constants import IGNORE_INDEX
from
llamafactory.train.test_utils
import
load_train_dataset
from
llamafactory.train.test_utils
import
load_train_dataset
DEMO_DATA
=
os
.
environ
.
get
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
DEMO_DATA
=
os
.
get
env
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
TINY_LLAMA
=
os
.
environ
.
get
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
=
os
.
get
env
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TRAIN_ARGS
=
{
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
,
...
...
tests/data/processors/test_pairwise.py
View file @
2778a3d0
...
@@ -24,9 +24,9 @@ from llamafactory.extras.constants import IGNORE_INDEX
...
@@ -24,9 +24,9 @@ from llamafactory.extras.constants import IGNORE_INDEX
from
llamafactory.train.test_utils
import
load_train_dataset
from
llamafactory.train.test_utils
import
load_train_dataset
DEMO_DATA
=
os
.
environ
.
get
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
DEMO_DATA
=
os
.
get
env
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
TINY_LLAMA
=
os
.
environ
.
get
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
=
os
.
get
env
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TRAIN_ARGS
=
{
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
,
...
...
tests/data/processors/test_supervised.py
View file @
2778a3d0
...
@@ -23,11 +23,11 @@ from llamafactory.extras.constants import IGNORE_INDEX
...
@@ -23,11 +23,11 @@ from llamafactory.extras.constants import IGNORE_INDEX
from
llamafactory.train.test_utils
import
load_train_dataset
from
llamafactory.train.test_utils
import
load_train_dataset
DEMO_DATA
=
os
.
environ
.
get
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
DEMO_DATA
=
os
.
get
env
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
TINY_LLAMA
=
os
.
environ
.
get
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
=
os
.
get
env
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_DATA
=
os
.
environ
.
get
(
"TINY_DATA"
,
"llamafactory/tiny-supervised-dataset"
)
TINY_DATA
=
os
.
get
env
(
"TINY_DATA"
,
"llamafactory/tiny-supervised-dataset"
)
TRAIN_ARGS
=
{
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
,
...
...
Prev
1
…
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment