Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
LLaMA-Factory
Commits
4a40151b
"research/object_detection/legacy/trainer.py" did not exist on "9fce9c641e7f51b5931ebb8dc78858baa357adf5"
Commit
4a40151b
authored
Nov 05, 2024
by
chenych
Browse files
Update v0.8.3
parent
731cf9b8
Changes
56
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
61 additions
and
575 deletions
+61
-575
src/llamafactory/model/model_utils/longlora.py
src/llamafactory/model/model_utils/longlora.py
+22
-53
src/llamafactory/model/model_utils/packing.py
src/llamafactory/model/model_utils/packing.py
+2
-10
src/llamafactory/model/patcher.py
src/llamafactory/model/patcher.py
+4
-0
src/llamafactory/train/callbacks.py
src/llamafactory/train/callbacks.py
+2
-4
src/llamafactory/train/dpo/trainer.py
src/llamafactory/train/dpo/trainer.py
+2
-2
src/llamafactory/train/kto/trainer.py
src/llamafactory/train/kto/trainer.py
+2
-2
src/llamafactory/train/ppo/trainer.py
src/llamafactory/train/ppo/trainer.py
+2
-3
src/llamafactory/train/pt/trainer.py
src/llamafactory/train/pt/trainer.py
+2
-2
src/llamafactory/train/rm/trainer.py
src/llamafactory/train/rm/trainer.py
+2
-2
src/llamafactory/train/sft/trainer.py
src/llamafactory/train/sft/trainer.py
+2
-2
src/llamafactory/train/trainer_utils.py
src/llamafactory/train/trainer_utils.py
+1
-30
src/llamafactory/webui/components/export.py
src/llamafactory/webui/components/export.py
+2
-2
src/llamafactory/webui/components/top.py
src/llamafactory/webui/components/top.py
+1
-1
src/llamafactory/webui/interface.py
src/llamafactory/webui/interface.py
+1
-1
src/llamafactory/webui/locales.py
src/llamafactory/webui/locales.py
+1
-450
src/llamafactory/webui/runner.py
src/llamafactory/webui/runner.py
+13
-11
No files found.
src/llamafactory/model/model_utils/longlora.py
View file @
4a40151b
...
...
@@ -35,7 +35,6 @@ from transformers.utils.versions import require_version
from
...extras.constants
import
SUPPORTED_CLASS_FOR_S2ATTN
from
...extras.logging
import
get_logger
from
...extras.packages
import
is_transformers_version_greater_than_4_43
if
TYPE_CHECKING
:
...
...
@@ -51,15 +50,14 @@ transformers_logger = logging.get_logger(__name__)
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py
def
llama_attention_forward
(
self
:
"LlamaAttention"
,
hidden_states
:
"
torch.Tensor
"
,
attention_mask
:
Optional
[
"
torch.Tensor
"
]
=
None
,
position_ids
:
Optional
[
"
torch.LongTensor
"
]
=
None
,
hidden_states
:
torch
.
Tensor
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
position_ids
:
Optional
[
torch
.
LongTensor
]
=
None
,
past_key_value
:
Optional
[
"Cache"
]
=
None
,
output_attentions
:
bool
=
False
,
cache_position
:
Optional
[
"torch.LongTensor"
]
=
None
,
position_embeddings
:
Optional
[
Tuple
[
"torch.Tensor"
,
"torch.Tensor"
]]
=
None
,
cache_position
:
Optional
[
torch
.
LongTensor
]
=
None
,
**
kwargs
,
)
->
Tuple
[
"
torch.Tensor
"
,
Optional
[
"
torch.Tensor
"
],
Optional
[
Tuple
[
"
torch.Tensor
"
]]]:
)
->
Tuple
[
torch
.
Tensor
,
Optional
[
torch
.
Tensor
],
Optional
[
Tuple
[
torch
.
Tensor
]]]:
bsz
,
q_len
,
_
=
hidden_states
.
size
()
query_states
:
"torch.Tensor"
=
self
.
q_proj
(
hidden_states
)
...
...
@@ -70,11 +68,7 @@ def llama_attention_forward(
key_states
=
key_states
.
view
(
bsz
,
q_len
,
self
.
num_key_value_heads
,
self
.
head_dim
).
transpose
(
1
,
2
)
value_states
=
value_states
.
view
(
bsz
,
q_len
,
self
.
num_key_value_heads
,
self
.
head_dim
).
transpose
(
1
,
2
)
if
position_embeddings
is
None
:
cos
,
sin
=
self
.
rotary_emb
(
value_states
,
position_ids
)
else
:
cos
,
sin
=
position_embeddings
cos
,
sin
=
self
.
rotary_emb
(
value_states
,
position_ids
)
query_states
,
key_states
=
apply_rotary_pos_emb
(
query_states
,
key_states
,
cos
,
sin
)
if
past_key_value
is
not
None
:
...
...
@@ -136,15 +130,14 @@ def llama_attention_forward(
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py
def
llama_flash_attention_2_forward
(
self
:
"LlamaFlashAttention2"
,
hidden_states
:
"
torch.Tensor
"
,
attention_mask
:
Optional
[
"
torch.Tensor
"
]
=
None
,
position_ids
:
Optional
[
"
torch.LongTensor
"
]
=
None
,
hidden_states
:
torch
.
Tensor
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
position_ids
:
Optional
[
torch
.
LongTensor
]
=
None
,
past_key_value
:
Optional
[
"Cache"
]
=
None
,
output_attentions
:
bool
=
False
,
cache_position
:
Optional
[
"torch.LongTensor"
]
=
None
,
position_embeddings
:
Optional
[
Tuple
[
"torch.Tensor"
,
"torch.Tensor"
]]
=
None
,
cache_position
:
Optional
[
torch
.
LongTensor
]
=
None
,
**
kwargs
,
)
->
Tuple
[
"
torch.Tensor
"
,
Optional
[
"
torch.Tensor
"
],
Optional
[
Tuple
[
"
torch.Tensor
"
]]]:
)
->
Tuple
[
torch
.
Tensor
,
Optional
[
torch
.
Tensor
],
Optional
[
Tuple
[
torch
.
Tensor
]]]:
# LlamaFlashAttention2 attention does not support output_attentions
output_attentions
=
False
...
...
@@ -158,11 +151,7 @@ def llama_flash_attention_2_forward(
key_states
=
key_states
.
view
(
bsz
,
q_len
,
self
.
num_key_value_heads
,
self
.
head_dim
).
transpose
(
1
,
2
)
value_states
=
value_states
.
view
(
bsz
,
q_len
,
self
.
num_key_value_heads
,
self
.
head_dim
).
transpose
(
1
,
2
)
if
position_embeddings
is
None
:
cos
,
sin
=
self
.
rotary_emb
(
value_states
,
position_ids
)
else
:
cos
,
sin
=
position_embeddings
cos
,
sin
=
self
.
rotary_emb
(
value_states
,
position_ids
)
query_states
,
key_states
=
apply_rotary_pos_emb
(
query_states
,
key_states
,
cos
,
sin
)
if
past_key_value
is
not
None
:
...
...
@@ -209,24 +198,9 @@ def llama_flash_attention_2_forward(
if
attention_mask
is
not
None
:
attention_mask
=
attention_mask
[:,
:
groupsz
].
repeat
(
num_groups
,
1
)
if
is_transformers_version_greater_than_4_43
():
from
transformers.modeling_flash_attention_utils
import
_flash_attention_forward
attn_output
:
"torch.Tensor"
=
_flash_attention_forward
(
query_states
,
key_states
,
value_states
,
attention_mask
,
query_states
.
size
(
1
),
dropout
=
dropout_rate
,
sliding_window
=
getattr
(
self
,
"sliding_window"
,
None
),
use_top_left_mask
=
self
.
_flash_attn_uses_top_left_mask
,
is_causal
=
self
.
is_causal
,
)
else
:
attn_output
:
"torch.Tensor"
=
self
.
_flash_attention_forward
(
query_states
,
key_states
,
value_states
,
attention_mask
,
query_states
.
size
(
1
),
dropout
=
dropout_rate
)
attn_output
:
"torch.Tensor"
=
self
.
_flash_attention_forward
(
query_states
,
key_states
,
value_states
,
attention_mask
,
query_states
.
size
(
1
),
dropout
=
dropout_rate
)
if
getattr
(
self
.
config
,
"group_size_ratio"
,
None
)
and
self
.
training
:
# shift back
attn_output
.
reshape
(
bsz
,
q_len
,
self
.
num_heads
,
self
.
head_dim
)
...
...
@@ -251,15 +225,14 @@ def llama_flash_attention_2_forward(
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py
def
llama_sdpa_attention_forward
(
self
:
"LlamaSdpaAttention"
,
hidden_states
:
"
torch.Tensor
"
,
attention_mask
:
Optional
[
"
torch.Tensor
"
]
=
None
,
position_ids
:
Optional
[
"
torch.LongTensor
"
]
=
None
,
hidden_states
:
torch
.
Tensor
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
position_ids
:
Optional
[
torch
.
LongTensor
]
=
None
,
past_key_value
:
Optional
[
"Cache"
]
=
None
,
output_attentions
:
bool
=
False
,
cache_position
:
Optional
[
"torch.LongTensor"
]
=
None
,
position_embeddings
:
Optional
[
Tuple
[
"torch.Tensor"
,
"torch.Tensor"
]]
=
None
,
cache_position
:
Optional
[
torch
.
LongTensor
]
=
None
,
**
kwargs
,
)
->
Tuple
[
"
torch.Tensor
"
,
Optional
[
"
torch.Tensor
"
],
Optional
[
Tuple
[
"
torch.Tensor
"
]]]:
)
->
Tuple
[
torch
.
Tensor
,
Optional
[
torch
.
Tensor
],
Optional
[
Tuple
[
torch
.
Tensor
]]]:
if
output_attentions
:
transformers_logger
.
warning_once
(
"SDPA does not support `output_attentions=True`. Falling back to the vanilla attention"
...
...
@@ -285,11 +258,7 @@ def llama_sdpa_attention_forward(
key_states
=
key_states
.
view
(
bsz
,
q_len
,
self
.
num_key_value_heads
,
self
.
head_dim
).
transpose
(
1
,
2
)
value_states
=
value_states
.
view
(
bsz
,
q_len
,
self
.
num_key_value_heads
,
self
.
head_dim
).
transpose
(
1
,
2
)
if
position_embeddings
is
None
:
cos
,
sin
=
self
.
rotary_emb
(
value_states
,
position_ids
)
else
:
cos
,
sin
=
position_embeddings
cos
,
sin
=
self
.
rotary_emb
(
value_states
,
position_ids
)
query_states
,
key_states
=
apply_rotary_pos_emb
(
query_states
,
key_states
,
cos
,
sin
)
if
past_key_value
is
not
None
:
...
...
@@ -353,7 +322,7 @@ def llama_sdpa_attention_forward(
def
_apply_llama_patch
()
->
None
:
require_version
(
"transformers>=4.41.2,<=4.4
3
.4"
,
"To fix: pip install transformers>=4.41.2,<=4.4
3
.4"
)
require_version
(
"transformers>=4.41.2,<=4.4
2
.4"
,
"To fix: pip install transformers>=4.41.2,<=4.4
2
.4"
)
LlamaAttention
.
forward
=
llama_attention_forward
LlamaFlashAttention2
.
forward
=
llama_flash_attention_2_forward
LlamaSdpaAttention
.
forward
=
llama_sdpa_attention_forward
...
...
src/llamafactory/model/model_utils/packing.py
View file @
4a40151b
...
...
@@ -41,11 +41,11 @@ from typing import TYPE_CHECKING, Tuple
import
torch
import
torch.nn.functional
as
F
import
transformers.models
from
transformers.utils.versions
import
require_version
from
...extras.constants
import
SUPPORTED_CLASS_FOR_BLOCK_DIAG_ATTN
from
...extras.logging
import
get_logger
from
...extras.packages
import
is_transformers_version_greater_than_4_43
if
TYPE_CHECKING
:
...
...
@@ -114,15 +114,7 @@ def get_unpad_data(attention_mask: "torch.Tensor") -> Tuple["torch.Tensor", "tor
def
_patch_for_block_diag_attn
(
model_type
:
str
)
->
None
:
require_version
(
"transformers>=4.41.2,<=4.43.4"
,
"To fix: pip install transformers>=4.41.2,<=4.43.4"
)
if
is_transformers_version_greater_than_4_43
():
import
transformers.modeling_flash_attention_utils
transformers
.
modeling_flash_attention_utils
.
_get_unpad_data
=
get_unpad_data
return
import
transformers.models
require_version
(
"transformers>=4.41.2,<=4.42.4"
,
"To fix: pip install transformers>=4.41.2,<=4.42.4"
)
if
model_type
==
"cohere"
:
transformers
.
models
.
cohere
.
modeling_cohere
.
_get_unpad_data
=
get_unpad_data
elif
model_type
==
"falcon"
:
...
...
src/llamafactory/model/patcher.py
View file @
4a40151b
...
...
@@ -21,6 +21,7 @@ from peft import PeftModel
from
transformers
import
PreTrainedModel
,
PreTrainedTokenizerBase
,
is_torch_npu_available
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
transformers.modeling_utils
import
is_fsdp_enabled
from
transformers.utils.versions
import
require_version
from
..extras.logging
import
get_logger
from
..extras.misc
import
infer_optim_dtype
...
...
@@ -88,6 +89,9 @@ def patch_config(
if
getattr
(
config
,
"model_type"
,
None
)
==
"qwen2"
and
is_trainable
and
model_args
.
flash_attn
==
"fa2"
:
setattr
(
config
,
"use_cache"
,
False
)
# qwen2 does not support use_cache when using flash attn
if
getattr
(
config
,
"model_type"
,
None
)
==
"chatglm"
:
require_version
(
"transformers==4.41.2"
,
"To fix: pip install transformers==4.41.2"
)
# deepspeed zero3 is not compatible with low_cpu_mem_usage
init_kwargs
[
"low_cpu_mem_usage"
]
=
model_args
.
low_cpu_mem_usage
and
(
not
is_deepspeed_zero3_enabled
())
...
...
src/llamafactory/train/callbacks.py
View file @
4a40151b
...
...
@@ -162,12 +162,10 @@ class PissaConvertCallback(TrainerCallback):
setattr
(
model
.
peft_config
[
"default"
],
"init_lora_weights"
,
init_lora_weights
)
model
.
save_pretrained
(
pissa_convert_dir
,
safe_serialization
=
args
.
save_safetensors
,
convert_pissa_to_lora
=
pissa_init_dir
)
# TODO: use `path_initial_model_for_weight_conversion` (peft>=0.12.0)
)
model
.
load_adapter
(
pissa_backup_dir
,
"default"
,
is_trainable
=
True
)
model
.
set_adapter
(
"default"
)
if
"pissa_init"
in
model
.
peft_config
.
keys
():
# backward compatibility (peft<0.12.0)
model
.
delete_adapter
(
"pissa_init"
)
model
.
delete_adapter
(
"pissa_init"
)
setattr
(
model
.
peft_config
[
"default"
],
"init_lora_weights"
,
init_lora_weights
)
...
...
src/llamafactory/train/dpo/trainer.py
View file @
4a40151b
...
...
@@ -29,7 +29,7 @@ from trl.trainer import disable_dropout_in_model
from
...extras.constants
import
IGNORE_INDEX
from
..callbacks
import
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optim
i
zer
,
create_custom_scheduler
,
get_batch_logps
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
,
get_batch_logps
if
TYPE_CHECKING
:
...
...
@@ -106,7 +106,7 @@ class CustomDPOTrainer(DPOTrainer):
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optim
i
zer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
self
.
optimizer
=
create_custom_optimzer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
def
create_scheduler
(
...
...
src/llamafactory/train/kto/trainer.py
View file @
4a40151b
...
...
@@ -28,7 +28,7 @@ from trl.trainer import disable_dropout_in_model
from
...extras.constants
import
IGNORE_INDEX
from
..callbacks
import
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optim
i
zer
,
create_custom_scheduler
,
get_batch_logps
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
,
get_batch_logps
if
TYPE_CHECKING
:
...
...
@@ -101,7 +101,7 @@ class CustomKTOTrainer(KTOTrainer):
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optim
i
zer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
self
.
optimizer
=
create_custom_optimzer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
def
create_scheduler
(
...
...
src/llamafactory/train/ppo/trainer.py
View file @
4a40151b
...
...
@@ -39,7 +39,7 @@ from trl.models.utils import unwrap_model_for_generation
from
...extras.logging
import
get_logger
from
...extras.misc
import
AverageMeter
,
count_parameters
,
get_current_device
,
get_logits_processor
from
..callbacks
import
FixValueHeadModelCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optim
i
zer
,
create_custom_scheduler
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
from
.ppo_utils
import
dump_layernorm
,
get_rewards_from_server
,
replace_model
,
restore_layernorm
...
...
@@ -133,7 +133,6 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
ref_model
=
ref_model
,
tokenizer
=
tokenizer
,
dataset
=
train_dataset
,
optimizer
=
optimizer
,
data_collator
=
data_collator
,
lr_scheduler
=
scheduler
,
)
...
...
@@ -304,7 +303,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
)
->
"torch.optim.Optimizer"
:
optimizer
=
create_custom_optim
i
zer
(
model
,
training_args
,
finetuning_args
)
optimizer
=
create_custom_optimzer
(
model
,
training_args
,
finetuning_args
)
if
optimizer
is
None
:
decay_params
,
nodecay_params
=
[],
[]
decay_param_names
=
self
.
get_decay_parameter_names
(
model
)
...
...
src/llamafactory/train/pt/trainer.py
View file @
4a40151b
...
...
@@ -19,7 +19,7 @@ from transformers import Trainer
from
...extras.logging
import
get_logger
from
..callbacks
import
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optim
i
zer
,
create_custom_scheduler
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
if
TYPE_CHECKING
:
...
...
@@ -57,7 +57,7 @@ class CustomTrainer(Trainer):
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optim
i
zer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
self
.
optimizer
=
create_custom_optimzer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
def
create_scheduler
(
...
...
src/llamafactory/train/rm/trainer.py
View file @
4a40151b
...
...
@@ -25,7 +25,7 @@ from transformers import Trainer
from
...extras.logging
import
get_logger
from
..callbacks
import
FixValueHeadModelCallback
,
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optim
i
zer
,
create_custom_scheduler
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
if
TYPE_CHECKING
:
...
...
@@ -65,7 +65,7 @@ class PairwiseTrainer(Trainer):
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optim
i
zer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
self
.
optimizer
=
create_custom_optimzer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
def
create_scheduler
(
...
...
src/llamafactory/train/sft/trainer.py
View file @
4a40151b
...
...
@@ -27,7 +27,7 @@ from transformers import Seq2SeqTrainer
from
...extras.constants
import
IGNORE_INDEX
from
...extras.logging
import
get_logger
from
..callbacks
import
PissaConvertCallback
,
SaveProcessorCallback
from
..trainer_utils
import
create_custom_optim
i
zer
,
create_custom_scheduler
from
..trainer_utils
import
create_custom_optimzer
,
create_custom_scheduler
if
TYPE_CHECKING
:
...
...
@@ -66,7 +66,7 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
def
create_optimizer
(
self
)
->
"torch.optim.Optimizer"
:
if
self
.
optimizer
is
None
:
self
.
optimizer
=
create_custom_optim
i
zer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
self
.
optimizer
=
create_custom_optimzer
(
self
.
model
,
self
.
args
,
self
.
finetuning_args
)
return
super
().
create_optimizer
()
def
create_scheduler
(
...
...
src/llamafactory/train/trainer_utils.py
View file @
4a40151b
...
...
@@ -22,7 +22,6 @@ from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union
import
torch
from
transformers
import
Trainer
from
transformers.integrations
import
is_deepspeed_zero3_enabled
from
transformers.modeling_utils
import
is_fsdp_enabled
from
transformers.optimization
import
get_scheduler
from
transformers.pytorch_utils
import
ALL_LAYERNORM_LAYERS
from
transformers.trainer_pt_utils
import
get_parameter_names
...
...
@@ -367,32 +366,7 @@ def _create_badam_optimizer(
return
optimizer
def
_create_adam_mini_optimizer
(
model
:
"PreTrainedModel"
,
training_args
:
"Seq2SeqTrainingArguments"
,
)
->
"torch.optim.Optimizer"
:
from
adam_mini
import
Adam_mini
hidden_size
=
getattr
(
model
.
config
,
"hidden_size"
,
None
)
num_q_head
=
getattr
(
model
.
config
,
"num_attention_heads"
,
None
)
num_kv_head
=
getattr
(
model
.
config
,
"num_key_value_heads"
,
None
)
optimizer
=
Adam_mini
(
named_parameters
=
model
.
named_parameters
(),
lr
=
training_args
.
learning_rate
,
betas
=
(
training_args
.
adam_beta1
,
training_args
.
adam_beta2
),
eps
=
training_args
.
adam_epsilon
,
weight_decay
=
training_args
.
weight_decay
,
model_sharding
=
is_fsdp_enabled
()
or
is_deepspeed_zero3_enabled
(),
dim
=
hidden_size
,
n_heads
=
num_q_head
,
n_kv_heads
=
num_kv_head
,
)
logger
.
info
(
"Using Adam-mini optimizer."
)
return
optimizer
def
create_custom_optimizer
(
def
create_custom_optimzer
(
model
:
"PreTrainedModel"
,
training_args
:
"Seq2SeqTrainingArguments"
,
finetuning_args
:
"FinetuningArguments"
,
...
...
@@ -406,9 +380,6 @@ def create_custom_optimizer(
if
finetuning_args
.
use_badam
:
return
_create_badam_optimizer
(
model
,
training_args
,
finetuning_args
)
if
finetuning_args
.
use_adam_mini
:
return
_create_adam_mini_optimizer
(
model
,
training_args
)
def
create_custom_scheduler
(
training_args
:
"Seq2SeqTrainingArguments"
,
...
...
src/llamafactory/webui/components/export.py
View file @
4a40151b
...
...
@@ -66,7 +66,7 @@ def save_model(
error
=
ALERTS
[
"err_no_dataset"
][
lang
]
elif
export_quantization_bit
not
in
GPTQ_BITS
and
not
checkpoint_path
:
error
=
ALERTS
[
"err_no_adapter"
][
lang
]
elif
export_quantization_bit
in
GPTQ_BITS
and
checkpoint_path
and
isinstance
(
checkpoint_path
,
list
):
elif
export_quantization_bit
in
GPTQ_BITS
and
isinstance
(
checkpoint_path
,
list
):
error
=
ALERTS
[
"err_gptq_lora"
][
lang
]
if
error
:
...
...
@@ -104,7 +104,7 @@ def save_model(
def
create_export_tab
(
engine
:
"Engine"
)
->
Dict
[
str
,
"Component"
]:
with
gr
.
Row
():
export_size
=
gr
.
Slider
(
minimum
=
1
,
maximum
=
100
,
value
=
5
,
step
=
1
)
export_size
=
gr
.
Slider
(
minimum
=
1
,
maximum
=
100
,
value
=
1
,
step
=
1
)
export_quantization_bit
=
gr
.
Dropdown
(
choices
=
[
"none"
]
+
GPTQ_BITS
,
value
=
"none"
)
export_quantization_dataset
=
gr
.
Textbox
(
value
=
"data/c4_demo.json"
)
export_device
=
gr
.
Radio
(
choices
=
[
"cpu"
,
"auto"
],
value
=
"cpu"
)
...
...
src/llamafactory/webui/components/top.py
View file @
4a40151b
...
...
@@ -33,7 +33,7 @@ def create_top() -> Dict[str, "Component"]:
available_models
=
list
(
SUPPORTED_MODELS
.
keys
())
+
[
"Custom"
]
with
gr
.
Row
():
lang
=
gr
.
Dropdown
(
choices
=
[
"en"
,
"ru"
,
"zh"
,
"ko"
],
scale
=
1
)
lang
=
gr
.
Dropdown
(
choices
=
[
"en"
,
"ru"
,
"zh"
],
scale
=
1
)
model_name
=
gr
.
Dropdown
(
choices
=
available_models
,
scale
=
3
)
model_path
=
gr
.
Textbox
(
scale
=
3
)
...
...
src/llamafactory/webui/interface.py
View file @
4a40151b
...
...
@@ -71,7 +71,7 @@ def create_web_demo() -> "gr.Blocks":
engine
=
Engine
(
pure_chat
=
True
)
with
gr
.
Blocks
(
title
=
"Web Demo"
,
css
=
CSS
)
as
demo
:
lang
=
gr
.
Dropdown
(
choices
=
[
"en"
,
"
ru"
,
"zh"
,
"ko"
],
scale
=
1
)
lang
=
gr
.
Dropdown
(
choices
=
[
"en"
,
"
zh"
]
)
engine
.
manager
.
add_elems
(
"top"
,
dict
(
lang
=
lang
))
_
,
_
,
chat_elems
=
create_chat_box
(
engine
,
visible
=
True
)
...
...
src/llamafactory/webui/locales.py
View file @
4a40151b
...
...
@@ -18,14 +18,11 @@ LOCALES = {
"label"
:
"Lang"
,
},
"ru"
:
{
"label"
:
"
язык
"
,
"label"
:
"
Русский
"
,
},
"zh"
:
{
"label"
:
"语言"
,
},
"ko"
:
{
"label"
:
"언어"
,
},
},
"model_name"
:
{
"en"
:
{
...
...
@@ -37,9 +34,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"模型名称"
,
},
"ko"
:
{
"label"
:
"모델 이름"
,
},
},
"model_path"
:
{
"en"
:
{
...
...
@@ -54,10 +48,6 @@ LOCALES = {
"label"
:
"模型路径"
,
"info"
:
"本地模型的文件路径或 Hugging Face 的模型标识符。"
,
},
"ko"
:
{
"label"
:
"모델 경로"
,
"info"
:
"사전 훈련된 모델의 경로 또는 Hugging Face의 모델 식별자."
,
},
},
"finetuning_type"
:
{
"en"
:
{
...
...
@@ -69,9 +59,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"微调方法"
,
},
"ko"
:
{
"label"
:
"파인튜닝 방법"
,
},
},
"checkpoint_path"
:
{
"en"
:
{
...
...
@@ -83,9 +70,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"检查点路径"
,
},
"ko"
:
{
"label"
:
"체크포인트 경로"
,
},
},
"advanced_tab"
:
{
"en"
:
{
...
...
@@ -97,9 +81,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"高级设置"
,
},
"ko"
:
{
"label"
:
"고급 설정"
,
},
},
"quantization_bit"
:
{
"en"
:
{
...
...
@@ -114,10 +95,6 @@ LOCALES = {
"label"
:
"量化等级"
,
"info"
:
"启用量化(QLoRA)。"
,
},
"ko"
:
{
"label"
:
"양자화 비트"
,
"info"
:
"양자화 활성화 (QLoRA)."
,
},
},
"quantization_method"
:
{
"en"
:
{
...
...
@@ -132,10 +109,6 @@ LOCALES = {
"label"
:
"量化方法"
,
"info"
:
"使用的量化算法。"
,
},
"ko"
:
{
"label"
:
"양자화 방법"
,
"info"
:
"사용할 양자화 알고리즘."
,
},
},
"template"
:
{
"en"
:
{
...
...
@@ -150,10 +123,6 @@ LOCALES = {
"label"
:
"提示模板"
,
"info"
:
"构建提示词时使用的模板"
,
},
"ko"
:
{
"label"
:
"프롬프트 템플릿"
,
"info"
:
"프롬프트 구성에 사용될 템플릿."
,
},
},
"rope_scaling"
:
{
"en"
:
{
...
...
@@ -165,9 +134,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"RoPE 插值方法"
,
},
"ko"
:
{
"label"
:
"RoPE 스케일링"
,
},
},
"booster"
:
{
"en"
:
{
...
...
@@ -179,9 +145,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"加速方式"
,
},
"ko"
:
{
"label"
:
"부스터"
,
},
},
"visual_inputs"
:
{
"en"
:
{
...
...
@@ -193,9 +156,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"图像输入"
,
},
"ko"
:
{
"label"
:
"시각적 입력"
,
},
},
"training_stage"
:
{
"en"
:
{
...
...
@@ -210,10 +170,6 @@ LOCALES = {
"label"
:
"训练阶段"
,
"info"
:
"目前采用的训练方式。"
,
},
"ko"
:
{
"label"
:
"학습 단계"
,
"info"
:
"수행할 학습 방법."
,
},
},
"dataset_dir"
:
{
"en"
:
{
...
...
@@ -228,10 +184,6 @@ LOCALES = {
"label"
:
"数据路径"
,
"info"
:
"数据文件夹的路径。"
,
},
"ko"
:
{
"label"
:
"데이터 디렉토리"
,
"info"
:
"데이터 디렉토리의 경로."
,
},
},
"dataset"
:
{
"en"
:
{
...
...
@@ -243,9 +195,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"数据集"
,
},
"ko"
:
{
"label"
:
"데이터셋"
,
},
},
"data_preview_btn"
:
{
"en"
:
{
...
...
@@ -257,9 +206,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"预览数据集"
,
},
"ko"
:
{
"value"
:
"데이터셋 미리보기"
,
},
},
"preview_count"
:
{
"en"
:
{
...
...
@@ -271,9 +217,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"数量"
,
},
"ko"
:
{
"label"
:
"개수"
,
},
},
"page_index"
:
{
"en"
:
{
...
...
@@ -285,9 +228,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"页数"
,
},
"ko"
:
{
"label"
:
"페이지"
,
},
},
"prev_btn"
:
{
"en"
:
{
...
...
@@ -299,9 +239,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"上一页"
,
},
"ko"
:
{
"value"
:
"이전"
,
},
},
"next_btn"
:
{
"en"
:
{
...
...
@@ -313,9 +250,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"下一页"
,
},
"ko"
:
{
"value"
:
"다음"
,
},
},
"close_btn"
:
{
"en"
:
{
...
...
@@ -327,9 +261,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"关闭"
,
},
"ko"
:
{
"value"
:
"닫기"
,
},
},
"preview_samples"
:
{
"en"
:
{
...
...
@@ -341,9 +272,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"样例"
,
},
"ko"
:
{
"label"
:
"샘플"
,
},
},
"learning_rate"
:
{
"en"
:
{
...
...
@@ -358,10 +286,6 @@ LOCALES = {
"label"
:
"学习率"
,
"info"
:
"AdamW 优化器的初始学习率。"
,
},
"ko"
:
{
"label"
:
"학습률"
,
"info"
:
"AdamW의 초기 학습률."
,
},
},
"num_train_epochs"
:
{
"en"
:
{
...
...
@@ -376,10 +300,6 @@ LOCALES = {
"label"
:
"训练轮数"
,
"info"
:
"需要执行的训练总轮数。"
,
},
"ko"
:
{
"label"
:
"에포크"
,
"info"
:
"수행할 총 학습 에포크 수."
,
},
},
"max_grad_norm"
:
{
"en"
:
{
...
...
@@ -394,10 +314,6 @@ LOCALES = {
"label"
:
"最大梯度范数"
,
"info"
:
"用于梯度裁剪的范数。"
,
},
"ko"
:
{
"label"
:
"최대 그레디언트 노름(norm)"
,
"info"
:
"그레디언트 클리핑을 위한 노름(norm)."
,
},
},
"max_samples"
:
{
"en"
:
{
...
...
@@ -412,10 +328,6 @@ LOCALES = {
"label"
:
"最大样本数"
,
"info"
:
"每个数据集的最大样本数。"
,
},
"ko"
:
{
"label"
:
"최대 샘플 수"
,
"info"
:
"데이터셋 당 최대 샘플 수."
,
},
},
"compute_type"
:
{
"en"
:
{
...
...
@@ -430,10 +342,6 @@ LOCALES = {
"label"
:
"计算类型"
,
"info"
:
"是否使用混合精度训练。"
,
},
"ko"
:
{
"label"
:
"연산 유형"
,
"info"
:
"혼합 정밀도 훈련을 사용할지 여부."
,
},
},
"cutoff_len"
:
{
"en"
:
{
...
...
@@ -448,10 +356,6 @@ LOCALES = {
"label"
:
"截断长度"
,
"info"
:
"输入序列分词后的最大长度。"
,
},
"ko"
:
{
"label"
:
"컷오프 길이"
,
"info"
:
"입력 시퀀스의 최대 토큰 수."
,
},
},
"batch_size"
:
{
"en"
:
{
...
...
@@ -466,10 +370,6 @@ LOCALES = {
"label"
:
"批处理大小"
,
"info"
:
"每个 GPU 处理的样本数量。"
,
},
"ko"
:
{
"label"
:
"배치 크기"
,
"info"
:
"각 GPU에서 처리되는 샘플 수."
,
},
},
"gradient_accumulation_steps"
:
{
"en"
:
{
...
...
@@ -484,10 +384,6 @@ LOCALES = {
"label"
:
"梯度累积"
,
"info"
:
"梯度累积的步数。"
,
},
"ko"
:
{
"label"
:
"그레디언트 누적"
,
"info"
:
"그레디언트 누적 단계 수."
,
},
},
"val_size"
:
{
"en"
:
{
...
...
@@ -502,10 +398,6 @@ LOCALES = {
"label"
:
"验证集比例"
,
"info"
:
"验证集占全部样本的百分比。"
,
},
"ko"
:
{
"label"
:
"검증 데이터셋 크기"
,
"info"
:
"개발 데이터셋에서 검증 데이터의 비율."
,
},
},
"lr_scheduler_type"
:
{
"en"
:
{
...
...
@@ -520,10 +412,6 @@ LOCALES = {
"label"
:
"学习率调节器"
,
"info"
:
"学习率调度器的名称。"
,
},
"ko"
:
{
"label"
:
"LR 스케줄러"
,
"info"
:
"학습률 스케줄러의 이름."
,
},
},
"extra_tab"
:
{
"en"
:
{
...
...
@@ -535,9 +423,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"其它参数设置"
,
},
"ko"
:
{
"label"
:
"추가 구성(configuration)"
,
},
},
"logging_steps"
:
{
"en"
:
{
...
...
@@ -552,10 +437,6 @@ LOCALES = {
"label"
:
"日志间隔"
,
"info"
:
"每两次日志输出间的更新步数。"
,
},
"ko"
:
{
"label"
:
"로깅 스텝"
,
"info"
:
"이전 로깅과 다음 로깅 간 스텝 수."
,
},
},
"save_steps"
:
{
"en"
:
{
...
...
@@ -570,10 +451,6 @@ LOCALES = {
"label"
:
"保存间隔"
,
"info"
:
"每两次断点保存间的更新步数。"
,
},
"ko"
:
{
"label"
:
"저장 스텝"
,
"info"
:
"이전 체크포인트와 다음 체크포인트 사이의 스텝 수."
,
},
},
"warmup_steps"
:
{
"en"
:
{
...
...
@@ -588,10 +465,6 @@ LOCALES = {
"label"
:
"预热步数"
,
"info"
:
"学习率预热采用的步数。"
,
},
"ko"
:
{
"label"
:
"Warmup 스텝"
,
"info"
:
"Warmup에 사용되는 스텝 수."
,
},
},
"neftune_alpha"
:
{
"en"
:
{
...
...
@@ -606,10 +479,6 @@ LOCALES = {
"label"
:
"NEFTune 噪声参数"
,
"info"
:
"嵌入向量所添加的噪声大小。"
,
},
"ko"
:
{
"label"
:
"NEFTune 알파"
,
"info"
:
"임베딩 벡터에 추가되는 노이즈의 크기."
,
},
},
"optim"
:
{
"en"
:
{
...
...
@@ -624,10 +493,6 @@ LOCALES = {
"label"
:
"优化器"
,
"info"
:
"使用的优化器:adamw_torch、adamw_8bit 或 adafactor。"
,
},
"ko"
:
{
"label"
:
"옵티마이저"
,
"info"
:
"사용할 옵티마이저: adamw_torch, adamw_8bit 또는 adafactor 등."
,
},
},
"packing"
:
{
"en"
:
{
...
...
@@ -642,10 +507,6 @@ LOCALES = {
"label"
:
"序列打包"
,
"info"
:
"将序列打包为等长样本。"
,
},
"ko"
:
{
"label"
:
"시퀀스 패킹"
,
"info"
:
"고정된 길이의 샘플로 시퀀스를 패킹합니다."
,
},
},
"neat_packing"
:
{
"en"
:
{
...
...
@@ -660,10 +521,6 @@ LOCALES = {
"label"
:
"使用无污染打包"
,
"info"
:
"避免打包后的序列产生交叉注意力。"
,
},
"ko"
:
{
"label"
:
"니트 패킹 사용"
,
"info"
:
"패킹된 시퀀스 간의 크로스 어텐션을 피합니다."
,
},
},
"train_on_prompt"
:
{
"en"
:
{
...
...
@@ -678,10 +535,6 @@ LOCALES = {
"label"
:
"学习提示词"
,
"info"
:
"不在提示词的部分添加掩码(仅适用于 SFT)。"
,
},
"ko"
:
{
"label"
:
"프롬프트도 학습"
,
"info"
:
"프롬프트에서 라벨 마스킹을 비활성화합니다 (SFT에만 해당)."
,
},
},
"mask_history"
:
{
"en"
:
{
...
...
@@ -696,10 +549,6 @@ LOCALES = {
"label"
:
"不学习历史对话"
,
"info"
:
"仅学习最后一轮对话(仅适用于 SFT)。"
,
},
"ko"
:
{
"label"
:
"히스토리 마스킹"
,
"info"
:
"대화 데이터의 마지막 턴만 학습합니다 (SFT에만 해당)."
,
},
},
"resize_vocab"
:
{
"en"
:
{
...
...
@@ -714,10 +563,6 @@ LOCALES = {
"label"
:
"更改词表大小"
,
"info"
:
"更改分词器词表和嵌入层的大小。"
,
},
"ko"
:
{
"label"
:
"토큰 임베딩의 사이즈 조정"
,
"info"
:
"토크나이저 어휘와 임베딩 레이어의 크기를 조정합니다."
,
},
},
"use_llama_pro"
:
{
"en"
:
{
...
...
@@ -732,10 +577,6 @@ LOCALES = {
"label"
:
"使用 LLaMA Pro"
,
"info"
:
"仅训练块扩展后的参数。"
,
},
"ko"
:
{
"label"
:
"LLaMA Pro 사용"
,
"info"
:
"확장된 블록의 매개변수를 학습 가능하게 만듭니다."
,
},
},
"shift_attn"
:
{
"en"
:
{
...
...
@@ -750,10 +591,6 @@ LOCALES = {
"label"
:
"使用 S^2 Attention"
,
"info"
:
"使用 LongLoRA 提出的 shift short attention。"
,
},
"ko"
:
{
"label"
:
"S^2 Attention 사용"
,
"info"
:
"LongLoRA에서 제안한 shift short attention을 사용합니다."
,
},
},
"report_to"
:
{
"en"
:
{
...
...
@@ -768,10 +605,6 @@ LOCALES = {
"label"
:
"启用外部记录面板"
,
"info"
:
"使用 TensorBoard 或 wandb 记录实验。"
,
},
"ko"
:
{
"label"
:
"외부 logger 활성화"
,
"info"
:
"TensorBoard 또는 wandb를 사용하여 실험을 기록합니다."
,
},
},
"freeze_tab"
:
{
"en"
:
{
...
...
@@ -783,9 +616,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"部分参数微调设置"
,
},
"ko"
:
{
"label"
:
"Freeze tuning 설정"
,
},
},
"freeze_trainable_layers"
:
{
"en"
:
{
...
...
@@ -800,10 +630,6 @@ LOCALES = {
"label"
:
"可训练层数"
,
"info"
:
"最末尾(+)/最前端(-)可训练隐藏层的数量。"
,
},
"ko"
:
{
"label"
:
"학습 가능한 레이어"
,
"info"
:
"학습 가능하게 설정할 마지막(+)/처음(-) 히든 레이어의 수."
,
},
},
"freeze_trainable_modules"
:
{
"en"
:
{
...
...
@@ -818,10 +644,6 @@ LOCALES = {
"label"
:
"可训练模块"
,
"info"
:
"可训练模块的名称。使用英文逗号分隔多个名称。"
,
},
"ko"
:
{
"label"
:
"학습 가능한 모듈"
,
"info"
:
"학습 가능한 모듈의 이름. 여러 모듈을 구분하려면 쉼표(,)를 사용하세요."
,
},
},
"freeze_extra_modules"
:
{
"en"
:
{
...
...
@@ -842,10 +664,6 @@ LOCALES = {
"label"
:
"额外模块(非必填)"
,
"info"
:
"除隐藏层以外的可训练模块名称。使用英文逗号分隔多个名称。"
,
},
"ko"
:
{
"label"
:
"추가 모듈 (선택 사항)"
,
"info"
:
"학습 가능한 모듈의 이름(히든 레이어 제외). 모듈 간에는 쉼표(,)로 구분하십시오."
,
},
},
"lora_tab"
:
{
"en"
:
{
...
...
@@ -857,9 +675,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"LoRA 参数设置"
,
},
"ko"
:
{
"label"
:
"LoRA 구성"
,
},
},
"lora_rank"
:
{
"en"
:
{
...
...
@@ -874,10 +689,6 @@ LOCALES = {
"label"
:
"LoRA 秩"
,
"info"
:
"LoRA 矩阵的秩大小。"
,
},
"ko"
:
{
"label"
:
"LoRA 랭크"
,
"info"
:
"LoRA 행렬의 랭크."
,
},
},
"lora_alpha"
:
{
"en"
:
{
...
...
@@ -892,10 +703,6 @@ LOCALES = {
"label"
:
"LoRA 缩放系数"
,
"info"
:
"LoRA 缩放系数大小。"
,
},
"ko"
:
{
"label"
:
"LoRA 알파"
,
"info"
:
"LoRA 스케일링 계수."
,
},
},
"lora_dropout"
:
{
"en"
:
{
...
...
@@ -910,10 +717,6 @@ LOCALES = {
"label"
:
"LoRA 随机丢弃"
,
"info"
:
"LoRA 权重随机丢弃的概率。"
,
},
"ko"
:
{
"label"
:
"LoRA 드롭아웃"
,
"info"
:
"LoRA 가중치의 드롭아웃 비율."
,
},
},
"loraplus_lr_ratio"
:
{
"en"
:
{
...
...
@@ -928,10 +731,6 @@ LOCALES = {
"label"
:
"LoRA+ 学习率比例"
,
"info"
:
"LoRA+ 中 B 矩阵的学习率倍数。"
,
},
"ko"
:
{
"label"
:
"LoRA+ LR 비율"
,
"info"
:
"LoRA에서 B 행렬의 LR 비율."
,
},
},
"create_new_adapter"
:
{
"en"
:
{
...
...
@@ -946,10 +745,6 @@ LOCALES = {
"label"
:
"新建适配器"
,
"info"
:
"在现有的适配器上创建一个随机初始化后的新适配器。"
,
},
"ko"
:
{
"label"
:
"새 어댑터 생성"
,
"info"
:
"기존 어댑터 위에 무작위로 초기화된 가중치를 가진 새 어댑터를 생성합니다."
,
},
},
"use_rslora"
:
{
"en"
:
{
...
...
@@ -964,10 +759,6 @@ LOCALES = {
"label"
:
"使用 rslora"
,
"info"
:
"对 LoRA 层使用秩稳定缩放方法。"
,
},
"ko"
:
{
"label"
:
"rslora 사용"
,
"info"
:
"LoRA 레이어에 랭크 안정화 스케일링 계수를 사용합니다."
,
},
},
"use_dora"
:
{
"en"
:
{
...
...
@@ -982,10 +773,6 @@ LOCALES = {
"label"
:
"使用 DoRA"
,
"info"
:
"使用权重分解的 LoRA。"
,
},
"ko"
:
{
"label"
:
"DoRA 사용"
,
"info"
:
"가중치-분해 LoRA를 사용합니다."
,
},
},
"use_pissa"
:
{
"en"
:
{
...
...
@@ -1000,10 +787,6 @@ LOCALES = {
"label"
:
"使用 PiSSA"
,
"info"
:
"使用 PiSSA 方法。"
,
},
"ko"
:
{
"label"
:
"PiSSA 사용"
,
"info"
:
"PiSSA 방법을 사용합니다."
,
},
},
"lora_target"
:
{
"en"
:
{
...
...
@@ -1018,10 +801,6 @@ LOCALES = {
"label"
:
"LoRA 作用模块(非必填)"
,
"info"
:
"应用 LoRA 的模块名称。使用英文逗号分隔多个名称。"
,
},
"ko"
:
{
"label"
:
"LoRA 모듈 (선택 사항)"
,
"info"
:
"LoRA를 적용할 모듈의 이름. 모듈 간에는 쉼표(,)로 구분하십시오."
,
},
},
"additional_target"
:
{
"en"
:
{
...
...
@@ -1042,10 +821,6 @@ LOCALES = {
"label"
:
"附加模块(非必填)"
,
"info"
:
"除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。"
,
},
"ko"
:
{
"label"
:
"추가 모듈 (선택 사항)"
,
"info"
:
"LoRA 레이어 외에 학습 가능하게 설정할 모듈의 이름. 모듈 간에는 쉼표(,)로 구분하십시오."
,
},
},
"rlhf_tab"
:
{
"en"
:
{
...
...
@@ -1057,9 +832,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"RLHF 参数设置"
,
},
"ko"
:
{
"label"
:
"RLHF 구성"
,
},
},
"pref_beta"
:
{
"en"
:
{
...
...
@@ -1074,10 +846,6 @@ LOCALES = {
"label"
:
"Beta 参数"
,
"info"
:
"损失函数中 beta 超参数大小。"
,
},
"ko"
:
{
"label"
:
"베타 값"
,
"info"
:
"손실 함수에서 베타 매개 변수의 값."
,
},
},
"pref_ftx"
:
{
"en"
:
{
...
...
@@ -1092,10 +860,6 @@ LOCALES = {
"label"
:
"Ftx gamma"
,
"info"
:
"损失函数中 SFT 损失的权重大小。"
,
},
"ko"
:
{
"label"
:
"Ftx 감마"
,
"info"
:
"최종 로스 함수에서 SFT 로스의 가중치."
,
},
},
"pref_loss"
:
{
"en"
:
{
...
...
@@ -1110,10 +874,6 @@ LOCALES = {
"label"
:
"损失类型"
,
"info"
:
"损失函数的类型。"
,
},
"ko"
:
{
"label"
:
"로스 유형"
,
"info"
:
"로스 함수의 유형."
,
},
},
"reward_model"
:
{
"en"
:
{
...
...
@@ -1128,10 +888,6 @@ LOCALES = {
"label"
:
"奖励模型"
,
"info"
:
"PPO 训练中奖励模型的适配器路径。"
,
},
"ko"
:
{
"label"
:
"리워드 모델"
,
"info"
:
"PPO 학습에서 사용할 리워드 모델의 어댑터."
,
},
},
"ppo_score_norm"
:
{
"en"
:
{
...
...
@@ -1146,10 +902,6 @@ LOCALES = {
"label"
:
"奖励模型"
,
"info"
:
"PPO 训练中归一化奖励分数。"
,
},
"ko"
:
{
"label"
:
"스코어 정규화"
,
"info"
:
"PPO 학습에서 스코어를 정규화합니다."
,
},
},
"ppo_whiten_rewards"
:
{
"en"
:
{
...
...
@@ -1164,10 +916,6 @@ LOCALES = {
"label"
:
"白化奖励"
,
"info"
:
"PPO 训练中将奖励分数做白化处理。"
,
},
"ko"
:
{
"label"
:
"보상 백화"
,
"info"
:
"PPO 훈련에서 보상을 백화(Whiten)합니다."
,
},
},
"galore_tab"
:
{
"en"
:
{
...
...
@@ -1179,9 +927,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"GaLore 参数设置"
,
},
"ko"
:
{
"label"
:
"GaLore 구성"
,
},
},
"use_galore"
:
{
"en"
:
{
...
...
@@ -1196,10 +941,6 @@ LOCALES = {
"label"
:
"使用 GaLore"
,
"info"
:
"使用梯度低秩投影。"
,
},
"ko"
:
{
"label"
:
"GaLore 사용"
,
"info"
:
"그레디언트 로우 랭크 프로젝션을 활성화합니다."
,
},
},
"galore_rank"
:
{
"en"
:
{
...
...
@@ -1214,10 +955,6 @@ LOCALES = {
"label"
:
"GaLore 秩"
,
"info"
:
"GaLore 梯度的秩大小。"
,
},
"ko"
:
{
"label"
:
"GaLore 랭크"
,
"info"
:
"GaLore 그레디언트의 랭크."
,
},
},
"galore_update_interval"
:
{
"en"
:
{
...
...
@@ -1232,10 +969,6 @@ LOCALES = {
"label"
:
"更新间隔"
,
"info"
:
"相邻两次投影更新的步数。"
,
},
"ko"
:
{
"label"
:
"업데이트 간격"
,
"info"
:
"GaLore 프로젝션을 업데이트할 간격의 스텝 수."
,
},
},
"galore_scale"
:
{
"en"
:
{
...
...
@@ -1250,10 +983,6 @@ LOCALES = {
"label"
:
"GaLore 缩放系数"
,
"info"
:
"GaLore 缩放系数大小。"
,
},
"ko"
:
{
"label"
:
"GaLore 스케일"
,
"info"
:
"GaLore 스케일링 계수."
,
},
},
"galore_target"
:
{
"en"
:
{
...
...
@@ -1268,10 +997,6 @@ LOCALES = {
"label"
:
"GaLore 作用模块"
,
"info"
:
"应用 GaLore 的模块名称。使用英文逗号分隔多个名称。"
,
},
"ko"
:
{
"label"
:
"GaLore 모듈"
,
"info"
:
"GaLore를 적용할 모듈의 이름. 모듈 간에는 쉼표(,)로 구분하십시오."
,
},
},
"badam_tab"
:
{
"en"
:
{
...
...
@@ -1283,9 +1008,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"BAdam 参数设置"
,
},
"ko"
:
{
"label"
:
"BAdam 설정"
,
},
},
"use_badam"
:
{
"en"
:
{
...
...
@@ -1300,10 +1022,6 @@ LOCALES = {
"label"
:
"使用 BAdam"
,
"info"
:
"使用 BAdam 优化器。"
,
},
"ko"
:
{
"label"
:
"BAdam 사용"
,
"info"
:
"BAdam 옵티마이저를 사용합니다."
,
},
},
"badam_mode"
:
{
"en"
:
{
...
...
@@ -1318,10 +1036,6 @@ LOCALES = {
"label"
:
"BAdam 模式"
,
"info"
:
"使用 layer-wise 或 ratio-wise BAdam 优化器。"
,
},
"ko"
:
{
"label"
:
"BAdam 모드"
,
"info"
:
"레이어-BAdam 옵티마이저인지 비율-BAdam 옵티마이저인지."
,
},
},
"badam_switch_mode"
:
{
"en"
:
{
...
...
@@ -1336,10 +1050,6 @@ LOCALES = {
"label"
:
"切换策略"
,
"info"
:
"Layer-wise BAdam 优化器的块切换策略。"
,
},
"ko"
:
{
"label"
:
"스위치 모드"
,
"info"
:
"레이어-BAdam을 위한 블록 선택 전략."
,
},
},
"badam_switch_interval"
:
{
"en"
:
{
...
...
@@ -1354,10 +1064,6 @@ LOCALES = {
"label"
:
"切换频率"
,
"info"
:
"Layer-wise BAdam 优化器的块切换频率。"
,
},
"ko"
:
{
"label"
:
"전환 간격"
,
"info"
:
"레이어-BAdam을 위한 블록 업데이트 간 스텝 수."
,
},
},
"badam_update_ratio"
:
{
"en"
:
{
...
...
@@ -1372,10 +1078,6 @@ LOCALES = {
"label"
:
"Block 更新比例"
,
"info"
:
"Ratio-wise BAdam 优化器的更新比例。"
,
},
"ko"
:
{
"label"
:
"업데이트 비율"
,
"info"
:
"비율-BAdam의 업데이트 비율."
,
},
},
"cmd_preview_btn"
:
{
"en"
:
{
...
...
@@ -1387,9 +1089,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"预览命令"
,
},
"ko"
:
{
"value"
:
"명령어 미리보기"
,
},
},
"arg_save_btn"
:
{
"en"
:
{
...
...
@@ -1401,9 +1100,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"保存训练参数"
,
},
"ko"
:
{
"value"
:
"Argument 저장"
,
},
},
"arg_load_btn"
:
{
"en"
:
{
...
...
@@ -1415,9 +1111,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"载入训练参数"
,
},
"ko"
:
{
"value"
:
"Argument 불러오기"
,
},
},
"start_btn"
:
{
"en"
:
{
...
...
@@ -1429,9 +1122,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"开始"
,
},
"ko"
:
{
"value"
:
"시작"
,
},
},
"stop_btn"
:
{
"en"
:
{
...
...
@@ -1443,9 +1133,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"中断"
,
},
"ko"
:
{
"value"
:
"중단"
,
},
},
"output_dir"
:
{
"en"
:
{
...
...
@@ -1460,10 +1147,6 @@ LOCALES = {
"label"
:
"输出目录"
,
"info"
:
"保存结果的路径。"
,
},
"ko"
:
{
"label"
:
"출력 디렉토리"
,
"info"
:
"결과를 저장할 디렉토리."
,
},
},
"config_path"
:
{
"en"
:
{
...
...
@@ -1478,10 +1161,6 @@ LOCALES = {
"label"
:
"配置路径"
,
"info"
:
"保存训练参数的配置文件路径。"
,
},
"ko"
:
{
"label"
:
"설정 경로"
,
"info"
:
"Arguments 저장 파일 경로."
,
},
},
"device_count"
:
{
"en"
:
{
...
...
@@ -1496,10 +1175,6 @@ LOCALES = {
"label"
:
"设备数量"
,
"info"
:
"当前可用的运算设备数。"
,
},
"ko"
:
{
"label"
:
"디바이스 수"
,
"info"
:
"사용 가능한 디바이스 수."
,
},
},
"ds_stage"
:
{
"en"
:
{
...
...
@@ -1514,10 +1189,6 @@ LOCALES = {
"label"
:
"DeepSpeed stage"
,
"info"
:
"多卡训练的 DeepSpeed stage。"
,
},
"ko"
:
{
"label"
:
"DeepSpeed 단계"
,
"info"
:
"분산 학습을 위한 DeepSpeed 단계."
,
},
},
"ds_offload"
:
{
"en"
:
{
...
...
@@ -1532,10 +1203,6 @@ LOCALES = {
"label"
:
"使用 offload"
,
"info"
:
"使用 DeepSpeed offload(会减慢速度)。"
,
},
"ko"
:
{
"label"
:
"오프로딩 활성화"
,
"info"
:
"DeepSpeed 오프로딩 활성화 (훈련 속도 느려짐)."
,
},
},
"output_box"
:
{
"en"
:
{
...
...
@@ -1547,9 +1214,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"准备就绪。"
,
},
"ko"
:
{
"value"
:
"준비 완료."
,
},
},
"loss_viewer"
:
{
"en"
:
{
...
...
@@ -1561,9 +1225,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"损失"
,
},
"ko"
:
{
"label"
:
"손실"
,
},
},
"predict"
:
{
"en"
:
{
...
...
@@ -1575,9 +1236,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"保存预测结果"
,
},
"ko"
:
{
"label"
:
"예측 결과 저장"
,
},
},
"infer_backend"
:
{
"en"
:
{
...
...
@@ -1589,9 +1247,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"推理引擎"
,
},
"ko"
:
{
"label"
:
"추론 엔진"
,
},
},
"infer_dtype"
:
{
"en"
:
{
...
...
@@ -1603,9 +1258,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"推理数据类型"
,
},
"ko"
:
{
"label"
:
"추론 데이터 유형"
,
},
},
"load_btn"
:
{
"en"
:
{
...
...
@@ -1617,9 +1269,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"加载模型"
,
},
"ko"
:
{
"value"
:
"모델 불러오기"
,
},
},
"unload_btn"
:
{
"en"
:
{
...
...
@@ -1631,9 +1280,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"卸载模型"
,
},
"ko"
:
{
"value"
:
"모델 언로드"
,
},
},
"info_box"
:
{
"en"
:
{
...
...
@@ -1645,9 +1291,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"模型未加载,请先加载模型。"
,
},
"ko"
:
{
"value"
:
"모델이 언로드되었습니다. 모델을 먼저 불러오십시오."
,
},
},
"role"
:
{
"en"
:
{
...
...
@@ -1659,9 +1302,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"角色"
,
},
"ko"
:
{
"label"
:
"역할"
,
},
},
"system"
:
{
"en"
:
{
...
...
@@ -1673,9 +1313,6 @@ LOCALES = {
"zh"
:
{
"placeholder"
:
"系统提示词(非必填)"
,
},
"ko"
:
{
"placeholder"
:
"시스템 프롬프트 (선택 사항)"
,
},
},
"tools"
:
{
"en"
:
{
...
...
@@ -1687,9 +1324,6 @@ LOCALES = {
"zh"
:
{
"placeholder"
:
"工具列表(非必填)"
,
},
"ko"
:
{
"placeholder"
:
"툴 (선택 사항)"
,
},
},
"image"
:
{
"en"
:
{
...
...
@@ -1701,9 +1335,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"图像(非必填)"
,
},
"ko"
:
{
"label"
:
"이미지 (선택 사항)"
,
},
},
"query"
:
{
"en"
:
{
...
...
@@ -1715,9 +1346,6 @@ LOCALES = {
"zh"
:
{
"placeholder"
:
"输入..."
,
},
"ko"
:
{
"placeholder"
:
"입력..."
,
},
},
"submit_btn"
:
{
"en"
:
{
...
...
@@ -1729,9 +1357,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"提交"
,
},
"ko"
:
{
"value"
:
"제출"
,
},
},
"max_length"
:
{
"en"
:
{
...
...
@@ -1743,9 +1368,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"最大长度"
,
},
"ko"
:
{
"label"
:
"최대 길이"
,
},
},
"max_new_tokens"
:
{
"en"
:
{
...
...
@@ -1757,9 +1379,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"最大生成长度"
,
},
"ko"
:
{
"label"
:
"응답의 최대 길이"
,
},
},
"top_p"
:
{
"en"
:
{
...
...
@@ -1771,9 +1390,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"Top-p 采样值"
,
},
"ko"
:
{
"label"
:
"Top-p"
,
},
},
"temperature"
:
{
"en"
:
{
...
...
@@ -1785,9 +1401,6 @@ LOCALES = {
"zh"
:
{
"label"
:
"温度系数"
,
},
"ko"
:
{
"label"
:
"온도"
,
},
},
"clear_btn"
:
{
"en"
:
{
...
...
@@ -1799,9 +1412,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"清空历史"
,
},
"ko"
:
{
"value"
:
"기록 지우기"
,
},
},
"export_size"
:
{
"en"
:
{
...
...
@@ -1816,10 +1426,6 @@ LOCALES = {
"label"
:
"最大分块大小(GB)"
,
"info"
:
"单个模型文件的最大大小。"
,
},
"ko"
:
{
"label"
:
"최대 샤드 크기 (GB)"
,
"info"
:
"모델 파일의 최대 크기."
,
},
},
"export_quantization_bit"
:
{
"en"
:
{
...
...
@@ -1834,10 +1440,6 @@ LOCALES = {
"label"
:
"导出量化等级"
,
"info"
:
"量化导出模型。"
,
},
"ko"
:
{
"label"
:
"양자화 비트 내보내기"
,
"info"
:
"내보낸 모델의 양자화."
,
},
},
"export_quantization_dataset"
:
{
"en"
:
{
...
...
@@ -1852,10 +1454,6 @@ LOCALES = {
"label"
:
"导出量化数据集"
,
"info"
:
"量化过程中使用的校准数据集。"
,
},
"ko"
:
{
"label"
:
"양자화 데이터셋 내보내기"
,
"info"
:
"양자화에 사용되는 교정 데이터셋."
,
},
},
"export_device"
:
{
"en"
:
{
...
...
@@ -1870,10 +1468,6 @@ LOCALES = {
"label"
:
"导出设备"
,
"info"
:
"导出模型使用的设备类型。"
,
},
"ko"
:
{
"label"
:
"내보낼 장치"
,
"info"
:
"모델을 내보내는 데 사용할 장치."
,
},
},
"export_legacy_format"
:
{
"en"
:
{
...
...
@@ -1888,10 +1482,6 @@ LOCALES = {
"label"
:
"导出旧格式"
,
"info"
:
"不使用 safetensors 格式保存模型。"
,
},
"ko"
:
{
"label"
:
"레거시 형식 내보내기"
,
"info"
:
"모델을 저장하는 데 safetensors를 사용하지 않습니다."
,
},
},
"export_dir"
:
{
"en"
:
{
...
...
@@ -1906,10 +1496,6 @@ LOCALES = {
"label"
:
"导出目录"
,
"info"
:
"保存导出模型的文件夹路径。"
,
},
"ko"
:
{
"label"
:
"내보내기 디렉토리"
,
"info"
:
"내보낸 모델을 저장할 디렉토리."
,
},
},
"export_hub_model_id"
:
{
"en"
:
{
...
...
@@ -1924,10 +1510,6 @@ LOCALES = {
"label"
:
"HF Hub ID(非必填)"
,
"info"
:
"用于将模型上传至 Hugging Face Hub 的仓库 ID。"
,
},
"ko"
:
{
"label"
:
"HF 허브 ID (선택 사항)"
,
"info"
:
"모델을 Hugging Face 허브에 업로드하기 위한 레포 ID."
,
},
},
"export_btn"
:
{
"en"
:
{
...
...
@@ -1939,9 +1521,6 @@ LOCALES = {
"zh"
:
{
"value"
:
"开始导出"
,
},
"ko"
:
{
"value"
:
"내보내기"
,
},
},
}
...
...
@@ -1951,168 +1530,140 @@ ALERTS = {
"en"
:
"A process is in running, please abort it first."
,
"ru"
:
"Процесс уже запущен, пожалуйста, сначала прервите его."
,
"zh"
:
"任务已存在,请先中断训练。"
,
"ko"
:
"프로세스가 실행 중입니다. 먼저 중단하십시오."
,
},
"err_exists"
:
{
"en"
:
"You have loaded a model, please unload it first."
,
"ru"
:
"Вы загрузили модель, сначала разгрузите ее."
,
"zh"
:
"模型已存在,请先卸载模型。"
,
"ko"
:
"모델이 로드되었습니다. 먼저 언로드하십시오."
,
},
"err_no_model"
:
{
"en"
:
"Please select a model."
,
"ru"
:
"Пожалуйста, выберите модель."
,
"zh"
:
"请选择模型。"
,
"ko"
:
"모델을 선택하십시오."
,
},
"err_no_path"
:
{
"en"
:
"Model not found."
,
"ru"
:
"Модель не найдена."
,
"zh"
:
"模型未找到。"
,
"ko"
:
"모델을 찾을 수 없습니다."
,
},
"err_no_dataset"
:
{
"en"
:
"Please choose a dataset."
,
"ru"
:
"Пожалуйста, выберите набор данных."
,
"zh"
:
"请选择数据集。"
,
"ko"
:
"데이터 세트를 선택하십시오."
,
},
"err_no_adapter"
:
{
"en"
:
"Please select an adapter."
,
"ru"
:
"Пожалуйста, выберите адаптер."
,
"zh"
:
"请选择适配器。"
,
"ko"
:
"어댑터를 선택하십시오."
,
},
"err_no_output_dir"
:
{
"en"
:
"Please provide output dir."
,
"ru"
:
"Пожалуйста, укажите выходную директорию."
,
"zh"
:
"请填写输出目录。"
,
"ko"
:
"출력 디렉토리를 제공하십시오."
,
},
"err_no_reward_model"
:
{
"en"
:
"Please select a reward model."
,
"ru"
:
"Пожалуйста, выберите модель вознаграждения."
,
"zh"
:
"请选择奖励模型。"
,
"ko"
:
"리워드 모델을 선택하십시오."
,
},
"err_no_export_dir"
:
{
"en"
:
"Please provide export dir."
,
"ru"
:
"Пожалуйста, укажите каталог для экспорта."
,
"zh"
:
"请填写导出目录。"
,
"ko"
:
"Export 디렉토리를 제공하십시오."
,
},
"err_gptq_lora"
:
{
"en"
:
"Please merge adapters before quantizing the model."
,
"ru"
:
"Пожалуйста, объедините адаптеры перед квантованием модели."
,
"zh"
:
"量化模型前请先合并适配器。"
,
"ko"
:
"모델을 양자화하기 전에 어댑터를 병합하십시오."
,
},
"err_failed"
:
{
"en"
:
"Failed."
,
"ru"
:
"Ошибка."
,
"zh"
:
"训练出错。"
,
"ko"
:
"실패했습니다."
,
},
"err_demo"
:
{
"en"
:
"Training is unavailable in demo mode, duplicate the space to a private one first."
,
"ru"
:
"Обучение недоступно в демонстрационном режиме, сначала скопируйте пространство в частное."
,
"zh"
:
"展示模式不支持训练,请先复制到私人空间。"
,
"ko"
:
"데모 모드에서는 훈련을 사용할 수 없습니다. 먼저 프라이빗 레포지토리로 작업 공간을 복제하십시오."
,
},
"err_tool_name"
:
{
"en"
:
"Tool name not found."
,
"ru"
:
"Имя инструмента не найдено."
,
"zh"
:
"工具名称未找到。"
,
"ko"
:
"툴 이름을 찾을 수 없습니다."
,
},
"err_json_schema"
:
{
"en"
:
"Invalid JSON schema."
,
"ru"
:
"Неверная схема JSON."
,
"zh"
:
"Json 格式错误。"
,
"ko"
:
"잘못된 JSON 스키마입니다."
,
},
"err_config_not_found"
:
{
"en"
:
"Config file is not found."
,
"ru"
:
"Файл конфигурации не найден."
,
"zh"
:
"未找到配置文件。"
,
"ko"
:
"Config 파일을 찾을 수 없습니다."
,
},
"warn_no_cuda"
:
{
"en"
:
"CUDA environment was not detected."
,
"ru"
:
"Среда CUDA не обнаружена."
,
"zh"
:
"未检测到 CUDA 环境。"
,
"ko"
:
"CUDA 환경이 감지되지 않았습니다."
,
},
"warn_output_dir_exists"
:
{
"en"
:
"Output dir already exists, will resume training from here."
,
"ru"
:
"Выходной каталог уже существует, обучение будет продолжено отсюда."
,
"zh"
:
"输出目录已存在,将从该断点恢复训练。"
,
"ko"
:
"출력 디렉토리가 이미 존재합니다. 위 출력 디렉토리에 저장된 학습을 재개합니다."
,
},
"info_aborting"
:
{
"en"
:
"Aborted, wait for terminating..."
,
"ru"
:
"Прервано, ожидание завершения..."
,
"zh"
:
"训练中断,正在等待进程结束……"
,
"ko"
:
"중단되었습니다. 종료를 기다리십시오..."
,
},
"info_aborted"
:
{
"en"
:
"Ready."
,
"ru"
:
"Готово."
,
"zh"
:
"准备就绪。"
,
"ko"
:
"준비되었습니다."
,
},
"info_finished"
:
{
"en"
:
"Finished."
,
"ru"
:
"Завершено."
,
"zh"
:
"训练完毕。"
,
"ko"
:
"완료되었습니다."
,
},
"info_config_saved"
:
{
"en"
:
"Arguments have been saved at: "
,
"ru"
:
"Аргументы были сохранены по адресу: "
,
"zh"
:
"训练参数已保存至:"
,
"ko"
:
"매개변수가 저장되었습니다: "
,
},
"info_config_loaded"
:
{
"en"
:
"Arguments have been restored."
,
"ru"
:
"Аргументы были восстановлены."
,
"zh"
:
"训练参数已载入。"
,
"ko"
:
"매개변수가 복원되었습니다."
,
},
"info_loading"
:
{
"en"
:
"Loading model..."
,
"ru"
:
"Загрузка модели..."
,
"zh"
:
"加载中……"
,
"ko"
:
"모델 로딩 중..."
,
},
"info_unloading"
:
{
"en"
:
"Unloading model..."
,
"ru"
:
"Выгрузка модели..."
,
"zh"
:
"卸载中……"
,
"ko"
:
"모델 언로딩 중..."
,
},
"info_loaded"
:
{
"en"
:
"Model loaded, now you can chat with your model!"
,
"ru"
:
"Модель загружена, теперь вы можете общаться с вашей моделью!"
,
"zh"
:
"模型已加载,可以开始聊天了!"
,
"ko"
:
"모델이 로드되었습니다. 이제 모델과 채팅할 수 있습니다!"
,
},
"info_unloaded"
:
{
"en"
:
"Model unloaded."
,
"ru"
:
"Модель выгружена."
,
"zh"
:
"模型已卸载。"
,
"ko"
:
"모델이 언로드되었습니다."
,
},
"info_exporting"
:
{
"en"
:
"Exporting model..."
,
"ru"
:
"Экспорт модели..."
,
"zh"
:
"正在导出模型……"
,
"ko"
:
"모델 내보내기 중..."
,
},
"info_exported"
:
{
"en"
:
"Model exported."
,
"ru"
:
"Модель экспортирована."
,
"zh"
:
"模型导出完成。"
,
"ko"
:
"모델이 내보내졌습니다."
,
},
}
src/llamafactory/webui/runner.py
View file @
4a40151b
...
...
@@ -104,6 +104,11 @@ class Runner:
model_name
,
finetuning_type
=
get
(
"top.model_name"
),
get
(
"top.finetuning_type"
)
user_config
=
load_config
()
if
get
(
"top.quantization_bit"
)
in
QUANTIZATION_BITS
:
quantization_bit
=
int
(
get
(
"top.quantization_bit"
))
else
:
quantization_bit
=
None
args
=
dict
(
stage
=
TRAINING_STAGES
[
get
(
"train.training_stage"
)],
do_train
=
True
,
...
...
@@ -111,6 +116,8 @@ class Runner:
cache_dir
=
user_config
.
get
(
"cache_dir"
,
None
),
preprocessing_num_workers
=
16
,
finetuning_type
=
finetuning_type
,
quantization_bit
=
quantization_bit
,
quantization_method
=
get
(
"top.quantization_method"
),
template
=
get
(
"top.template"
),
rope_scaling
=
get
(
"top.rope_scaling"
)
if
get
(
"top.rope_scaling"
)
in
[
"linear"
,
"dynamic"
]
else
None
,
flash_attn
=
"fa2"
if
get
(
"top.booster"
)
==
"flashattn2"
else
"auto"
,
...
...
@@ -159,11 +166,6 @@ class Runner:
else
:
# str
args
[
"model_name_or_path"
]
=
get_save_dir
(
model_name
,
finetuning_type
,
get
(
"top.checkpoint_path"
))
# quantization
if
get
(
"top.quantization_bit"
)
in
QUANTIZATION_BITS
:
args
[
"quantization_bit"
]
=
int
(
get
(
"top.quantization_bit"
))
args
[
"quantization_method"
]
=
get
(
"top.quantization_method"
)
# freeze config
if
args
[
"finetuning_type"
]
==
"freeze"
:
args
[
"freeze_trainable_layers"
]
=
get
(
"train.freeze_trainable_layers"
)
...
...
@@ -240,12 +242,18 @@ class Runner:
model_name
,
finetuning_type
=
get
(
"top.model_name"
),
get
(
"top.finetuning_type"
)
user_config
=
load_config
()
if
get
(
"top.quantization_bit"
)
in
QUANTIZATION_BITS
:
quantization_bit
=
int
(
get
(
"top.quantization_bit"
))
else
:
quantization_bit
=
None
args
=
dict
(
stage
=
"sft"
,
model_name_or_path
=
get
(
"top.model_path"
),
cache_dir
=
user_config
.
get
(
"cache_dir"
,
None
),
preprocessing_num_workers
=
16
,
finetuning_type
=
finetuning_type
,
quantization_bit
=
quantization_bit
,
quantization_method
=
get
(
"top.quantization_method"
),
template
=
get
(
"top.template"
),
rope_scaling
=
get
(
"top.rope_scaling"
)
if
get
(
"top.rope_scaling"
)
in
[
"linear"
,
"dynamic"
]
else
None
,
...
...
@@ -269,7 +277,6 @@ class Runner:
else
:
args
[
"do_eval"
]
=
True
# checkpoints
if
get
(
"top.checkpoint_path"
):
if
finetuning_type
in
PEFT_METHODS
:
# list
args
[
"adapter_name_or_path"
]
=
","
.
join
(
...
...
@@ -278,11 +285,6 @@ class Runner:
else
:
# str
args
[
"model_name_or_path"
]
=
get_save_dir
(
model_name
,
finetuning_type
,
get
(
"top.checkpoint_path"
))
# quantization
if
get
(
"top.quantization_bit"
)
in
QUANTIZATION_BITS
:
args
[
"quantization_bit"
]
=
int
(
get
(
"top.quantization_bit"
))
args
[
"quantization_method"
]
=
get
(
"top.quantization_method"
)
return
args
def
_preview
(
self
,
data
:
Dict
[
"Component"
,
Any
],
do_train
:
bool
)
->
Generator
[
Dict
[
"Component"
,
str
],
None
,
None
]:
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment