Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
LLaMA-Factory
Commits
581d366d
Commit
581d366d
authored
Apr 15, 2025
by
chenych
Browse files
Support GLM-4/GLM-4-0414/GLM-Z1
parent
428c5813
Changes
107
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
122 additions
and
68 deletions
+122
-68
src/llamafactory/train/kto/workflow.py
src/llamafactory/train/kto/workflow.py
+7
-1
src/llamafactory/train/pt/workflow.py
src/llamafactory/train/pt/workflow.py
+7
-1
src/llamafactory/train/rm/workflow.py
src/llamafactory/train/rm/workflow.py
+9
-1
src/llamafactory/train/sft/workflow.py
src/llamafactory/train/sft/workflow.py
+9
-1
src/llamafactory/train/trainer_utils.py
src/llamafactory/train/trainer_utils.py
+4
-0
src/llamafactory/webui/components/export.py
src/llamafactory/webui/components/export.py
+1
-1
src/llamafactory/webui/components/top.py
src/llamafactory/webui/components/top.py
+1
-1
src/llamafactory/webui/runner.py
src/llamafactory/webui/runner.py
+1
-0
tests/data/processor/test_feedback.py
tests/data/processor/test_feedback.py
+3
-3
tests/data/processor/test_pairwise.py
tests/data/processor/test_pairwise.py
+3
-3
tests/data/processor/test_supervised.py
tests/data/processor/test_supervised.py
+6
-6
tests/data/processor/test_unsupervised.py
tests/data/processor/test_unsupervised.py
+3
-3
tests/data/test_collator.py
tests/data/test_collator.py
+2
-2
tests/data/test_loader.py
tests/data/test_loader.py
+2
-2
tests/data/test_mm_plugin.py
tests/data/test_mm_plugin.py
+25
-4
tests/data/test_template.py
tests/data/test_template.py
+29
-30
tests/e2e/test_chat.py
tests/e2e/test_chat.py
+2
-2
tests/e2e/test_train.py
tests/e2e/test_train.py
+4
-3
tests/model/model_utils/test_attention.py
tests/model/model_utils/test_attention.py
+2
-2
tests/model/model_utils/test_checkpointing.py
tests/model/model_utils/test_checkpointing.py
+2
-2
No files found.
src/llamafactory/train/kto/workflow.py
View file @
581d366d
...
...
@@ -82,7 +82,13 @@ def run_kto(
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
,
"rewards/chosen"
])
keys
=
[
"loss"
,
"rewards/chosen"
]
if
isinstance
(
dataset_module
.
get
(
"eval_dataset"
),
dict
):
keys
+=
[
f
"eval_
{
key
}
_loss"
for
key
in
dataset_module
[
"eval_dataset"
].
keys
()]
else
:
keys
+=
[
"eval_loss"
]
plot_loss
(
training_args
.
output_dir
,
keys
=
keys
)
# Evaluation
if
training_args
.
do_eval
:
...
...
src/llamafactory/train/pt/workflow.py
View file @
581d366d
...
...
@@ -66,7 +66,13 @@ def run_pt(
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
])
keys
=
[
"loss"
]
if
isinstance
(
dataset_module
.
get
(
"eval_dataset"
),
dict
):
keys
+=
[
f
"eval_
{
key
}
_loss"
for
key
in
dataset_module
[
"eval_dataset"
].
keys
()]
else
:
keys
+=
[
"eval_loss"
]
plot_loss
(
training_args
.
output_dir
,
keys
=
keys
)
# Evaluation
if
training_args
.
do_eval
:
...
...
src/llamafactory/train/rm/workflow.py
View file @
581d366d
...
...
@@ -74,7 +74,15 @@ def run_rm(
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
,
"eval_accuracy"
])
keys
=
[
"loss"
]
if
isinstance
(
dataset_module
.
get
(
"eval_dataset"
),
dict
):
keys
+=
sum
(
[[
f
"eval_
{
key
}
_loss"
,
f
"eval_
{
key
}
_accuracy"
]
for
key
in
dataset_module
[
"eval_dataset"
].
keys
()],
[]
)
else
:
keys
+=
[
"eval_loss"
,
"eval_accuracy"
]
plot_loss
(
training_args
.
output_dir
,
keys
=
keys
)
# Evaluation
if
training_args
.
do_eval
:
...
...
src/llamafactory/train/sft/workflow.py
View file @
581d366d
...
...
@@ -110,7 +110,15 @@ def run_sft(
trainer
.
save_metrics
(
"train"
,
train_result
.
metrics
)
trainer
.
save_state
()
if
trainer
.
is_world_process_zero
()
and
finetuning_args
.
plot_loss
:
plot_loss
(
training_args
.
output_dir
,
keys
=
[
"loss"
,
"eval_loss"
,
"eval_accuracy"
])
keys
=
[
"loss"
]
if
isinstance
(
dataset_module
.
get
(
"eval_dataset"
),
dict
):
keys
+=
sum
(
[[
f
"eval_
{
key
}
_loss"
,
f
"eval_
{
key
}
_accuracy"
]
for
key
in
dataset_module
[
"eval_dataset"
].
keys
()],
[]
)
else
:
keys
+=
[
"eval_loss"
,
"eval_accuracy"
]
plot_loss
(
training_args
.
output_dir
,
keys
=
keys
)
if
training_args
.
predict_with_generate
:
tokenizer
.
padding_side
=
"left"
# use left-padding in generation
...
...
src/llamafactory/train/trainer_utils.py
View file @
581d366d
...
...
@@ -48,6 +48,7 @@ if is_apollo_available():
if
is_ray_available
():
import
ray
from
ray.train
import
RunConfig
,
ScalingConfig
from
ray.train.torch
import
TorchTrainer
...
...
@@ -644,6 +645,9 @@ def get_ray_trainer(
if
not
ray_args
.
use_ray
:
raise
ValueError
(
"Ray was not enabled. Please set `USE_RAY=1` to enable ray."
)
if
ray_args
.
ray_init_kwargs
is
not
None
:
ray
.
init
(
**
ray_args
.
ray_init_kwargs
)
trainer
=
TorchTrainer
(
training_function
,
train_loop_config
=
train_loop_config
,
...
...
src/llamafactory/webui/components/export.py
View file @
581d366d
...
...
@@ -111,7 +111,7 @@ def create_export_tab(engine: "Engine") -> dict[str, "Component"]:
with
gr
.
Row
():
export_size
=
gr
.
Slider
(
minimum
=
1
,
maximum
=
100
,
value
=
5
,
step
=
1
)
export_quantization_bit
=
gr
.
Dropdown
(
choices
=
[
"none"
]
+
GPTQ_BITS
,
value
=
"none"
)
export_quantization_dataset
=
gr
.
Textbox
(
value
=
"data/c4_demo.json"
)
export_quantization_dataset
=
gr
.
Textbox
(
value
=
"data/c4_demo.json
l
"
)
export_device
=
gr
.
Radio
(
choices
=
[
"cpu"
,
"auto"
],
value
=
"cpu"
)
export_legacy_format
=
gr
.
Checkbox
()
...
...
src/llamafactory/webui/components/top.py
View file @
581d366d
...
...
@@ -42,7 +42,7 @@ def create_top() -> dict[str, "Component"]:
with
gr
.
Row
():
quantization_bit
=
gr
.
Dropdown
(
choices
=
[
"none"
,
"8"
,
"4"
],
value
=
"none"
,
allow_custom_value
=
True
)
quantization_method
=
gr
.
Dropdown
(
choices
=
[
"b
itsandbytes
"
,
"hqq"
,
"eetq"
],
value
=
"b
itsandbytes
"
)
quantization_method
=
gr
.
Dropdown
(
choices
=
[
"b
nb
"
,
"hqq"
,
"eetq"
],
value
=
"b
nb
"
)
template
=
gr
.
Dropdown
(
choices
=
list
(
TEMPLATES
.
keys
()),
value
=
"default"
)
rope_scaling
=
gr
.
Dropdown
(
choices
=
[
"none"
,
"linear"
,
"dynamic"
,
"yarn"
,
"llama3"
],
value
=
"none"
)
booster
=
gr
.
Dropdown
(
choices
=
[
"auto"
,
"flashattn2"
,
"unsloth"
,
"liger_kernel"
],
value
=
"auto"
)
...
...
src/llamafactory/webui/runner.py
View file @
581d366d
...
...
@@ -368,6 +368,7 @@ class Runner:
if
args
.
get
(
"deepspeed"
,
None
)
is
not
None
:
env
[
"FORCE_TORCHRUN"
]
=
"1"
# NOTE: DO NOT USE shell=True to avoid security risk
self
.
trainer
=
Popen
([
"llamafactory-cli"
,
"train"
,
save_cmd
(
args
)],
env
=
env
)
yield
from
self
.
monitor
()
...
...
tests/data/processor/test_feedback.py
View file @
581d366d
...
...
@@ -25,10 +25,10 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA
=
os
.
getenv
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"stage"
:
"kto"
,
"do_train"
:
True
,
"finetuning_type"
:
"full"
,
...
...
@@ -45,7 +45,7 @@ TRAIN_ARGS = {
@
pytest
.
mark
.
parametrize
(
"num_samples"
,
[
16
])
def
test_feedback_data
(
num_samples
:
int
):
train_dataset
=
load_dataset_module
(
**
TRAIN_ARGS
)[
"train_dataset"
]
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
)
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
)
original_data
=
load_dataset
(
DEMO_DATA
,
name
=
"kto_en_demo"
,
split
=
"train"
)
indexes
=
random
.
choices
(
range
(
len
(
original_data
)),
k
=
num_samples
)
for
index
in
indexes
:
...
...
tests/data/processor/test_pairwise.py
View file @
581d366d
...
...
@@ -25,10 +25,10 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA
=
os
.
getenv
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"stage"
:
"rm"
,
"do_train"
:
True
,
"finetuning_type"
:
"full"
,
...
...
@@ -54,7 +54,7 @@ def _convert_sharegpt_to_openai(messages: list[dict[str, str]]) -> list[dict[str
@
pytest
.
mark
.
parametrize
(
"num_samples"
,
[
16
])
def
test_pairwise_data
(
num_samples
:
int
):
train_dataset
=
load_dataset_module
(
**
TRAIN_ARGS
)[
"train_dataset"
]
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
)
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
)
original_data
=
load_dataset
(
DEMO_DATA
,
name
=
"dpo_en_demo"
,
split
=
"train"
)
indexes
=
random
.
choices
(
range
(
len
(
original_data
)),
k
=
num_samples
)
for
index
in
indexes
:
...
...
tests/data/processor/test_supervised.py
View file @
581d366d
...
...
@@ -25,12 +25,12 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA
=
os
.
getenv
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_DATA
=
os
.
getenv
(
"TINY_DATA"
,
"llamafactory/tiny-supervised-dataset"
)
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"stage"
:
"sft"
,
"do_train"
:
True
,
"finetuning_type"
:
"full"
,
...
...
@@ -45,7 +45,7 @@ TRAIN_ARGS = {
@
pytest
.
mark
.
parametrize
(
"num_samples"
,
[
16
])
def
test_supervised_single_turn
(
num_samples
:
int
):
train_dataset
=
load_dataset_module
(
dataset_dir
=
"ONLINE"
,
dataset
=
TINY_DATA
,
**
TRAIN_ARGS
)[
"train_dataset"
]
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
)
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
)
original_data
=
load_dataset
(
TINY_DATA
,
split
=
"train"
)
indexes
=
random
.
choices
(
range
(
len
(
original_data
)),
k
=
num_samples
)
for
index
in
indexes
:
...
...
@@ -66,7 +66,7 @@ def test_supervised_multi_turn(num_samples: int):
train_dataset
=
load_dataset_module
(
dataset_dir
=
"REMOTE:"
+
DEMO_DATA
,
dataset
=
"system_chat"
,
**
TRAIN_ARGS
)[
"train_dataset"
]
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
)
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
)
original_data
=
load_dataset
(
DEMO_DATA
,
name
=
"system_chat"
,
split
=
"train"
)
indexes
=
random
.
choices
(
range
(
len
(
original_data
)),
k
=
num_samples
)
for
index
in
indexes
:
...
...
@@ -79,7 +79,7 @@ def test_supervised_train_on_prompt(num_samples: int):
train_dataset
=
load_dataset_module
(
dataset_dir
=
"REMOTE:"
+
DEMO_DATA
,
dataset
=
"system_chat"
,
train_on_prompt
=
True
,
**
TRAIN_ARGS
)[
"train_dataset"
]
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
)
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
)
original_data
=
load_dataset
(
DEMO_DATA
,
name
=
"system_chat"
,
split
=
"train"
)
indexes
=
random
.
choices
(
range
(
len
(
original_data
)),
k
=
num_samples
)
for
index
in
indexes
:
...
...
@@ -93,7 +93,7 @@ def test_supervised_mask_history(num_samples: int):
train_dataset
=
load_dataset_module
(
dataset_dir
=
"REMOTE:"
+
DEMO_DATA
,
dataset
=
"system_chat"
,
mask_history
=
True
,
**
TRAIN_ARGS
)[
"train_dataset"
]
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
)
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
)
original_data
=
load_dataset
(
DEMO_DATA
,
name
=
"system_chat"
,
split
=
"train"
)
indexes
=
random
.
choices
(
range
(
len
(
original_data
)),
k
=
num_samples
)
for
index
in
indexes
:
...
...
tests/data/processor/test_unsupervised.py
View file @
581d366d
...
...
@@ -24,12 +24,12 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA
=
os
.
getenv
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_DATA
=
os
.
getenv
(
"TINY_DATA"
,
"llamafactory/tiny-supervised-dataset"
)
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"stage"
:
"ppo"
,
"do_train"
:
True
,
"finetuning_type"
:
"full"
,
...
...
@@ -48,7 +48,7 @@ TRAIN_ARGS = {
@
pytest
.
mark
.
parametrize
(
"num_samples"
,
[
16
])
def
test_unsupervised_data
(
num_samples
:
int
):
train_dataset
=
load_dataset_module
(
**
TRAIN_ARGS
)[
"train_dataset"
]
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
)
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
)
original_data
=
load_dataset
(
DEMO_DATA
,
name
=
"system_chat"
,
split
=
"train"
)
indexes
=
random
.
choices
(
range
(
len
(
original_data
)),
k
=
num_samples
)
for
index
in
indexes
:
...
...
tests/data/test_collator.py
View file @
581d366d
...
...
@@ -24,11 +24,11 @@ from llamafactory.hparams import get_infer_args
from
llamafactory.model
import
load_tokenizer
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
def
test_base_collator
():
model_args
,
data_args
,
*
_
=
get_infer_args
({
"model_name_or_path"
:
TINY_LLAMA
,
"template"
:
"default"
})
model_args
,
data_args
,
*
_
=
get_infer_args
({
"model_name_or_path"
:
TINY_LLAMA
3
,
"template"
:
"default"
})
tokenizer_module
=
load_tokenizer
(
model_args
)
template
=
get_template_and_fix_tokenizer
(
tokenizer_module
[
"tokenizer"
],
data_args
)
data_collator
=
MultiModalDataCollatorForSeq2Seq
(
...
...
tests/data/test_loader.py
View file @
581d366d
...
...
@@ -19,12 +19,12 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA
=
os
.
getenv
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_DATA
=
os
.
getenv
(
"TINY_DATA"
,
"llamafactory/tiny-supervised-dataset"
)
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"stage"
:
"sft"
,
"do_train"
:
True
,
"finetuning_type"
:
"full"
,
...
...
tests/data/test_mm_plugin.py
View file @
581d366d
...
...
@@ -20,7 +20,6 @@ import torch
from
PIL
import
Image
from
llamafactory.data.mm_plugin
import
get_mm_plugin
from
llamafactory.extras.packages
import
is_transformers_version_greater_than
from
llamafactory.hparams
import
get_infer_args
from
llamafactory.model
import
load_tokenizer
...
...
@@ -35,7 +34,8 @@ if TYPE_CHECKING:
HF_TOKEN
=
os
.
getenv
(
"HF_TOKEN"
)
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA3
=
os
.
getenv
(
"TINY_LLAMA3"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA4
=
os
.
getenv
(
"TINY_LLAMA4"
,
"llamafactory/tiny-random-Llama-4"
)
MM_MESSAGES
=
[
{
"role"
:
"user"
,
"content"
:
"<image>What is in this image?"
},
...
...
@@ -130,13 +130,13 @@ def _check_plugin(
def
test_base_plugin
():
tokenizer_module
=
_load_tokenizer_module
(
model_name_or_path
=
TINY_LLAMA
)
tokenizer_module
=
_load_tokenizer_module
(
model_name_or_path
=
TINY_LLAMA
3
)
base_plugin
=
get_mm_plugin
(
name
=
"base"
)
check_inputs
=
{
"plugin"
:
base_plugin
,
**
tokenizer_module
}
_check_plugin
(
**
check_inputs
)
@
pytest
.
mark
.
skipif
(
not
HF_TOKEN
or
not
is_transformers_version_greater_than
(
"4.50.0"
)
,
reason
=
"Gated model."
)
@
pytest
.
mark
.
skipif
(
not
HF_TOKEN
,
reason
=
"Gated model."
)
def
test_gemma3_plugin
():
image_seqlen
=
256
tokenizer_module
=
_load_tokenizer_module
(
model_name_or_path
=
"google/gemma-3-4b-it"
)
...
...
@@ -157,6 +157,27 @@ def test_gemma3_plugin():
_check_plugin
(
**
check_inputs
)
@
pytest
.
mark
.
xfail
(
reason
=
"Unknown error."
)
def
test_llama4_plugin
():
tokenizer_module
=
_load_tokenizer_module
(
model_name_or_path
=
TINY_LLAMA4
)
processor
=
tokenizer_module
[
"processor"
]
llama4_plugin
=
get_mm_plugin
(
name
=
"llama4"
,
image_token
=
"<|image|>"
)
check_inputs
=
{
"plugin"
:
llama4_plugin
,
**
tokenizer_module
}
mm_inputs
=
_get_mm_inputs
(
tokenizer_module
[
"processor"
])
image_height
,
image_width
=
mm_inputs
[
"pixel_values"
][
0
].
shape
[
-
2
:]
num_patches_per_chunk
=
int
(
(
image_height
//
processor
.
patch_size
)
*
(
image_width
//
processor
.
patch_size
)
//
processor
.
downsample_ratio
)
aspect_ratios
=
mm_inputs
.
pop
(
"aspect_ratios"
)
tokens_for_this_image
=
processor
.
_prompt_split_image
(
aspect_ratios
[
0
],
num_patches_per_chunk
)
check_inputs
[
"expected_mm_messages"
]
=
[
{
key
:
value
.
replace
(
"<image>"
,
tokens_for_this_image
)
for
key
,
value
in
message
.
items
()}
for
message
in
MM_MESSAGES
]
check_inputs
[
"expected_mm_inputs"
]
=
mm_inputs
_check_plugin
(
**
check_inputs
)
def
test_llava_plugin
():
image_seqlen
=
576
tokenizer_module
=
_load_tokenizer_module
(
model_name_or_path
=
"llava-hf/llava-1.5-7b-hf"
)
...
...
tests/data/test_template.py
View file @
581d366d
...
...
@@ -29,7 +29,8 @@ if TYPE_CHECKING:
HF_TOKEN
=
os
.
getenv
(
"HF_TOKEN"
)
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA3
=
os
.
getenv
(
"TINY_LLAMA3"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA4
=
os
.
getenv
(
"TINY_LLAMA4"
,
"llamafactory/tiny-random-Llama-4"
)
MESSAGES
=
[
{
"role"
:
"user"
,
"content"
:
"How are you"
},
...
...
@@ -75,7 +76,7 @@ def _check_template(model_id: str, template_name: str, prompt_str: str, answer_s
@
pytest
.
mark
.
parametrize
(
"use_fast"
,
[
True
,
False
])
def
test_encode_oneturn
(
use_fast
:
bool
):
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
,
use_fast
=
use_fast
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
,
use_fast
=
use_fast
)
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
DataArguments
(
template
=
"llama3"
))
prompt_ids
,
answer_ids
=
template
.
encode_oneturn
(
tokenizer
,
MESSAGES
)
prompt_str
=
(
...
...
@@ -90,7 +91,7 @@ def test_encode_oneturn(use_fast: bool):
@
pytest
.
mark
.
parametrize
(
"use_fast"
,
[
True
,
False
])
def
test_encode_multiturn
(
use_fast
:
bool
):
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
,
use_fast
=
use_fast
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
,
use_fast
=
use_fast
)
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
DataArguments
(
template
=
"llama3"
))
encoded_pairs
=
template
.
encode_multiturn
(
tokenizer
,
MESSAGES
)
prompt_str_1
=
(
...
...
@@ -111,8 +112,8 @@ def test_encode_multiturn(use_fast: bool):
@
pytest
.
mark
.
parametrize
(
"use_fast"
,
[
True
,
False
])
def
test_jinja_template
(
use_fast
:
bool
):
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
,
use_fast
=
use_fast
)
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
,
use_fast
=
use_fast
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
,
use_fast
=
use_fast
)
ref_tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
,
use_fast
=
use_fast
)
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
DataArguments
(
template
=
"llama3"
))
tokenizer
.
chat_template
=
template
.
_get_jinja_template
(
tokenizer
)
# llama3 template no replace
assert
tokenizer
.
chat_template
!=
ref_tokenizer
.
chat_template
...
...
@@ -120,7 +121,7 @@ def test_jinja_template(use_fast: bool):
def
test_ollama_modelfile
():
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
)
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
DataArguments
(
template
=
"llama3"
))
assert
template
.
get_ollama_modelfile
(
tokenizer
)
==
(
"# ollama modelfile auto-generated by llamafactory
\n\n
"
...
...
@@ -137,7 +138,7 @@ def test_ollama_modelfile():
def
test_get_stop_token_ids
():
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
)
template
=
get_template_and_fix_tokenizer
(
tokenizer
,
DataArguments
(
template
=
"llama3"
))
assert
set
(
template
.
get_stop_token_ids
(
tokenizer
))
==
{
128008
,
128009
}
...
...
@@ -152,7 +153,7 @@ def test_gemma_template(use_fast: bool):
"<start_of_turn>model
\n
"
)
answer_str
=
"很高兴认识你!<end_of_turn>
\n
"
_check_template
(
"google/gemma-
2-9
b-it"
,
"gemma"
,
prompt_str
,
answer_str
,
use_fast
)
_check_template
(
"google/gemma-
3-4
b-it"
,
"gemma"
,
prompt_str
,
answer_str
,
use_fast
)
@
pytest
.
mark
.
skipif
(
not
HF_TOKEN
,
reason
=
"Gated model."
)
...
...
@@ -168,7 +169,20 @@ def test_llama3_template(use_fast: bool):
_check_template
(
"meta-llama/Meta-Llama-3-8B-Instruct"
,
"llama3"
,
prompt_str
,
answer_str
,
use_fast
)
@
pytest
.
mark
.
skipif
(
not
HF_TOKEN
,
reason
=
"Gated model."
)
@
pytest
.
mark
.
parametrize
(
"use_fast"
,
[
True
,
pytest
.
param
(
False
,
marks
=
pytest
.
mark
.
xfail
(
reason
=
"Llama 4 has no slow tokenizer."
))]
)
def
test_llama4_template
(
use_fast
:
bool
):
prompt_str
=
(
"<|begin_of_text|><|header_start|>user<|header_end|>
\n\n
How are you<|eot|>"
"<|header_start|>assistant<|header_end|>
\n\n
I am fine!<|eot|>"
"<|header_start|>user<|header_end|>
\n\n
你好<|eot|>"
"<|header_start|>assistant<|header_end|>
\n\n
"
)
answer_str
=
"很高兴认识你!<|eot|>"
_check_template
(
TINY_LLAMA4
,
"llama4"
,
prompt_str
,
answer_str
,
use_fast
)
@
pytest
.
mark
.
parametrize
(
"use_fast"
,
[
True
,
pytest
.
param
(
False
,
marks
=
pytest
.
mark
.
xfail
(
reason
=
"Phi-4 slow tokenizer is broken."
))]
)
...
...
@@ -183,35 +197,21 @@ def test_phi4_template(use_fast: bool):
_check_template
(
"microsoft/phi-4"
,
"phi4"
,
prompt_str
,
answer_str
,
use_fast
)
@
pytest
.
mark
.
skipif
(
not
HF_TOKEN
,
reason
=
"Gated model."
)
# TODO: why it is gated?
@
pytest
.
mark
.
parametrize
(
"use_fast"
,
[
True
,
False
])
def
test_qwen_template
(
use_fast
:
bool
):
prompt_str
=
(
"<|im_start|>system
\n
You are a helpful assistant.<|im_end|>
\n
"
"<|im_start|>user
\n
How are you<|im_end|>
\n
"
"<|im_start|>assistant
\n
I am fine!<|im_end|>
\n
"
"<|im_start|>user
\n
你好<|im_end|>
\n
"
"<|im_start|>assistant
\n
"
)
answer_str
=
"很高兴认识你!<|im_end|>
\n
"
_check_template
(
"Qwen/Qwen2-7B-Instruct"
,
"qwen"
,
prompt_str
,
answer_str
,
use_fast
)
@
pytest
.
mark
.
parametrize
(
"use_fast"
,
[
True
,
False
])
@
pytest
.
mark
.
xfail
(
reason
=
"Yi tokenizer is broken."
)
def
test_yi_template
(
use_fast
:
bool
):
prompt_str
=
(
"<|im_start|>system
\n
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
\n
"
"<|im_start|>user
\n
How are you<|im_end|>
\n
"
"<|im_start|>assistant
\n
I am fine!<|im_end|>
\n
"
"<|im_start|>user
\n
你好<|im_end|>
\n
"
"<|im_start|>assistant
\n
"
)
answer_str
=
"很高兴认识你!<|im_end|>
\n
"
_check_template
(
"
01-ai/Yi-1
.5-
6
B-
Cha
t"
,
"
yi
"
,
prompt_str
,
answer_str
,
use_fast
)
_check_template
(
"
Qwen/Qwen2
.5-
7
B-
Instruc
t"
,
"
qwen
"
,
prompt_str
,
answer_str
,
use_fast
)
def
test_parse_template
():
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
,
token
=
HF_TOKEN
)
def
test_parse_
llama3_
template
():
tokenizer
=
AutoTokenizer
.
from_pretrained
(
TINY_LLAMA
3
,
token
=
HF_TOKEN
)
template
=
parse_template
(
tokenizer
)
assert
template
.
format_user
.
slots
==
[
"<|start_header_id|>user<|end_header_id|>
\n\n
{{content}}<|eot_id|>"
...
...
@@ -223,12 +223,11 @@ def test_parse_template():
assert
template
.
default_system
==
""
@
pytest
.
mark
.
skipif
(
not
HF_TOKEN
,
reason
=
"Gated model."
)
def
test_parse_qwen_template
():
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"Qwen/Qwen2-7B-Instruct"
,
token
=
HF_TOKEN
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"Qwen/Qwen2
.5
-7B-Instruct"
,
token
=
HF_TOKEN
)
template
=
parse_template
(
tokenizer
)
assert
template
.
format_user
.
slots
==
[
"<|im_start|>user
\n
{{content}}<|im_end|>
\n
<|im_start|>assistant
\n
"
]
assert
template
.
format_assistant
.
slots
==
[
"{{content}}<|im_end|>
\n
"
]
assert
template
.
format_system
.
slots
==
[
"<|im_start|>system
\n
{{content}}<|im_end|>
\n
"
]
assert
template
.
format_prefix
.
slots
==
[]
assert
template
.
default_system
==
"You are a helpful assistant."
assert
template
.
default_system
==
"You are
Qwen, created by Alibaba Cloud. You are
a helpful assistant."
tests/e2e/test_chat.py
View file @
581d366d
...
...
@@ -17,10 +17,10 @@ import os
from
llamafactory.chat
import
ChatModel
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
INFER_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"finetuning_type"
:
"lora"
,
"template"
:
"llama3"
,
"infer_dtype"
:
"float16"
,
...
...
tests/e2e/test_train.py
View file @
581d366d
...
...
@@ -21,12 +21,12 @@ from llamafactory.train.tuner import export_model, run_exp
DEMO_DATA
=
os
.
getenv
(
"DEMO_DATA"
,
"llamafactory/demo_data"
)
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA_ADAPTER
=
os
.
getenv
(
"TINY_LLAMA_ADAPTER"
,
"llamafactory/tiny-random-Llama-3-lora"
)
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"do_train"
:
True
,
"finetuning_type"
:
"lora"
,
"dataset_dir"
:
"REMOTE:"
+
DEMO_DATA
,
...
...
@@ -35,10 +35,11 @@ TRAIN_ARGS = {
"overwrite_output_dir"
:
True
,
"per_device_train_batch_size"
:
1
,
"max_steps"
:
1
,
"report_to"
:
"none"
,
}
INFER_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"adapter_name_or_path"
:
TINY_LLAMA_ADAPTER
,
"finetuning_type"
:
"lora"
,
"template"
:
"llama3"
,
...
...
tests/model/model_utils/test_attention.py
View file @
581d366d
...
...
@@ -21,10 +21,10 @@ from llamafactory.extras.packages import is_transformers_version_greater_than
from
llamafactory.train.test_utils
import
load_infer_model
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
INFER_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"template"
:
"llama3"
,
}
...
...
tests/model/model_utils/test_checkpointing.py
View file @
581d366d
...
...
@@ -21,10 +21,10 @@ from llamafactory.extras.misc import get_current_device
from
llamafactory.train.test_utils
import
load_train_model
TINY_LLAMA
=
os
.
getenv
(
"TINY_LLAMA"
,
"llamafactory/tiny-random-Llama-3"
)
TINY_LLAMA
3
=
os
.
getenv
(
"TINY_LLAMA
3
"
,
"llamafactory/tiny-random-Llama-3"
)
TRAIN_ARGS
=
{
"model_name_or_path"
:
TINY_LLAMA
,
"model_name_or_path"
:
TINY_LLAMA
3
,
"stage"
:
"sft"
,
"do_train"
:
True
,
"finetuning_type"
:
"lora"
,
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment