Commit 581d366d authored by chenych's avatar chenych
Browse files

Support GLM-4/GLM-4-0414/GLM-Z1

parent 428c5813
...@@ -82,7 +82,13 @@ def run_kto( ...@@ -82,7 +82,13 @@ def run_kto(
trainer.save_metrics("train", train_result.metrics) trainer.save_metrics("train", train_result.metrics)
trainer.save_state() trainer.save_state()
if trainer.is_world_process_zero() and finetuning_args.plot_loss: if trainer.is_world_process_zero() and finetuning_args.plot_loss:
plot_loss(training_args.output_dir, keys=["loss", "eval_loss", "rewards/chosen"]) keys = ["loss", "rewards/chosen"]
if isinstance(dataset_module.get("eval_dataset"), dict):
keys += [f"eval_{key}_loss" for key in dataset_module["eval_dataset"].keys()]
else:
keys += ["eval_loss"]
plot_loss(training_args.output_dir, keys=keys)
# Evaluation # Evaluation
if training_args.do_eval: if training_args.do_eval:
......
...@@ -66,7 +66,13 @@ def run_pt( ...@@ -66,7 +66,13 @@ def run_pt(
trainer.save_metrics("train", train_result.metrics) trainer.save_metrics("train", train_result.metrics)
trainer.save_state() trainer.save_state()
if trainer.is_world_process_zero() and finetuning_args.plot_loss: if trainer.is_world_process_zero() and finetuning_args.plot_loss:
plot_loss(training_args.output_dir, keys=["loss", "eval_loss"]) keys = ["loss"]
if isinstance(dataset_module.get("eval_dataset"), dict):
keys += [f"eval_{key}_loss" for key in dataset_module["eval_dataset"].keys()]
else:
keys += ["eval_loss"]
plot_loss(training_args.output_dir, keys=keys)
# Evaluation # Evaluation
if training_args.do_eval: if training_args.do_eval:
......
...@@ -74,7 +74,15 @@ def run_rm( ...@@ -74,7 +74,15 @@ def run_rm(
trainer.save_metrics("train", train_result.metrics) trainer.save_metrics("train", train_result.metrics)
trainer.save_state() trainer.save_state()
if trainer.is_world_process_zero() and finetuning_args.plot_loss: if trainer.is_world_process_zero() and finetuning_args.plot_loss:
plot_loss(training_args.output_dir, keys=["loss", "eval_loss", "eval_accuracy"]) keys = ["loss"]
if isinstance(dataset_module.get("eval_dataset"), dict):
keys += sum(
[[f"eval_{key}_loss", f"eval_{key}_accuracy"] for key in dataset_module["eval_dataset"].keys()], []
)
else:
keys += ["eval_loss", "eval_accuracy"]
plot_loss(training_args.output_dir, keys=keys)
# Evaluation # Evaluation
if training_args.do_eval: if training_args.do_eval:
......
...@@ -110,7 +110,15 @@ def run_sft( ...@@ -110,7 +110,15 @@ def run_sft(
trainer.save_metrics("train", train_result.metrics) trainer.save_metrics("train", train_result.metrics)
trainer.save_state() trainer.save_state()
if trainer.is_world_process_zero() and finetuning_args.plot_loss: if trainer.is_world_process_zero() and finetuning_args.plot_loss:
plot_loss(training_args.output_dir, keys=["loss", "eval_loss", "eval_accuracy"]) keys = ["loss"]
if isinstance(dataset_module.get("eval_dataset"), dict):
keys += sum(
[[f"eval_{key}_loss", f"eval_{key}_accuracy"] for key in dataset_module["eval_dataset"].keys()], []
)
else:
keys += ["eval_loss", "eval_accuracy"]
plot_loss(training_args.output_dir, keys=keys)
if training_args.predict_with_generate: if training_args.predict_with_generate:
tokenizer.padding_side = "left" # use left-padding in generation tokenizer.padding_side = "left" # use left-padding in generation
......
...@@ -48,6 +48,7 @@ if is_apollo_available(): ...@@ -48,6 +48,7 @@ if is_apollo_available():
if is_ray_available(): if is_ray_available():
import ray
from ray.train import RunConfig, ScalingConfig from ray.train import RunConfig, ScalingConfig
from ray.train.torch import TorchTrainer from ray.train.torch import TorchTrainer
...@@ -644,6 +645,9 @@ def get_ray_trainer( ...@@ -644,6 +645,9 @@ def get_ray_trainer(
if not ray_args.use_ray: if not ray_args.use_ray:
raise ValueError("Ray was not enabled. Please set `USE_RAY=1` to enable ray.") raise ValueError("Ray was not enabled. Please set `USE_RAY=1` to enable ray.")
if ray_args.ray_init_kwargs is not None:
ray.init(**ray_args.ray_init_kwargs)
trainer = TorchTrainer( trainer = TorchTrainer(
training_function, training_function,
train_loop_config=train_loop_config, train_loop_config=train_loop_config,
......
...@@ -111,7 +111,7 @@ def create_export_tab(engine: "Engine") -> dict[str, "Component"]: ...@@ -111,7 +111,7 @@ def create_export_tab(engine: "Engine") -> dict[str, "Component"]:
with gr.Row(): with gr.Row():
export_size = gr.Slider(minimum=1, maximum=100, value=5, step=1) export_size = gr.Slider(minimum=1, maximum=100, value=5, step=1)
export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none") export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
export_quantization_dataset = gr.Textbox(value="data/c4_demo.json") export_quantization_dataset = gr.Textbox(value="data/c4_demo.jsonl")
export_device = gr.Radio(choices=["cpu", "auto"], value="cpu") export_device = gr.Radio(choices=["cpu", "auto"], value="cpu")
export_legacy_format = gr.Checkbox() export_legacy_format = gr.Checkbox()
......
...@@ -42,7 +42,7 @@ def create_top() -> dict[str, "Component"]: ...@@ -42,7 +42,7 @@ def create_top() -> dict[str, "Component"]:
with gr.Row(): with gr.Row():
quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True) quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True)
quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes") quantization_method = gr.Dropdown(choices=["bnb", "hqq", "eetq"], value="bnb")
template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default") template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default")
rope_scaling = gr.Dropdown(choices=["none", "linear", "dynamic", "yarn", "llama3"], value="none") rope_scaling = gr.Dropdown(choices=["none", "linear", "dynamic", "yarn", "llama3"], value="none")
booster = gr.Dropdown(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto") booster = gr.Dropdown(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto")
......
...@@ -368,6 +368,7 @@ class Runner: ...@@ -368,6 +368,7 @@ class Runner:
if args.get("deepspeed", None) is not None: if args.get("deepspeed", None) is not None:
env["FORCE_TORCHRUN"] = "1" env["FORCE_TORCHRUN"] = "1"
# NOTE: DO NOT USE shell=True to avoid security risk
self.trainer = Popen(["llamafactory-cli", "train", save_cmd(args)], env=env) self.trainer = Popen(["llamafactory-cli", "train", save_cmd(args)], env=env)
yield from self.monitor() yield from self.monitor()
......
...@@ -25,10 +25,10 @@ from llamafactory.train.test_utils import load_dataset_module ...@@ -25,10 +25,10 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data") DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
TRAIN_ARGS = { TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"stage": "kto", "stage": "kto",
"do_train": True, "do_train": True,
"finetuning_type": "full", "finetuning_type": "full",
...@@ -45,7 +45,7 @@ TRAIN_ARGS = { ...@@ -45,7 +45,7 @@ TRAIN_ARGS = {
@pytest.mark.parametrize("num_samples", [16]) @pytest.mark.parametrize("num_samples", [16])
def test_feedback_data(num_samples: int): def test_feedback_data(num_samples: int):
train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"] train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
original_data = load_dataset(DEMO_DATA, name="kto_en_demo", split="train") original_data = load_dataset(DEMO_DATA, name="kto_en_demo", split="train")
indexes = random.choices(range(len(original_data)), k=num_samples) indexes = random.choices(range(len(original_data)), k=num_samples)
for index in indexes: for index in indexes:
......
...@@ -25,10 +25,10 @@ from llamafactory.train.test_utils import load_dataset_module ...@@ -25,10 +25,10 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data") DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
TRAIN_ARGS = { TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"stage": "rm", "stage": "rm",
"do_train": True, "do_train": True,
"finetuning_type": "full", "finetuning_type": "full",
...@@ -54,7 +54,7 @@ def _convert_sharegpt_to_openai(messages: list[dict[str, str]]) -> list[dict[str ...@@ -54,7 +54,7 @@ def _convert_sharegpt_to_openai(messages: list[dict[str, str]]) -> list[dict[str
@pytest.mark.parametrize("num_samples", [16]) @pytest.mark.parametrize("num_samples", [16])
def test_pairwise_data(num_samples: int): def test_pairwise_data(num_samples: int):
train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"] train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
original_data = load_dataset(DEMO_DATA, name="dpo_en_demo", split="train") original_data = load_dataset(DEMO_DATA, name="dpo_en_demo", split="train")
indexes = random.choices(range(len(original_data)), k=num_samples) indexes = random.choices(range(len(original_data)), k=num_samples)
for index in indexes: for index in indexes:
......
...@@ -25,12 +25,12 @@ from llamafactory.train.test_utils import load_dataset_module ...@@ -25,12 +25,12 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data") DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset") TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
TRAIN_ARGS = { TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"stage": "sft", "stage": "sft",
"do_train": True, "do_train": True,
"finetuning_type": "full", "finetuning_type": "full",
...@@ -45,7 +45,7 @@ TRAIN_ARGS = { ...@@ -45,7 +45,7 @@ TRAIN_ARGS = {
@pytest.mark.parametrize("num_samples", [16]) @pytest.mark.parametrize("num_samples", [16])
def test_supervised_single_turn(num_samples: int): def test_supervised_single_turn(num_samples: int):
train_dataset = load_dataset_module(dataset_dir="ONLINE", dataset=TINY_DATA, **TRAIN_ARGS)["train_dataset"] train_dataset = load_dataset_module(dataset_dir="ONLINE", dataset=TINY_DATA, **TRAIN_ARGS)["train_dataset"]
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
original_data = load_dataset(TINY_DATA, split="train") original_data = load_dataset(TINY_DATA, split="train")
indexes = random.choices(range(len(original_data)), k=num_samples) indexes = random.choices(range(len(original_data)), k=num_samples)
for index in indexes: for index in indexes:
...@@ -66,7 +66,7 @@ def test_supervised_multi_turn(num_samples: int): ...@@ -66,7 +66,7 @@ def test_supervised_multi_turn(num_samples: int):
train_dataset = load_dataset_module(dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", **TRAIN_ARGS)[ train_dataset = load_dataset_module(dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", **TRAIN_ARGS)[
"train_dataset" "train_dataset"
] ]
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train") original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
indexes = random.choices(range(len(original_data)), k=num_samples) indexes = random.choices(range(len(original_data)), k=num_samples)
for index in indexes: for index in indexes:
...@@ -79,7 +79,7 @@ def test_supervised_train_on_prompt(num_samples: int): ...@@ -79,7 +79,7 @@ def test_supervised_train_on_prompt(num_samples: int):
train_dataset = load_dataset_module( train_dataset = load_dataset_module(
dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", train_on_prompt=True, **TRAIN_ARGS dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", train_on_prompt=True, **TRAIN_ARGS
)["train_dataset"] )["train_dataset"]
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train") original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
indexes = random.choices(range(len(original_data)), k=num_samples) indexes = random.choices(range(len(original_data)), k=num_samples)
for index in indexes: for index in indexes:
...@@ -93,7 +93,7 @@ def test_supervised_mask_history(num_samples: int): ...@@ -93,7 +93,7 @@ def test_supervised_mask_history(num_samples: int):
train_dataset = load_dataset_module( train_dataset = load_dataset_module(
dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", mask_history=True, **TRAIN_ARGS dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", mask_history=True, **TRAIN_ARGS
)["train_dataset"] )["train_dataset"]
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train") original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
indexes = random.choices(range(len(original_data)), k=num_samples) indexes = random.choices(range(len(original_data)), k=num_samples)
for index in indexes: for index in indexes:
......
...@@ -24,12 +24,12 @@ from llamafactory.train.test_utils import load_dataset_module ...@@ -24,12 +24,12 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data") DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset") TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
TRAIN_ARGS = { TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"stage": "ppo", "stage": "ppo",
"do_train": True, "do_train": True,
"finetuning_type": "full", "finetuning_type": "full",
...@@ -48,7 +48,7 @@ TRAIN_ARGS = { ...@@ -48,7 +48,7 @@ TRAIN_ARGS = {
@pytest.mark.parametrize("num_samples", [16]) @pytest.mark.parametrize("num_samples", [16])
def test_unsupervised_data(num_samples: int): def test_unsupervised_data(num_samples: int):
train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"] train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train") original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
indexes = random.choices(range(len(original_data)), k=num_samples) indexes = random.choices(range(len(original_data)), k=num_samples)
for index in indexes: for index in indexes:
......
...@@ -24,11 +24,11 @@ from llamafactory.hparams import get_infer_args ...@@ -24,11 +24,11 @@ from llamafactory.hparams import get_infer_args
from llamafactory.model import load_tokenizer from llamafactory.model import load_tokenizer
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
def test_base_collator(): def test_base_collator():
model_args, data_args, *_ = get_infer_args({"model_name_or_path": TINY_LLAMA, "template": "default"}) model_args, data_args, *_ = get_infer_args({"model_name_or_path": TINY_LLAMA3, "template": "default"})
tokenizer_module = load_tokenizer(model_args) tokenizer_module = load_tokenizer(model_args)
template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args) template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
data_collator = MultiModalDataCollatorForSeq2Seq( data_collator = MultiModalDataCollatorForSeq2Seq(
......
...@@ -19,12 +19,12 @@ from llamafactory.train.test_utils import load_dataset_module ...@@ -19,12 +19,12 @@ from llamafactory.train.test_utils import load_dataset_module
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data") DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset") TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
TRAIN_ARGS = { TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"stage": "sft", "stage": "sft",
"do_train": True, "do_train": True,
"finetuning_type": "full", "finetuning_type": "full",
......
...@@ -20,7 +20,6 @@ import torch ...@@ -20,7 +20,6 @@ import torch
from PIL import Image from PIL import Image
from llamafactory.data.mm_plugin import get_mm_plugin from llamafactory.data.mm_plugin import get_mm_plugin
from llamafactory.extras.packages import is_transformers_version_greater_than
from llamafactory.hparams import get_infer_args from llamafactory.hparams import get_infer_args
from llamafactory.model import load_tokenizer from llamafactory.model import load_tokenizer
...@@ -35,7 +34,8 @@ if TYPE_CHECKING: ...@@ -35,7 +34,8 @@ if TYPE_CHECKING:
HF_TOKEN = os.getenv("HF_TOKEN") HF_TOKEN = os.getenv("HF_TOKEN")
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
TINY_LLAMA4 = os.getenv("TINY_LLAMA4", "llamafactory/tiny-random-Llama-4")
MM_MESSAGES = [ MM_MESSAGES = [
{"role": "user", "content": "<image>What is in this image?"}, {"role": "user", "content": "<image>What is in this image?"},
...@@ -130,13 +130,13 @@ def _check_plugin( ...@@ -130,13 +130,13 @@ def _check_plugin(
def test_base_plugin(): def test_base_plugin():
tokenizer_module = _load_tokenizer_module(model_name_or_path=TINY_LLAMA) tokenizer_module = _load_tokenizer_module(model_name_or_path=TINY_LLAMA3)
base_plugin = get_mm_plugin(name="base") base_plugin = get_mm_plugin(name="base")
check_inputs = {"plugin": base_plugin, **tokenizer_module} check_inputs = {"plugin": base_plugin, **tokenizer_module}
_check_plugin(**check_inputs) _check_plugin(**check_inputs)
@pytest.mark.skipif(not HF_TOKEN or not is_transformers_version_greater_than("4.50.0"), reason="Gated model.") @pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
def test_gemma3_plugin(): def test_gemma3_plugin():
image_seqlen = 256 image_seqlen = 256
tokenizer_module = _load_tokenizer_module(model_name_or_path="google/gemma-3-4b-it") tokenizer_module = _load_tokenizer_module(model_name_or_path="google/gemma-3-4b-it")
...@@ -157,6 +157,27 @@ def test_gemma3_plugin(): ...@@ -157,6 +157,27 @@ def test_gemma3_plugin():
_check_plugin(**check_inputs) _check_plugin(**check_inputs)
@pytest.mark.xfail(reason="Unknown error.")
def test_llama4_plugin():
tokenizer_module = _load_tokenizer_module(model_name_or_path=TINY_LLAMA4)
processor = tokenizer_module["processor"]
llama4_plugin = get_mm_plugin(name="llama4", image_token="<|image|>")
check_inputs = {"plugin": llama4_plugin, **tokenizer_module}
mm_inputs = _get_mm_inputs(tokenizer_module["processor"])
image_height, image_width = mm_inputs["pixel_values"][0].shape[-2:]
num_patches_per_chunk = int(
(image_height // processor.patch_size) * (image_width // processor.patch_size) // processor.downsample_ratio
)
aspect_ratios = mm_inputs.pop("aspect_ratios")
tokens_for_this_image = processor._prompt_split_image(aspect_ratios[0], num_patches_per_chunk)
check_inputs["expected_mm_messages"] = [
{key: value.replace("<image>", tokens_for_this_image) for key, value in message.items()}
for message in MM_MESSAGES
]
check_inputs["expected_mm_inputs"] = mm_inputs
_check_plugin(**check_inputs)
def test_llava_plugin(): def test_llava_plugin():
image_seqlen = 576 image_seqlen = 576
tokenizer_module = _load_tokenizer_module(model_name_or_path="llava-hf/llava-1.5-7b-hf") tokenizer_module = _load_tokenizer_module(model_name_or_path="llava-hf/llava-1.5-7b-hf")
......
...@@ -29,7 +29,8 @@ if TYPE_CHECKING: ...@@ -29,7 +29,8 @@ if TYPE_CHECKING:
HF_TOKEN = os.getenv("HF_TOKEN") HF_TOKEN = os.getenv("HF_TOKEN")
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
TINY_LLAMA4 = os.getenv("TINY_LLAMA4", "llamafactory/tiny-random-Llama-4")
MESSAGES = [ MESSAGES = [
{"role": "user", "content": "How are you"}, {"role": "user", "content": "How are you"},
...@@ -75,7 +76,7 @@ def _check_template(model_id: str, template_name: str, prompt_str: str, answer_s ...@@ -75,7 +76,7 @@ def _check_template(model_id: str, template_name: str, prompt_str: str, answer_s
@pytest.mark.parametrize("use_fast", [True, False]) @pytest.mark.parametrize("use_fast", [True, False])
def test_encode_oneturn(use_fast: bool): def test_encode_oneturn(use_fast: bool):
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast) tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, use_fast=use_fast)
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3")) template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
prompt_ids, answer_ids = template.encode_oneturn(tokenizer, MESSAGES) prompt_ids, answer_ids = template.encode_oneturn(tokenizer, MESSAGES)
prompt_str = ( prompt_str = (
...@@ -90,7 +91,7 @@ def test_encode_oneturn(use_fast: bool): ...@@ -90,7 +91,7 @@ def test_encode_oneturn(use_fast: bool):
@pytest.mark.parametrize("use_fast", [True, False]) @pytest.mark.parametrize("use_fast", [True, False])
def test_encode_multiturn(use_fast: bool): def test_encode_multiturn(use_fast: bool):
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast) tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, use_fast=use_fast)
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3")) template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
encoded_pairs = template.encode_multiturn(tokenizer, MESSAGES) encoded_pairs = template.encode_multiturn(tokenizer, MESSAGES)
prompt_str_1 = ( prompt_str_1 = (
...@@ -111,8 +112,8 @@ def test_encode_multiturn(use_fast: bool): ...@@ -111,8 +112,8 @@ def test_encode_multiturn(use_fast: bool):
@pytest.mark.parametrize("use_fast", [True, False]) @pytest.mark.parametrize("use_fast", [True, False])
def test_jinja_template(use_fast: bool): def test_jinja_template(use_fast: bool):
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast) tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, use_fast=use_fast)
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, use_fast=use_fast)
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3")) template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
tokenizer.chat_template = template._get_jinja_template(tokenizer) # llama3 template no replace tokenizer.chat_template = template._get_jinja_template(tokenizer) # llama3 template no replace
assert tokenizer.chat_template != ref_tokenizer.chat_template assert tokenizer.chat_template != ref_tokenizer.chat_template
...@@ -120,7 +121,7 @@ def test_jinja_template(use_fast: bool): ...@@ -120,7 +121,7 @@ def test_jinja_template(use_fast: bool):
def test_ollama_modelfile(): def test_ollama_modelfile():
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3")) template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
assert template.get_ollama_modelfile(tokenizer) == ( assert template.get_ollama_modelfile(tokenizer) == (
"# ollama modelfile auto-generated by llamafactory\n\n" "# ollama modelfile auto-generated by llamafactory\n\n"
...@@ -137,7 +138,7 @@ def test_ollama_modelfile(): ...@@ -137,7 +138,7 @@ def test_ollama_modelfile():
def test_get_stop_token_ids(): def test_get_stop_token_ids():
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3")) template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
assert set(template.get_stop_token_ids(tokenizer)) == {128008, 128009} assert set(template.get_stop_token_ids(tokenizer)) == {128008, 128009}
...@@ -152,7 +153,7 @@ def test_gemma_template(use_fast: bool): ...@@ -152,7 +153,7 @@ def test_gemma_template(use_fast: bool):
"<start_of_turn>model\n" "<start_of_turn>model\n"
) )
answer_str = "很高兴认识你!<end_of_turn>\n" answer_str = "很高兴认识你!<end_of_turn>\n"
_check_template("google/gemma-2-9b-it", "gemma", prompt_str, answer_str, use_fast) _check_template("google/gemma-3-4b-it", "gemma", prompt_str, answer_str, use_fast)
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.") @pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
...@@ -168,7 +169,20 @@ def test_llama3_template(use_fast: bool): ...@@ -168,7 +169,20 @@ def test_llama3_template(use_fast: bool):
_check_template("meta-llama/Meta-Llama-3-8B-Instruct", "llama3", prompt_str, answer_str, use_fast) _check_template("meta-llama/Meta-Llama-3-8B-Instruct", "llama3", prompt_str, answer_str, use_fast)
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.") @pytest.mark.parametrize(
"use_fast", [True, pytest.param(False, marks=pytest.mark.xfail(reason="Llama 4 has no slow tokenizer."))]
)
def test_llama4_template(use_fast: bool):
prompt_str = (
"<|begin_of_text|><|header_start|>user<|header_end|>\n\nHow are you<|eot|>"
"<|header_start|>assistant<|header_end|>\n\nI am fine!<|eot|>"
"<|header_start|>user<|header_end|>\n\n你好<|eot|>"
"<|header_start|>assistant<|header_end|>\n\n"
)
answer_str = "很高兴认识你!<|eot|>"
_check_template(TINY_LLAMA4, "llama4", prompt_str, answer_str, use_fast)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"use_fast", [True, pytest.param(False, marks=pytest.mark.xfail(reason="Phi-4 slow tokenizer is broken."))] "use_fast", [True, pytest.param(False, marks=pytest.mark.xfail(reason="Phi-4 slow tokenizer is broken."))]
) )
...@@ -183,35 +197,21 @@ def test_phi4_template(use_fast: bool): ...@@ -183,35 +197,21 @@ def test_phi4_template(use_fast: bool):
_check_template("microsoft/phi-4", "phi4", prompt_str, answer_str, use_fast) _check_template("microsoft/phi-4", "phi4", prompt_str, answer_str, use_fast)
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.") # TODO: why it is gated?
@pytest.mark.parametrize("use_fast", [True, False]) @pytest.mark.parametrize("use_fast", [True, False])
def test_qwen_template(use_fast: bool): def test_qwen_template(use_fast: bool):
prompt_str = ( prompt_str = (
"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n"
"<|im_start|>user\nHow are you<|im_end|>\n"
"<|im_start|>assistant\nI am fine!<|im_end|>\n"
"<|im_start|>user\n你好<|im_end|>\n"
"<|im_start|>assistant\n"
)
answer_str = "很高兴认识你!<|im_end|>\n"
_check_template("Qwen/Qwen2-7B-Instruct", "qwen", prompt_str, answer_str, use_fast)
@pytest.mark.parametrize("use_fast", [True, False])
@pytest.mark.xfail(reason="Yi tokenizer is broken.")
def test_yi_template(use_fast: bool):
prompt_str = (
"<|im_start|>user\nHow are you<|im_end|>\n" "<|im_start|>user\nHow are you<|im_end|>\n"
"<|im_start|>assistant\nI am fine!<|im_end|>\n" "<|im_start|>assistant\nI am fine!<|im_end|>\n"
"<|im_start|>user\n你好<|im_end|>\n" "<|im_start|>user\n你好<|im_end|>\n"
"<|im_start|>assistant\n" "<|im_start|>assistant\n"
) )
answer_str = "很高兴认识你!<|im_end|>\n" answer_str = "很高兴认识你!<|im_end|>\n"
_check_template("01-ai/Yi-1.5-6B-Chat", "yi", prompt_str, answer_str, use_fast) _check_template("Qwen/Qwen2.5-7B-Instruct", "qwen", prompt_str, answer_str, use_fast)
def test_parse_template(): def test_parse_llama3_template():
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, token=HF_TOKEN) tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, token=HF_TOKEN)
template = parse_template(tokenizer) template = parse_template(tokenizer)
assert template.format_user.slots == [ assert template.format_user.slots == [
"<|start_header_id|>user<|end_header_id|>\n\n{{content}}<|eot_id|>" "<|start_header_id|>user<|end_header_id|>\n\n{{content}}<|eot_id|>"
...@@ -223,12 +223,11 @@ def test_parse_template(): ...@@ -223,12 +223,11 @@ def test_parse_template():
assert template.default_system == "" assert template.default_system == ""
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
def test_parse_qwen_template(): def test_parse_qwen_template():
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B-Instruct", token=HF_TOKEN) tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct", token=HF_TOKEN)
template = parse_template(tokenizer) template = parse_template(tokenizer)
assert template.format_user.slots == ["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"] assert template.format_user.slots == ["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]
assert template.format_assistant.slots == ["{{content}}<|im_end|>\n"] assert template.format_assistant.slots == ["{{content}}<|im_end|>\n"]
assert template.format_system.slots == ["<|im_start|>system\n{{content}}<|im_end|>\n"] assert template.format_system.slots == ["<|im_start|>system\n{{content}}<|im_end|>\n"]
assert template.format_prefix.slots == [] assert template.format_prefix.slots == []
assert template.default_system == "You are a helpful assistant." assert template.default_system == "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."
...@@ -17,10 +17,10 @@ import os ...@@ -17,10 +17,10 @@ import os
from llamafactory.chat import ChatModel from llamafactory.chat import ChatModel
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
INFER_ARGS = { INFER_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"finetuning_type": "lora", "finetuning_type": "lora",
"template": "llama3", "template": "llama3",
"infer_dtype": "float16", "infer_dtype": "float16",
......
...@@ -21,12 +21,12 @@ from llamafactory.train.tuner import export_model, run_exp ...@@ -21,12 +21,12 @@ from llamafactory.train.tuner import export_model, run_exp
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data") DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
TINY_LLAMA_ADAPTER = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora") TINY_LLAMA_ADAPTER = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
TRAIN_ARGS = { TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"do_train": True, "do_train": True,
"finetuning_type": "lora", "finetuning_type": "lora",
"dataset_dir": "REMOTE:" + DEMO_DATA, "dataset_dir": "REMOTE:" + DEMO_DATA,
...@@ -35,10 +35,11 @@ TRAIN_ARGS = { ...@@ -35,10 +35,11 @@ TRAIN_ARGS = {
"overwrite_output_dir": True, "overwrite_output_dir": True,
"per_device_train_batch_size": 1, "per_device_train_batch_size": 1,
"max_steps": 1, "max_steps": 1,
"report_to": "none",
} }
INFER_ARGS = { INFER_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"adapter_name_or_path": TINY_LLAMA_ADAPTER, "adapter_name_or_path": TINY_LLAMA_ADAPTER,
"finetuning_type": "lora", "finetuning_type": "lora",
"template": "llama3", "template": "llama3",
......
...@@ -21,10 +21,10 @@ from llamafactory.extras.packages import is_transformers_version_greater_than ...@@ -21,10 +21,10 @@ from llamafactory.extras.packages import is_transformers_version_greater_than
from llamafactory.train.test_utils import load_infer_model from llamafactory.train.test_utils import load_infer_model
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
INFER_ARGS = { INFER_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"template": "llama3", "template": "llama3",
} }
......
...@@ -21,10 +21,10 @@ from llamafactory.extras.misc import get_current_device ...@@ -21,10 +21,10 @@ from llamafactory.extras.misc import get_current_device
from llamafactory.train.test_utils import load_train_model from llamafactory.train.test_utils import load_train_model
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
TRAIN_ARGS = { TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA, "model_name_or_path": TINY_LLAMA3,
"stage": "sft", "stage": "sft",
"do_train": True, "do_train": True,
"finetuning_type": "lora", "finetuning_type": "lora",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment