Commit 317a82e2 authored by chenych's avatar chenych
Browse files

Add QWQ-32B

parent 37b0ad9f
# Copyright 2024 the LlamaFactory team. # Copyright 2025 the LlamaFactory team.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team. # Copyright 2025 the LlamaFactory team.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# Copyright 2024 Microsoft Corporation and the LlamaFactory team.
#
# This code is inspired by the Microsoft's DeepSpeed library.
# https://www.deepspeed.ai/tutorials/flops-profiler/
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fire
import torch
from deepspeed.accelerator import get_accelerator # type: ignore
from deepspeed.profiling.flops_profiler import get_model_profile # type: ignore
from llamafactory.chat import ChatModel
def calculate_flops(
model_name_or_path: str,
batch_size: int = 1,
seq_length: int = 512,
flash_attn: str = "auto",
):
r"""
Calculates the flops of pre-trained models.
Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
"""
with get_accelerator().device(0):
chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.engine.model.device)
input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
flops, macs, params = get_model_profile(
chat_model.engine.model, kwargs=input_dict, print_profile=True, detailed=True
)
print("FLOPs:", flops)
print("MACs:", macs)
print("Params:", params)
if __name__ == "__main__":
fire.Fire(calculate_flops)
# Copyright 2024 imoneoi and the LlamaFactory team.
#
# This code is inspired by the imoneoi's OpenChat library.
# https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
from typing import Literal
import fire
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
from llamafactory.extras.constants import IGNORE_INDEX
from llamafactory.hparams import get_train_args
from llamafactory.model import load_tokenizer
BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models
BASE_BS = 4_000_000 # from llama paper
def calculate_lr(
model_name_or_path: str,
batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size)
stage: Literal["pt", "sft"] = "sft",
dataset: str = "alpaca_en_demo",
dataset_dir: str = "data",
template: str = "default",
cutoff_len: int = 1024, # i.e. maximum input length during training
is_mistral_or_gemma: bool = False, # mistral and gemma models opt for a smaller learning rate,
packing: bool = False,
):
r"""
Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
Usage:
python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en_demo --cutoff_len 1024 --batch_size 16
"""
model_args, data_args, training_args, _, _ = get_train_args(
dict(
stage=stage,
model_name_or_path=model_name_or_path,
dataset=dataset,
dataset_dir=dataset_dir,
template=template,
cutoff_len=cutoff_len,
packing=packing,
output_dir="dummy_dir",
overwrite_cache=True,
do_train=True,
)
)
tokenizer_module = load_tokenizer(model_args)
tokenizer = tokenizer_module["tokenizer"]
template = get_template_and_fix_tokenizer(tokenizer, data_args)
trainset = get_dataset(template, model_args, data_args, training_args, stage, **tokenizer_module)["train_dataset"]
if stage == "pt":
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
elif stage == "sft":
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
else:
raise NotImplementedError(f"Stage does not supported: {stage}.")
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
valid_tokens, total_tokens = 0, 0
for batch in tqdm(dataloader):
valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
total_tokens += torch.numel(batch["labels"])
batch_max_len = cutoff_len * batch_size # max tokens in a batch
valid_ratio = valid_tokens / total_tokens
batch_valid_len = batch_max_len * valid_ratio
lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS) # lr ~ sqrt(batch_size)
lr = lr / 6.0 if is_mistral_or_gemma else lr
print(
"Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format(
lr, valid_ratio * 100, batch_valid_len
)
)
if __name__ == "__main__":
fire.Fire(calculate_lr)
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import fire
import torch
import torch.distributed as dist
from transformers import AutoConfig
from llamafactory.train.tuner import run_exp
BASE = 2 # gemm (add + mul)
def compute_model_flops(
model_name_or_path: str,
total_batch_size: int,
seq_length: int,
include_backward: bool = True,
include_recompute: bool = False,
include_flashattn: bool = False,
) -> int:
r"""
Calculates the FLOPs of model per forward/backward pass.
"""
config = AutoConfig.from_pretrained(model_name_or_path)
hidden_size = getattr(config, "hidden_size", None)
vocab_size = getattr(config, "vocab_size", None)
intermediate_size = getattr(config, "intermediate_size", None)
num_attention_heads = getattr(config, "num_attention_heads", None)
num_key_value_heads = getattr(config, "num_key_value_heads", None)
num_hidden_layers = getattr(config, "num_hidden_layers", None)
tie_word_embeddings = getattr(config, "tie_word_embeddings", False)
# mlp module
mlp_flops_per_token = 3 * BASE * hidden_size * intermediate_size # up, gate, down
mlp_flops = total_batch_size * seq_length * num_hidden_layers * mlp_flops_per_token
# attn projector module
q_flops_per_token = BASE * hidden_size * hidden_size
o_flops_per_token = BASE * hidden_size * hidden_size
k_flops_per_token = BASE * hidden_size * hidden_size * num_key_value_heads // num_attention_heads
v_flops_per_token = BASE * hidden_size * hidden_size * num_key_value_heads // num_attention_heads
attn_proj_flops_per_token = q_flops_per_token + o_flops_per_token + k_flops_per_token + v_flops_per_token
attn_proj_flops = total_batch_size * seq_length * num_hidden_layers * attn_proj_flops_per_token
# attn sdpa module
sdpa_flops_per_layer = 2 * BASE * hidden_size * seq_length * seq_length # (q * k^T) * v
sdpa_flops = total_batch_size * num_hidden_layers * sdpa_flops_per_layer
# embedding module
embedding_flops_per_token = hidden_size * vocab_size
embedding_flops = total_batch_size * seq_length * embedding_flops_per_token
if tie_word_embeddings is False:
embedding_flops *= 2
non_embedding_flops = mlp_flops + attn_proj_flops + sdpa_flops
non_embedding_coeff, embedding_coeff = 1, 1
if include_backward:
non_embedding_coeff += 2
embedding_coeff += 2
if include_recompute:
non_embedding_coeff += 1
total_flops = non_embedding_coeff * non_embedding_flops + embedding_coeff * embedding_flops
if include_flashattn:
total_flops += sdpa_flops
return total_flops
def compute_device_flops(world_size: int) -> float:
r"""
Calculates the FLOPs of the device capability per second.
"""
device_name = torch.cuda.get_device_name()
if "H100" in device_name or "H800" in device_name:
return 989 * 1e12 * world_size
elif "A100" in device_name or "A800" in device_name:
return 312 * 1e12 * world_size
elif "V100" in device_name:
return 125 * 1e12 * world_size
elif "4090" in device_name:
return 98 * 1e12 * world_size
else:
raise NotImplementedError(f"Device not supported: {device_name}.")
def calculate_mfu(
model_name_or_path: str,
batch_size: int = 1,
seq_length: int = 1024,
num_steps: int = 100,
finetuning_type: str = "lora",
flash_attn: str = "auto",
deepspeed_stage: int = 0,
disable_gc: bool = False,
liger_kernel: bool = False,
unsloth_gc: bool = False,
) -> float:
r"""
Calculates MFU for given model and hyper-params.
Usage: python cal_mfu.py --model_name_or_path path_to_model --batch_size 1 --seq_length 1024
"""
args = {
"model_name_or_path": model_name_or_path,
"flash_attn": flash_attn,
"disable_gradient_checkpointing": disable_gc,
"enable_liger_kernel": liger_kernel,
"use_unsloth_gc": unsloth_gc,
"stage": "pt",
"do_train": True,
"finetuning_type": finetuning_type,
"dataset": "c4_demo",
"cutoff_len": seq_length,
"output_dir": os.path.join("saves", "test_mfu"),
"logging_strategy": "no",
"save_strategy": "no",
"save_only_model": True,
"overwrite_output_dir": True,
"per_device_train_batch_size": batch_size,
"max_steps": num_steps,
"bf16": True,
}
if deepspeed_stage in [2, 3]:
args["deepspeed"] = f"examples/deepspeed/ds_z{deepspeed_stage}_config.json"
run_exp(args)
with open(os.path.join("saves", "test_mfu", "all_results.json"), encoding="utf-8") as f:
result = json.load(f)
if dist.is_initialized():
world_size = dist.get_world_size()
else:
world_size = 1
total_batch_size = batch_size * world_size
mfu_value = (
result["train_steps_per_second"]
* compute_model_flops(model_name_or_path, total_batch_size, seq_length)
/ compute_device_flops(world_size)
)
print(f"MFU: {mfu_value * 100:.2f}%")
if __name__ == "__main__":
fire.Fire(calculate_mfu)
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from dataclasses import dataclass
from typing import Any, Dict, Literal, Optional, Sequence
import fire
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
from llamafactory.extras.constants import IGNORE_INDEX
from llamafactory.hparams import get_train_args
from llamafactory.model import load_model, load_tokenizer
@dataclass
class PairwiseDataCollatorWithPadding(DataCollatorForSeq2Seq):
r"""
Data collator for pairwise data.
"""
train_on_prompt: bool = False
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
r"""
Pads batched data to the longest sequence in the batch.
We generate 2 * n examples where the first n examples represent chosen examples and
the last n examples represent rejected examples.
"""
chosen_features = []
for feature in features:
prompt_len, answer_len = len(feature["prompt_ids"]), len(feature["chosen_ids"])
input_ids = feature["prompt_ids"] + feature["chosen_ids"]
attention_mask = [1] * (prompt_len + answer_len)
labels = input_ids if self.train_on_prompt else [IGNORE_INDEX] * prompt_len + feature["chosen_ids"]
chosen_features.append({"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels})
return super().__call__(chosen_features)
def calculate_ppl(
model_name_or_path: str,
save_name: str,
batch_size: int = 4,
stage: Literal["pt", "sft", "rm"] = "sft",
dataset: str = "alpaca_en_demo",
dataset_dir: str = "data",
template: str = "default",
cutoff_len: int = 1024,
max_samples: Optional[int] = None,
train_on_prompt: bool = False,
):
r"""
Calculates the ppl on the dataset of the pre-trained models.
Usage: python cal_ppl.py --model_name_or_path path_to_model --dataset alpaca_en_demo --save_name ppl.json
"""
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
dict(
stage=stage,
model_name_or_path=model_name_or_path,
dataset=dataset,
dataset_dir=dataset_dir,
template=template,
cutoff_len=cutoff_len,
max_samples=max_samples,
train_on_prompt=train_on_prompt,
output_dir="dummy_dir",
overwrite_cache=True,
do_train=True,
)
)
tokenizer_module = load_tokenizer(model_args)
tokenizer = tokenizer_module["tokenizer"]
template = get_template_and_fix_tokenizer(tokenizer, data_args)
trainset = get_dataset(template, model_args, data_args, training_args, stage, **tokenizer_module)["train_dataset"]
model = load_model(tokenizer, model_args, finetuning_args, is_trainable=False)
if stage == "pt":
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
elif stage == "sft":
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
elif stage == "rm":
data_collator = PairwiseDataCollatorWithPadding(
tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX, train_on_prompt=train_on_prompt
)
else:
raise NotImplementedError(f"Stage does not supported: {stage}.")
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
criterion = torch.nn.CrossEntropyLoss(reduction="none")
total_ppl = 0
perplexities = []
batch: Dict[str, "torch.Tensor"]
with torch.no_grad():
for batch in tqdm(dataloader):
batch = batch.to(model.device)
outputs = model(**batch)
shift_logits: "torch.Tensor" = outputs["logits"][..., :-1, :]
shift_labels: "torch.Tensor" = batch["labels"][..., 1:]
loss_mask = shift_labels != IGNORE_INDEX
flatten_logits = shift_logits.contiguous().view(shift_labels.size(0) * shift_labels.size(1), -1)
flatten_labels = shift_labels.contiguous().view(-1)
token_logps: "torch.Tensor" = criterion(flatten_logits, flatten_labels)
token_logps = token_logps.contiguous().view(shift_logits.size(0), -1)
sentence_logps = (token_logps * loss_mask).sum(-1) / loss_mask.sum(-1)
total_ppl += sentence_logps.exp().sum().item()
perplexities.extend(sentence_logps.exp().tolist())
with open(save_name, "w", encoding="utf-8") as f:
json.dump(perplexities, f, indent=2)
print(f"Average perplexity is {total_ppl / len(perplexities):.2f}")
print(f"Perplexities have been saved at {save_name}.")
if __name__ == "__main__":
fire.Fire(calculate_ppl)
# Copyright 2024 the LlamaFactory team. # Copyright 2025 the LlamaFactory team.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -19,15 +19,10 @@ from typing import Any, Dict ...@@ -19,15 +19,10 @@ from typing import Any, Dict
import fire import fire
import torch import torch
from huggingface_hub import split_torch_state_dict_into_shards
from safetensors.torch import save_file from safetensors.torch import save_file
from tqdm import tqdm from tqdm import tqdm
from transformers.modeling_utils import ( from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME
SAFE_WEIGHTS_INDEX_NAME,
SAFE_WEIGHTS_NAME,
WEIGHTS_INDEX_NAME,
WEIGHTS_NAME,
shard_checkpoint,
)
CONFIG_NAME = "config.json" CONFIG_NAME = "config.json"
...@@ -40,34 +35,42 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso ...@@ -40,34 +35,42 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso
shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu") shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
baichuan2_state_dict.update(shard_weight) baichuan2_state_dict.update(shard_weight)
llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict() llama_state_dict: Dict[str, torch.Tensor] = OrderedDict()
for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"): for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"):
if "W_pack" in key: if "W_pack" in key:
proj_size = value.size(0) // 3 proj_size = value.size(0) // 3
llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :] llama_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :]
llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :] llama_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :]
llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :] llama_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :]
elif "lm_head" in key: elif "lm_head" in key:
llama2_state_dict[key] = torch.nn.functional.normalize(value) llama_state_dict[key] = torch.nn.functional.normalize(value)
else: else:
llama2_state_dict[key] = value llama_state_dict[key] = value
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name) filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
state_dict_split = split_torch_state_dict_into_shards(
for shard_file, shard in tqdm(shards.items(), desc="Save weights"): llama_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size
)
for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"):
shard = {tensor: llama_state_dict[tensor].contiguous() for tensor in tensors}
if save_safetensors: if save_safetensors:
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
else: else:
torch.save(shard, os.path.join(output_dir, shard_file)) torch.save(shard, os.path.join(output_dir, shard_file))
if index is None: if not state_dict_split.is_sharded:
print(f"Model weights saved in {os.path.join(output_dir, WEIGHTS_NAME)}") print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.")
else: else:
index = {
"metadata": state_dict_split.metadata,
"weight_map": state_dict_split.tensor_to_filename,
}
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, sort_keys=True) json.dump(index, f, indent=2, sort_keys=True)
print(f"Model weights saved in {output_dir}")
print(f"Model weights saved in {output_dir}.")
def save_config(input_dir: str, output_dir: str): def save_config(input_dir: str, output_dir: str):
...@@ -81,6 +84,7 @@ def save_config(input_dir: str, output_dir: str): ...@@ -81,6 +84,7 @@ def save_config(input_dir: str, output_dir: str):
with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f: with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
json.dump(llama2_config_dict, f, indent=2) json.dump(llama2_config_dict, f, indent=2)
print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}") print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}")
......
# Copyright 2024 the LlamaFactory team. # Copyright 2025 the LlamaFactory team.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -19,16 +19,11 @@ from typing import Any, Dict ...@@ -19,16 +19,11 @@ from typing import Any, Dict
import fire import fire
import torch import torch
from huggingface_hub import split_torch_state_dict_into_shards
from safetensors import safe_open from safetensors import safe_open
from safetensors.torch import save_file from safetensors.torch import save_file
from tqdm import tqdm from tqdm import tqdm
from transformers.modeling_utils import ( from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME
SAFE_WEIGHTS_INDEX_NAME,
SAFE_WEIGHTS_NAME,
WEIGHTS_INDEX_NAME,
WEIGHTS_NAME,
shard_checkpoint,
)
from transformers.utils import check_min_version from transformers.utils import check_min_version
...@@ -49,60 +44,68 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso ...@@ -49,60 +44,68 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso
for key in f.keys(): for key in f.keys():
qwen_state_dict[key] = f.get_tensor(key) qwen_state_dict[key] = f.get_tensor(key)
llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict() llama_state_dict: Dict[str, torch.Tensor] = OrderedDict()
torch_dtype = None torch_dtype = None
for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"): for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
if torch_dtype is None: if torch_dtype is None:
torch_dtype = value.dtype torch_dtype = value.dtype
if "wte" in key: if "wte" in key:
llama2_state_dict["model.embed_tokens.weight"] = value llama_state_dict["model.embed_tokens.weight"] = value
elif "ln_f" in key: elif "ln_f" in key:
llama2_state_dict["model.norm.weight"] = value llama_state_dict["model.norm.weight"] = value
else: else:
key = key.replace("transformer.h", "model.layers") key = key.replace("transformer.h", "model.layers")
if "attn.c_attn" in key: if "attn.c_attn" in key:
proj_size = value.size(0) // 3 proj_size = value.size(0) // 3
llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...] llama_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...]
llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[ llama_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[
proj_size : 2 * proj_size, ... proj_size : 2 * proj_size, ...
] ]
llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...] llama_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...]
elif "attn.c_proj" in key: elif "attn.c_proj" in key:
llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value llama_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value
llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like( llama_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like(
value[:, 0] value[:, 0]
).squeeze() ).squeeze()
elif "ln_1" in key: elif "ln_1" in key:
llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value llama_state_dict[key.replace("ln_1", "input_layernorm")] = value
elif "ln_2" in key: elif "ln_2" in key:
llama2_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value llama_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value
elif "mlp.w1" in key: elif "mlp.w1" in key:
llama2_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value llama_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value
elif "mlp.w2" in key: elif "mlp.w2" in key:
llama2_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value llama_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value
elif "mlp.c_proj" in key: elif "mlp.c_proj" in key:
llama2_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value llama_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value
elif "lm_head" in key: elif "lm_head" in key:
llama2_state_dict[key] = value llama_state_dict[key] = value
else: else:
raise KeyError(f"Unable to process key {key}") raise KeyError(f"Unable to process key {key}")
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name) filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
state_dict_split = split_torch_state_dict_into_shards(
for shard_file, shard in tqdm(shards.items(), desc="Save weights"): llama_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size
)
for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"):
shard = {tensor: llama_state_dict[tensor].contiguous() for tensor in tensors}
if save_safetensors: if save_safetensors:
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
else: else:
torch.save(shard, os.path.join(output_dir, shard_file)) torch.save(shard, os.path.join(output_dir, shard_file))
if index is None: if not state_dict_split.is_sharded:
print(f"Model weights saved in {os.path.join(output_dir, weights_name)}") print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.")
else: else:
index = {
"metadata": state_dict_split.metadata,
"weight_map": state_dict_split.tensor_to_filename,
}
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, sort_keys=True) json.dump(index, f, indent=2, sort_keys=True)
print(f"Model weights saved in {output_dir}")
print(f"Model weights saved in {output_dir}.")
return str(torch_dtype).replace("torch.", "") return str(torch_dtype).replace("torch.", "")
...@@ -134,6 +137,7 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str): ...@@ -134,6 +137,7 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str):
with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f: with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
json.dump(llama2_config_dict, f, indent=2) json.dump(llama2_config_dict, f, indent=2)
print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}") print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}")
......
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
import fire
from tqdm import tqdm
from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
from llamafactory.hparams import get_train_args
from llamafactory.model import load_tokenizer
def length_cdf(
model_name_or_path: str,
dataset: str = "alpaca_en_demo",
dataset_dir: str = "data",
template: str = "default",
interval: int = 1000,
):
r"""
Calculates the distribution of the input lengths in the dataset.
Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en_demo --template default
"""
model_args, data_args, training_args, _, _ = get_train_args(
dict(
stage="sft",
model_name_or_path=model_name_or_path,
dataset=dataset,
dataset_dir=dataset_dir,
template=template,
cutoff_len=1_000_000,
output_dir="dummy_dir",
overwrite_cache=True,
do_train=True,
)
)
tokenizer_module = load_tokenizer(model_args)
template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
trainset = get_dataset(template, model_args, data_args, training_args, "sft", **tokenizer_module)["train_dataset"]
total_num = len(trainset)
length_dict = defaultdict(int)
for sample in tqdm(trainset["input_ids"]):
length_dict[len(sample) // interval * interval] += 1
length_tuples = list(length_dict.items())
length_tuples.sort()
count_accu, prob_accu = 0, 0
for length, count in length_tuples:
count_accu += count
prob_accu += count / total_num * 100
print(f"{count_accu:d} ({prob_accu:.2f}%) samples have length < {length + interval}.")
if __name__ == "__main__":
fire.Fire(length_cdf)
# Copyright 2024 Tencent Inc. and the LlamaFactory team. # Copyright 2025 Tencent Inc. and the LlamaFactory team.
# #
# This code is inspired by the Tencent's LLaMA-Pro library. # This code is inspired by the Tencent's LLaMA-Pro library.
# https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py # https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
...@@ -18,20 +18,15 @@ ...@@ -18,20 +18,15 @@
import json import json
import os import os
from collections import OrderedDict from collections import OrderedDict
from typing import TYPE_CHECKING from typing import TYPE_CHECKING, Dict
import fire import fire
import torch import torch
from huggingface_hub import split_torch_state_dict_into_shards
from safetensors.torch import save_file from safetensors.torch import save_file
from tqdm import tqdm from tqdm import tqdm
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel
from transformers.modeling_utils import ( from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME
SAFE_WEIGHTS_INDEX_NAME,
SAFE_WEIGHTS_NAME,
WEIGHTS_INDEX_NAME,
WEIGHTS_NAME,
shard_checkpoint,
)
if TYPE_CHECKING: if TYPE_CHECKING:
...@@ -46,41 +41,36 @@ def block_expansion( ...@@ -46,41 +41,36 @@ def block_expansion(
model_name_or_path: str, model_name_or_path: str,
output_dir: str, output_dir: str,
num_expand: int, num_expand: int,
shard_size: str = "2GB", shard_size: str = "5GB",
save_safetensors: bool = True, save_safetensors: bool = True,
): ):
r""" r"""
Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models. Performs block expansion for LLaMA, Mistral, Qwen2 or Yi models.
Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8 Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
""" """
config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path) config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
num_layers = getattr(config, "num_hidden_layers") num_layers = getattr(config, "num_hidden_layers")
if num_layers % num_expand != 0:
raise ValueError(f"`num_layers` {num_layers} should be divisible by `num_expand` {num_expand}.")
setattr(config, "num_hidden_layers", num_layers + num_expand) setattr(config, "num_hidden_layers", num_layers + num_expand)
config.save_pretrained(output_dir) config.save_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
tokenizer.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir)
config = AutoConfig.from_pretrained(model_name_or_path) # load the original one print(f"Expanding model of {num_layers} layers to {num_layers + num_expand} layers.")
if save_safetensors:
setattr(config, "tie_word_embeddings", False) # safetensors does not allow shared weights
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_name_or_path, model_name_or_path, torch_dtype="auto", device_map="cpu", trust_remote_code=True, low_cpu_mem_usage=True
config=config,
torch_dtype="auto",
trust_remote_code=True,
low_cpu_mem_usage=True,
) )
assert isinstance(model, PreTrainedModel) # type hint assert isinstance(model, PreTrainedModel) # type hint
state_dict = model.state_dict() if save_safetensors and getattr(model.config, "tie_word_embeddings", False):
del model.lm_head # safetensors does not allow shared weights
if num_layers % num_expand != 0:
raise ValueError(f"`num_layers` {num_layers} should be divisible by `num_expand` {num_expand}.")
split = num_layers // num_expand split = num_layers // num_expand
layer_cnt = 0 layer_cnt = 0
output_state_dict = OrderedDict() state_dict = model.state_dict()
output_state_dict: Dict[str, "torch.Tensor"] = OrderedDict()
for i in range(num_layers): for i in range(num_layers):
for key, value in state_dict.items(): for key, value in state_dict.items():
if f".{i:d}." in key: if f".{i:d}." in key:
...@@ -104,17 +94,24 @@ def block_expansion( ...@@ -104,17 +94,24 @@ def block_expansion(
output_state_dict[key] = value output_state_dict[key] = value
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
shards, index = shard_checkpoint(output_state_dict, max_shard_size=shard_size, weights_name=weights_name) filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
state_dict_split = split_torch_state_dict_into_shards(
for shard_file, shard in tqdm(shards.items(), desc="Save weights"): output_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size
)
for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"):
shard = {tensor: output_state_dict[tensor].contiguous() for tensor in tensors}
if save_safetensors: if save_safetensors:
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
else: else:
torch.save(shard, os.path.join(output_dir, shard_file)) torch.save(shard, os.path.join(output_dir, shard_file))
if index is None: if not state_dict_split.is_sharded:
print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.") print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.")
else: else:
index = {
"metadata": state_dict_split.metadata,
"weight_map": state_dict_split.tensor_to_filename,
}
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, sort_keys=True) json.dump(index, f, indent=2, sort_keys=True)
......
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
from collections import OrderedDict
from typing import Any, Dict
import fire
import torch
from safetensors.torch import save_file
from tqdm import tqdm
from transformers.modeling_utils import (
SAFE_WEIGHTS_INDEX_NAME,
SAFE_WEIGHTS_NAME,
WEIGHTS_INDEX_NAME,
WEIGHTS_NAME,
shard_checkpoint,
)
CONFIG_NAME = "config.json"
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
baichuan2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
baichuan2_state_dict.update(shard_weight)
llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"):
if "W_pack" in key:
proj_size = value.size(0) // 3
llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :]
llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :]
llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :]
elif "lm_head" in key:
llama2_state_dict[key] = torch.nn.functional.normalize(value)
else:
llama2_state_dict[key] = value
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
if save_safetensors:
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
else:
torch.save(shard, os.path.join(output_dir, shard_file))
if index is None:
print(f"Model weights saved in {os.path.join(output_dir, WEIGHTS_NAME)}")
else:
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, sort_keys=True)
print(f"Model weights saved in {output_dir}")
def save_config(input_dir: str, output_dir: str):
with open(os.path.join(input_dir, CONFIG_NAME), encoding="utf-8") as f:
llama2_config_dict: Dict[str, Any] = json.load(f)
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
llama2_config_dict.pop("auto_map", None)
llama2_config_dict.pop("tokenizer_class", None)
llama2_config_dict["model_type"] = "llama"
with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
json.dump(llama2_config_dict, f, indent=2)
print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}")
def llamafy_baichuan2(
input_dir: str,
output_dir: str,
shard_size: str = "2GB",
save_safetensors: bool = True,
):
r"""
Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
"""
try:
os.makedirs(output_dir, exist_ok=False)
except Exception as e:
raise print("Output dir already exists", e)
save_weight(input_dir, output_dir, shard_size, save_safetensors)
save_config(input_dir, output_dir)
if __name__ == "__main__":
fire.Fire(llamafy_baichuan2)
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
from collections import OrderedDict
from typing import Any, Dict
import fire
import torch
from safetensors import safe_open
from safetensors.torch import save_file
from tqdm import tqdm
from transformers.modeling_utils import (
SAFE_WEIGHTS_INDEX_NAME,
SAFE_WEIGHTS_NAME,
WEIGHTS_INDEX_NAME,
WEIGHTS_NAME,
shard_checkpoint,
)
from transformers.utils import check_min_version
try:
check_min_version("4.34.0")
except Exception:
raise ValueError("Please upgrade `transformers` to 4.34.0")
CONFIG_NAME = "config.json"
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str:
qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict()
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"):
with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f:
for key in f.keys():
qwen_state_dict[key] = f.get_tensor(key)
llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
torch_dtype = None
for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
if torch_dtype is None:
torch_dtype = value.dtype
if "wte" in key:
llama2_state_dict["model.embed_tokens.weight"] = value
elif "ln_f" in key:
llama2_state_dict["model.norm.weight"] = value
else:
key = key.replace("transformer.h", "model.layers")
if "attn.c_attn" in key:
proj_size = value.size(0) // 3
llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...]
llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[
proj_size : 2 * proj_size, ...
]
llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...]
elif "attn.c_proj" in key:
llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value
llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like(
value[:, 0]
).squeeze()
elif "ln_1" in key:
llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value
elif "ln_2" in key:
llama2_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value
elif "mlp.w1" in key:
llama2_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value
elif "mlp.w2" in key:
llama2_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value
elif "mlp.c_proj" in key:
llama2_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value
elif "lm_head" in key:
llama2_state_dict[key] = value
else:
raise KeyError(f"Unable to process key {key}")
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
if save_safetensors:
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
else:
torch.save(shard, os.path.join(output_dir, shard_file))
if index is None:
print(f"Model weights saved in {os.path.join(output_dir, weights_name)}")
else:
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, sort_keys=True)
print(f"Model weights saved in {output_dir}")
return str(torch_dtype).replace("torch.", "")
def save_config(input_dir: str, output_dir: str, torch_dtype: str):
with open(os.path.join(input_dir, CONFIG_NAME), encoding="utf-8") as f:
qwen_config_dict: Dict[str, Any] = json.load(f)
llama2_config_dict: Dict[str, Any] = OrderedDict()
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
llama2_config_dict["hidden_act"] = "silu"
llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"]
llama2_config_dict["initializer_range"] = qwen_config_dict["initializer_range"]
llama2_config_dict["intermediate_size"] = qwen_config_dict["intermediate_size"] // 2
llama2_config_dict["max_position_embeddings"] = qwen_config_dict["max_position_embeddings"]
llama2_config_dict["model_type"] = "llama"
llama2_config_dict["num_attention_heads"] = qwen_config_dict["num_attention_heads"]
llama2_config_dict["num_hidden_layers"] = qwen_config_dict["num_hidden_layers"]
llama2_config_dict["num_key_value_heads"] = qwen_config_dict["hidden_size"] // qwen_config_dict["kv_channels"]
llama2_config_dict["pretraining_tp"] = 1
llama2_config_dict["rms_norm_eps"] = qwen_config_dict["layer_norm_epsilon"]
llama2_config_dict["rope_scaling"] = None
llama2_config_dict["tie_word_embeddings"] = qwen_config_dict["tie_word_embeddings"]
llama2_config_dict["torch_dtype"] = torch_dtype
llama2_config_dict["transformers_version"] = "4.34.0"
llama2_config_dict["use_cache"] = True
llama2_config_dict["vocab_size"] = qwen_config_dict["vocab_size"]
llama2_config_dict["attention_bias"] = True
with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
json.dump(llama2_config_dict, f, indent=2)
print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}")
def llamafy_qwen(
input_dir: str,
output_dir: str,
shard_size: str = "2GB",
save_safetensors: bool = False,
):
r"""
Converts the Qwen models in the same format as LLaMA2.
Usage: python llamafy_qwen.py --input_dir input --output_dir output
Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
"""
try:
os.makedirs(output_dir, exist_ok=False)
except Exception as e:
raise print("Output dir already exists", e)
torch_dtype = save_weight(input_dir, output_dir, shard_size, save_safetensors)
save_config(input_dir, output_dir, torch_dtype)
if __name__ == "__main__":
fire.Fire(llamafy_qwen)
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
# #
# This code is based on the HuggingFace's PEFT library. # This code is based on the HuggingFace's PEFT library.
# https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py # https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py
......
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
# #
# This code is based on the HuggingFace's PEFT library. # This code is based on the HuggingFace's PEFT library.
# https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py # https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py
......
# Copyright 2024 Microsoft Corporation and the LlamaFactory team. # Copyright 2025 Microsoft Corporation and the LlamaFactory team.
# #
# This code is inspired by the Microsoft's DeepSpeed library. # This code is inspired by the Microsoft's DeepSpeed library.
# https://www.deepspeed.ai/tutorials/flops-profiler/ # https://www.deepspeed.ai/tutorials/flops-profiler/
......
# Copyright 2024 imoneoi and the LlamaFactory team. # Copyright 2025 imoneoi and the LlamaFactory team.
# #
# This code is inspired by the imoneoi's OpenChat library. # This code is inspired by the imoneoi's OpenChat library.
# https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py # https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py
......
# Copyright 2024 the LlamaFactory team. # Copyright 2025 the LlamaFactory team.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -142,21 +142,23 @@ def calculate_mfu( ...@@ -142,21 +142,23 @@ def calculate_mfu(
args["deepspeed"] = f"examples/deepspeed/ds_z{deepspeed_stage}_config.json" args["deepspeed"] = f"examples/deepspeed/ds_z{deepspeed_stage}_config.json"
run_exp(args) run_exp(args)
with open(os.path.join("saves", "test_mfu", "all_results.json"), encoding="utf-8") as f:
result = json.load(f)
if dist.is_initialized(): if dist.is_initialized():
dist.barrier()
world_size = dist.get_world_size() world_size = dist.get_world_size()
else: else:
world_size = 1 world_size = 1
total_batch_size = batch_size * world_size if int(os.getenv("LOCAL_RANK", "0")) == 0:
mfu_value = ( with open(os.path.join("saves", "test_mfu", "all_results.json"), encoding="utf-8") as f:
result["train_steps_per_second"] result = json.load(f)
* compute_model_flops(model_name_or_path, total_batch_size, seq_length)
/ compute_device_flops(world_size) total_batch_size = batch_size * world_size
) mfu_value = (
print(f"MFU: {mfu_value * 100:.2f}%") result["train_steps_per_second"]
* compute_model_flops(model_name_or_path, total_batch_size, seq_length)
/ compute_device_flops(world_size)
)
print(f"MFU: {mfu_value * 100:.2f}%")
if __name__ == "__main__": if __name__ == "__main__":
......
# Copyright 2024 the LlamaFactory team. # Copyright 2025 the LlamaFactory team.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -49,6 +49,7 @@ class PairwiseDataCollatorWithPadding(MultiModalDataCollatorForSeq2Seq): ...@@ -49,6 +49,7 @@ class PairwiseDataCollatorWithPadding(MultiModalDataCollatorForSeq2Seq):
"labels": feature["chosen_input_ids"] if self.train_on_prompt else feature["chosen_labels"], "labels": feature["chosen_input_ids"] if self.train_on_prompt else feature["chosen_labels"],
"images": feature["images"], "images": feature["images"],
"videos": feature["videos"], "videos": feature["videos"],
"audios": feature["audios"],
} }
) )
......
# Copyright 2024 the LlamaFactory team. # Copyright 2025 the LlamaFactory team.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from openai import OpenAI
from transformers.utils.versions import require_version
require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
def main():
client = OpenAI(
api_key="{}".format(os.environ.get("API_KEY", "0")),
base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
)
messages = []
messages.append(
{
"role": "user",
"content": [
{"type": "text", "text": "Output the color and number of each box."},
{
"type": "image_url",
"image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/boxes.png"},
},
],
}
)
result = client.chat.completions.create(messages=messages, model="test")
messages.append(result.choices[0].message)
print("Round 1:", result.choices[0].message.content)
# The image shows a pyramid of colored blocks with numbers on them. Here are the colors and numbers of ...
messages.append(
{
"role": "user",
"content": [
{"type": "text", "text": "What kind of flower is this?"},
{
"type": "image_url",
"image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/flowers.jpg"},
},
],
}
)
result = client.chat.completions.create(messages=messages, model="test")
messages.append(result.choices[0].message)
print("Round 2:", result.choices[0].message.content)
# The image shows a cluster of forget-me-not flowers. Forget-me-nots are small ...
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment