Initial commit

afe180a6 · wanglch · afe180a6 · afe180a6 · afe180a6 · afe180a6
Commit afe180a6 authored May 21, 2024 by wanglch
20 changed files
--- a/LLaMA-Factory/src/llmtuner/webui/engine.py
+++ b/LLaMA-Factory/src/llmtuner/webui/engine.py
+from typing import Any, Dict, Generator, Optional
+
+import gradio as gr
+from gradio.components import Component  # cannot use TYPE_CHECKING here
+
+from .chatter import WebChatModel
+from .common import get_model_path, list_dataset, load_config
+from .locales import LOCALES
+from .manager import Manager
+from .runner import Runner
+from .utils import get_time
+
+
+class Engine:
+    def __init__(self, demo_mode: Optional[bool] = False, pure_chat: Optional[bool] = False) -> None:
+        self.demo_mode = demo_mode
+        self.pure_chat = pure_chat
+        self.manager = Manager()
+        self.runner = Runner(self.manager, demo_mode=demo_mode)
+        self.chatter = WebChatModel(manager=self.manager, demo_mode=demo_mode, lazy_init=(not pure_chat))
+
+    def _form_dict(self, resume_dict: Dict[str, Dict[str, Any]]):
+        return {self.manager.get_elem_by_name(k): gr.update(**v) for k, v in resume_dict.items()}
+
+    def resume(self) -> Generator[Dict[Component, Dict[str, Any]], None, None]:
+        user_config = load_config() if not self.demo_mode else {}
+        lang = user_config.get("lang", None) or "en"
+
+        init_dict = {"top.lang": {"value": lang}, "infer.chat_box": {"visible": self.chatter.loaded}}
+
+        if not self.pure_chat:
+            init_dict["train.dataset"] = {"choices": list_dataset()["choices"]}
+            init_dict["eval.dataset"] = {"choices": list_dataset()["choices"]}
+
+            if user_config.get("last_model", None):
+                init_dict["top.model_name"] = {"value": user_config["last_model"]}
+                init_dict["top.model_path"] = {"value": get_model_path(user_config["last_model"])}
+
+        yield self._form_dict(init_dict)
+
+        if not self.pure_chat:
+            if self.runner.alive:
+                yield {elem: gr.update(value=value) for elem, value in self.runner.running_data.items()}
+                if self.runner.do_train:
+                    yield self._form_dict({"train.resume_btn": {"value": True}})
+                else:
+                    yield self._form_dict({"eval.resume_btn": {"value": True}})
+            else:
+                yield self._form_dict(
+                    {
+                        "train.output_dir": {"value": "train_" + get_time()},
+                        "eval.output_dir": {"value": "eval_" + get_time()},
+                    }
+                )
+
+    def change_lang(self, lang: str) -> Dict[Component, Dict[str, Any]]:
+        return {
+            component: gr.update(**LOCALES[name][lang])
+            for elems in self.manager.all_elems.values()
+            for name, component in elems.items()
+            if name in LOCALES
+        }
--- a/LLaMA-Factory/src/llmtuner/webui/interface.py
+++ b/LLaMA-Factory/src/llmtuner/webui/interface.py
+from typing import Optional
+
+import gradio as gr
+from transformers.utils.versions import require_version
+
+from .common import save_config
+from .components import (
+    create_chat_box,
+    create_eval_tab,
+    create_export_tab,
+    create_infer_tab,
+    create_top,
+    create_train_tab,
+)
+from .css import CSS
+from .engine import Engine
+
+
+require_version("gradio>=3.38.0,<4.0.0", 'To fix: pip install "gradio>=3.38.0,<4.0.0"')
+
+
+def create_ui(demo_mode: Optional[bool] = False) -> gr.Blocks:
+    engine = Engine(demo_mode=demo_mode, pure_chat=False)
+
+    with gr.Blocks(title="LLaMA Board", css=CSS) as demo:
+        if demo_mode:
+            gr.HTML("<h1><center>LLaMA Board: A One-stop Web UI for Getting Started with LLaMA Factory</center></h1>")
+            gr.HTML(
+                '<h3><center>Visit <a href="https://github.com/hiyouga/LLaMA-Factory" target="_blank">'
+                "LLaMA Factory</a> for details.</center></h3>"
+            )
+            gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
+
+        engine.manager.all_elems["top"] = create_top()
+        lang: "gr.Dropdown" = engine.manager.get_elem_by_name("top.lang")
+
+        with gr.Tab("Train"):
+            engine.manager.all_elems["train"] = create_train_tab(engine)
+
+        with gr.Tab("Evaluate & Predict"):
+            engine.manager.all_elems["eval"] = create_eval_tab(engine)
+
+        with gr.Tab("Chat"):
+            engine.manager.all_elems["infer"] = create_infer_tab(engine)
+
+        if not demo_mode:
+            with gr.Tab("Export"):
+                engine.manager.all_elems["export"] = create_export_tab(engine)
+
+        demo.load(engine.resume, outputs=engine.manager.list_elems())
+        lang.change(engine.change_lang, [lang], engine.manager.list_elems(), queue=False)
+        lang.input(save_config, inputs=[lang], queue=False)
+
+    return demo
+
+
+def create_web_demo() -> gr.Blocks:
+    engine = Engine(pure_chat=True)
+
+    with gr.Blocks(title="Web Demo", css=CSS) as demo:
+        lang = gr.Dropdown(choices=["en", "zh"])
+        engine.manager.all_elems["top"] = dict(lang=lang)
+
+        chat_box, _, _, chat_elems = create_chat_box(engine, visible=True)
+        engine.manager.all_elems["infer"] = dict(chat_box=chat_box, **chat_elems)
+
+        demo.load(engine.resume, outputs=engine.manager.list_elems())
+        lang.change(engine.change_lang, [lang], engine.manager.list_elems(), queue=False)
+        lang.input(save_config, inputs=[lang], queue=False)
+
+    return demo
+
+
+if __name__ == "__main__":
+    demo = create_ui()
+    demo.queue()
+    demo.launch(server_name="0.0.0.0", share=False, inbrowser=True)
--- a/LLaMA-Factory/src/llmtuner/webui/locales.py
+++ b/LLaMA-Factory/src/llmtuner/webui/locales.py
+LOCALES = {
+    "lang": {
+        "en": {
+            "label": "Lang",
+        },
+        "ru": {
+            "label": "Русский",
+        },
+        "zh": {
+            "label": "语言",
+        },
+    },
+    "model_name": {
+        "en": {
+            "label": "Model name",
+        },
+        "ru": {
+            "label": "Название модели",
+        },
+        "zh": {
+            "label": "模型名称",
+        },
+    },
+    "model_path": {
+        "en": {
+            "label": "Model path",
+            "info": "Path to pretrained model or model identifier from Hugging Face.",
+        },
+        "ru": {
+            "label": "Путь к модели",
+            "info": "Путь к предварительно обученной модели или идентификатор модели от Hugging Face.",
+        },
+        "zh": {
+            "label": "模型路径",
+            "info": "本地模型的文件路径或 Hugging Face 的模型标识符。",
+        },
+    },
+    "finetuning_type": {
+        "en": {
+            "label": "Finetuning method",
+        },
+        "ru": {
+            "label": "Метод дообучения",
+        },
+        "zh": {
+            "label": "微调方法",
+        },
+    },
+    "adapter_path": {
+        "en": {
+            "label": "Adapter path",
+        },
+        "ru": {
+            "label": "Путь к адаптеру",
+        },
+        "zh": {
+            "label": "适配器路径",
+        },
+    },
+    "refresh_btn": {
+        "en": {
+            "value": "Refresh adapters",
+        },
+        "ru": {
+            "value": "Обновить адаптеры",
+        },
+        "zh": {
+            "value": "刷新适配器",
+        },
+    },
+    "advanced_tab": {
+        "en": {
+            "label": "Advanced configurations",
+        },
+        "ru": {
+            "label": "Расширенные конфигурации",
+        },
+        "zh": {
+            "label": "高级设置",
+        },
+    },
+    "quantization_bit": {
+        "en": {
+            "label": "Quantization bit",
+            "info": "Enable 4/8-bit model quantization (QLoRA).",
+        },
+        "ru": {
+            "label": "Уровень квантования",
+            "info": "Включить 4/8-битное квантование модели (QLoRA).",
+        },
+        "zh": {
+            "label": "量化等级",
+            "info": "启用 4/8 比特模型量化（QLoRA）。",
+        },
+    },
+    "template": {
+        "en": {
+            "label": "Prompt template",
+            "info": "The template used in constructing prompts.",
+        },
+        "ru": {
+            "label": "Шаблон запроса",
+            "info": "Шаблон, используемый при формировании запросов.",
+        },
+        "zh": {
+            "label": "提示模板",
+            "info": "构建提示词时使用的模板",
+        },
+    },
+    "rope_scaling": {
+        "en": {
+            "label": "RoPE scaling",
+        },
+        "ru": {
+            "label": "Масштабирование RoPE",
+        },
+        "zh": {
+            "label": "RoPE 插值方法",
+        },
+    },
+    "booster": {
+        "en": {
+            "label": "Booster",
+        },
+        "ru": {
+            "label": "Ускоритель",
+        },
+        "zh": {
+            "label": "加速方式",
+        },
+    },
+    "training_stage": {
+        "en": {
+            "label": "Stage",
+            "info": "The stage to perform in training.",
+        },
+        "ru": {
+            "label": "Этап",
+            "info": "Этап выполнения обучения.",
+        },
+        "zh": {
+            "label": "训练阶段",
+            "info": "目前采用的训练方式。",
+        },
+    },
+    "dataset_dir": {
+        "en": {
+            "label": "Data dir",
+            "info": "Path to the data directory.",
+        },
+        "ru": {
+            "label": "Директория данных",
+            "info": "Путь к директории данных.",
+        },
+        "zh": {
+            "label": "数据路径",
+            "info": "数据文件夹的路径。",
+        },
+    },
+    "dataset": {
+        "en": {
+            "label": "Dataset",
+        },
+        "ru": {
+            "label": "Набор данных",
+        },
+        "zh": {
+            "label": "数据集",
+        },
+    },
+    "data_preview_btn": {
+        "en": {
+            "value": "Preview dataset",
+        },
+        "ru": {
+            "value": "Просмотреть набор данных",
+        },
+        "zh": {
+            "value": "预览数据集",
+        },
+    },
+    "preview_count": {
+        "en": {
+            "label": "Count",
+        },
+        "ru": {
+            "label": "Количество",
+        },
+        "zh": {
+            "label": "数量",
+        },
+    },
+    "page_index": {
+        "en": {
+            "label": "Page",
+        },
+        "ru": {
+            "label": "Страница",
+        },
+        "zh": {
+            "label": "页数",
+        },
+    },
+    "prev_btn": {
+        "en": {
+            "value": "Prev",
+        },
+        "ru": {
+            "value": "Предыдущая",
+        },
+        "zh": {
+            "value": "上一页",
+        },
+    },
+    "next_btn": {
+        "en": {
+            "value": "Next",
+        },
+        "ru": {
+            "value": "Следующая",
+        },
+        "zh": {
+            "value": "下一页",
+        },
+    },
+    "close_btn": {
+        "en": {
+            "value": "Close",
+        },
+        "ru": {
+            "value": "Закрыть",
+        },
+        "zh": {
+            "value": "关闭",
+        },
+    },
+    "preview_samples": {
+        "en": {
+            "label": "Samples",
+        },
+        "ru": {
+            "label": "Примеры",
+        },
+        "zh": {
+            "label": "样例",
+        },
+    },
+    "cutoff_len": {
+        "en": {
+            "label": "Cutoff length",
+            "info": "Max tokens in input sequence.",
+        },
+        "ru": {
+            "label": "Длина обрезки",
+            "info": "Максимальное количество токенов во входной последовательности.",
+        },
+        "zh": {
+            "label": "截断长度",
+            "info": "输入序列分词后的最大长度。",
+        },
+    },
+    "learning_rate": {
+        "en": {
+            "label": "Learning rate",
+            "info": "Initial learning rate for AdamW.",
+        },
+        "ru": {
+            "label": "Скорость обучения",
+            "info": "Начальная скорость обучения для AdamW.",
+        },
+        "zh": {
+            "label": "学习率",
+            "info": "AdamW 优化器的初始学习率。",
+        },
+    },
+    "num_train_epochs": {
+        "en": {
+            "label": "Epochs",
+            "info": "Total number of training epochs to perform.",
+        },
+        "ru": {
+            "label": "Эпохи",
+            "info": "Общее количество эпох обучения.",
+        },
+        "zh": {
+            "label": "训练轮数",
+            "info": "需要执行的训练总轮数。",
+        },
+    },
+    "max_samples": {
+        "en": {
+            "label": "Max samples",
+            "info": "Maximum samples per dataset.",
+        },
+        "ru": {
+            "label": "Максимальное количество образцов",
+            "info": "Максимальное количество образцов на набор данных.",
+        },
+        "zh": {
+            "label": "最大样本数",
+            "info": "每个数据集的最大样本数。",
+        },
+    },
+    "compute_type": {
+        "en": {
+            "label": "Compute type",
+            "info": "Whether to use mixed precision training (fp16 or bf16).",
+        },
+        "ru": {
+            "label": "Тип вычислений",
+            "info": "Использовать ли обучение смешанной точности fp16 или bf16.",
+        },
+        "zh": {
+            "label": "计算类型",
+            "info": "是否使用混合精度训练（fp16 或 bf16）。",
+        },
+    },
+    "batch_size": {
+        "en": {
+            "label": "Batch size",
+            "info": "Number of samples processed on each GPU.",
+        },
+        "ru": {
+            "label": "Размер пакета",
+            "info": "Количество образцов для обработки на каждом GPU.",
+        },
+        "zh": {
+            "label": "批处理大小",
+            "info": "每个 GPU 处理的样本数量。",
+        },
+    },
+    "gradient_accumulation_steps": {
+        "en": {
+            "label": "Gradient accumulation",
+            "info": "Number of steps for gradient accumulation.",
+        },
+        "ru": {
+            "label": "Накопление градиента",
+            "info": "Количество шагов накопления градиента.",
+        },
+        "zh": {
+            "label": "梯度累积",
+            "info": "梯度累积的步数。",
+        },
+    },
+    "lr_scheduler_type": {
+        "en": {
+            "label": "LR scheduler",
+            "info": "Name of the learning rate scheduler.",
+        },
+        "ru": {
+            "label": "Планировщик скорости обучения",
+            "info": "Название планировщика скорости обучения.",
+        },
+        "zh": {
+            "label": "学习率调节器",
+            "info": "学习率调度器的名称。",
+        },
+    },
+    "max_grad_norm": {
+        "en": {
+            "label": "Maximum gradient norm",
+            "info": "Norm for gradient clipping.",
+        },
+        "ru": {
+            "label": "Максимальная норма градиента",
+            "info": "Норма для обрезки градиента.",
+        },
+        "zh": {
+            "label": "最大梯度范数",
+            "info": "用于梯度裁剪的范数。",
+        },
+    },
+    "val_size": {
+        "en": {
+            "label": "Val size",
+            "info": "Proportion of data in the dev set.",
+        },
+        "ru": {
+            "label": "Размер валидации",
+            "info": "Пропорция данных в наборе для разработки.",
+        },
+        "zh": {
+            "label": "验证集比例",
+            "info": "验证集占全部样本的百分比。",
+        },
+    },
+    "extra_tab": {
+        "en": {
+            "label": "Extra configurations",
+        },
+        "ru": {
+            "label": "Дополнительные конфигурации",
+        },
+        "zh": {
+            "label": "其它参数设置",
+        },
+    },
+    "logging_steps": {
+        "en": {
+            "label": "Logging steps",
+            "info": "Number of steps between two logs.",
+        },
+        "ru": {
+            "label": "Шаги логирования",
+            "info": "Количество шагов между двумя записями в журнале.",
+        },
+        "zh": {
+            "label": "日志间隔",
+            "info": "每两次日志输出间的更新步数。",
+        },
+    },
+    "save_steps": {
+        "en": {
+            "label": "Save steps",
+            "info": "Number of steps between two checkpoints.",
+        },
+        "ru": {
+            "label": "Шаги сохранения",
+            "info": "Количество шагов между двумя контрольными точками.",
+        },
+        "zh": {
+            "label": "保存间隔",
+            "info": "每两次断点保存间的更新步数。",
+        },
+    },
+    "warmup_steps": {
+        "en": {
+            "label": "Warmup steps",
+            "info": "Number of steps used for warmup.",
+        },
+        "ru": {
+            "label": "Шаги прогрева",
+            "info": "Количество шагов, используемых для прогрева.",
+        },
+        "zh": {
+            "label": "预热步数",
+            "info": "学习率预热采用的步数。",
+        },
+    },
+    "neftune_alpha": {
+        "en": {
+            "label": "NEFTune Alpha",
+            "info": "Magnitude of noise adding to embedding vectors.",
+        },
+        "ru": {
+            "label": "NEFTune Alpha",
+            "info": "Величина шума, добавляемого к векторам вложений.",
+        },
+        "zh": {
+            "label": "NEFTune 噪声参数",
+            "info": "嵌入向量所添加的噪声大小。",
+        },
+    },
+    "sft_packing": {
+        "en": {
+            "label": "Pack sequences",
+            "info": "Pack sequences into samples of fixed length in supervised fine-tuning.",
+        },
+        "ru": {
+            "label": "Упаковка последовательностей",
+            "info": "Упаковка последовательностей в образцы фиксированной длины при контролируемой тонкой настройке.",
+        },
+        "zh": {
+            "label": "序列打包",
+            "info": "在指令监督微调阶段将序列打包为相同长度的样本。",
+        },
+    },
+    "upcast_layernorm": {
+        "en": {
+            "label": "Upcast LayerNorm",
+            "info": "Upcast weights of layernorm in float32.",
+        },
+        "ru": {
+            "label": "Приведение весов LayerNorm",
+            "info": "Приведение весов LayerNorm к float32.",
+        },
+        "zh": {
+            "label": "缩放归一化层",
+            "info": "将归一化层权重缩放至 32 位精度。",
+        },
+    },
+    "lora_tab": {
+        "en": {
+            "label": "LoRA configurations",
+        },
+        "ru": {
+            "label": "Конфигурации LoRA",
+        },
+        "zh": {
+            "label": "LoRA 参数设置",
+        },
+    },
+    "lora_rank": {
+        "en": {
+            "label": "LoRA rank",
+            "info": "The rank of LoRA matrices.",
+        },
+        "ru": {
+            "label": "Ранг матриц LoRA",
+            "info": "Ранг матриц LoRA.",
+        },
+        "zh": {
+            "label": "LoRA 秩",
+            "info": "LoRA 矩阵的秩。",
+        },
+    },
+    "lora_dropout": {
+        "en": {
+            "label": "LoRA Dropout",
+            "info": "Dropout ratio of LoRA weights.",
+        },
+        "ru": {
+            "label": "Вероятность отсева LoRA",
+            "info": "Вероятность отсева весов LoRA.",
+        },
+        "zh": {
+            "label": "LoRA 随机丢弃",
+            "info": "LoRA 权重随机丢弃的概率。",
+        },
+    },
+    "lora_target": {
+        "en": {
+            "label": "LoRA modules (optional)",
+            "info": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules.",
+        },
+        "ru": {
+            "label": "Модули LoRA (опционально)",
+            "info": "Имена целевых модулей для применения LoRA. Используйте запятые для разделения нескольких модулей.",
+        },
+        "zh": {
+            "label": "LoRA 作用模块（非必填）",
+            "info": "应用 LoRA 的目标模块名称。使用英文逗号分隔多个名称。",
+        },
+    },
+    "additional_target": {
+        "en": {
+            "label": "Additional modules (optional)",
+            "info": "Name(s) of modules apart from LoRA layers to be set as trainable. Use commas to separate multiple modules.",
+        },
+        "ru": {
+            "label": "Дополнительные модули (опционально)",
+            "info": (
+                "Имена модулей, кроме слоев LoRA, которые следует установить в качестве обучаемых. "
+                "Используйте запятые для разделения нескольких модулей."
+            ),
+        },
+        "zh": {
+            "label": "附加模块（非必填）",
+            "info": "除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。",
+        },
+    },
+    "create_new_adapter": {
+        "en": {
+            "label": "Create new adapter",
+            "info": "Whether to create a new adapter with randomly initialized weight or not.",
+        },
+        "ru": {
+            "label": "Создать новый адаптер",
+            "info": "Создать новый адаптер с случайной инициализацией веса или нет.",
+        },
+        "zh": {
+            "label": "新建适配器",
+            "info": "是否创建一个经过随机初始化的新适配器。",
+        },
+    },
+    "rlhf_tab": {
+        "en": {
+            "label": "RLHF configurations",
+        },
+        "ru": {
+            "label": "Конфигурации RLHF",
+        },
+        "zh": {
+            "label": "RLHF 参数设置",
+        },
+    },
+    "dpo_beta": {
+        "en": {
+            "label": "DPO beta",
+            "info": "Value of the beta parameter in the DPO loss.",
+        },
+        "ru": {
+            "label": "DPO бета",
+            "info": "Значение параметра бета в функции потерь DPO.",
+        },
+        "zh": {
+            "label": "DPO beta 参数",
+            "info": "DPO 损失函数中 beta 超参数大小。",
+        },
+    },
+    "dpo_ftx": {
+        "en": {
+            "label": "DPO-ftx weight",
+            "info": "The weight of SFT loss in the DPO-ftx.",
+        },
+        "ru": {
+            "label": "Вес DPO-ftx",
+            "info": "Вес функции потерь SFT в DPO-ftx.",
+        },
+        "zh": {
+            "label": "DPO-ftx 权重",
+            "info": "DPO-ftx 中 SFT 损失的权重大小。",
+        },
+    },
+    "reward_model": {
+        "en": {
+            "label": "Reward model",
+            "info": "Adapter of the reward model for PPO training. (Needs to refresh adapters)",
+        },
+        "ru": {
+            "label": "Модель вознаграждения",
+            "info": "Адаптер модели вознаграждения для обучения PPO. (Необходимо обновить адаптеры)",
+        },
+        "zh": {
+            "label": "奖励模型",
+            "info": "PPO 训练中奖励模型的适配器路径。（需要刷新适配器）",
+        },
+    },
+    "cmd_preview_btn": {
+        "en": {
+            "value": "Preview command",
+        },
+        "ru": {
+            "value": "Просмотр команды",
+        },
+        "zh": {
+            "value": "预览命令",
+        },
+    },
+    "start_btn": {
+        "en": {
+            "value": "Start",
+        },
+        "ru": {
+            "value": "Начать",
+        },
+        "zh": {
+            "value": "开始",
+        },
+    },
+    "stop_btn": {
+        "en": {
+            "value": "Abort",
+        },
+        "ru": {
+            "value": "Прервать",
+        },
+        "zh": {
+            "value": "中断",
+        },
+    },
+    "output_dir": {
+        "en": {
+            "label": "Output dir",
+            "info": "Directory for saving results.",
+        },
+        "ru": {
+            "label": "Выходной каталог",
+            "info": "Каталог для сохранения результатов.",
+        },
+        "zh": {
+            "label": "输出目录",
+            "info": "保存结果的路径。",
+        },
+    },
+    "output_box": {
+        "en": {
+            "value": "Ready.",
+        },
+        "ru": {
+            "value": "Готово.",
+        },
+        "zh": {
+            "value": "准备就绪。",
+        },
+    },
+    "loss_viewer": {
+        "en": {
+            "label": "Loss",
+        },
+        "ru": {
+            "label": "Потери",
+        },
+        "zh": {
+            "label": "损失",
+        },
+    },
+    "predict": {
+        "en": {
+            "label": "Save predictions",
+        },
+        "ru": {
+            "label": "Сохранить предсказания",
+        },
+        "zh": {
+            "label": "保存预测结果",
+        },
+    },
+    "load_btn": {
+        "en": {
+            "value": "Load model",
+        },
+        "ru": {
+            "value": "Загрузить модель",
+        },
+        "zh": {
+            "value": "加载模型",
+        },
+    },
+    "unload_btn": {
+        "en": {
+            "value": "Unload model",
+        },
+        "ru": {
+            "value": "Выгрузить модель",
+        },
+        "zh": {
+            "value": "卸载模型",
+        },
+    },
+    "info_box": {
+        "en": {
+            "value": "Model unloaded, please load a model first.",
+        },
+        "ru": {
+            "value": "Модель не загружена, загрузите модель сначала.",
+        },
+        "zh": {
+            "value": "模型未加载，请先加载模型。",
+        },
+    },
+    "system": {
+        "en": {
+            "placeholder": "System prompt (optional)",
+        },
+        "ru": {
+            "placeholder": "Системный запрос (по желанию)",
+        },
+        "zh": {
+            "placeholder": "系统提示词（非必填）",
+        },
+    },
+    "tools": {
+        "en": {
+            "placeholder": "Tools (optional)",
+        },
+        "ru": {
+            "placeholder": "Инструменты (по желанию)",
+        },
+        "zh": {
+            "placeholder": "工具列表（非必填）",
+        },
+    },
+    "query": {
+        "en": {
+            "placeholder": "Input...",
+        },
+        "ru": {
+            "placeholder": "Ввод...",
+        },
+        "zh": {
+            "placeholder": "输入...",
+        },
+    },
+    "submit_btn": {
+        "en": {
+            "value": "Submit",
+        },
+        "ru": {
+            "value": "Отправить",
+        },
+        "zh": {
+            "value": "提交",
+        },
+    },
+    "clear_btn": {
+        "en": {
+            "value": "Clear history",
+        },
+        "ru": {
+            "value": "Очистить историю",
+        },
+        "zh": {
+            "value": "清空历史",
+        },
+    },
+    "max_length": {
+        "en": {
+            "label": "Maximum length",
+        },
+        "ru": {
+            "label": "Максимальная длина",
+        },
+        "zh": {
+            "label": "最大长度",
+        },
+    },
+    "max_new_tokens": {
+        "en": {
+            "label": "Maximum new tokens",
+        },
+        "ru": {
+            "label": "Максимальное количество новых токенов",
+        },
+        "zh": {
+            "label": "最大生成长度",
+        },
+    },
+    "top_p": {
+        "en": {
+            "label": "Top-p",
+        },
+        "ru": {
+            "label": "Лучшие-p",
+        },
+        "zh": {
+            "label": "Top-p 采样值",
+        },
+    },
+    "temperature": {
+        "en": {
+            "label": "Temperature",
+        },
+        "ru": {
+            "label": "Температура",
+        },
+        "zh": {
+            "label": "温度系数",
+        },
+    },
+    "max_shard_size": {
+        "en": {
+            "label": "Max shard size (GB)",
+            "info": "The maximum size for a model file.",
+        },
+        "ru": {
+            "label": "Максимальный размер фрагмента (ГБ)",
+            "info": "Максимальный размер файла модели.",
+        },
+        "zh": {
+            "label": "最大分块大小（GB）",
+            "info": "单个模型文件的最大大小。",
+        },
+    },
+    "export_quantization_bit": {
+        "en": {
+            "label": "Export quantization bit.",
+            "info": "Quantizing the exported model.",
+        },
+        "ru": {
+            "label": "Экспорт бита квантования",
+            "info": "Квантование экспортируемой модели.",
+        },
+        "zh": {
+            "label": "导出量化等级",
+            "info": "量化导出模型。",
+        },
+    },
+    "export_quantization_dataset": {
+        "en": {
+            "label": "Export quantization dataset.",
+            "info": "The calibration dataset used for quantization.",
+        },
+        "ru": {
+            "label": "Экспорт набора данных для квантования",
+            "info": "Набор данных калибровки, используемый для квантования.",
+        },
+        "zh": {
+            "label": "导出量化数据集",
+            "info": "量化过程中使用的校准数据集。",
+        },
+    },
+    "export_dir": {
+        "en": {
+            "label": "Export dir",
+            "info": "Directory to save exported model.",
+        },
+        "ru": {
+            "label": "Каталог экспорта",
+            "info": "Каталог для сохранения экспортированной модели.",
+        },
+        "zh": {
+            "label": "导出目录",
+            "info": "保存导出模型的文件夹路径。",
+        },
+    },
+    "export_btn": {
+        "en": {
+            "value": "Export",
+        },
+        "ru": {
+            "value": "Экспорт",
+        },
+        "zh": {
+            "value": "开始导出",
+        },
+    },
+}
+
+
+ALERTS = {
+    "err_conflict": {
+        "en": "A process is in running, please abort it first.",
+        "ru": "Процесс уже запущен, пожалуйста, сначала прервите его.",
+        "zh": "任务已存在，请先中断训练。",
+    },
+    "err_exists": {
+        "en": "You have loaded a model, please unload it first.",
+        "ru": "Вы загрузили модель, сначала разгрузите ее.",
+        "zh": "模型已存在，请先卸载模型。",
+    },
+    "err_no_model": {
+        "en": "Please select a model.",
+        "ru": "Пожалуйста, выберите модель.",
+        "zh": "请选择模型。",
+    },
+    "err_no_path": {
+        "en": "Model not found.",
+        "ru": "Модель не найдена.",
+        "zh": "模型未找到。",
+    },
+    "err_no_dataset": {
+        "en": "Please choose a dataset.",
+        "ru": "Пожалуйста, выберите набор данных.",
+        "zh": "请选择数据集。",
+    },
+    "err_no_adapter": {
+        "en": "Please select an adapter.",
+        "ru": "Пожалуйста, выберите адаптер.",
+        "zh": "请选择一个适配器。",
+    },
+    "err_no_export_dir": {
+        "en": "Please provide export dir.",
+        "ru": "Пожалуйста, укажите каталог для экспорта.",
+        "zh": "请填写导出目录",
+    },
+    "err_failed": {
+        "en": "Failed.",
+        "ru": "Ошибка.",
+        "zh": "训练出错。",
+    },
+    "err_demo": {
+        "en": "Training is unavailable in demo mode, duplicate the space to a private one first.",
+        "ru": "Обучение недоступно в демонстрационном режиме, сначала скопируйте пространство в частное.",
+        "zh": "展示模式不支持训练，请先复制到私人空间。",
+    },
+    "err_device_count": {
+        "en": "Multiple GPUs are not supported yet.",
+        "ru": "Пока не поддерживается множественные GPU.",
+        "zh": "尚不支持多 GPU 训练。",
+    },
+    "err_tool_name": {
+        "en": "Tool name not found.",
+        "ru": "Имя инструмента не найдено.",
+        "zh": "工具名称未找到。",
+    },
+    "err_json_schema": {
+        "en": "Invalid JSON schema.",
+        "ru": "Неверная схема JSON.",
+        "zh": "Json 格式错误。",
+    },
+    "info_aborting": {
+        "en": "Aborted, wait for terminating...",
+        "ru": "Прервано, ожидание завершения...",
+        "zh": "训练中断，正在等待线程结束……",
+    },
+    "info_aborted": {
+        "en": "Ready.",
+        "ru": "Готово.",
+        "zh": "准备就绪。",
+    },
+    "info_finished": {
+        "en": "Finished.",
+        "ru": "Завершено.",
+        "zh": "训练完毕。",
+    },
+    "info_loading": {
+        "en": "Loading model...",
+        "ru": "Загрузка модели...",
+        "zh": "加载中……",
+    },
+    "info_unloading": {
+        "en": "Unloading model...",
+        "ru": "Выгрузка модели...",
+        "zh": "卸载中……",
+    },
+    "info_loaded": {
+        "en": "Model loaded, now you can chat with your model!",
+        "ru": "Модель загружена, теперь вы можете общаться с вашей моделью!",
+        "zh": "模型已加载，可以开始聊天了！",
+    },
+    "info_unloaded": {
+        "en": "Model unloaded.",
+        "ru": "Модель выгружена.",
+        "zh": "模型已卸载。",
+    },
+    "info_exporting": {
+        "en": "Exporting model...",
+        "ru": "Экспорт модели...",
+        "zh": "正在导出模型……",
+    },
+    "info_exported": {
+        "en": "Model exported.",
+        "ru": "Модель экспортирована.",
+        "zh": "模型导出完成。",
+    },
+}
--- a/LLaMA-Factory/src/llmtuner/webui/manager.py
+++ b/LLaMA-Factory/src/llmtuner/webui/manager.py
+from typing import TYPE_CHECKING, Dict, List, Set
+
+
+if TYPE_CHECKING:
+    from gradio.components import Component
+
+
+class Manager:
+    def __init__(self) -> None:
+        self.all_elems: Dict[str, Dict[str, "Component"]] = {}
+
+    def get_elem_by_name(self, name: str) -> "Component":
+        r"""
+        Example: top.lang, train.dataset
+        """
+        tab_name, elem_name = name.split(".")
+        return self.all_elems[tab_name][elem_name]
+
+    def get_base_elems(self) -> Set["Component"]:
+        return {
+            self.all_elems["top"]["lang"],
+            self.all_elems["top"]["model_name"],
+            self.all_elems["top"]["model_path"],
+            self.all_elems["top"]["adapter_path"],
+            self.all_elems["top"]["finetuning_type"],
+            self.all_elems["top"]["quantization_bit"],
+            self.all_elems["top"]["template"],
+            self.all_elems["top"]["rope_scaling"],
+            self.all_elems["top"]["booster"],
+        }
+
+    def list_elems(self) -> List["Component"]:
+        return [elem for elems in self.all_elems.values() for elem in elems.values()]
--- a/LLaMA-Factory/src/llmtuner/webui/runner.py
+++ b/LLaMA-Factory/src/llmtuner/webui/runner.py
+import logging
+import os
+import time
+from threading import Thread
+from typing import TYPE_CHECKING, Any, Dict, Generator, Optional, Tuple
+
+import gradio as gr
+import transformers
+from gradio.components import Component  # cannot use TYPE_CHECKING here
+from transformers.trainer import TRAINING_ARGS_NAME
+
+from ..extras.callbacks import LogCallback
+from ..extras.constants import TRAINING_STAGES
+from ..extras.logging import LoggerHandler
+from ..extras.misc import get_device_count, torch_gc
+from ..train import run_exp
+from .common import get_module, get_save_dir, load_config
+from .locales import ALERTS
+from .utils import gen_cmd, get_eval_results, update_process_bar
+
+
+if TYPE_CHECKING:
+    from .manager import Manager
+
+
+class Runner:
+    def __init__(self, manager: "Manager", demo_mode: Optional[bool] = False) -> None:
+        self.manager = manager
+        self.demo_mode = demo_mode
+        """ Resume """
+        self.thread: "Thread" = None
+        self.do_train = True
+        self.running_data: Dict["Component", Any] = None
+        """ State """
+        self.aborted = False
+        self.running = False
+        """ Handler """
+        self.logger_handler = LoggerHandler()
+        self.logger_handler.setLevel(logging.INFO)
+        logging.root.addHandler(self.logger_handler)
+        transformers.logging.add_handler(self.logger_handler)
+
+    @property
+    def alive(self) -> bool:
+        return self.thread is not None
+
+    def set_abort(self) -> None:
+        self.aborted = True
+
+    def _initialize(self, data: Dict[Component, Any], do_train: bool, from_preview: bool) -> str:
+        get = lambda name: data[self.manager.get_elem_by_name(name)]
+        lang, model_name, model_path = get("top.lang"), get("top.model_name"), get("top.model_path")
+        dataset = get("train.dataset") if do_train else get("eval.dataset")
+
+        if self.running:
+            return ALERTS["err_conflict"][lang]
+
+        if not model_name:
+            return ALERTS["err_no_model"][lang]
+
+        if not model_path:
+            return ALERTS["err_no_path"][lang]
+
+        if len(dataset) == 0:
+            return ALERTS["err_no_dataset"][lang]
+
+        if self.demo_mode and (not from_preview):
+            return ALERTS["err_demo"][lang]
+
+        if not from_preview and get_device_count() > 1:
+            return ALERTS["err_device_count"][lang]
+
+        self.aborted = False
+        self.logger_handler.reset()
+        self.trainer_callback = LogCallback(self)
+        return ""
+
+    def _finalize(self, lang: str, finish_info: str) -> str:
+        self.thread = None
+        self.running_data = None
+        self.running = False
+        torch_gc()
+        if self.aborted:
+            return ALERTS["info_aborted"][lang]
+        else:
+            return finish_info
+
+    def _parse_train_args(self, data: Dict[Component, Any]) -> Dict[str, Any]:
+        get = lambda name: data[self.manager.get_elem_by_name(name)]
+        user_config = load_config()
+
+        if get("top.adapter_path"):
+            adapter_name_or_path = ",".join(
+                [
+                    get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter)
+                    for adapter in get("top.adapter_path")
+                ]
+            )
+        else:
+            adapter_name_or_path = None
+
+        args = dict(
+            stage=TRAINING_STAGES[get("train.training_stage")],
+            do_train=True,
+            model_name_or_path=get("top.model_path"),
+            adapter_name_or_path=adapter_name_or_path,
+            cache_dir=user_config.get("cache_dir", None),
+            finetuning_type=get("top.finetuning_type"),
+            quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
+            template=get("top.template"),
+            rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None,
+            flash_attn=(get("top.booster") == "flash_attn"),
+            use_unsloth=(get("top.booster") == "unsloth"),
+            dataset_dir=get("train.dataset_dir"),
+            dataset=",".join(get("train.dataset")),
+            cutoff_len=get("train.cutoff_len"),
+            learning_rate=float(get("train.learning_rate")),
+            num_train_epochs=float(get("train.num_train_epochs")),
+            max_samples=int(get("train.max_samples")),
+            per_device_train_batch_size=get("train.batch_size"),
+            gradient_accumulation_steps=get("train.gradient_accumulation_steps"),
+            lr_scheduler_type=get("train.lr_scheduler_type"),
+            max_grad_norm=float(get("train.max_grad_norm")),
+            logging_steps=get("train.logging_steps"),
+            save_steps=get("train.save_steps"),
+            warmup_steps=get("train.warmup_steps"),
+            neftune_noise_alpha=get("train.neftune_alpha") or None,
+            sft_packing=get("train.sft_packing"),
+            upcast_layernorm=get("train.upcast_layernorm"),
+            lora_rank=get("train.lora_rank"),
+            lora_dropout=get("train.lora_dropout"),
+            lora_target=get("train.lora_target") or get_module(get("top.model_name")),
+            additional_target=get("train.additional_target") or None,
+            create_new_adapter=get("train.create_new_adapter"),
+            output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("train.output_dir")),
+            fp16=(get("train.compute_type") == "fp16"),
+            bf16=(get("train.compute_type") == "bf16"),
+        )
+        args["disable_tqdm"] = True
+
+        if TRAINING_STAGES[get("train.training_stage")] in ["rm", "ppo", "dpo"]:
+            args["create_new_adapter"] = args["quantization_bit"] is None
+
+        if args["stage"] == "ppo":
+            args["reward_model"] = get_save_dir(
+                get("top.model_name"), get("top.finetuning_type"), get("train.reward_model")
+            )
+            args["reward_model_type"] = "lora" if get("top.finetuning_type") == "lora" else "full"
+
+        if args["stage"] == "dpo":
+            args["dpo_beta"] = get("train.dpo_beta")
+            args["dpo_ftx"] = get("train.dpo_ftx")
+
+        if get("train.val_size") > 1e-6 and args["stage"] != "ppo":
+            args["val_size"] = get("train.val_size")
+            args["evaluation_strategy"] = "steps"
+            args["eval_steps"] = get("train.save_steps")
+            args["load_best_model_at_end"] = True
+
+        return args
+
+    def _parse_eval_args(self, data: Dict[Component, Any]) -> Dict[str, Any]:
+        get = lambda name: data[self.manager.get_elem_by_name(name)]
+        user_config = load_config()
+
+        if get("top.adapter_path"):
+            adapter_name_or_path = ",".join(
+                [
+                    get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter)
+                    for adapter in get("top.adapter_path")
+                ]
+            )
+        else:
+            adapter_name_or_path = None
+
+        args = dict(
+            stage="sft",
+            model_name_or_path=get("top.model_path"),
+            adapter_name_or_path=adapter_name_or_path,
+            cache_dir=user_config.get("cache_dir", None),
+            finetuning_type=get("top.finetuning_type"),
+            quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
+            template=get("top.template"),
+            rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None,
+            flash_attn=(get("top.booster") == "flash_attn"),
+            use_unsloth=(get("top.booster") == "unsloth"),
+            dataset_dir=get("eval.dataset_dir"),
+            dataset=",".join(get("eval.dataset")),
+            cutoff_len=get("eval.cutoff_len"),
+            max_samples=int(get("eval.max_samples")),
+            per_device_eval_batch_size=get("eval.batch_size"),
+            predict_with_generate=True,
+            max_new_tokens=get("eval.max_new_tokens"),
+            top_p=get("eval.top_p"),
+            temperature=get("eval.temperature"),
+            output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("eval.output_dir")),
+        )
+
+        if get("eval.predict"):
+            args["do_predict"] = True
+        else:
+            args["do_eval"] = True
+
+        return args
+
+    def _preview(
+        self, data: Dict[Component, Any], do_train: bool
+    ) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
+        error = self._initialize(data, do_train, from_preview=True)
+        if error:
+            gr.Warning(error)
+            yield error, gr.update(visible=False)
+        else:
+            args = self._parse_train_args(data) if do_train else self._parse_eval_args(data)
+            yield gen_cmd(args), gr.update(visible=False)
+
+    def _launch(self, data: Dict[Component, Any], do_train: bool) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
+        error = self._initialize(data, do_train, from_preview=False)
+        if error:
+            gr.Warning(error)
+            yield error, gr.update(visible=False)
+        else:
+            args = self._parse_train_args(data) if do_train else self._parse_eval_args(data)
+            run_kwargs = dict(args=args, callbacks=[self.trainer_callback])
+            self.do_train, self.running_data = do_train, data
+            self.thread = Thread(target=run_exp, kwargs=run_kwargs)
+            self.thread.start()
+            yield from self.monitor()
+
+    def preview_train(self, data: Dict[Component, Any]) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
+        yield from self._preview(data, do_train=True)
+
+    def preview_eval(self, data: Dict[Component, Any]) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
+        yield from self._preview(data, do_train=False)
+
+    def run_train(self, data: Dict[Component, Any]) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
+        yield from self._launch(data, do_train=True)
+
+    def run_eval(self, data: Dict[Component, Any]) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
+        yield from self._launch(data, do_train=False)
+
+    def monitor(self) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
+        get = lambda name: self.running_data[self.manager.get_elem_by_name(name)]
+        self.running = True
+        lang = get("top.lang")
+        output_dir = get_save_dir(
+            get("top.model_name"),
+            get("top.finetuning_type"),
+            get("{}.output_dir".format("train" if self.do_train else "eval")),
+        )
+
+        while self.thread.is_alive():
+            time.sleep(2)
+            if self.aborted:
+                yield ALERTS["info_aborting"][lang], gr.update(visible=False)
+            else:
+                yield self.logger_handler.log, update_process_bar(self.trainer_callback)
+
+        if self.do_train:
+            if os.path.exists(os.path.join(output_dir, TRAINING_ARGS_NAME)):
+                finish_info = ALERTS["info_finished"][lang]
+            else:
+                finish_info = ALERTS["err_failed"][lang]
+        else:
+            if os.path.exists(os.path.join(output_dir, "all_results.json")):
+                finish_info = get_eval_results(os.path.join(output_dir, "all_results.json"))
+            else:
+                finish_info = ALERTS["err_failed"][lang]
+
+        yield self._finalize(lang, finish_info), gr.update(visible=False)
--- a/LLaMA-Factory/src/llmtuner/webui/utils.py
+++ b/LLaMA-Factory/src/llmtuner/webui/utils.py
+import json
+import os
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, Dict
+
+import gradio as gr
+
+from ..extras.packages import is_matplotlib_available
+from ..extras.ploting import smooth
+from .common import get_save_dir
+from .locales import ALERTS
+
+
+if TYPE_CHECKING:
+    from ..extras.callbacks import LogCallback
+
+if is_matplotlib_available():
+    import matplotlib.figure
+    import matplotlib.pyplot as plt
+
+
+def update_process_bar(callback: "LogCallback") -> Dict[str, Any]:
+    if not callback.max_steps:
+        return gr.update(visible=False)
+
+    percentage = round(100 * callback.cur_steps / callback.max_steps, 0) if callback.max_steps != 0 else 100.0
+    label = "Running {:d}/{:d}: {} < {}".format(
+        callback.cur_steps, callback.max_steps, callback.elapsed_time, callback.remaining_time
+    )
+    return gr.update(label=label, value=percentage, visible=True)
+
+
+def get_time() -> str:
+    return datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+
+
+def can_quantize(finetuning_type: str) -> Dict[str, Any]:
+    if finetuning_type != "lora":
+        return gr.update(value="None", interactive=False)
+    else:
+        return gr.update(interactive=True)
+
+
+def check_json_schema(text: str, lang: str) -> None:
+    try:
+        tools = json.loads(text)
+        for tool in tools:
+            assert "name" in tool
+    except AssertionError:
+        gr.Warning(ALERTS["err_tool_name"][lang])
+    except json.JSONDecodeError:
+        gr.Warning(ALERTS["err_json_schema"][lang])
+
+
+def gen_cmd(args: Dict[str, Any]) -> str:
+    args.pop("disable_tqdm", None)
+    args["plot_loss"] = args.get("do_train", None)
+    current_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
+    cmd_lines = ["CUDA_VISIBLE_DEVICES={} python src/train_bash.py ".format(current_devices)]
+    for k, v in args.items():
+        if v is not None and v is not False and v != "":
+            cmd_lines.append("    --{} {} ".format(k, str(v)))
+    cmd_text = "\\\n".join(cmd_lines)
+    cmd_text = "```bash\n{}\n```".format(cmd_text)
+    return cmd_text
+
+
+def get_eval_results(path: os.PathLike) -> str:
+    with open(path, "r", encoding="utf-8") as f:
+        result = json.dumps(json.load(f), indent=4)
+    return "```json\n{}\n```\n".format(result)
+
+
+def gen_plot(base_model: str, finetuning_type: str, output_dir: str) -> "matplotlib.figure.Figure":
+    if not base_model:
+        return
+    log_file = get_save_dir(base_model, finetuning_type, output_dir, "trainer_log.jsonl")
+    if not os.path.isfile(log_file):
+        return
+
+    plt.close("all")
+    fig = plt.figure()
+    ax = fig.add_subplot(111)
+    steps, losses = [], []
+    with open(log_file, "r", encoding="utf-8") as f:
+        for line in f:
+            log_info = json.loads(line)
+            if log_info.get("loss", None):
+                steps.append(log_info["current_steps"])
+                losses.append(log_info["loss"])
+
+    if len(losses) == 0:
+        return None
+
+    ax.plot(steps, losses, alpha=0.4, label="original")
+    ax.plot(steps, smooth(losses), label="smoothed")
+    ax.legend()
+    ax.set_xlabel("step")
+    ax.set_ylabel("loss")
+    return fig
--- a/LLaMA-Factory/src/train_bash.py
+++ b/LLaMA-Factory/src/train_bash.py
+from llmtuner import run_exp
+
+
+def main():
+    run_exp()
+
+
+def _mp_fn(index):
+    # For xla_spawn (TPUs)
+    main()
+
+
+if __name__ == "__main__":
+    main()
--- a/LLaMA-Factory/src/train_web.py
+++ b/LLaMA-Factory/src/train_web.py
+from llmtuner import create_ui
+
+
+def main():
+    demo = create_ui()
+    demo.queue()
+    demo.launch(server_name="0.0.0.0", share=False, inbrowser=True)
+
+
+if __name__ == "__main__":
+    main()
--- a/LLaMA-Factory/src/web_demo.py
+++ b/LLaMA-Factory/src/web_demo.py
+from llmtuner import create_web_demo
+
+
+def main():
+    demo = create_web_demo()
+    demo.queue()
+    demo.launch(server_name="0.0.0.0", share=False, inbrowser=True)
+
+
+if __name__ == "__main__":
+    main()
--- a/LLaMA-Factory/tests/cal_flops.py
+++ b/LLaMA-Factory/tests/cal_flops.py
+# coding=utf-8
+# Calculates the flops of pre-trained models.
+# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
+# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
+
+from typing import Optional
+
+import fire
+import torch
+from deepspeed.accelerator import get_accelerator  # type: ignore
+from deepspeed.profiling.flops_profiler import get_model_profile  # type: ignore
+
+from llmtuner import ChatModel
+
+
+def calculate_flops(
+    model_name_or_path: str,
+    batch_size: Optional[int] = 1,
+    seq_length: Optional[int] = 256,
+    flash_attn: Optional[bool] = False,
+):
+    with get_accelerator().device(0):
+        chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="vanilla", flash_attn=flash_attn))
+        fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)
+        input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
+        flops, macs, params = get_model_profile(chat_model.model, kwargs=input_dict, print_profile=True, detailed=True)
+        print("FLOPs:", flops)
+        print("MACs:", macs)
+        print("Params:", params)
+
+
+if __name__ == "__main__":
+    fire.Fire(calculate_flops)
--- a/LLaMA-Factory/tests/cal_lr.py
+++ b/LLaMA-Factory/tests/cal_lr.py
+# coding=utf-8
+# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
+# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
+# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
+
+import math
+from typing import Optional
+
+import fire
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
+
+from llmtuner.data import get_dataset
+from llmtuner.extras.constants import IGNORE_INDEX
+from llmtuner.hparams import get_train_args
+from llmtuner.model import load_model_and_tokenizer
+
+
+BASE_LR = 3e-4  # 1.5e-4 for 30B-70B models
+BASE_BS = 4_000_000  # from llama paper
+
+
+def calculate_lr(
+    model_name_or_path: str,
+    batch_size: int,  # total batch size, namely (batch size * gradient accumulation * world size)
+    stage: Optional[str] = "sft",
+    dataset: Optional[str] = "alpaca_en",
+    dataset_dir: Optional[str] = "data",
+    template: Optional[str] = "default",
+    cutoff_len: Optional[int] = 1024,  # i.e. maximum input length during training
+    is_mistral: Optional[bool] = False,  # mistral model uses a smaller learning rate,
+):
+    model_args, data_args, training_args, finetuning_args, _ = get_train_args(
+        dict(
+            stage=stage,
+            model_name_or_path=model_name_or_path,
+            dataset=dataset,
+            dataset_dir=dataset_dir,
+            template=template,
+            cutoff_len=cutoff_len,
+            output_dir="dummy_dir",
+            overwrite_cache=True,
+        )
+    )
+    _, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False)
+    trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage=stage)
+    if stage == "pt":
+        data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+    elif stage == "sft":
+        data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
+    else:
+        raise NotImplementedError
+
+    dataloader = DataLoader(
+        dataset=trainset, batch_size=batch_size, shuffle=True, collate_fn=data_collator, pin_memory=True
+    )
+    valid_tokens, total_tokens = 0, 0
+    for batch in tqdm(dataloader):
+        valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
+        total_tokens += torch.numel(batch["labels"])
+
+    batch_max_len = cutoff_len * batch_size  # max tokens in a batch
+    valid_ratio = valid_tokens / total_tokens
+    batch_valid_len = batch_max_len * valid_ratio
+    lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS)  # lr ~ sqrt(batch_size)
+    lr = lr / 6.0 if is_mistral else lr
+    print(
+        "Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format(
+            lr, valid_ratio * 100, batch_valid_len
+        )
+    )
+
+
+if __name__ == "__main__":
+    fire.Fire(calculate_lr)
--- a/LLaMA-Factory/tests/length_cdf.py
+++ b/LLaMA-Factory/tests/length_cdf.py
+# coding=utf-8
+# Calculates the distribution of the input lengths in the dataset.
+# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
+
+from collections import defaultdict
+from typing import Optional
+
+import fire
+from tqdm import tqdm
+
+from llmtuner.data import get_dataset
+from llmtuner.hparams import get_train_args
+from llmtuner.model import load_model_and_tokenizer
+
+
+def length_cdf(
+    model_name_or_path: str,
+    dataset: Optional[str] = "alpaca_en",
+    dataset_dir: Optional[str] = "data",
+    template: Optional[str] = "default",
+    interval: Optional[int] = 1000,
+):
+    model_args, data_args, training_args, finetuning_args, _ = get_train_args(
+        dict(
+            stage="sft",
+            model_name_or_path=model_name_or_path,
+            dataset=dataset,
+            dataset_dir=dataset_dir,
+            template=template,
+            cutoff_len=1_000_000,
+            output_dir="dummy_dir",
+            overwrite_cache=True,
+        )
+    )
+    _, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False)
+    trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft")
+    total_num = len(trainset)
+    length_dict = defaultdict(int)
+    for sample in tqdm(trainset["input_ids"]):
+        length_dict[len(sample) // interval * interval] += 1
+
+    length_tuples = list(length_dict.items())
+    length_tuples.sort()
+    count_accu, prob_accu = 0, 0
+    for length, count in length_tuples:
+        count_accu += count
+        prob_accu += count / total_num * 100
+        print("{:d} ({:.2f}%) samples have length < {}.".format(count_accu, prob_accu, length + interval))
+
+
+if __name__ == "__main__":
+    fire.Fire(length_cdf)
--- a/LLaMA-Factory/tests/llama_pro.py
+++ b/LLaMA-Factory/tests/llama_pro.py
+# coding=utf-8
+# Performs block expansion for LLaMA, Mistral or Qwen1.5 models.
+# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
+# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
+
+import json
+import os
+from collections import OrderedDict
+from typing import TYPE_CHECKING, Optional
+
+import fire
+import torch
+from safetensors.torch import save_file
+from tqdm import tqdm
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+from transformers.modeling_utils import (
+    SAFE_WEIGHTS_INDEX_NAME,
+    SAFE_WEIGHTS_NAME,
+    WEIGHTS_INDEX_NAME,
+    WEIGHTS_NAME,
+    shard_checkpoint,
+)
+
+
+if TYPE_CHECKING:
+    from transformers import PretrainedConfig, PreTrainedModel
+
+
+def change_name(name: str, old_index: int, new_index: int) -> str:
+    return name.replace(".{:d}.".format(old_index), ".{:d}.".format(new_index))
+
+
+def block_expansion(
+    model_name_or_path: str,
+    output_dir: str,
+    num_expand: int,
+    shard_size: Optional[str] = "2GB",
+    save_safetensors: Optional[bool] = False,
+):
+    config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path)
+    num_layers = getattr(config, "num_hidden_layers")
+    setattr(config, "num_hidden_layers", num_layers + num_expand)
+    config.save_pretrained(output_dir)
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+    tokenizer.save_pretrained(output_dir)
+
+    config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path)  # load the original one
+    if save_safetensors:
+        setattr(config, "tie_word_embeddings", False)  # safetensors does not allow shared weights
+
+    model: "PreTrainedModel" = AutoModelForCausalLM.from_pretrained(
+        model_name_or_path,
+        config=config,
+        torch_dtype="auto",
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
+    )
+    state_dict = model.state_dict()
+
+    if num_layers % num_expand != 0:
+        raise ValueError("`num_layers` {} should be divisible by `num_expand` {}.".format(num_layers, num_expand))
+
+    split = num_layers // num_expand
+    layer_cnt = 0
+    output_state_dict = OrderedDict()
+    for i in range(num_layers):
+        for key, value in state_dict.items():
+            if ".{:d}.".format(i) in key:
+                output_state_dict[change_name(key, i, layer_cnt)] = value
+
+        print("Add layer {} copied from layer {}".format(layer_cnt, i))
+        layer_cnt += 1
+        if (i + 1) % split == 0:
+            for key, value in state_dict.items():
+                if ".{:d}.".format(i) in key:
+                    if "down_proj" in key or "o_proj" in key:
+                        output_state_dict[change_name(key, i, layer_cnt)] = torch.zeros_like(value)
+                    else:
+                        output_state_dict[change_name(key, i, layer_cnt)] = torch.clone(value)
+
+            print("Add layer {} expanded from layer {}".format(layer_cnt, i))
+            layer_cnt += 1
+
+    for key, value in state_dict.items():
+        if key not in output_state_dict:
+            output_state_dict[key] = value
+
+    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
+    shards, index = shard_checkpoint(output_state_dict, max_shard_size=shard_size, weights_name=weights_name)
+
+    for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
+        if save_safetensors:
+            save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
+        else:
+            torch.save(shard, os.path.join(output_dir, shard_file))
+
+    if index is None:
+        print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
+    else:
+        index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
+        with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
+            json.dump(index, f, indent=2, sort_keys=True)
+        print("Model weights saved in {}".format(output_dir))
+
+    print("Fine-tune this model with:")
+    print("  --model_name_or_path {} \\".format(output_dir))
+    print("  --finetuning_type freeze \\")
+    print("  --name_module_trainable all \\")
+    print("  --num_layer_trainable {} \\".format(num_expand))
+    print("  --use_llama_pro")
+
+
+if __name__ == "__main__":
+    fire.Fire(block_expansion)
--- a/LLaMA-Factory/tests/llamafy_baichuan2.py
+++ b/LLaMA-Factory/tests/llamafy_baichuan2.py
+# coding=utf-8
+# Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
+# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
+# Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py
+# Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
+
+import json
+import os
+from collections import OrderedDict
+from typing import Any, Dict, Optional
+
+import fire
+import torch
+from safetensors.torch import save_file
+from tqdm import tqdm
+from transformers.modeling_utils import (
+    SAFE_WEIGHTS_INDEX_NAME,
+    SAFE_WEIGHTS_NAME,
+    WEIGHTS_INDEX_NAME,
+    WEIGHTS_NAME,
+    shard_checkpoint,
+)
+
+
+CONFIG_NAME = "config.json"
+
+
+def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
+    baichuan2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
+        if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
+            shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
+            baichuan2_state_dict.update(shard_weight)
+
+    llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"):
+        if "W_pack" in key:
+            proj_size = value.size(0) // 3
+            llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :]
+            llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :]
+            llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :]
+        elif "lm_head" in key:
+            llama2_state_dict[key] = torch.nn.functional.normalize(value)
+        else:
+            llama2_state_dict[key] = value
+
+    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
+    shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
+
+    for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
+        if save_safetensors:
+            save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
+        else:
+            torch.save(shard, os.path.join(output_dir, shard_file))
+
+    if index is None:
+        print("Model weights saved in {}".format(os.path.join(output_dir, WEIGHTS_NAME)))
+    else:
+        index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
+        with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
+            json.dump(index, f, indent=2, sort_keys=True)
+        print("Model weights saved in {}".format(output_dir))
+
+
+def save_config(input_dir: str, output_dir: str):
+    with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
+        llama2_config_dict: Dict[str, Any] = json.load(f)
+
+    llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
+    llama2_config_dict.pop("auto_map", None)
+    llama2_config_dict.pop("tokenizer_class", None)
+    llama2_config_dict["model_type"] = "llama"
+
+    with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
+        json.dump(llama2_config_dict, f, indent=2)
+    print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
+
+
+def llamafy_baichuan2(
+    input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
+):
+    try:
+        os.makedirs(output_dir, exist_ok=False)
+    except Exception as e:
+        raise print("Output dir already exists", e)
+
+    save_weight(input_dir, output_dir, shard_size, save_safetensors)
+    save_config(input_dir, output_dir)
+
+
+if __name__ == "__main__":
+    fire.Fire(llamafy_baichuan2)
--- a/LLaMA-Factory/tests/llamafy_internlm2.py
+++ b/LLaMA-Factory/tests/llamafy_internlm2.py
+# coding=utf-8
+# Converts the InternLM2 model in the same format as LLaMA2.
+# Usage: python llamafy_internlm2.py --input_dir input --output_dir output
+# Warning: We have found that the converted model cannot infer correctly. It will be fixed later.
+
+import json
+import os
+from collections import OrderedDict
+from typing import Any, Dict, Optional
+
+import fire
+import torch
+from safetensors.torch import save_file
+from tqdm import tqdm
+from transformers.modeling_utils import (
+    SAFE_WEIGHTS_INDEX_NAME,
+    SAFE_WEIGHTS_NAME,
+    WEIGHTS_INDEX_NAME,
+    WEIGHTS_NAME,
+    shard_checkpoint,
+)
+
+
+CONFIG_NAME = "config.json"
+
+
+def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
+    with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
+        internlm2_config_dict: Dict[str, Any] = json.load(f)
+
+    internlm2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
+        if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
+            shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
+            internlm2_state_dict.update(shard_weight)
+
+    llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    for key, value in tqdm(internlm2_state_dict.items(), desc="Convert format"):
+        if "output" in key:
+            llama2_state_dict[key.replace("output", "lm_head")] = value
+        elif "tok_embeddings" in key:
+            llama2_state_dict[key.replace("tok_embeddings", "embed_tokens")] = value
+        elif "wqkv" in key:
+            num_q_heads = internlm2_config_dict["num_attention_heads"]
+            num_kv_heads = internlm2_config_dict["num_key_value_heads"]
+            q_size = value.size(0) // (num_q_heads + 2 * num_kv_heads) * num_q_heads
+            kv_size = value.size(0) // (num_q_heads + 2 * num_kv_heads) * num_kv_heads
+            llama2_state_dict[key.replace("attention.wqkv", "self_attn.q_proj")] = value[:q_size, ...]
+            llama2_state_dict[key.replace("attention.wqkv", "self_attn.k_proj")] = value[
+                q_size : q_size + kv_size, ...
+            ]
+            llama2_state_dict[key.replace("attention.wqkv", "self_attn.v_proj")] = value[q_size + kv_size :, ...]
+        elif "wo" in key:
+            llama2_state_dict[key.replace("attention.wo", "self_attn.o_proj")] = value
+        elif "attention_norm" in key:
+            llama2_state_dict[key.replace("attention_norm", "input_layernorm")] = value
+        elif "ffn_norm" in key:
+            llama2_state_dict[key.replace("ffn_norm", "post_attention_layernorm")] = value
+        elif "w1" in key:
+            llama2_state_dict[key.replace("feed_forward.w1", "mlp.gate_proj")] = value
+        elif "w2" in key:
+            llama2_state_dict[key.replace("feed_forward.w2", "mlp.down_proj")] = value
+        elif "w3" in key:
+            llama2_state_dict[key.replace("feed_forward.w3", "mlp.up_proj")] = value
+        else:
+            llama2_state_dict[key] = value
+
+    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
+    shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
+
+    for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
+        if save_safetensors:
+            save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
+        else:
+            torch.save(shard, os.path.join(output_dir, shard_file))
+
+    if index is None:
+        print("Model weights saved in {}".format(os.path.join(output_dir, WEIGHTS_NAME)))
+    else:
+        index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
+        with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
+            json.dump(index, f, indent=2, sort_keys=True)
+        print("Model weights saved in {}".format(output_dir))
+
+
+def save_config(input_dir: str, output_dir: str):
+    with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
+        llama2_config_dict: Dict[str, Any] = json.load(f)
+
+    llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
+    llama2_config_dict.pop("auto_map", None)
+    llama2_config_dict.pop("bias", None)
+    llama2_config_dict.pop("rope_scaling", None)
+    llama2_config_dict["model_type"] = "llama"
+
+    with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
+        json.dump(llama2_config_dict, f, indent=2)
+    print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
+
+
+def llamafy_internlm2(
+    input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
+):
+    try:
+        os.makedirs(output_dir, exist_ok=False)
+    except Exception as e:
+        raise print("Output dir already exists", e)
+
+    save_weight(input_dir, output_dir, shard_size, save_safetensors)
+    save_config(input_dir, output_dir)
+
+
+if __name__ == "__main__":
+    fire.Fire(llamafy_internlm2)
--- a/LLaMA-Factory/tests/llamafy_qwen.py
+++ b/LLaMA-Factory/tests/llamafy_qwen.py
+# coding=utf-8
+# Converts the Qwen models in the same format as LLaMA2.
+# Usage: python llamafy_qwen.py --input_dir input --output_dir output
+# Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
+
+import json
+import os
+from collections import OrderedDict
+from typing import Any, Dict, Optional
+
+import fire
+import torch
+from safetensors import safe_open
+from safetensors.torch import save_file
+from tqdm import tqdm
+from transformers.modeling_utils import (
+    SAFE_WEIGHTS_INDEX_NAME,
+    SAFE_WEIGHTS_NAME,
+    WEIGHTS_INDEX_NAME,
+    WEIGHTS_NAME,
+    shard_checkpoint,
+)
+from transformers.utils import check_min_version
+
+
+try:
+    check_min_version("4.34.0")
+except Exception:
+    raise ValueError("Please upgrade `transformers` to 4.34.0")
+
+
+CONFIG_NAME = "config.json"
+
+
+def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str:
+    qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
+        if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"):
+            with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f:
+                for key in f.keys():
+                    qwen_state_dict[key] = f.get_tensor(key)
+
+    llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    torch_dtype = None
+    for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
+        if torch_dtype is None:
+            torch_dtype = value.dtype
+        if "wte" in key:
+            llama2_state_dict["model.embed_tokens.weight"] = value
+        elif "ln_f" in key:
+            llama2_state_dict["model.norm.weight"] = value
+        else:
+            key = key.replace("transformer.h", "model.layers")
+            if "attn.c_attn" in key:
+                proj_size = value.size(0) // 3
+                llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...]
+                llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[
+                    proj_size : 2 * proj_size, ...
+                ]
+                llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...]
+            elif "attn.c_proj" in key:
+                llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value
+                llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like(
+                    value[:, 0]
+                ).squeeze()
+            elif "ln_1" in key:
+                llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value
+            elif "ln_2" in key:
+                llama2_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value
+            elif "mlp.w1" in key:
+                llama2_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value
+            elif "mlp.w2" in key:
+                llama2_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value
+            elif "mlp.c_proj" in key:
+                llama2_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value
+            elif "lm_head" in key:
+                llama2_state_dict[key] = value
+            else:
+                raise KeyError("Unable to process key {}".format(key))
+
+    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
+    shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
+
+    for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
+        if save_safetensors:
+            save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
+        else:
+            torch.save(shard, os.path.join(output_dir, shard_file))
+
+    if index is None:
+        print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
+    else:
+        index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
+        with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
+            json.dump(index, f, indent=2, sort_keys=True)
+        print("Model weights saved in {}".format(output_dir))
+
+    return str(torch_dtype).replace("torch.", "")
+
+
+def save_config(input_dir: str, output_dir: str, torch_dtype: str):
+    with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
+        qwen_config_dict: Dict[str, Any] = json.load(f)
+
+    llama2_config_dict: Dict[str, Any] = OrderedDict()
+    llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
+    llama2_config_dict["hidden_act"] = "silu"
+    llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"]
+    llama2_config_dict["initializer_range"] = qwen_config_dict["initializer_range"]
+    llama2_config_dict["intermediate_size"] = qwen_config_dict["intermediate_size"] // 2
+    llama2_config_dict["max_position_embeddings"] = qwen_config_dict["max_position_embeddings"]
+    llama2_config_dict["model_type"] = "llama"
+    llama2_config_dict["num_attention_heads"] = qwen_config_dict["num_attention_heads"]
+    llama2_config_dict["num_hidden_layers"] = qwen_config_dict["num_hidden_layers"]
+    llama2_config_dict["num_key_value_heads"] = qwen_config_dict["hidden_size"] // qwen_config_dict["kv_channels"]
+    llama2_config_dict["pretraining_tp"] = 1
+    llama2_config_dict["rms_norm_eps"] = qwen_config_dict["layer_norm_epsilon"]
+    llama2_config_dict["rope_scaling"] = None
+    llama2_config_dict["tie_word_embeddings"] = qwen_config_dict["tie_word_embeddings"]
+    llama2_config_dict["torch_dtype"] = torch_dtype
+    llama2_config_dict["transformers_version"] = "4.34.0"
+    llama2_config_dict["use_cache"] = True
+    llama2_config_dict["vocab_size"] = qwen_config_dict["vocab_size"]
+    llama2_config_dict["attention_bias"] = True
+
+    with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
+        json.dump(llama2_config_dict, f, indent=2)
+    print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
+
+
+def llamafy_qwen(
+    input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
+):
+    try:
+        os.makedirs(output_dir, exist_ok=False)
+    except Exception as e:
+        raise print("Output dir already exists", e)
+
+    torch_dtype = save_weight(input_dir, output_dir, shard_size, save_safetensors)
+    save_config(input_dir, output_dir, torch_dtype)
+
+
+if __name__ == "__main__":
+    fire.Fire(llamafy_qwen)
--- a/LLaMA-Factory/tests/loftq_init.py
+++ b/LLaMA-Factory/tests/loftq_init.py
+# coding=utf-8
+# Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
+# Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
+# Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py
+
+import os
+from typing import TYPE_CHECKING, Optional
+
+import fire
+import torch
+import torch.nn as nn
+from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
+
+
+class Shell(nn.Module):
+    def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
+        super().__init__()
+        self.weight = nn.Parameter(weight, requires_grad=False)
+        if bias is not None:
+            self.bias = nn.Parameter(bias, requires_grad=False)
+
+
+def unwrap_model(model: nn.Module, pattern=".base_layer") -> None:
+    for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}:
+        parent_name = ".".join(name.split(".")[:-1])
+        child_name = name.split(".")[-1]
+        parent_module = model.get_submodule(parent_name)
+        child_module = getattr(parent_module, child_name)
+        base_layer = getattr(child_module, "base_layer")
+        weight = getattr(base_layer, "weight", None)
+        bias = getattr(base_layer, "bias", None)
+        setattr(parent_module, child_name, Shell(weight, bias))
+
+    print("Model unwrapped.")
+
+
+def quantize_loftq(
+    model_name_or_path: str,
+    save_dir: str,
+    loftq_bits: Optional[int] = 4,
+    loftq_iter: Optional[int] = 1,
+    lora_alpha: Optional[int] = None,
+    lora_rank: Optional[int] = 16,
+    lora_target: Optional[str] = "q_proj,v_proj",
+    save_safetensors: Optional[bool] = False,
+):
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
+    loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter)
+    lora_config = LoraConfig(
+        task_type=TaskType.CAUSAL_LM,
+        inference_mode=True,
+        r=lora_rank,
+        lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
+        lora_dropout=0.1,
+        target_modules=[name.strip() for name in lora_target.split(",")],
+        init_lora_weights="loftq",
+        loftq_config=loftq_config,
+    )
+
+    # Init LoftQ model
+    lora_model = get_peft_model(model, lora_config)
+    base_model: "PreTrainedModel" = lora_model.get_base_model()
+
+    # Save LoftQ model
+    setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir)
+    setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True)
+    lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors)
+
+    # Save base model
+    unwrap_model(base_model)
+    base_model.save_pretrained(save_dir, safe_serialization=save_safetensors)
+    tokenizer.save_pretrained(save_dir)
+
+
+if __name__ == "__main__":
+    fire.Fire(quantize_loftq)
--- a/LLaMA-Factory/tests/test_toolcall.py
+++ b/LLaMA-Factory/tests/test_toolcall.py
+import json
+from typing import Sequence
+
+from openai import OpenAI
+from transformers.utils.versions import require_version
+
+
+require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
+
+
+def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float:
+    grade_to_score = {"A": 4, "B": 3, "C": 2}
+    total_score, total_hour = 0, 0
+    for grade, hour in zip(grades, hours):
+        total_score += grade_to_score[grade] * hour
+        total_hour += hour
+    return total_score / total_hour
+
+
+tool_map = {"calculate_gpa": calculate_gpa}
+
+
+if __name__ == "__main__":
+    client = OpenAI(
+        api_key="0",
+        base_url="http://localhost:8000/v1",
+    )
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "calculate_gpa",
+                "description": "Calculate the Grade Point Average (GPA) based on grades and credit hours",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "grades": {"type": "array", "items": {"type": "string"}, "description": "The grades"},
+                        "hours": {"type": "array", "items": {"type": "integer"}, "description": "The credit hours"},
+                    },
+                    "required": ["grades", "hours"],
+                },
+            },
+        }
+    ]
+    messages = []
+    messages.append({"role": "user", "content": "My grades are A, A, B, and C. The credit hours are 3, 4, 3, and 2."})
+    result = client.chat.completions.create(messages=messages, model="test", tools=tools)
+    tool_call = result.choices[0].message.tool_calls[0].function
+    name, arguments = tool_call.name, json.loads(tool_call.arguments)
+    messages.append(
+        {"role": "function", "content": json.dumps({"name": name, "argument": arguments}, ensure_ascii=False)}
+    )
+    tool_result = tool_map[name](**arguments)
+    messages.append({"role": "tool", "content": json.dumps({"gpa": tool_result}, ensure_ascii=False)})
+    result = client.chat.completions.create(messages=messages, model="test", tools=tools)
+    print(result.choices[0].message.content)
+    # Based on your grades and credit hours, your calculated Grade Point Average (GPA) is 3.4166666666666665.
--- a/README.md
+++ b/README.md
+# DISC-FinLLM:
+**DISC-FinLLM 是一个专门针对金融场景下为用户提供专业、智能、全面的**金融咨询服务**的金融领域大模型，由[复旦大学数据智能与社会计算实验室 (Fudan-DISC)](http://fudan-disc.com) 开发并开源。**
+
+## 论文
+
+- [论文地址] [DISC-FinLLM: A Chinese Financial Large Language Model based on Multiple Experts Fine-tuning](https://arxiv.org/abs/2310.15205)
+
+- [huggingface预训练模型下载地址] <https://huggingface.co/Go4miii/DISC-FinLLM>
+
+## 模型结构
+
+### DISC-FinLLM模型结构
+<div align="center">
+<img align="center" src=images/transformer.jpg>
+</div>
+
+
+## 算法原理
+
+DISC-FinLLM是基于我们构建的高质量金融数据集DISC-Fin-SFT在通用领域中文大模型Baichuan-13B-Chat上进行LoRA指令微调得到的金融大模型。
+
+<div align="center">
+<img align="center" src=images/transformer.png>
+</div>
+
+
+## 环境配置
+### Docker（方法一）
+```
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu22.04-dtk23.10.1-py310
+
+docker run -it -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro --shm-size=64G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name DISC-FinLLM <your imageID> bash
+
+docker exec -it DISC-FinLLM bash
+
+cd /path/your_code_data/DISC-FinLLM
+
+pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
+```
+### Dockerfile（方法二）
+```
+cd /path/your_code_data/DISC-FinLLM/docker
+
+docker build --no-cache -t DISC-FinLLM:latest .
+
+docker run --shm-size=64G --name DISC-FinLLM -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video -v /path/your_code_data/:/path/your_code_data/ -it DISC-FinLLM bash
+```
+### Anaconda（方法三）
+
+关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
+
+```
+DTK驱动：dtk23.10
+python：python3.10
+torch:2.1
+torchvision: 0.16.0
+apex: 1.1.0
+deepspped: 0.12.3
+```
+```
+conda create -n DISC-FinLLM python=3.10
+
+cd /path/your_code_data/DISC-FinLLM
+
+pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple
+```
+
+
+`Tips：以上dtk驱动、python、torch、deepspeed等DCU相关工具版本需要严格一一对应`
+
+## 数据集
+
+**你可以在这里查看[资料分析评测](https://github.com/FudanDISC/DISC-FinLLM/tree/main/eval/computing_eval.json)、[时事分析评测](https://github.com/FudanDISC/DISC-FinLLM/tree/main/eval/retriever_eval.json)对应的数据集。**
+
+### 自定义数据处理代码 
+参考data_processor.py
+
+```
+import json
+
+jsonl_file_path = '.../data/dataset_new.jsonl'
+json_file_path = '../data/dataset_new.json'
+data = []
+with open(jsonl_file_path, 'r', encoding='utf-8') as file:
+    for line in file:
+        jsonl_data = json.loads(line)
+        json_data = {
+            "instruction": jsonl_data.get("context").split('\n')[0].replace('Instruction: ', ''),
+            "input": jsonl_data.get("context").split('\n')[1].replace('Input: ', ''),
+            "output": jsonl_data.get("target")
+        }
+        data.append(json_data)
+
+with open(json_file_path, 'w', encoding='utf-8') as file:
+    json.dump(data, file, ensure_ascii=False, indent=4)
+
+print(data)
+```
+
+
+项目中已提供用于试验训练的迷你数据集，训练数据目录结构如下，用于正常训练的完整数据集请按此目录结构进行制备：
+```
+ ── data
+    │   ├── computing_part.json
+    │   ├── consulting_part.json
+    │   ├── retrieval_part.json
+    │   └── task_part.json
+    │——————————
+```
+## 训练
+**运行训练代码时需将模型文件FinLLM中的文件替换下载的本地模型FinLLM文件**
+### 单机多卡
+```
+bash multi_dcu_train.sh
+```
+### 单机单卡
+```
+bash sft_work_dtk.sh
+```
+
+## 推理
+**运行推理代码时需将模型文件FinLLM中的文件替换下载的本地模型FinLLM文件,并且将cli_demo.py文件中的模型路径更换为本地模型路径。**
+### 单机单卡
+需将**cli_demo.py**中的**model_path**改为替换模型文件后的本地模型路径
+```
+python cli_demo.py
+```
+### web交互推理
+需将**web_demo.py**中的**model_path**改为替换模型文件后的本地模型路径
+```
+python web_demo.py
+```
+
+## result
+DISC-FinLLM是一个金融领域的大语言模型，是由面向不同金融场景的4个模组：金融咨询、金融文本分析、金融计算、金融知识检索问答构成的多专家智慧金融系统。这些模组分别在金融NLP任务、人类试题、资料分析和时事分析等四个评测中展现出明显优势，证明了DISC-FinLLM能为广泛的金融领域提供强有力的支持。DISC-FinLLM能在不同应用场景下提供帮助，可用于实现不同的功能：
+
+* **金融咨询：** 该模组可以在中国金融语境下，与用户展开关于金融话题的多轮对话，或是为用户解释金融专业的相关知识，是由数据集中的金融咨询指令部分训练而来。
+* **金融文本分析：** 该模组可以帮助用户在金融文本上完成的信息抽取、情感分析、文本分类、文本生成等NLP任务，是由数据集中的金融任务指令部分训练而来。
+* **金融计算：** 该模组可以帮助用户完成与数学计算相关的任务，除了利率、增长率等基本计算，它还支持统计分析和包括Black-Scholes期权定价模型、EDF预期违约概率模型在内的金融模型计算。这一模组是由数据集中的金融计算指令部分训练而来。
+* **金融知识检索问答：** 该模组可以基于金融新闻、研报和相关政策文件为用户提供投资建议、时事分析、政策解读。它是由数据集中的检索增强指令部分训练而来。
+
+<div align="center">
+<img align="center" src=images/result.png>
+</div>
+
+### 精度
+测试数据：[retrieval_part](data/retrieval_part.json)，使用的加速卡:V100S/K100。
+
+根据测试结果情况填写表格：
+| xxx | train_loss | train_runtime |eval_los |eval_runtime |
+| :------: | :------: | :------: | :------: |:------: |
+| V100s |  0.371248 | 4445.348 | 0.06542 | 30.5495 |
+| K100 | 0.671394 | 2384.0498 | 0.64843 | 19.6838 |
+
+
+## 应用场景
+金融,教育,政府,科研
+
+### 算法类别
+
+金融咨询,金融计算,文本分析
+
+## 预训练权重
+- [Hugging Face Go4miii/DISC-FinLLM](https://huggingface.co/Go4miii/DISC-FinLLM) 下载全参模型权重。
+   
+
+## 参考资料
+- 本项目gitlab地址[Go4miii/DISC-FinLLM](https://github.com/FudanDISC/DISC-FinLLM)
+
--- a/cli_demo.py
+++ b/cli_demo.py
+import os
+import torch
+import platform
+from colorama import Fore, Style
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation.utils import GenerationConfig
+
+
+def init_model():
+    print("Initializing model...")
+    model_path = "/DISC-FinLLM/FinLLM"
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True
+    )
+    model.generation_config = GenerationConfig.from_pretrained(model_path)
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_path, use_fast=False, trust_remote_code=True
+    )
+    return model, tokenizer
+
+
+def clear_screen():
+    if platform.system() == "Windows":
+        os.system("cls")
+    else:
+        os.system("clear")
+    print(
+        Fore.YELLOW
+        + Style.BRIGHT
+        + "欢迎使用复旦 DISC-FinLLM，输入进行对话，clear 清空历史，Ctrl+C 中断生成，"
+        + "stream 开关流式生成，exit 结束。"
+    )
+    return []
+
+
+def main(stream=True):
+    model, tokenizer = init_model()
+
+    messages = clear_screen()
+    while True:
+        prompt = input(Fore.GREEN + Style.BRIGHT + "\n用户：" + Style.NORMAL)
+        if prompt.strip() == "exit":
+            break
+        if prompt.strip() == "clear":
+            messages = clear_screen()
+            continue
+        print(Fore.CYAN + Style.BRIGHT + "\nDISC-FinLLM：" + Style.NORMAL, end="")
+
+        if prompt.strip() == "stream":
+            stream = not stream
+            print(
+                Fore.YELLOW + "({}流式生成)\n".format("开启" if stream else "关闭"),
+                end="",
+            )
+            continue
+        messages.append({"role": "user", "content": prompt})
+
+        if stream:
+            position = 0
+            try:
+                for response in model.chat(tokenizer, messages, stream=True):
+                    print(response[position:], end="", flush=True)
+                    position = len(response)
+                    if torch.backends.mps.is_available():
+                        torch.mps.empty_cache()
+            except KeyboardInterrupt:
+                pass
+            print()
+        else:
+            response = model.chat(tokenizer, messages)
+            print(response)
+            if torch.backends.mps.is_available():
+                torch.mps.empty_cache()
+        messages.append({"role": "assistant", "content": response})
+
+    print(Style.RESET_ALL)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file