Update doc/inf_result.png, result/result.png, doc/training_loss.png,...

Update doc/inf_result.png, result/result.png, doc/training_loss.png, doc/2403.04652v1.pdf, inference/6B_single_dcu.py, result/training_loss.png, finetune/single_node.sh, finetune/multi_node.sh, finetune/data/dataset_info.json, finetune/data/identity.json, finetune/data/mllm_demo.json, finetune/data/README.md, finetune/data/README_zh.md, finetune/data/alpaca_zh_demo.json, finetune/data/c4_demo.json, finetune/data/glaive_toolcall_en_demo.json, finetune/data/glaive_toolcall_zh_demo.json, finetune/data/alpaca_en_demo.json, finetune/data/dpo_zh_demo.json, finetune/data/kto_en_demo.json, finetune/data/wiki_demo.txt, finetune/data/dpo_en_demo.json, finetune/scripts/cal_flops.py, finetune/scripts/cal_lr.py, finetune/scripts/cal_ppl.py, finetune/scripts/length_cdf.py, finetune/scripts/llamafy_baichuan2.py, finetune/scripts/loftq_init.py, finetune/scripts/llamafy_qwen.py, finetune/scripts/llama_pro.py, finetune/src/api.py, finetune/src/train.py, finetune/src/webui.py, finetune/src/llamafactory/__init__.py, finetune/src/llamafactory/cli.py, finetune/src/llamafactory/api/__init__.py, finetune/src/llamafactory/api/common.py, finetune/src/llamafactory/api/chat.py, finetune/src/llamafactory/api/protocol.py, finetune/src/llamafactory/api/app.py, finetune/src/llamafactory/chat/__init__.py, finetune/src/llamafactory/chat/base_engine.py, finetune/src/llamafactory/chat/hf_engine.py, finetune/src/llamafactory/chat/vllm_engine.py, finetune/src/llamafactory/chat/chat_model.py, finetune/src/llamafactory/data/__init__.py, finetune/src/llamafactory/data/collator.py, finetune/src/llamafactory/data/utils.py, finetune/src/llamafactory/data/aligner.py, finetune/src/llamafactory/data/formatter.py, finetune/src/llamafactory/data/preprocess.py, finetune/src/llamafactory/data/parser.py, finetune/src/llamafactory/data/loader.py, finetune/src/llamafactory/data/template.py, finetune/src/llamafactory/eval/__init__.py, finetune/src/llamafactory/eval/evaluator.py, finetune/src/llamafactory/eval/template.py, finetune/src/llamafactory/extras/__init__.py, finetune/src/llamafactory/extras/ploting.py, finetune/src/llamafactory/extras/logging.py, finetune/src/llamafactory/extras/constants.py, finetune/src/llamafactory/extras/misc.py, finetune/src/llamafactory/extras/packages.py, finetune/src/llamafactory/extras/callbacks.py, finetune/src/llamafactory/hparams/__init__.py, finetune/src/llamafactory/hparams/data_args.py, finetune/src/llamafactory/hparams/evaluation_args.py, finetune/src/llamafactory/hparams/generating_args.py, finetune/src/llamafactory/hparams/finetuning_args.py, finetune/src/llamafactory/hparams/parser.py, finetune/src/llamafactory/hparams/model_args.py, finetune/src/llamafactory/model/__init__.py, finetune/src/llamafactory/model/adapter.py, finetune/src/llamafactory/model/loader.py, finetune/src/llamafactory/model/patcher.py, finetune/src/llamafactory/model/utils/__init__.py, finetune/src/llamafactory/model/utils/checkpointing.py, finetune/src/llamafactory/model/utils/embedding.py, finetune/src/llamafactory/model/utils/moe.py, finetune/src/llamafactory/model/utils/attention.py, finetune/src/llamafactory/model/utils/quantization.py, finetune/src/llamafactory/model/utils/valuehead.py, finetune/src/llamafactory/model/utils/longlora.py, finetune/src/llamafactory/model/utils/visual.py, finetune/src/llamafactory/model/utils/misc.py, finetune/src/llamafactory/model/utils/mod.py, finetune/src/llamafactory/model/utils/unsloth.py, finetune/src/llamafactory/model/utils/rope.py, finetune/src/llamafactory/train/__init__.py, finetune/src/llamafactory/train/utils.py, finetune/src/llamafactory/train/tuner.py, finetune/src/llamafactory/train/dpo/__init__.py, finetune/src/llamafactory/train/dpo/trainer.py, finetune/src/llamafactory/train/dpo/workflow.py, finetune/src/llamafactory/train/kto/__init__.py, finetune/src/llamafactory/train/kto/workflow.py, finetune/src/llamafactory/train/kto/trainer.py, finetune/src/llamafactory/train/orpo/__init__.py, finetune/src/llamafactory/train/orpo/workflow.py, finetune/src/llamafactory/train/orpo/trainer.py, finetune/src/llamafactory/train/ppo/__init__.py, finetune/src/llamafactory/train/ppo/workflow.py, finetune/src/llamafactory/train/ppo/utils.py, finetune/src/llamafactory/train/ppo/trainer.py, finetune/src/llamafactory/train/pt/__init__.py, finetune/src/llamafactory/train/pt/trainer.py, finetune/src/llamafactory/train/pt/workflow.py, finetune/src/llamafactory/train/rm/__init__.py, finetune/src/llamafactory/train/rm/metric.py, finetune/src/llamafactory/train/rm/trainer.py, finetune/src/llamafactory/train/rm/workflow.py, finetune/src/llamafactory/train/sft/__init__.py, finetune/src/llamafactory/train/sft/workflow.py, finetune/src/llamafactory/train/sft/trainer.py, finetune/src/llamafactory/train/sft/metric.py, finetune/src/llamafactory/webui/__init__.py, finetune/src/llamafactory/webui/chatter.py, finetune/src/llamafactory/webui/common.py, finetune/src/llamafactory/webui/interface.py, finetune/src/llamafactory/webui/runner.py, finetune/src/llamafactory/webui/css.py, finetune/src/llamafactory/webui/engine.py, finetune/src/llamafactory/webui/utils.py, finetune/src/llamafactory/webui/manager.py, finetune/src/llamafactory/webui/locales.py, finetune/src/llamafactory/webui/components/__init__.py, finetune/src/llamafactory/webui/components/data.py, finetune/src/llamafactory/webui/components/chatbot.py, finetune/src/llamafactory/webui/components/train.py, finetune/src/llamafactory/webui/components/top.py, finetune/src/llamafactory/webui/components/infer.py, finetune/src/llamafactory/webui/components/eval.py, finetune/src/llamafactory/webui/components/export.py files

Update doc/inf_result.png, result/result.png, doc/training_loss.png,...
Update doc/inf_result.png, result/result.png, doc/training_loss.png, doc/2403.04652v1.pdf, inference/6B_single_dcu.py, result/training_loss.png, finetune/single_node.sh, finetune/multi_node.sh, finetune/data/dataset_info.json, finetune/data/identity.json, finetune/data/mllm_demo.json, finetune/data/README.md, finetune/data/README_zh.md, finetune/data/alpaca_zh_demo.json, finetune/data/c4_demo.json, finetune/data/glaive_toolcall_en_demo.json, finetune/data/glaive_toolcall_zh_demo.json, finetune/data/alpaca_en_demo.json, finetune/data/dpo_zh_demo.json, finetune/data/kto_en_demo.json, finetune/data/wiki_demo.txt, finetune/data/dpo_en_demo.json, finetune/scripts/cal_flops.py, finetune/scripts/cal_lr.py, finetune/scripts/cal_ppl.py, finetune/scripts/length_cdf.py, finetune/scripts/llamafy_baichuan2.py, finetune/scripts/loftq_init.py, finetune/scripts/llamafy_qwen.py, finetune/scripts/llama_pro.py, finetune/src/api.py, finetune/src/train.py, finetune/src/webui.py, finetune/src/llamafactory/__init__.py, finetune/src/llamafactory/cli.py, finetune/src/llamafactory/api/__init__.py, finetune/src/llamafactory/api/common.py, finetune/src/llamafactory/api/chat.py, finetune/src/llamafactory/api/protocol.py, finetune/src/llamafactory/api/app.py, finetune/src/llamafactory/chat/__init__.py, finetune/src/llamafactory/chat/base_engine.py, finetune/src/llamafactory/chat/hf_engine.py, finetune/src/llamafactory/chat/vllm_engine.py, finetune/src/llamafactory/chat/chat_model.py, finetune/src/llamafactory/data/__init__.py, finetune/src/llamafactory/data/collator.py, finetune/src/llamafactory/data/utils.py, finetune/src/llamafactory/data/aligner.py, finetune/src/llamafactory/data/formatter.py, finetune/src/llamafactory/data/preprocess.py, finetune/src/llamafactory/data/parser.py, finetune/src/llamafactory/data/loader.py, finetune/src/llamafactory/data/template.py, finetune/src/llamafactory/eval/__init__.py, finetune/src/llamafactory/eval/evaluator.py, finetune/src/llamafactory/eval/template.py, finetune/src/llamafactory/extras/__init__.py, finetune/src/llamafactory/extras/ploting.py, finetune/src/llamafactory/extras/logging.py, finetune/src/llamafactory/extras/constants.py, finetune/src/llamafactory/extras/misc.py, finetune/src/llamafactory/extras/packages.py, finetune/src/llamafactory/extras/callbacks.py, finetune/src/llamafactory/hparams/__init__.py, finetune/src/llamafactory/hparams/data_args.py, finetune/src/llamafactory/hparams/evaluation_args.py, finetune/src/llamafactory/hparams/generating_args.py, finetune/src/llamafactory/hparams/finetuning_args.py, finetune/src/llamafactory/hparams/parser.py, finetune/src/llamafactory/hparams/model_args.py, finetune/src/llamafactory/model/__init__.py, finetune/src/llamafactory/model/adapter.py, finetune/src/llamafactory/model/loader.py, finetune/src/llamafactory/model/patcher.py, finetune/src/llamafactory/model/utils/__init__.py, finetune/src/llamafactory/model/utils/checkpointing.py, finetune/src/llamafactory/model/utils/embedding.py, finetune/src/llamafactory/model/utils/moe.py, finetune/src/llamafactory/model/utils/attention.py, finetune/src/llamafactory/model/utils/quantization.py, finetune/src/llamafactory/model/utils/valuehead.py, finetune/src/llamafactory/model/utils/longlora.py, finetune/src/llamafactory/model/utils/visual.py, finetune/src/llamafactory/model/utils/misc.py, finetune/src/llamafactory/model/utils/mod.py, finetune/src/llamafactory/model/utils/unsloth.py, finetune/src/llamafactory/model/utils/rope.py, finetune/src/llamafactory/train/__init__.py, finetune/src/llamafactory/train/utils.py, finetune/src/llamafactory/train/tuner.py, finetune/src/llamafactory/train/dpo/__init__.py, finetune/src/llamafactory/train/dpo/trainer.py, finetune/src/llamafactory/train/dpo/workflow.py, finetune/src/llamafactory/train/kto/__init__.py, finetune/src/llamafactory/train/kto/workflow.py, finetune/src/llamafactory/train/kto/trainer.py, finetune/src/llamafactory/train/orpo/__init__.py, finetune/src/llamafactory/train/orpo/workflow.py, finetune/src/llamafactory/train/orpo/trainer.py, finetune/src/llamafactory/train/ppo/__init__.py, finetune/src/llamafactory/train/ppo/workflow.py, finetune/src/llamafactory/train/ppo/utils.py, finetune/src/llamafactory/train/ppo/trainer.py, finetune/src/llamafactory/train/pt/__init__.py, finetune/src/llamafactory/train/pt/trainer.py, finetune/src/llamafactory/train/pt/workflow.py, finetune/src/llamafactory/train/rm/__init__.py, finetune/src/llamafactory/train/rm/metric.py, finetune/src/llamafactory/train/rm/trainer.py, finetune/src/llamafactory/train/rm/workflow.py, finetune/src/llamafactory/train/sft/__init__.py, finetune/src/llamafactory/train/sft/workflow.py, finetune/src/llamafactory/train/sft/trainer.py, finetune/src/llamafactory/train/sft/metric.py, finetune/src/llamafactory/webui/__init__.py, finetune/src/llamafactory/webui/chatter.py, finetune/src/llamafactory/webui/common.py, finetune/src/llamafactory/webui/interface.py, finetune/src/llamafactory/webui/runner.py, finetune/src/llamafactory/webui/css.py, finetune/src/llamafactory/webui/engine.py, finetune/src/llamafactory/webui/utils.py, finetune/src/llamafactory/webui/manager.py, finetune/src/llamafactory/webui/locales.py, finetune/src/llamafactory/webui/components/__init__.py, finetune/src/llamafactory/webui/components/data.py, finetune/src/llamafactory/webui/components/chatbot.py, finetune/src/llamafactory/webui/components/train.py, finetune/src/llamafactory/webui/components/top.py, finetune/src/llamafactory/webui/components/infer.py, finetune/src/llamafactory/webui/components/eval.py, finetune/src/llamafactory/webui/components/export.py files
6f97a5dc · zhougaofeng · 2d76eb38 · 6f97a5dc · 6f97a5dc · 6f97a5dc
Commit 6f97a5dc authored Jul 01, 2024 by zhougaofeng
20 changed files
--- a/doc/2403.04652v1.pdf
+++ b/doc/2403.04652v1.pdf
--- a/doc/inf_result.png
+++ b/doc/inf_result.png
--- a/doc/training_loss.png
+++ b/doc/training_loss.png
--- a/finetune/data/README.md
+++ b/finetune/data/README.md
+The [dataset_info.json](dataset_info.json) contains all available datasets. If you are using a custom dataset, please **make sure** to add a *dataset description* in `dataset_info.json` and specify `dataset: dataset_name` before training to use it.
+Currently we support datasets in **alpaca** and **sharegpt** format.
+```json
+"dataset_name": {
+  "hf_hub_url": "the name of the dataset repository on the Hugging Face hub. (if specified, ignore script_url and file_name)",
+  "ms_hub_url": "the name of the dataset repository on the Model Scope hub. (if specified, ignore script_url and file_name)",
+  "script_url": "the name of the directory containing a dataset loading script. (if specified, ignore file_name)",
+  "file_name": "the name of the dataset folder or dataset file in this directory. (required if above are not specified)",
+  "formatting": "the format of the dataset. (optional, default: alpaca, can be chosen from {alpaca, sharegpt})",
+  "ranking": "whether the dataset is a preference dataset or not. (default: False)",
+  "subset": "the name of the subset. (optional, default: None)",
+  "folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)",
+  "columns (optional)": {
+    "prompt": "the column name in the dataset containing the prompts. (default: instruction)",
+    "query": "the column name in the dataset containing the queries. (default: input)",
+    "response": "the column name in the dataset containing the responses. (default: output)",
+    "history": "the column name in the dataset containing the histories. (default: None)",
+    "messages": "the column name in the dataset containing the messages. (default: conversations)",
+    "system": "the column name in the dataset containing the system prompts. (default: None)",
+    "tools": "the column name in the dataset containing the tool description. (default: None)",
+    "images": "the column name in the dataset containing the image inputs. (default: None)",
+    "chosen": "the column name in the dataset containing the chosen answers. (default: None)",
+    "rejected": "the column name in the dataset containing the rejected answers. (default: None)",
+    "kto_tag": "the column name in the dataset containing the kto tags. (default: None)"
+  },
+  "tags (optional, used for the sharegpt format)": {
+    "role_tag": "the key in the message represents the identity. (default: from)",
+    "content_tag": "the key in the message represents the content. (default: value)",
+    "user_tag": "the value of the role_tag represents the user. (default: human)",
+    "assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)",
+    "observation_tag": "the value of the role_tag represents the tool results. (default: observation)",
+    "function_tag": "the value of the role_tag represents the function call. (default: function_call)",
+    "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)"
+  }
+}
+```
+## Alpaca Format
+### Supervised Fine-Tuning Dataset
+* [Example dataset](alpaca_en_demo.json)
+In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the human prompt, then the human prompt would be `instruction\ninput`. The `output` column represents the model response.
+The `system` column will be used as the system prompt if specified.
+The `history` column is a list consisting of string tuples representing prompt-response pairs in the history messages. Note that the responses in the history **will also be learned by the model** in supervised fine-tuning.
+```json
+[
+  {
+    "instruction": "human instruction (required)",
+    "input": "human input (optional)",
+    "output": "model response (required)",
+    "system": "system prompt (optional)",
+    "history": [
+      ["human instruction in the first round (optional)", "model response in the first round (optional)"],
+      ["human instruction in the second round (optional)", "model response in the second round (optional)"]
+    ]
+  }
+]
+```
+Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
+```json
+"dataset_name": {
+  "file_name": "data.json",
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "response": "output",
+    "system": "system",
+    "history": "history"
+  }
+}
+```
+### Pre-training Dataset
+- [Example dataset](c4_demo.json)
+In pre-training, only the `text` column will be used for model learning.
+```json
+[
+  {"text": "document"},
+  {"text": "document"}
+]
+```
+Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
+```json
+"dataset_name": {
+  "file_name": "data.json",
+  "columns": {
+    "prompt": "text"
+  }
+}
+```
+### Preference Dataset
+Preference datasets are used for reward modeling, DPO training and ORPO training.
+It requires a better response in `chosen` column and a worse response in `rejected` column.
+```json
+[
+  {
+    "instruction": "human instruction (required)",
+    "input": "human input (optional)",
+    "chosen": "chosen answer (required)",
+    "rejected": "rejected answer (required)"
+  }
+]
+```
+Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
+```json
+"dataset_name": {
+  "file_name": "data.json",
+  "ranking": true,
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "chosen": "chosen",
+    "rejected": "rejected"
+  }
+}
+```
+### KTO Dataset
+- [Example dataset](kto_en_demo.json)
+KTO datasets require a extra `kto_tag` column containing the boolean human feedback.
+```json
+[
+  {
+    "instruction": "human instruction (required)",
+    "input": "human input (optional)",
+    "output": "model response (required)",
+    "kto_tag": "human feedback [true/false] (required)"
+  }
+]
+```
+Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
+```json
+"dataset_name": {
+  "file_name": "data.json",
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "response": "output",
+    "kto_tag": "kto_tag"
+  }
+}
+```
+### Multimodal Dataset
+- [Example dataset](mllm_demo.json)
+Multimodal datasets require a `images` column containing the paths to the input images. Currently we only support one image.
+```json
+[
+  {
+    "instruction": "human instruction (required)",
+    "input": "human input (optional)",
+    "output": "model response (required)",
+    "images": [
+      "image path (required)"
+    ]
+  }
+]
+```
+Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
+```json
+"dataset_name": {
+  "file_name": "data.json",
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "response": "output",
+    "images": "images"
+  }
+}
+```
+## Sharegpt Format
+### Supervised Fine-Tuning Dataset
+- [Example dataset](glaive_toolcall_en_demo.json)
+Compared to the alpaca format, the sharegpt format allows the datasets have **more roles**, such as human, gpt, observation and function. They are presented in a list of objects in the `conversations` column.
+Note that the human and observation should appear in odd positions, while gpt and function should appear in even positions.
+```json
+[
+  {
+    "conversations": [
+      {
+        "from": "human",
+        "value": "human instruction"
+      },
+      {
+        "from": "function_call",
+        "value": "tool arguments"
+      },
+      {
+        "from": "observation",
+        "value": "tool result"
+      },
+      {
+        "from": "gpt",
+        "value": "model response"
+      }
+    ],
+    "system": "system prompt (optional)",
+    "tools": "tool description (optional)"
+  }
+]
+```
+Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
+```json
+"dataset_name": {
+  "file_name": "data.json",
+  "formatting": "sharegpt",
+  "columns": {
+    "messages": "conversations",
+    "system": "system",
+    "tools": "tools"
+  }
+}
+```
+### Preference Dataset
+- [Example dataset](dpo_en_demo.json)
+Preference datasets in sharegpt format also require a better message in `chosen` column and a worse message in `rejected` column.
+```json
+[
+  {
+    "conversations": [
+      {
+        "from": "human",
+        "value": "human instruction"
+      },
+      {
+        "from": "gpt",
+        "value": "model response"
+      },
+      {
+        "from": "human",
+        "value": "human instruction"
+      }
+    ],
+    "chosen": {
+      "from": "gpt",
+      "value": "chosen answer (required)"
+    },
+    "rejected": {
+      "from": "gpt",
+      "value": "rejected answer (required)"
+    }
+  }
+]
+```
+Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
+```json
+"dataset_name": {
+  "file_name": "data.json",
+  "formatting": "sharegpt",
+  "ranking": true,
+  "columns": {
+    "messages": "conversations",
+    "chosen": "chosen",
+    "rejected": "rejected"
+  }
+}
+```
+### OpenAI Format
+The openai format is simply a special case of the sharegpt format, where the first message may be a system prompt.
+```json
+[
+  {
+    "messages": [
+      {
+        "role": "system",
+        "content": "system prompt (optional)"
+      },
+      {
+        "role": "user",
+        "content": "human instruction"
+      },
+      {
+        "role": "assistant",
+        "content": "model response"
+      }
+    ]
+  }
+]
+```
+Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
+```json
+"dataset_name": {
+  "file_name": "data.json",
+  "formatting": "sharegpt",
+  "columns": {
+    "messages": "messages"
+  },
+  "tags": {
+    "role_tag": "role",
+    "content_tag": "content",
+    "user_tag": "user",
+    "assistant_tag": "assistant",
+    "system_tag": "system"
+  }
+}
+```
+The KTO datasets and multimodal datasets in sharegpt format are similar to the alpaca format.
+Pre-training datasets are **incompatible** with the sharegpt format.
--- a/finetune/data/README_zh.md
+++ b/finetune/data/README_zh.md
+[dataset_info.json](dataset_info.json) 包含了所有可用的数据集。如果您希望使用自定义数据集，请**务必**在 `dataset_info.json` 文件中添加*数据集描述*，并通过修改 `dataset: 数据集名称` 配置来使用数据集。
+目前我们支持 **alpaca** 格式和 **sharegpt** 格式的数据集。
+```json
+"数据集名称": {
+  "hf_hub_url": "Hugging Face 的数据集仓库地址（若指定，则忽略 script_url 和 file_name）",
+  "ms_hub_url": "ModelScope 的数据集仓库地址（若指定，则忽略 script_url 和 file_name）",
+  "script_url": "包含数据加载脚本的本地文件夹名称（若指定，则忽略 file_name）",
+  "file_name": "该目录下数据集文件的名称（若上述参数未指定，则此项必需）",
+  "formatting": "数据集格式（可选，默认：alpaca，可以为 alpaca 或 sharegpt）",
+  "ranking": "是否为偏好数据集（可选，默认：False）",
+  "subset": "数据集子集的名称（可选，默认：None）",
+  "folder": "Hugging Face 仓库的文件夹名称（可选，默认：None）",
+  "columns（可选）": {
+    "prompt": "数据集代表提示词的表头名称（默认：instruction）",
+    "query": "数据集代表请求的表头名称（默认：input）",
+    "response": "数据集代表回答的表头名称（默认：output）",
+    "history": "数据集代表历史对话的表头名称（默认：None）",
+    "messages": "数据集代表消息列表的表头名称（默认：conversations）",
+    "system": "数据集代表系统提示的表头名称（默认：None）",
+    "tools": "数据集代表工具描述的表头名称（默认：None）",
+    "images": "数据集代表图像输入的表头名称（默认：None）",
+    "chosen": "数据集代表更优回答的表头名称（默认：None）",
+    "rejected": "数据集代表更差回答的表头名称（默认：None）",
+    "kto_tag": "数据集代表 KTO 标签的表头名称（默认：None）"
+  },
+  "tags（可选，用于 sharegpt 格式）": {
+    "role_tag": "消息中代表发送者身份的键名（默认：from）",
+    "content_tag": "消息中代表文本内容的键名（默认：value）",
+    "user_tag": "消息中代表用户的 role_tag（默认：human）",
+    "assistant_tag": "消息中代表助手的 role_tag（默认：gpt）",
+    "observation_tag": "消息中代表工具返回结果的 role_tag（默认：observation）",
+    "function_tag": "消息中代表工具调用的 role_tag（默认：function_call）",
+    "system_tag": "消息中代表系统提示的 role_tag（默认：system，会覆盖 system column）"
+  }
+}
+```
+## Alpaca 格式
+### 指令监督微调数据集
+- [样例数据集](alpaca_zh_demo.json)
+在指令监督微调时，`instruction` 列对应的内容会与 `input` 列对应的内容拼接后作为人类指令，即人类指令为 `instruction\ninput`。而 `output` 列对应的内容为模型回答。
+如果指定，`system` 列对应的内容将被作为系统提示词。
+`history` 列是由多个字符串二元组构成的列表，分别代表历史消息中每轮对话的指令和回答。注意在指令监督微调时，历史消息中的回答内容**也会被用于模型学习**。
+```json
+[
+  {
+    "instruction": "人类指令（必填）",
+    "input": "人类输入（选填）",
+    "output": "模型回答（必填）",
+    "system": "系统提示词（选填）",
+    "history": [
+      ["第一轮指令（选填）", "第一轮回答（选填）"],
+      ["第二轮指令（选填）", "第二轮回答（选填）"]
+    ]
+  }
+]
+```
+对于上述格式的数据，`dataset_info.json` 中的*数据集描述*应为：
+```json
+"数据集名称": {
+  "file_name": "data.json",
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "response": "output",
+    "system": "system",
+    "history": "history"
+  }
+}
+```
+### 预训练数据集
+- [样例数据集](c4_demo.json)
+在预训练时，只有 `text` 列中的内容会用于模型学习。
+```json
+[
+  {"text": "document"},
+  {"text": "document"}
+]
+```
+对于上述格式的数据，`dataset_info.json` 中的*数据集描述*应为：
+```json
+"数据集名称": {
+  "file_name": "data.json",
+  "columns": {
+    "prompt": "text"
+  }
+}
+```
+### 偏好数据集
+偏好数据集用于奖励模型训练、DPO 训练和 ORPO 训练。
+它需要在 `chosen` 列中提供更优的回答，并在 `rejected` 列中提供更差的回答。
+```json
+[
+  {
+    "instruction": "人类指令（必填）",
+    "input": "人类输入（选填）",
+    "chosen": "优质回答（必填）",
+    "rejected": "劣质回答（必填）"
+  }
+]
+```
+对于上述格式的数据，`dataset_info.json` 中的*数据集描述*应为：
+```json
+"数据集名称": {
+  "file_name": "data.json",
+  "ranking": true,
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "chosen": "chosen",
+    "rejected": "rejected"
+  }
+}
+```
+### KTO 数据集
+- [样例数据集](kto_en_demo.json)
+KTO 数据集需要额外添加一个 `kto_tag` 列，包含 bool 类型的人类反馈。
+```json
+[
+  {
+    "instruction": "人类指令（必填）",
+    "input": "人类输入（选填）",
+    "output": "模型回答（必填）",
+    "kto_tag": "人类反馈 [true/false]（必填）"
+  }
+]
+```
+对于上述格式的数据，`dataset_info.json` 中的*数据集描述*应为：
+```json
+"数据集名称": {
+  "file_name": "data.json",
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "response": "output",
+    "kto_tag": "kto_tag"
+  }
+}
+```
+### 多模态数据集
+- [样例数据集](mllm_demo.json)
+多模态数据集需要额外添加一个 `images` 列，包含输入图像的路径。目前我们仅支持单张图像输入。
+```json
+[
+  {
+    "instruction": "人类指令（必填）",
+    "input": "人类输入（选填）",
+    "output": "模型回答（必填）",
+    "images": [
+      "图像路径（必填）"
+    ]
+  }
+]
+```
+对于上述格式的数据，`dataset_info.json` 中的*数据集描述*应为：
+```json
+"数据集名称": {
+  "file_name": "data.json",
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "response": "output",
+    "images": "images"
+  }
+}
+```
+## Sharegpt 格式
+### 指令监督微调数据集
+- [样例数据集](glaive_toolcall_zh_demo.json)
+相比 alpaca 格式的数据集，sharegpt 格式支持**更多的角色种类**，例如 human、gpt、observation、function 等等。它们构成一个对象列表呈现在 `conversations` 列中。
+注意其中 human 和 observation 必须出现在奇数位置，gpt 和 function 必须出现在偶数位置。
+```json
+[
+  {
+    "conversations": [
+      {
+        "from": "human",
+        "value": "人类指令"
+      },
+      {
+        "from": "function_call",
+        "value": "工具参数"
+      },
+      {
+        "from": "observation",
+        "value": "工具结果"
+      },
+      {
+        "from": "gpt",
+        "value": "模型回答"
+      }
+    ],
+    "system": "系统提示词（选填）",
+    "tools": "工具描述（选填）"
+  }
+]
+```
+对于上述格式的数据，`dataset_info.json` 中的*数据集描述*应为：
+```json
+"数据集名称": {
+  "file_name": "data.json",
+  "formatting": "sharegpt",
+  "columns": {
+    "messages": "conversations",
+    "system": "system",
+    "tools": "tools"
+  }
+}
+```
+### 偏好数据集
+- [样例数据集](dpo_zh_demo.json)
+Sharegpt 格式的偏好数据集同样需要在 `chosen` 列中提供更优的消息，并在 `rejected` 列中提供更差的消息。
+```json
+[
+  {
+    "conversations": [
+      {
+        "from": "human",
+        "value": "人类指令"
+      },
+      {
+        "from": "gpt",
+        "value": "模型回答"
+      },
+      {
+        "from": "human",
+        "value": "人类指令"
+      }
+    ],
+    "chosen": {
+      "from": "gpt",
+      "value": "优质回答"
+    },
+    "rejected": {
+      "from": "gpt",
+      "value": "劣质回答"
+    }
+  }
+]
+```
+对于上述格式的数据，`dataset_info.json` 中的*数据集描述*应为：
+```json
+"数据集名称": {
+  "file_name": "data.json",
+  "formatting": "sharegpt",
+  "ranking": true,
+  "columns": {
+    "messages": "conversations",
+    "chosen": "chosen",
+    "rejected": "rejected"
+  }
+}
+```
+### OpenAI 格式
+OpenAI 格式仅仅是 sharegpt 格式的一种特殊情况，其中第一条消息可能是系统提示词。
+```json
+[
+  {
+    "messages": [
+      {
+        "role": "system",
+        "content": "系统提示词（选填）"
+      },
+      {
+        "role": "user",
+        "content": "人类指令"
+      },
+      {
+        "role": "assistant",
+        "content": "模型回答"
+      }
+    ]
+  }
+]
+```
+对于上述格式的数据，`dataset_info.json` 中的*数据集描述*应为：
+```json
+"数据集名称": {
+  "file_name": "data.json",
+  "formatting": "sharegpt",
+  "columns": {
+    "messages": "messages"
+  },
+  "tags": {
+    "role_tag": "role",
+    "content_tag": "content",
+    "user_tag": "user",
+    "assistant_tag": "assistant",
+    "system_tag": "system"
+  }
+}
+```
+Sharegpt 格式中的 KTO 数据集和多模态数据集与 alpaca 格式的类似。
+预训练数据集**不支持** sharegpt 格式。
--- a/finetune/data/alpaca_en_demo.json
+++ b/finetune/data/alpaca_en_demo.json
--- a/finetune/data/alpaca_zh_demo.json
+++ b/finetune/data/alpaca_zh_demo.json
--- a/finetune/data/c4_demo.json
+++ b/finetune/data/c4_demo.json
--- a/finetune/data/dataset_info.json
+++ b/finetune/data/dataset_info.json
+{
+  "identity": {
+    "file_name": "identity.json"
+  },
+  "alpaca_en_demo": {
+    "file_name": "alpaca_en_demo.json"
+  },
+  "alpaca_zh_demo": {
+    "file_name": "alpaca_zh_demo.json"
+  },
+  "glaive_toolcall_en_demo": {
+    "file_name": "glaive_toolcall_en_demo.json",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "tools": "tools"
+    }
+  },
+  "glaive_toolcall_zh_demo": {
+    "file_name": "glaive_toolcall_zh_demo.json",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "tools": "tools"
+    }
+  },
+  "mllm_demo": {
+    "file_name": "mllm_demo.json",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages",
+      "images": "images"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "alpaca_en": {
+    "hf_hub_url": "llamafactory/alpaca_en",
+    "ms_hub_url": "llamafactory/alpaca_en"
+  },
+  "alpaca_zh": {
+    "hf_hub_url": "llamafactory/alpaca_zh",
+    "ms_hub_url": "llamafactory/alpaca_zh"
+  },
+  "alpaca_gpt4_en": {
+    "hf_hub_url": "llamafactory/alpaca_gpt4_en",
+    "ms_hub_url": "llamafactory/alpaca_gpt4_en"
+  },
+  "alpaca_gpt4_zh": {
+    "hf_hub_url": "llamafactory/alpaca_gpt4_zh",
+    "ms_hub_url": "llamafactory/alpaca_gpt4_zh"
+  },
+  "glaive_toolcall_en": {
+    "hf_hub_url": "llamafactory/glaive_toolcall_en",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "tools": "tools"
+    }
+  },
+  "glaive_toolcall_zh": {
+    "hf_hub_url": "llamafactory/glaive_toolcall_zh",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "tools": "tools"
+    }
+  },
+  "lima": {
+    "hf_hub_url": "llamafactory/lima",
+    "formatting": "sharegpt"
+  },
+  "guanaco": {
+    "hf_hub_url": "JosephusCheung/GuanacoDataset",
+    "ms_hub_url": "AI-ModelScope/GuanacoDataset"
+  },
+  "belle_2m": {
+    "hf_hub_url": "BelleGroup/train_2M_CN",
+    "ms_hub_url": "AI-ModelScope/train_2M_CN"
+  },
+  "belle_1m": {
+    "hf_hub_url": "BelleGroup/train_1M_CN",
+    "ms_hub_url": "AI-ModelScope/train_1M_CN"
+  },
+  "belle_0.5m": {
+    "hf_hub_url": "BelleGroup/train_0.5M_CN",
+    "ms_hub_url": "AI-ModelScope/train_0.5M_CN"
+  },
+  "belle_dialog": {
+    "hf_hub_url": "BelleGroup/generated_chat_0.4M",
+    "ms_hub_url": "AI-ModelScope/generated_chat_0.4M"
+  },
+  "belle_math": {
+    "hf_hub_url": "BelleGroup/school_math_0.25M",
+    "ms_hub_url": "AI-ModelScope/school_math_0.25M"
+  },
+  "belle_multiturn": {
+    "script_url": "belle_multiturn",
+    "formatting": "sharegpt"
+  },
+  "ultra_chat": {
+    "script_url": "ultra_chat",
+    "formatting": "sharegpt"
+  },
+  "open_platypus": {
+    "hf_hub_url": "garage-bAInd/Open-Platypus",
+    "ms_hub_url": "AI-ModelScope/Open-Platypus"
+  },
+  "codealpaca": {
+    "hf_hub_url": "sahil2801/CodeAlpaca-20k",
+    "ms_hub_url": "AI-ModelScope/CodeAlpaca-20k"
+  },
+  "alpaca_cot": {
+    "hf_hub_url": "QingyiSi/Alpaca-CoT",
+    "ms_hub_url": "AI-ModelScope/Alpaca-CoT"
+  },
+  "openorca": {
+    "hf_hub_url": "Open-Orca/OpenOrca",
+    "ms_hub_url": "AI-ModelScope/OpenOrca",
+    "columns": {
+      "prompt": "question",
+      "response": "response",
+      "system": "system_prompt"
+    }
+  },
+  "slimorca": {
+    "hf_hub_url": "Open-Orca/SlimOrca",
+    "formatting": "sharegpt"
+  },
+  "mathinstruct": {
+    "hf_hub_url": "TIGER-Lab/MathInstruct",
+    "ms_hub_url": "AI-ModelScope/MathInstruct",
+    "columns": {
+      "prompt": "instruction",
+      "response": "output"
+    }
+  },
+  "firefly": {
+    "hf_hub_url": "YeungNLP/firefly-train-1.1M",
+    "columns": {
+      "prompt": "input",
+      "response": "target"
+    }
+  },
+  "wikiqa": {
+    "hf_hub_url": "wiki_qa",
+    "columns": {
+      "prompt": "question",
+      "response": "answer"
+    }
+  },
+  "webqa": {
+    "hf_hub_url": "suolyer/webqa",
+    "ms_hub_url": "AI-ModelScope/webqa",
+    "columns": {
+      "prompt": "input",
+      "response": "output"
+    }
+  },
+  "webnovel": {
+    "hf_hub_url": "zxbsmk/webnovel_cn",
+    "ms_hub_url": "AI-ModelScope/webnovel_cn"
+  },
+  "nectar_sft": {
+    "hf_hub_url": "AstraMindAI/SFT-Nectar",
+    "ms_hub_url": "AI-ModelScope/SFT-Nectar"
+  },
+  "deepctrl": {
+    "ms_hub_url": "deepctrl/deepctrl-sft-data"
+  },
+  "adgen": {
+    "hf_hub_url": "HasturOfficial/adgen",
+    "ms_hub_url": "AI-ModelScope/adgen",
+    "columns": {
+      "prompt": "content",
+      "response": "summary"
+    }
+  },
+  "sharegpt_hyper": {
+    "hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
+    "formatting": "sharegpt"
+  },
+  "sharegpt4": {
+    "hf_hub_url": "shibing624/sharegpt_gpt4",
+    "ms_hub_url": "AI-ModelScope/sharegpt_gpt4",
+    "formatting": "sharegpt"
+  },
+  "ultrachat_200k": {
+    "hf_hub_url": "HuggingFaceH4/ultrachat_200k",
+    "ms_hub_url": "AI-ModelScope/ultrachat_200k",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "agent_instruct": {
+    "hf_hub_url": "THUDM/AgentInstruct",
+    "ms_hub_url": "ZhipuAI/AgentInstruct",
+    "formatting": "sharegpt"
+  },
+  "lmsys_chat": {
+    "hf_hub_url": "lmsys/lmsys-chat-1m",
+    "ms_hub_url": "AI-ModelScope/lmsys-chat-1m",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversation"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "human",
+      "assistant_tag": "assistant"
+    }
+  },
+  "evol_instruct": {
+    "hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
+    "ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
+    "formatting": "sharegpt"
+  },
+  "glaive_toolcall_100k": {
+    "hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "tools": "tools"
+    }
+  },
+  "cosmopedia": {
+    "hf_hub_url": "HuggingFaceTB/cosmopedia",
+    "columns": {
+      "prompt": "prompt",
+      "response": "text"
+    }
+  },
+  "stem_zh": {
+    "hf_hub_url": "hfl/stem_zh_instruction"
+  },
+  "ruozhiba_gpt4": {
+    "hf_hub_url": "hfl/ruozhiba_gpt4_turbo"
+  },
+  "llava_150k_en": {
+    "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
+    "subset": "en",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages",
+      "images": "images"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "llava_150k_zh": {
+    "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
+    "subset": "zh",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages",
+      "images": "images"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "oasst_de": {
+    "hf_hub_url": "mayflowergmbh/oasst_de"
+  },
+  "dolly_15k_de": {
+    "hf_hub_url": "mayflowergmbh/dolly-15k_de"
+  },
+  "alpaca-gpt4_de": {
+    "hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
+  },
+  "openschnabeltier_de": {
+    "hf_hub_url": "mayflowergmbh/openschnabeltier_de"
+  },
+  "evol_instruct_de": {
+    "hf_hub_url": "mayflowergmbh/evol-instruct_de"
+  },
+  "dolphin_de": {
+    "hf_hub_url": "mayflowergmbh/dolphin_de"
+  },
+  "booksum_de": {
+    "hf_hub_url": "mayflowergmbh/booksum_de"
+  },
+  "airoboros_de": {
+    "hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
+  },
+  "ultrachat_de": {
+    "hf_hub_url": "mayflowergmbh/ultra-chat_de"
+  },
+  "dpo_en_demo": {
+    "file_name": "dpo_en_demo.json",
+    "ranking": true,
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "chosen": "chosen",
+      "rejected": "rejected"
+    }
+  },
+  "dpo_zh_demo": {
+    "file_name": "dpo_zh_demo.json",
+    "ranking": true,
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "chosen": "chosen",
+      "rejected": "rejected"
+    }
+  },
+  "dpo_mix_en": {
+    "hf_hub_url": "hiyouga/DPO-En-Zh-20k",
+    "subset": "en",
+    "ranking": true,
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "chosen": "chosen",
+      "rejected": "rejected"
+    }
+  },
+  "dpo_mix_zh": {
+    "hf_hub_url": "hiyouga/DPO-En-Zh-20k",
+    "subset": "zh",
+    "ranking": true,
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "chosen": "chosen",
+      "rejected": "rejected"
+    }
+  },
+  "orca_pairs": {
+    "hf_hub_url": "Intel/orca_dpo_pairs",
+    "ranking": true,
+    "columns": {
+      "prompt": "question",
+      "chosen": "chosen",
+      "rejected": "rejected",
+      "system": "system"
+    }
+  },
+  "hh_rlhf_en": {
+    "script_url": "hh_rlhf_en",
+    "ranking": true,
+    "columns": {
+      "prompt": "instruction",
+      "chosen": "chosen",
+      "rejected": "rejected",
+      "history": "history"
+    }
+  },
+  "nectar_rm": {
+    "hf_hub_url": "AstraMindAI/RLAIF-Nectar",
+    "ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
+    "ranking": true
+  },
+  "orca_dpo_de": {
+    "hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
+    "ranking": true
+  },
+  "kto_en_demo": {
+    "file_name": "kto_en_demo.json",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages",
+      "kto_tag": "label"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "kto_mix_en": {
+    "hf_hub_url": "argilla/kto-mix-15k",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "completion",
+      "kto_tag": "label"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "wiki_demo": {
+    "file_name": "wiki_demo.txt",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "c4_demo": {
+    "file_name": "c4_demo.json",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "refinedweb": {
+    "hf_hub_url": "tiiuae/falcon-refinedweb",
+    "columns": {
+      "prompt": "content"
+    }
+  },
+  "redpajama_v2": {
+    "hf_hub_url": "togethercomputer/RedPajama-Data-V2",
+    "columns": {
+      "prompt": "raw_content"
+    },
+    "subset": "default"
+  },
+  "wikipedia_en": {
+    "hf_hub_url": "olm/olm-wikipedia-20221220",
+    "ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "wikipedia_zh": {
+    "hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
+    "ms_hub_url": "AI-ModelScope/wikipedia-cn-20230720-filtered",
+    "columns": {
+      "prompt": "completion"
+    }
+  },
+  "pile": {
+    "hf_hub_url": "monology/pile-uncopyrighted",
+    "ms_hub_url": "AI-ModelScope/pile",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "skypile": {
+    "hf_hub_url": "Skywork/SkyPile-150B",
+    "ms_hub_url": "AI-ModelScope/SkyPile-150B",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "the_stack": {
+    "hf_hub_url": "bigcode/the-stack",
+    "ms_hub_url": "AI-ModelScope/the-stack",
+    "columns": {
+      "prompt": "content"
+    }
+  },
+  "starcoder_python": {
+    "hf_hub_url": "bigcode/starcoderdata",
+    "ms_hub_url": "AI-ModelScope/starcoderdata",
+    "columns": {
+      "prompt": "content"
+    },
+    "folder": "python"
+  }
+}
\ No newline at end of file
--- a/finetune/data/dpo_en_demo.json
+++ b/finetune/data/dpo_en_demo.json
--- a/finetune/data/dpo_zh_demo.json
+++ b/finetune/data/dpo_zh_demo.json
--- a/finetune/data/glaive_toolcall_en_demo.json
+++ b/finetune/data/glaive_toolcall_en_demo.json
--- a/finetune/data/glaive_toolcall_zh_demo.json
+++ b/finetune/data/glaive_toolcall_zh_demo.json
--- a/finetune/data/identity.json
+++ b/finetune/data/identity.json
--- a/finetune/data/kto_en_demo.json
+++ b/finetune/data/kto_en_demo.json
--- a/finetune/data/mllm_demo.json
+++ b/finetune/data/mllm_demo.json
+[
+  {
+    "messages": [
+      {
+        "content": "Who are they?",
+        "role": "user"
+      },
+      {
+        "content": "They're Kane and Gretzka from Bayern Munich.",
+        "role": "assistant"
+      },
+      {
+        "content": "What are they doing?",
+        "role": "user"
+      },
+      {
+        "content": "They are celebrating on the soccer field.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/1.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "Who is he?",
+        "role": "user"
+      },
+      {
+        "content": "He's Thomas Muller from Bayern Munich.",
+        "role": "assistant"
+      },
+      {
+        "content": "Why is he on the ground?",
+        "role": "user"
+      },
+      {
+        "content": "Because he's sliding on his knees to celebrate.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/2.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "Please describe this image",
+        "role": "user"
+      },
+      {
+        "content": "Chinese astronaut Gui Haichao is giving a speech.",
+        "role": "assistant"
+      },
+      {
+        "content": "What has he accomplished?",
+        "role": "user"
+      },
+      {
+        "content": "He was appointed to be a payload specialist on Shenzhou 16 mission in June 2022, thus becoming the first Chinese civilian of Group 3 in space on 30 May 2023. He is responsible for the on-orbit operation of space science experimental payloads.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/3.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "他们是谁？",
+        "role": "user"
+      },
+      {
+        "content": "他们是拜仁慕尼黑的凯恩和格雷茨卡。",
+        "role": "assistant"
+      },
+      {
+        "content": "他们在做什么？",
+        "role": "user"
+      },
+      {
+        "content": "他们在足球场上庆祝。",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/1.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "他是谁？",
+        "role": "user"
+      },
+      {
+        "content": "他是来自拜仁慕尼黑的托马斯·穆勒。",
+        "role": "assistant"
+      },
+      {
+        "content": "他为什么在地上？",
+        "role": "user"
+      },
+      {
+        "content": "因为他正在双膝跪地滑行庆祝。",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/2.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "请描述这张图片",
+        "role": "user"
+      },
+      {
+        "content": "中国宇航员桂海潮正在讲话。",
+        "role": "assistant"
+      },
+      {
+        "content": "他取得过哪些成就？",
+        "role": "user"
+      },
+      {
+        "content": "他于2022年6月被任命为神舟十六号任务的有效载荷专家，从而成为2023年5月30日进入太空的首位平民宇航员。他负责在轨操作空间科学实验有效载荷。",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/3.jpg"
+    ]
+  }
+]
\ No newline at end of file
--- a/finetune/data/wiki_demo.txt
+++ b/finetune/data/wiki_demo.txt
--- a/finetune/multi_node.sh
+++ b/finetune/multi_node.sh
+#!/bin/bash
+# also launch it on slave machine using slave_config.yaml
+NPROC_PER_NODE=2
+NNODES=1
+RANK=0
+MASTER_ADDR=127.0.0.1
+MASTER_PORT=17170
+HIP_VISIBLE_DEVICES=6,7 torchrun \
+    --nproc_per_node $NPROC_PER_NODE \
+    --nnodes $NNODES \
+    --node_rank $RANK \
+    --master_addr $MASTER_ADDR \
+    --master_port $MASTER_PORT \
+    src/train.py \
+        --stage sft --do_train \
+        --model_name_or_path /home/practice/Yi-1.5-6B-Chat <Your model path>\
+        --dataset alpaca_en_demo  --template yi --finetuning_type lora --lora_target q_proj,v_proj \
+        --output_dir saves/yi-6b/lora/sft \
+        --overwrite_output_dir \
+        --overwrite_cache --per_device_train_batch_size 2 --gradient_accumulation_steps 32 --lr_scheduler_type cosine  \
+        --logging_steps 10 --save_steps 1000 --learning_rate 1e-4 --num_train_epochs 3.0 --plot_loss --fp16
--- a/finetune/scripts/cal_flops.py
+++ b/finetune/scripts/cal_flops.py
+# coding=utf-8
+# Calculates the flops of pre-trained models.
+# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
+# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
+import fire
+import torch
+from deepspeed.accelerator import get_accelerator  # type: ignore
+from deepspeed.profiling.flops_profiler import get_model_profile  # type: ignore
+from llamafactory.chat import ChatModel
+def calculate_flops(
+    model_name_or_path: str,
+    batch_size: int = 1,
+    seq_length: int = 256,
+    flash_attn: str = "auto",
+):
+    with get_accelerator().device(0):
+        chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
+        fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)
+        input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
+        flops, macs, params = get_model_profile(chat_model.model, kwargs=input_dict, print_profile=True, detailed=True)
+        print("FLOPs:", flops)
+        print("MACs:", macs)
+        print("Params:", params)
+if __name__ == "__main__":
+    fire.Fire(calculate_flops)
--- a/finetune/scripts/cal_lr.py
+++ b/finetune/scripts/cal_lr.py
+# coding=utf-8
+# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
+# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
+# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
+import math
+from typing import Literal
+import fire
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
+from llamafactory.data import get_dataset
+from llamafactory.extras.constants import IGNORE_INDEX
+from llamafactory.hparams import get_train_args
+from llamafactory.model import load_tokenizer
+BASE_LR = 3e-4  # 1.5e-4 for 30B-70B models
+BASE_BS = 4_000_000  # from llama paper
+def calculate_lr(
+    model_name_or_path: str,
+    batch_size: int,  # total batch size, namely (batch size * gradient accumulation * world size)
+    stage: Literal["pt", "sft"] = "sft",
+    dataset: str = "alpaca_en",
+    dataset_dir: str = "data",
+    template: str = "default",
+    cutoff_len: int = 1024,  # i.e. maximum input length during training
+    is_mistral: bool = False,  # mistral model uses a smaller learning rate,
+):
+    model_args, data_args, training_args, _, _ = get_train_args(
+        dict(
+            stage=stage,
+            model_name_or_path=model_name_or_path,
+            dataset=dataset,
+            dataset_dir=dataset_dir,
+            template=template,
+            cutoff_len=cutoff_len,
+            output_dir="dummy_dir",
+            overwrite_cache=True,
+        )
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    tokenizer = tokenizer_module["tokenizer"]
+    trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
+    if stage == "pt":
+        data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+    elif stage == "sft":
+        data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
+    else:
+        raise NotImplementedError
+    dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
+    valid_tokens, total_tokens = 0, 0
+    for batch in tqdm(dataloader):
+        valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
+        total_tokens += torch.numel(batch["labels"])
+    batch_max_len = cutoff_len * batch_size  # max tokens in a batch
+    valid_ratio = valid_tokens / total_tokens
+    batch_valid_len = batch_max_len * valid_ratio
+    lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS)  # lr ~ sqrt(batch_size)
+    lr = lr / 6.0 if is_mistral else lr
+    print(
+        "Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format(
+            lr, valid_ratio * 100, batch_valid_len
+        )
+    )
+if __name__ == "__main__":
+    fire.Fire(calculate_lr)