Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
DISC-FinLLM_pytorch
Commits
afe180a6
Commit
afe180a6
authored
May 21, 2024
by
wanglch
Browse files
Initial commit
parents
Pipeline
#1006
canceled with stages
Changes
258
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2602 additions
and
0 deletions
+2602
-0
LLaMA-Factory/src/llmtuner/webui/engine.py
LLaMA-Factory/src/llmtuner/webui/engine.py
+62
-0
LLaMA-Factory/src/llmtuner/webui/interface.py
LLaMA-Factory/src/llmtuner/webui/interface.py
+77
-0
LLaMA-Factory/src/llmtuner/webui/locales.py
LLaMA-Factory/src/llmtuner/webui/locales.py
+1008
-0
LLaMA-Factory/src/llmtuner/webui/manager.py
LLaMA-Factory/src/llmtuner/webui/manager.py
+33
-0
LLaMA-Factory/src/llmtuner/webui/runner.py
LLaMA-Factory/src/llmtuner/webui/runner.py
+270
-0
LLaMA-Factory/src/llmtuner/webui/utils.py
LLaMA-Factory/src/llmtuner/webui/utils.py
+100
-0
LLaMA-Factory/src/train_bash.py
LLaMA-Factory/src/train_bash.py
+14
-0
LLaMA-Factory/src/train_web.py
LLaMA-Factory/src/train_web.py
+11
-0
LLaMA-Factory/src/web_demo.py
LLaMA-Factory/src/web_demo.py
+11
-0
LLaMA-Factory/tests/cal_flops.py
LLaMA-Factory/tests/cal_flops.py
+33
-0
LLaMA-Factory/tests/cal_lr.py
LLaMA-Factory/tests/cal_lr.py
+77
-0
LLaMA-Factory/tests/length_cdf.py
LLaMA-Factory/tests/length_cdf.py
+52
-0
LLaMA-Factory/tests/llama_pro.py
LLaMA-Factory/tests/llama_pro.py
+115
-0
LLaMA-Factory/tests/llamafy_baichuan2.py
LLaMA-Factory/tests/llamafy_baichuan2.py
+92
-0
LLaMA-Factory/tests/llamafy_internlm2.py
LLaMA-Factory/tests/llamafy_internlm2.py
+114
-0
LLaMA-Factory/tests/llamafy_qwen.py
LLaMA-Factory/tests/llamafy_qwen.py
+144
-0
LLaMA-Factory/tests/loftq_init.py
LLaMA-Factory/tests/loftq_init.py
+82
-0
LLaMA-Factory/tests/test_toolcall.py
LLaMA-Factory/tests/test_toolcall.py
+57
-0
README.md
README.md
+169
-0
cli_demo.py
cli_demo.py
+81
-0
No files found.
LLaMA-Factory/src/llmtuner/webui/engine.py
0 → 100644
View file @
afe180a6
from
typing
import
Any
,
Dict
,
Generator
,
Optional
import
gradio
as
gr
from
gradio.components
import
Component
# cannot use TYPE_CHECKING here
from
.chatter
import
WebChatModel
from
.common
import
get_model_path
,
list_dataset
,
load_config
from
.locales
import
LOCALES
from
.manager
import
Manager
from
.runner
import
Runner
from
.utils
import
get_time
class
Engine
:
def
__init__
(
self
,
demo_mode
:
Optional
[
bool
]
=
False
,
pure_chat
:
Optional
[
bool
]
=
False
)
->
None
:
self
.
demo_mode
=
demo_mode
self
.
pure_chat
=
pure_chat
self
.
manager
=
Manager
()
self
.
runner
=
Runner
(
self
.
manager
,
demo_mode
=
demo_mode
)
self
.
chatter
=
WebChatModel
(
manager
=
self
.
manager
,
demo_mode
=
demo_mode
,
lazy_init
=
(
not
pure_chat
))
def
_form_dict
(
self
,
resume_dict
:
Dict
[
str
,
Dict
[
str
,
Any
]]):
return
{
self
.
manager
.
get_elem_by_name
(
k
):
gr
.
update
(
**
v
)
for
k
,
v
in
resume_dict
.
items
()}
def
resume
(
self
)
->
Generator
[
Dict
[
Component
,
Dict
[
str
,
Any
]],
None
,
None
]:
user_config
=
load_config
()
if
not
self
.
demo_mode
else
{}
lang
=
user_config
.
get
(
"lang"
,
None
)
or
"en"
init_dict
=
{
"top.lang"
:
{
"value"
:
lang
},
"infer.chat_box"
:
{
"visible"
:
self
.
chatter
.
loaded
}}
if
not
self
.
pure_chat
:
init_dict
[
"train.dataset"
]
=
{
"choices"
:
list_dataset
()[
"choices"
]}
init_dict
[
"eval.dataset"
]
=
{
"choices"
:
list_dataset
()[
"choices"
]}
if
user_config
.
get
(
"last_model"
,
None
):
init_dict
[
"top.model_name"
]
=
{
"value"
:
user_config
[
"last_model"
]}
init_dict
[
"top.model_path"
]
=
{
"value"
:
get_model_path
(
user_config
[
"last_model"
])}
yield
self
.
_form_dict
(
init_dict
)
if
not
self
.
pure_chat
:
if
self
.
runner
.
alive
:
yield
{
elem
:
gr
.
update
(
value
=
value
)
for
elem
,
value
in
self
.
runner
.
running_data
.
items
()}
if
self
.
runner
.
do_train
:
yield
self
.
_form_dict
({
"train.resume_btn"
:
{
"value"
:
True
}})
else
:
yield
self
.
_form_dict
({
"eval.resume_btn"
:
{
"value"
:
True
}})
else
:
yield
self
.
_form_dict
(
{
"train.output_dir"
:
{
"value"
:
"train_"
+
get_time
()},
"eval.output_dir"
:
{
"value"
:
"eval_"
+
get_time
()},
}
)
def
change_lang
(
self
,
lang
:
str
)
->
Dict
[
Component
,
Dict
[
str
,
Any
]]:
return
{
component
:
gr
.
update
(
**
LOCALES
[
name
][
lang
])
for
elems
in
self
.
manager
.
all_elems
.
values
()
for
name
,
component
in
elems
.
items
()
if
name
in
LOCALES
}
LLaMA-Factory/src/llmtuner/webui/interface.py
0 → 100644
View file @
afe180a6
from
typing
import
Optional
import
gradio
as
gr
from
transformers.utils.versions
import
require_version
from
.common
import
save_config
from
.components
import
(
create_chat_box
,
create_eval_tab
,
create_export_tab
,
create_infer_tab
,
create_top
,
create_train_tab
,
)
from
.css
import
CSS
from
.engine
import
Engine
require_version
(
"gradio>=3.38.0,<4.0.0"
,
'To fix: pip install "gradio>=3.38.0,<4.0.0"'
)
def
create_ui
(
demo_mode
:
Optional
[
bool
]
=
False
)
->
gr
.
Blocks
:
engine
=
Engine
(
demo_mode
=
demo_mode
,
pure_chat
=
False
)
with
gr
.
Blocks
(
title
=
"LLaMA Board"
,
css
=
CSS
)
as
demo
:
if
demo_mode
:
gr
.
HTML
(
"<h1><center>LLaMA Board: A One-stop Web UI for Getting Started with LLaMA Factory</center></h1>"
)
gr
.
HTML
(
'<h3><center>Visit <a href="https://github.com/hiyouga/LLaMA-Factory" target="_blank">'
"LLaMA Factory</a> for details.</center></h3>"
)
gr
.
DuplicateButton
(
value
=
"Duplicate Space for private use"
,
elem_classes
=
"duplicate-button"
)
engine
.
manager
.
all_elems
[
"top"
]
=
create_top
()
lang
:
"gr.Dropdown"
=
engine
.
manager
.
get_elem_by_name
(
"top.lang"
)
with
gr
.
Tab
(
"Train"
):
engine
.
manager
.
all_elems
[
"train"
]
=
create_train_tab
(
engine
)
with
gr
.
Tab
(
"Evaluate & Predict"
):
engine
.
manager
.
all_elems
[
"eval"
]
=
create_eval_tab
(
engine
)
with
gr
.
Tab
(
"Chat"
):
engine
.
manager
.
all_elems
[
"infer"
]
=
create_infer_tab
(
engine
)
if
not
demo_mode
:
with
gr
.
Tab
(
"Export"
):
engine
.
manager
.
all_elems
[
"export"
]
=
create_export_tab
(
engine
)
demo
.
load
(
engine
.
resume
,
outputs
=
engine
.
manager
.
list_elems
())
lang
.
change
(
engine
.
change_lang
,
[
lang
],
engine
.
manager
.
list_elems
(),
queue
=
False
)
lang
.
input
(
save_config
,
inputs
=
[
lang
],
queue
=
False
)
return
demo
def
create_web_demo
()
->
gr
.
Blocks
:
engine
=
Engine
(
pure_chat
=
True
)
with
gr
.
Blocks
(
title
=
"Web Demo"
,
css
=
CSS
)
as
demo
:
lang
=
gr
.
Dropdown
(
choices
=
[
"en"
,
"zh"
])
engine
.
manager
.
all_elems
[
"top"
]
=
dict
(
lang
=
lang
)
chat_box
,
_
,
_
,
chat_elems
=
create_chat_box
(
engine
,
visible
=
True
)
engine
.
manager
.
all_elems
[
"infer"
]
=
dict
(
chat_box
=
chat_box
,
**
chat_elems
)
demo
.
load
(
engine
.
resume
,
outputs
=
engine
.
manager
.
list_elems
())
lang
.
change
(
engine
.
change_lang
,
[
lang
],
engine
.
manager
.
list_elems
(),
queue
=
False
)
lang
.
input
(
save_config
,
inputs
=
[
lang
],
queue
=
False
)
return
demo
if
__name__
==
"__main__"
:
demo
=
create_ui
()
demo
.
queue
()
demo
.
launch
(
server_name
=
"0.0.0.0"
,
share
=
False
,
inbrowser
=
True
)
LLaMA-Factory/src/llmtuner/webui/locales.py
0 → 100644
View file @
afe180a6
LOCALES
=
{
"lang"
:
{
"en"
:
{
"label"
:
"Lang"
,
},
"ru"
:
{
"label"
:
"Русский"
,
},
"zh"
:
{
"label"
:
"语言"
,
},
},
"model_name"
:
{
"en"
:
{
"label"
:
"Model name"
,
},
"ru"
:
{
"label"
:
"Название модели"
,
},
"zh"
:
{
"label"
:
"模型名称"
,
},
},
"model_path"
:
{
"en"
:
{
"label"
:
"Model path"
,
"info"
:
"Path to pretrained model or model identifier from Hugging Face."
,
},
"ru"
:
{
"label"
:
"Путь к модели"
,
"info"
:
"Путь к предварительно обученной модели или идентификатор модели от Hugging Face."
,
},
"zh"
:
{
"label"
:
"模型路径"
,
"info"
:
"本地模型的文件路径或 Hugging Face 的模型标识符。"
,
},
},
"finetuning_type"
:
{
"en"
:
{
"label"
:
"Finetuning method"
,
},
"ru"
:
{
"label"
:
"Метод дообучения"
,
},
"zh"
:
{
"label"
:
"微调方法"
,
},
},
"adapter_path"
:
{
"en"
:
{
"label"
:
"Adapter path"
,
},
"ru"
:
{
"label"
:
"Путь к адаптеру"
,
},
"zh"
:
{
"label"
:
"适配器路径"
,
},
},
"refresh_btn"
:
{
"en"
:
{
"value"
:
"Refresh adapters"
,
},
"ru"
:
{
"value"
:
"Обновить адаптеры"
,
},
"zh"
:
{
"value"
:
"刷新适配器"
,
},
},
"advanced_tab"
:
{
"en"
:
{
"label"
:
"Advanced configurations"
,
},
"ru"
:
{
"label"
:
"Расширенные конфигурации"
,
},
"zh"
:
{
"label"
:
"高级设置"
,
},
},
"quantization_bit"
:
{
"en"
:
{
"label"
:
"Quantization bit"
,
"info"
:
"Enable 4/8-bit model quantization (QLoRA)."
,
},
"ru"
:
{
"label"
:
"Уровень квантования"
,
"info"
:
"Включить 4/8-битное квантование модели (QLoRA)."
,
},
"zh"
:
{
"label"
:
"量化等级"
,
"info"
:
"启用 4/8 比特模型量化(QLoRA)。"
,
},
},
"template"
:
{
"en"
:
{
"label"
:
"Prompt template"
,
"info"
:
"The template used in constructing prompts."
,
},
"ru"
:
{
"label"
:
"Шаблон запроса"
,
"info"
:
"Шаблон, используемый при формировании запросов."
,
},
"zh"
:
{
"label"
:
"提示模板"
,
"info"
:
"构建提示词时使用的模板"
,
},
},
"rope_scaling"
:
{
"en"
:
{
"label"
:
"RoPE scaling"
,
},
"ru"
:
{
"label"
:
"Масштабирование RoPE"
,
},
"zh"
:
{
"label"
:
"RoPE 插值方法"
,
},
},
"booster"
:
{
"en"
:
{
"label"
:
"Booster"
,
},
"ru"
:
{
"label"
:
"Ускоритель"
,
},
"zh"
:
{
"label"
:
"加速方式"
,
},
},
"training_stage"
:
{
"en"
:
{
"label"
:
"Stage"
,
"info"
:
"The stage to perform in training."
,
},
"ru"
:
{
"label"
:
"Этап"
,
"info"
:
"Этап выполнения обучения."
,
},
"zh"
:
{
"label"
:
"训练阶段"
,
"info"
:
"目前采用的训练方式。"
,
},
},
"dataset_dir"
:
{
"en"
:
{
"label"
:
"Data dir"
,
"info"
:
"Path to the data directory."
,
},
"ru"
:
{
"label"
:
"Директория данных"
,
"info"
:
"Путь к директории данных."
,
},
"zh"
:
{
"label"
:
"数据路径"
,
"info"
:
"数据文件夹的路径。"
,
},
},
"dataset"
:
{
"en"
:
{
"label"
:
"Dataset"
,
},
"ru"
:
{
"label"
:
"Набор данных"
,
},
"zh"
:
{
"label"
:
"数据集"
,
},
},
"data_preview_btn"
:
{
"en"
:
{
"value"
:
"Preview dataset"
,
},
"ru"
:
{
"value"
:
"Просмотреть набор данных"
,
},
"zh"
:
{
"value"
:
"预览数据集"
,
},
},
"preview_count"
:
{
"en"
:
{
"label"
:
"Count"
,
},
"ru"
:
{
"label"
:
"Количество"
,
},
"zh"
:
{
"label"
:
"数量"
,
},
},
"page_index"
:
{
"en"
:
{
"label"
:
"Page"
,
},
"ru"
:
{
"label"
:
"Страница"
,
},
"zh"
:
{
"label"
:
"页数"
,
},
},
"prev_btn"
:
{
"en"
:
{
"value"
:
"Prev"
,
},
"ru"
:
{
"value"
:
"Предыдущая"
,
},
"zh"
:
{
"value"
:
"上一页"
,
},
},
"next_btn"
:
{
"en"
:
{
"value"
:
"Next"
,
},
"ru"
:
{
"value"
:
"Следующая"
,
},
"zh"
:
{
"value"
:
"下一页"
,
},
},
"close_btn"
:
{
"en"
:
{
"value"
:
"Close"
,
},
"ru"
:
{
"value"
:
"Закрыть"
,
},
"zh"
:
{
"value"
:
"关闭"
,
},
},
"preview_samples"
:
{
"en"
:
{
"label"
:
"Samples"
,
},
"ru"
:
{
"label"
:
"Примеры"
,
},
"zh"
:
{
"label"
:
"样例"
,
},
},
"cutoff_len"
:
{
"en"
:
{
"label"
:
"Cutoff length"
,
"info"
:
"Max tokens in input sequence."
,
},
"ru"
:
{
"label"
:
"Длина обрезки"
,
"info"
:
"Максимальное количество токенов во входной последовательности."
,
},
"zh"
:
{
"label"
:
"截断长度"
,
"info"
:
"输入序列分词后的最大长度。"
,
},
},
"learning_rate"
:
{
"en"
:
{
"label"
:
"Learning rate"
,
"info"
:
"Initial learning rate for AdamW."
,
},
"ru"
:
{
"label"
:
"Скорость обучения"
,
"info"
:
"Начальная скорость обучения для AdamW."
,
},
"zh"
:
{
"label"
:
"学习率"
,
"info"
:
"AdamW 优化器的初始学习率。"
,
},
},
"num_train_epochs"
:
{
"en"
:
{
"label"
:
"Epochs"
,
"info"
:
"Total number of training epochs to perform."
,
},
"ru"
:
{
"label"
:
"Эпохи"
,
"info"
:
"Общее количество эпох обучения."
,
},
"zh"
:
{
"label"
:
"训练轮数"
,
"info"
:
"需要执行的训练总轮数。"
,
},
},
"max_samples"
:
{
"en"
:
{
"label"
:
"Max samples"
,
"info"
:
"Maximum samples per dataset."
,
},
"ru"
:
{
"label"
:
"Максимальное количество образцов"
,
"info"
:
"Максимальное количество образцов на набор данных."
,
},
"zh"
:
{
"label"
:
"最大样本数"
,
"info"
:
"每个数据集的最大样本数。"
,
},
},
"compute_type"
:
{
"en"
:
{
"label"
:
"Compute type"
,
"info"
:
"Whether to use mixed precision training (fp16 or bf16)."
,
},
"ru"
:
{
"label"
:
"Тип вычислений"
,
"info"
:
"Использовать ли обучение смешанной точности fp16 или bf16."
,
},
"zh"
:
{
"label"
:
"计算类型"
,
"info"
:
"是否使用混合精度训练(fp16 或 bf16)。"
,
},
},
"batch_size"
:
{
"en"
:
{
"label"
:
"Batch size"
,
"info"
:
"Number of samples processed on each GPU."
,
},
"ru"
:
{
"label"
:
"Размер пакета"
,
"info"
:
"Количество образцов для обработки на каждом GPU."
,
},
"zh"
:
{
"label"
:
"批处理大小"
,
"info"
:
"每个 GPU 处理的样本数量。"
,
},
},
"gradient_accumulation_steps"
:
{
"en"
:
{
"label"
:
"Gradient accumulation"
,
"info"
:
"Number of steps for gradient accumulation."
,
},
"ru"
:
{
"label"
:
"Накопление градиента"
,
"info"
:
"Количество шагов накопления градиента."
,
},
"zh"
:
{
"label"
:
"梯度累积"
,
"info"
:
"梯度累积的步数。"
,
},
},
"lr_scheduler_type"
:
{
"en"
:
{
"label"
:
"LR scheduler"
,
"info"
:
"Name of the learning rate scheduler."
,
},
"ru"
:
{
"label"
:
"Планировщик скорости обучения"
,
"info"
:
"Название планировщика скорости обучения."
,
},
"zh"
:
{
"label"
:
"学习率调节器"
,
"info"
:
"学习率调度器的名称。"
,
},
},
"max_grad_norm"
:
{
"en"
:
{
"label"
:
"Maximum gradient norm"
,
"info"
:
"Norm for gradient clipping."
,
},
"ru"
:
{
"label"
:
"Максимальная норма градиента"
,
"info"
:
"Норма для обрезки градиента."
,
},
"zh"
:
{
"label"
:
"最大梯度范数"
,
"info"
:
"用于梯度裁剪的范数。"
,
},
},
"val_size"
:
{
"en"
:
{
"label"
:
"Val size"
,
"info"
:
"Proportion of data in the dev set."
,
},
"ru"
:
{
"label"
:
"Размер валидации"
,
"info"
:
"Пропорция данных в наборе для разработки."
,
},
"zh"
:
{
"label"
:
"验证集比例"
,
"info"
:
"验证集占全部样本的百分比。"
,
},
},
"extra_tab"
:
{
"en"
:
{
"label"
:
"Extra configurations"
,
},
"ru"
:
{
"label"
:
"Дополнительные конфигурации"
,
},
"zh"
:
{
"label"
:
"其它参数设置"
,
},
},
"logging_steps"
:
{
"en"
:
{
"label"
:
"Logging steps"
,
"info"
:
"Number of steps between two logs."
,
},
"ru"
:
{
"label"
:
"Шаги логирования"
,
"info"
:
"Количество шагов между двумя записями в журнале."
,
},
"zh"
:
{
"label"
:
"日志间隔"
,
"info"
:
"每两次日志输出间的更新步数。"
,
},
},
"save_steps"
:
{
"en"
:
{
"label"
:
"Save steps"
,
"info"
:
"Number of steps between two checkpoints."
,
},
"ru"
:
{
"label"
:
"Шаги сохранения"
,
"info"
:
"Количество шагов между двумя контрольными точками."
,
},
"zh"
:
{
"label"
:
"保存间隔"
,
"info"
:
"每两次断点保存间的更新步数。"
,
},
},
"warmup_steps"
:
{
"en"
:
{
"label"
:
"Warmup steps"
,
"info"
:
"Number of steps used for warmup."
,
},
"ru"
:
{
"label"
:
"Шаги прогрева"
,
"info"
:
"Количество шагов, используемых для прогрева."
,
},
"zh"
:
{
"label"
:
"预热步数"
,
"info"
:
"学习率预热采用的步数。"
,
},
},
"neftune_alpha"
:
{
"en"
:
{
"label"
:
"NEFTune Alpha"
,
"info"
:
"Magnitude of noise adding to embedding vectors."
,
},
"ru"
:
{
"label"
:
"NEFTune Alpha"
,
"info"
:
"Величина шума, добавляемого к векторам вложений."
,
},
"zh"
:
{
"label"
:
"NEFTune 噪声参数"
,
"info"
:
"嵌入向量所添加的噪声大小。"
,
},
},
"sft_packing"
:
{
"en"
:
{
"label"
:
"Pack sequences"
,
"info"
:
"Pack sequences into samples of fixed length in supervised fine-tuning."
,
},
"ru"
:
{
"label"
:
"Упаковка последовательностей"
,
"info"
:
"Упаковка последовательностей в образцы фиксированной длины при контролируемой тонкой настройке."
,
},
"zh"
:
{
"label"
:
"序列打包"
,
"info"
:
"在指令监督微调阶段将序列打包为相同长度的样本。"
,
},
},
"upcast_layernorm"
:
{
"en"
:
{
"label"
:
"Upcast LayerNorm"
,
"info"
:
"Upcast weights of layernorm in float32."
,
},
"ru"
:
{
"label"
:
"Приведение весов LayerNorm"
,
"info"
:
"Приведение весов LayerNorm к float32."
,
},
"zh"
:
{
"label"
:
"缩放归一化层"
,
"info"
:
"将归一化层权重缩放至 32 位精度。"
,
},
},
"lora_tab"
:
{
"en"
:
{
"label"
:
"LoRA configurations"
,
},
"ru"
:
{
"label"
:
"Конфигурации LoRA"
,
},
"zh"
:
{
"label"
:
"LoRA 参数设置"
,
},
},
"lora_rank"
:
{
"en"
:
{
"label"
:
"LoRA rank"
,
"info"
:
"The rank of LoRA matrices."
,
},
"ru"
:
{
"label"
:
"Ранг матриц LoRA"
,
"info"
:
"Ранг матриц LoRA."
,
},
"zh"
:
{
"label"
:
"LoRA 秩"
,
"info"
:
"LoRA 矩阵的秩。"
,
},
},
"lora_dropout"
:
{
"en"
:
{
"label"
:
"LoRA Dropout"
,
"info"
:
"Dropout ratio of LoRA weights."
,
},
"ru"
:
{
"label"
:
"Вероятность отсева LoRA"
,
"info"
:
"Вероятность отсева весов LoRA."
,
},
"zh"
:
{
"label"
:
"LoRA 随机丢弃"
,
"info"
:
"LoRA 权重随机丢弃的概率。"
,
},
},
"lora_target"
:
{
"en"
:
{
"label"
:
"LoRA modules (optional)"
,
"info"
:
"Name(s) of target modules to apply LoRA. Use commas to separate multiple modules."
,
},
"ru"
:
{
"label"
:
"Модули LoRA (опционально)"
,
"info"
:
"Имена целевых модулей для применения LoRA. Используйте запятые для разделения нескольких модулей."
,
},
"zh"
:
{
"label"
:
"LoRA 作用模块(非必填)"
,
"info"
:
"应用 LoRA 的目标模块名称。使用英文逗号分隔多个名称。"
,
},
},
"additional_target"
:
{
"en"
:
{
"label"
:
"Additional modules (optional)"
,
"info"
:
"Name(s) of modules apart from LoRA layers to be set as trainable. Use commas to separate multiple modules."
,
},
"ru"
:
{
"label"
:
"Дополнительные модули (опционально)"
,
"info"
:
(
"Имена модулей, кроме слоев LoRA, которые следует установить в качестве обучаемых. "
"Используйте запятые для разделения нескольких модулей."
),
},
"zh"
:
{
"label"
:
"附加模块(非必填)"
,
"info"
:
"除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。"
,
},
},
"create_new_adapter"
:
{
"en"
:
{
"label"
:
"Create new adapter"
,
"info"
:
"Whether to create a new adapter with randomly initialized weight or not."
,
},
"ru"
:
{
"label"
:
"Создать новый адаптер"
,
"info"
:
"Создать новый адаптер с случайной инициализацией веса или нет."
,
},
"zh"
:
{
"label"
:
"新建适配器"
,
"info"
:
"是否创建一个经过随机初始化的新适配器。"
,
},
},
"rlhf_tab"
:
{
"en"
:
{
"label"
:
"RLHF configurations"
,
},
"ru"
:
{
"label"
:
"Конфигурации RLHF"
,
},
"zh"
:
{
"label"
:
"RLHF 参数设置"
,
},
},
"dpo_beta"
:
{
"en"
:
{
"label"
:
"DPO beta"
,
"info"
:
"Value of the beta parameter in the DPO loss."
,
},
"ru"
:
{
"label"
:
"DPO бета"
,
"info"
:
"Значение параметра бета в функции потерь DPO."
,
},
"zh"
:
{
"label"
:
"DPO beta 参数"
,
"info"
:
"DPO 损失函数中 beta 超参数大小。"
,
},
},
"dpo_ftx"
:
{
"en"
:
{
"label"
:
"DPO-ftx weight"
,
"info"
:
"The weight of SFT loss in the DPO-ftx."
,
},
"ru"
:
{
"label"
:
"Вес DPO-ftx"
,
"info"
:
"Вес функции потерь SFT в DPO-ftx."
,
},
"zh"
:
{
"label"
:
"DPO-ftx 权重"
,
"info"
:
"DPO-ftx 中 SFT 损失的权重大小。"
,
},
},
"reward_model"
:
{
"en"
:
{
"label"
:
"Reward model"
,
"info"
:
"Adapter of the reward model for PPO training. (Needs to refresh adapters)"
,
},
"ru"
:
{
"label"
:
"Модель вознаграждения"
,
"info"
:
"Адаптер модели вознаграждения для обучения PPO. (Необходимо обновить адаптеры)"
,
},
"zh"
:
{
"label"
:
"奖励模型"
,
"info"
:
"PPO 训练中奖励模型的适配器路径。(需要刷新适配器)"
,
},
},
"cmd_preview_btn"
:
{
"en"
:
{
"value"
:
"Preview command"
,
},
"ru"
:
{
"value"
:
"Просмотр команды"
,
},
"zh"
:
{
"value"
:
"预览命令"
,
},
},
"start_btn"
:
{
"en"
:
{
"value"
:
"Start"
,
},
"ru"
:
{
"value"
:
"Начать"
,
},
"zh"
:
{
"value"
:
"开始"
,
},
},
"stop_btn"
:
{
"en"
:
{
"value"
:
"Abort"
,
},
"ru"
:
{
"value"
:
"Прервать"
,
},
"zh"
:
{
"value"
:
"中断"
,
},
},
"output_dir"
:
{
"en"
:
{
"label"
:
"Output dir"
,
"info"
:
"Directory for saving results."
,
},
"ru"
:
{
"label"
:
"Выходной каталог"
,
"info"
:
"Каталог для сохранения результатов."
,
},
"zh"
:
{
"label"
:
"输出目录"
,
"info"
:
"保存结果的路径。"
,
},
},
"output_box"
:
{
"en"
:
{
"value"
:
"Ready."
,
},
"ru"
:
{
"value"
:
"Готово."
,
},
"zh"
:
{
"value"
:
"准备就绪。"
,
},
},
"loss_viewer"
:
{
"en"
:
{
"label"
:
"Loss"
,
},
"ru"
:
{
"label"
:
"Потери"
,
},
"zh"
:
{
"label"
:
"损失"
,
},
},
"predict"
:
{
"en"
:
{
"label"
:
"Save predictions"
,
},
"ru"
:
{
"label"
:
"Сохранить предсказания"
,
},
"zh"
:
{
"label"
:
"保存预测结果"
,
},
},
"load_btn"
:
{
"en"
:
{
"value"
:
"Load model"
,
},
"ru"
:
{
"value"
:
"Загрузить модель"
,
},
"zh"
:
{
"value"
:
"加载模型"
,
},
},
"unload_btn"
:
{
"en"
:
{
"value"
:
"Unload model"
,
},
"ru"
:
{
"value"
:
"Выгрузить модель"
,
},
"zh"
:
{
"value"
:
"卸载模型"
,
},
},
"info_box"
:
{
"en"
:
{
"value"
:
"Model unloaded, please load a model first."
,
},
"ru"
:
{
"value"
:
"Модель не загружена, загрузите модель сначала."
,
},
"zh"
:
{
"value"
:
"模型未加载,请先加载模型。"
,
},
},
"system"
:
{
"en"
:
{
"placeholder"
:
"System prompt (optional)"
,
},
"ru"
:
{
"placeholder"
:
"Системный запрос (по желанию)"
,
},
"zh"
:
{
"placeholder"
:
"系统提示词(非必填)"
,
},
},
"tools"
:
{
"en"
:
{
"placeholder"
:
"Tools (optional)"
,
},
"ru"
:
{
"placeholder"
:
"Инструменты (по желанию)"
,
},
"zh"
:
{
"placeholder"
:
"工具列表(非必填)"
,
},
},
"query"
:
{
"en"
:
{
"placeholder"
:
"Input..."
,
},
"ru"
:
{
"placeholder"
:
"Ввод..."
,
},
"zh"
:
{
"placeholder"
:
"输入..."
,
},
},
"submit_btn"
:
{
"en"
:
{
"value"
:
"Submit"
,
},
"ru"
:
{
"value"
:
"Отправить"
,
},
"zh"
:
{
"value"
:
"提交"
,
},
},
"clear_btn"
:
{
"en"
:
{
"value"
:
"Clear history"
,
},
"ru"
:
{
"value"
:
"Очистить историю"
,
},
"zh"
:
{
"value"
:
"清空历史"
,
},
},
"max_length"
:
{
"en"
:
{
"label"
:
"Maximum length"
,
},
"ru"
:
{
"label"
:
"Максимальная длина"
,
},
"zh"
:
{
"label"
:
"最大长度"
,
},
},
"max_new_tokens"
:
{
"en"
:
{
"label"
:
"Maximum new tokens"
,
},
"ru"
:
{
"label"
:
"Максимальное количество новых токенов"
,
},
"zh"
:
{
"label"
:
"最大生成长度"
,
},
},
"top_p"
:
{
"en"
:
{
"label"
:
"Top-p"
,
},
"ru"
:
{
"label"
:
"Лучшие-p"
,
},
"zh"
:
{
"label"
:
"Top-p 采样值"
,
},
},
"temperature"
:
{
"en"
:
{
"label"
:
"Temperature"
,
},
"ru"
:
{
"label"
:
"Температура"
,
},
"zh"
:
{
"label"
:
"温度系数"
,
},
},
"max_shard_size"
:
{
"en"
:
{
"label"
:
"Max shard size (GB)"
,
"info"
:
"The maximum size for a model file."
,
},
"ru"
:
{
"label"
:
"Максимальный размер фрагмента (ГБ)"
,
"info"
:
"Максимальный размер файла модели."
,
},
"zh"
:
{
"label"
:
"最大分块大小(GB)"
,
"info"
:
"单个模型文件的最大大小。"
,
},
},
"export_quantization_bit"
:
{
"en"
:
{
"label"
:
"Export quantization bit."
,
"info"
:
"Quantizing the exported model."
,
},
"ru"
:
{
"label"
:
"Экспорт бита квантования"
,
"info"
:
"Квантование экспортируемой модели."
,
},
"zh"
:
{
"label"
:
"导出量化等级"
,
"info"
:
"量化导出模型。"
,
},
},
"export_quantization_dataset"
:
{
"en"
:
{
"label"
:
"Export quantization dataset."
,
"info"
:
"The calibration dataset used for quantization."
,
},
"ru"
:
{
"label"
:
"Экспорт набора данных для квантования"
,
"info"
:
"Набор данных калибровки, используемый для квантования."
,
},
"zh"
:
{
"label"
:
"导出量化数据集"
,
"info"
:
"量化过程中使用的校准数据集。"
,
},
},
"export_dir"
:
{
"en"
:
{
"label"
:
"Export dir"
,
"info"
:
"Directory to save exported model."
,
},
"ru"
:
{
"label"
:
"Каталог экспорта"
,
"info"
:
"Каталог для сохранения экспортированной модели."
,
},
"zh"
:
{
"label"
:
"导出目录"
,
"info"
:
"保存导出模型的文件夹路径。"
,
},
},
"export_btn"
:
{
"en"
:
{
"value"
:
"Export"
,
},
"ru"
:
{
"value"
:
"Экспорт"
,
},
"zh"
:
{
"value"
:
"开始导出"
,
},
},
}
ALERTS
=
{
"err_conflict"
:
{
"en"
:
"A process is in running, please abort it first."
,
"ru"
:
"Процесс уже запущен, пожалуйста, сначала прервите его."
,
"zh"
:
"任务已存在,请先中断训练。"
,
},
"err_exists"
:
{
"en"
:
"You have loaded a model, please unload it first."
,
"ru"
:
"Вы загрузили модель, сначала разгрузите ее."
,
"zh"
:
"模型已存在,请先卸载模型。"
,
},
"err_no_model"
:
{
"en"
:
"Please select a model."
,
"ru"
:
"Пожалуйста, выберите модель."
,
"zh"
:
"请选择模型。"
,
},
"err_no_path"
:
{
"en"
:
"Model not found."
,
"ru"
:
"Модель не найдена."
,
"zh"
:
"模型未找到。"
,
},
"err_no_dataset"
:
{
"en"
:
"Please choose a dataset."
,
"ru"
:
"Пожалуйста, выберите набор данных."
,
"zh"
:
"请选择数据集。"
,
},
"err_no_adapter"
:
{
"en"
:
"Please select an adapter."
,
"ru"
:
"Пожалуйста, выберите адаптер."
,
"zh"
:
"请选择一个适配器。"
,
},
"err_no_export_dir"
:
{
"en"
:
"Please provide export dir."
,
"ru"
:
"Пожалуйста, укажите каталог для экспорта."
,
"zh"
:
"请填写导出目录"
,
},
"err_failed"
:
{
"en"
:
"Failed."
,
"ru"
:
"Ошибка."
,
"zh"
:
"训练出错。"
,
},
"err_demo"
:
{
"en"
:
"Training is unavailable in demo mode, duplicate the space to a private one first."
,
"ru"
:
"Обучение недоступно в демонстрационном режиме, сначала скопируйте пространство в частное."
,
"zh"
:
"展示模式不支持训练,请先复制到私人空间。"
,
},
"err_device_count"
:
{
"en"
:
"Multiple GPUs are not supported yet."
,
"ru"
:
"Пока не поддерживается множественные GPU."
,
"zh"
:
"尚不支持多 GPU 训练。"
,
},
"err_tool_name"
:
{
"en"
:
"Tool name not found."
,
"ru"
:
"Имя инструмента не найдено."
,
"zh"
:
"工具名称未找到。"
,
},
"err_json_schema"
:
{
"en"
:
"Invalid JSON schema."
,
"ru"
:
"Неверная схема JSON."
,
"zh"
:
"Json 格式错误。"
,
},
"info_aborting"
:
{
"en"
:
"Aborted, wait for terminating..."
,
"ru"
:
"Прервано, ожидание завершения..."
,
"zh"
:
"训练中断,正在等待线程结束……"
,
},
"info_aborted"
:
{
"en"
:
"Ready."
,
"ru"
:
"Готово."
,
"zh"
:
"准备就绪。"
,
},
"info_finished"
:
{
"en"
:
"Finished."
,
"ru"
:
"Завершено."
,
"zh"
:
"训练完毕。"
,
},
"info_loading"
:
{
"en"
:
"Loading model..."
,
"ru"
:
"Загрузка модели..."
,
"zh"
:
"加载中……"
,
},
"info_unloading"
:
{
"en"
:
"Unloading model..."
,
"ru"
:
"Выгрузка модели..."
,
"zh"
:
"卸载中……"
,
},
"info_loaded"
:
{
"en"
:
"Model loaded, now you can chat with your model!"
,
"ru"
:
"Модель загружена, теперь вы можете общаться с вашей моделью!"
,
"zh"
:
"模型已加载,可以开始聊天了!"
,
},
"info_unloaded"
:
{
"en"
:
"Model unloaded."
,
"ru"
:
"Модель выгружена."
,
"zh"
:
"模型已卸载。"
,
},
"info_exporting"
:
{
"en"
:
"Exporting model..."
,
"ru"
:
"Экспорт модели..."
,
"zh"
:
"正在导出模型……"
,
},
"info_exported"
:
{
"en"
:
"Model exported."
,
"ru"
:
"Модель экспортирована."
,
"zh"
:
"模型导出完成。"
,
},
}
LLaMA-Factory/src/llmtuner/webui/manager.py
0 → 100644
View file @
afe180a6
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Set
if
TYPE_CHECKING
:
from
gradio.components
import
Component
class
Manager
:
def
__init__
(
self
)
->
None
:
self
.
all_elems
:
Dict
[
str
,
Dict
[
str
,
"Component"
]]
=
{}
def
get_elem_by_name
(
self
,
name
:
str
)
->
"Component"
:
r
"""
Example: top.lang, train.dataset
"""
tab_name
,
elem_name
=
name
.
split
(
"."
)
return
self
.
all_elems
[
tab_name
][
elem_name
]
def
get_base_elems
(
self
)
->
Set
[
"Component"
]:
return
{
self
.
all_elems
[
"top"
][
"lang"
],
self
.
all_elems
[
"top"
][
"model_name"
],
self
.
all_elems
[
"top"
][
"model_path"
],
self
.
all_elems
[
"top"
][
"adapter_path"
],
self
.
all_elems
[
"top"
][
"finetuning_type"
],
self
.
all_elems
[
"top"
][
"quantization_bit"
],
self
.
all_elems
[
"top"
][
"template"
],
self
.
all_elems
[
"top"
][
"rope_scaling"
],
self
.
all_elems
[
"top"
][
"booster"
],
}
def
list_elems
(
self
)
->
List
[
"Component"
]:
return
[
elem
for
elems
in
self
.
all_elems
.
values
()
for
elem
in
elems
.
values
()]
LLaMA-Factory/src/llmtuner/webui/runner.py
0 → 100644
View file @
afe180a6
import
logging
import
os
import
time
from
threading
import
Thread
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
Generator
,
Optional
,
Tuple
import
gradio
as
gr
import
transformers
from
gradio.components
import
Component
# cannot use TYPE_CHECKING here
from
transformers.trainer
import
TRAINING_ARGS_NAME
from
..extras.callbacks
import
LogCallback
from
..extras.constants
import
TRAINING_STAGES
from
..extras.logging
import
LoggerHandler
from
..extras.misc
import
get_device_count
,
torch_gc
from
..train
import
run_exp
from
.common
import
get_module
,
get_save_dir
,
load_config
from
.locales
import
ALERTS
from
.utils
import
gen_cmd
,
get_eval_results
,
update_process_bar
if
TYPE_CHECKING
:
from
.manager
import
Manager
class
Runner
:
def
__init__
(
self
,
manager
:
"Manager"
,
demo_mode
:
Optional
[
bool
]
=
False
)
->
None
:
self
.
manager
=
manager
self
.
demo_mode
=
demo_mode
""" Resume """
self
.
thread
:
"Thread"
=
None
self
.
do_train
=
True
self
.
running_data
:
Dict
[
"Component"
,
Any
]
=
None
""" State """
self
.
aborted
=
False
self
.
running
=
False
""" Handler """
self
.
logger_handler
=
LoggerHandler
()
self
.
logger_handler
.
setLevel
(
logging
.
INFO
)
logging
.
root
.
addHandler
(
self
.
logger_handler
)
transformers
.
logging
.
add_handler
(
self
.
logger_handler
)
@
property
def
alive
(
self
)
->
bool
:
return
self
.
thread
is
not
None
def
set_abort
(
self
)
->
None
:
self
.
aborted
=
True
def
_initialize
(
self
,
data
:
Dict
[
Component
,
Any
],
do_train
:
bool
,
from_preview
:
bool
)
->
str
:
get
=
lambda
name
:
data
[
self
.
manager
.
get_elem_by_name
(
name
)]
lang
,
model_name
,
model_path
=
get
(
"top.lang"
),
get
(
"top.model_name"
),
get
(
"top.model_path"
)
dataset
=
get
(
"train.dataset"
)
if
do_train
else
get
(
"eval.dataset"
)
if
self
.
running
:
return
ALERTS
[
"err_conflict"
][
lang
]
if
not
model_name
:
return
ALERTS
[
"err_no_model"
][
lang
]
if
not
model_path
:
return
ALERTS
[
"err_no_path"
][
lang
]
if
len
(
dataset
)
==
0
:
return
ALERTS
[
"err_no_dataset"
][
lang
]
if
self
.
demo_mode
and
(
not
from_preview
):
return
ALERTS
[
"err_demo"
][
lang
]
if
not
from_preview
and
get_device_count
()
>
1
:
return
ALERTS
[
"err_device_count"
][
lang
]
self
.
aborted
=
False
self
.
logger_handler
.
reset
()
self
.
trainer_callback
=
LogCallback
(
self
)
return
""
def
_finalize
(
self
,
lang
:
str
,
finish_info
:
str
)
->
str
:
self
.
thread
=
None
self
.
running_data
=
None
self
.
running
=
False
torch_gc
()
if
self
.
aborted
:
return
ALERTS
[
"info_aborted"
][
lang
]
else
:
return
finish_info
def
_parse_train_args
(
self
,
data
:
Dict
[
Component
,
Any
])
->
Dict
[
str
,
Any
]:
get
=
lambda
name
:
data
[
self
.
manager
.
get_elem_by_name
(
name
)]
user_config
=
load_config
()
if
get
(
"top.adapter_path"
):
adapter_name_or_path
=
","
.
join
(
[
get_save_dir
(
get
(
"top.model_name"
),
get
(
"top.finetuning_type"
),
adapter
)
for
adapter
in
get
(
"top.adapter_path"
)
]
)
else
:
adapter_name_or_path
=
None
args
=
dict
(
stage
=
TRAINING_STAGES
[
get
(
"train.training_stage"
)],
do_train
=
True
,
model_name_or_path
=
get
(
"top.model_path"
),
adapter_name_or_path
=
adapter_name_or_path
,
cache_dir
=
user_config
.
get
(
"cache_dir"
,
None
),
finetuning_type
=
get
(
"top.finetuning_type"
),
quantization_bit
=
int
(
get
(
"top.quantization_bit"
))
if
get
(
"top.quantization_bit"
)
in
[
"8"
,
"4"
]
else
None
,
template
=
get
(
"top.template"
),
rope_scaling
=
get
(
"top.rope_scaling"
)
if
get
(
"top.rope_scaling"
)
in
[
"linear"
,
"dynamic"
]
else
None
,
flash_attn
=
(
get
(
"top.booster"
)
==
"flash_attn"
),
use_unsloth
=
(
get
(
"top.booster"
)
==
"unsloth"
),
dataset_dir
=
get
(
"train.dataset_dir"
),
dataset
=
","
.
join
(
get
(
"train.dataset"
)),
cutoff_len
=
get
(
"train.cutoff_len"
),
learning_rate
=
float
(
get
(
"train.learning_rate"
)),
num_train_epochs
=
float
(
get
(
"train.num_train_epochs"
)),
max_samples
=
int
(
get
(
"train.max_samples"
)),
per_device_train_batch_size
=
get
(
"train.batch_size"
),
gradient_accumulation_steps
=
get
(
"train.gradient_accumulation_steps"
),
lr_scheduler_type
=
get
(
"train.lr_scheduler_type"
),
max_grad_norm
=
float
(
get
(
"train.max_grad_norm"
)),
logging_steps
=
get
(
"train.logging_steps"
),
save_steps
=
get
(
"train.save_steps"
),
warmup_steps
=
get
(
"train.warmup_steps"
),
neftune_noise_alpha
=
get
(
"train.neftune_alpha"
)
or
None
,
sft_packing
=
get
(
"train.sft_packing"
),
upcast_layernorm
=
get
(
"train.upcast_layernorm"
),
lora_rank
=
get
(
"train.lora_rank"
),
lora_dropout
=
get
(
"train.lora_dropout"
),
lora_target
=
get
(
"train.lora_target"
)
or
get_module
(
get
(
"top.model_name"
)),
additional_target
=
get
(
"train.additional_target"
)
or
None
,
create_new_adapter
=
get
(
"train.create_new_adapter"
),
output_dir
=
get_save_dir
(
get
(
"top.model_name"
),
get
(
"top.finetuning_type"
),
get
(
"train.output_dir"
)),
fp16
=
(
get
(
"train.compute_type"
)
==
"fp16"
),
bf16
=
(
get
(
"train.compute_type"
)
==
"bf16"
),
)
args
[
"disable_tqdm"
]
=
True
if
TRAINING_STAGES
[
get
(
"train.training_stage"
)]
in
[
"rm"
,
"ppo"
,
"dpo"
]:
args
[
"create_new_adapter"
]
=
args
[
"quantization_bit"
]
is
None
if
args
[
"stage"
]
==
"ppo"
:
args
[
"reward_model"
]
=
get_save_dir
(
get
(
"top.model_name"
),
get
(
"top.finetuning_type"
),
get
(
"train.reward_model"
)
)
args
[
"reward_model_type"
]
=
"lora"
if
get
(
"top.finetuning_type"
)
==
"lora"
else
"full"
if
args
[
"stage"
]
==
"dpo"
:
args
[
"dpo_beta"
]
=
get
(
"train.dpo_beta"
)
args
[
"dpo_ftx"
]
=
get
(
"train.dpo_ftx"
)
if
get
(
"train.val_size"
)
>
1e-6
and
args
[
"stage"
]
!=
"ppo"
:
args
[
"val_size"
]
=
get
(
"train.val_size"
)
args
[
"evaluation_strategy"
]
=
"steps"
args
[
"eval_steps"
]
=
get
(
"train.save_steps"
)
args
[
"load_best_model_at_end"
]
=
True
return
args
def
_parse_eval_args
(
self
,
data
:
Dict
[
Component
,
Any
])
->
Dict
[
str
,
Any
]:
get
=
lambda
name
:
data
[
self
.
manager
.
get_elem_by_name
(
name
)]
user_config
=
load_config
()
if
get
(
"top.adapter_path"
):
adapter_name_or_path
=
","
.
join
(
[
get_save_dir
(
get
(
"top.model_name"
),
get
(
"top.finetuning_type"
),
adapter
)
for
adapter
in
get
(
"top.adapter_path"
)
]
)
else
:
adapter_name_or_path
=
None
args
=
dict
(
stage
=
"sft"
,
model_name_or_path
=
get
(
"top.model_path"
),
adapter_name_or_path
=
adapter_name_or_path
,
cache_dir
=
user_config
.
get
(
"cache_dir"
,
None
),
finetuning_type
=
get
(
"top.finetuning_type"
),
quantization_bit
=
int
(
get
(
"top.quantization_bit"
))
if
get
(
"top.quantization_bit"
)
in
[
"8"
,
"4"
]
else
None
,
template
=
get
(
"top.template"
),
rope_scaling
=
get
(
"top.rope_scaling"
)
if
get
(
"top.rope_scaling"
)
in
[
"linear"
,
"dynamic"
]
else
None
,
flash_attn
=
(
get
(
"top.booster"
)
==
"flash_attn"
),
use_unsloth
=
(
get
(
"top.booster"
)
==
"unsloth"
),
dataset_dir
=
get
(
"eval.dataset_dir"
),
dataset
=
","
.
join
(
get
(
"eval.dataset"
)),
cutoff_len
=
get
(
"eval.cutoff_len"
),
max_samples
=
int
(
get
(
"eval.max_samples"
)),
per_device_eval_batch_size
=
get
(
"eval.batch_size"
),
predict_with_generate
=
True
,
max_new_tokens
=
get
(
"eval.max_new_tokens"
),
top_p
=
get
(
"eval.top_p"
),
temperature
=
get
(
"eval.temperature"
),
output_dir
=
get_save_dir
(
get
(
"top.model_name"
),
get
(
"top.finetuning_type"
),
get
(
"eval.output_dir"
)),
)
if
get
(
"eval.predict"
):
args
[
"do_predict"
]
=
True
else
:
args
[
"do_eval"
]
=
True
return
args
def
_preview
(
self
,
data
:
Dict
[
Component
,
Any
],
do_train
:
bool
)
->
Generator
[
Tuple
[
str
,
Dict
[
str
,
Any
]],
None
,
None
]:
error
=
self
.
_initialize
(
data
,
do_train
,
from_preview
=
True
)
if
error
:
gr
.
Warning
(
error
)
yield
error
,
gr
.
update
(
visible
=
False
)
else
:
args
=
self
.
_parse_train_args
(
data
)
if
do_train
else
self
.
_parse_eval_args
(
data
)
yield
gen_cmd
(
args
),
gr
.
update
(
visible
=
False
)
def
_launch
(
self
,
data
:
Dict
[
Component
,
Any
],
do_train
:
bool
)
->
Generator
[
Tuple
[
str
,
Dict
[
str
,
Any
]],
None
,
None
]:
error
=
self
.
_initialize
(
data
,
do_train
,
from_preview
=
False
)
if
error
:
gr
.
Warning
(
error
)
yield
error
,
gr
.
update
(
visible
=
False
)
else
:
args
=
self
.
_parse_train_args
(
data
)
if
do_train
else
self
.
_parse_eval_args
(
data
)
run_kwargs
=
dict
(
args
=
args
,
callbacks
=
[
self
.
trainer_callback
])
self
.
do_train
,
self
.
running_data
=
do_train
,
data
self
.
thread
=
Thread
(
target
=
run_exp
,
kwargs
=
run_kwargs
)
self
.
thread
.
start
()
yield
from
self
.
monitor
()
def
preview_train
(
self
,
data
:
Dict
[
Component
,
Any
])
->
Generator
[
Tuple
[
str
,
Dict
[
str
,
Any
]],
None
,
None
]:
yield
from
self
.
_preview
(
data
,
do_train
=
True
)
def
preview_eval
(
self
,
data
:
Dict
[
Component
,
Any
])
->
Generator
[
Tuple
[
str
,
Dict
[
str
,
Any
]],
None
,
None
]:
yield
from
self
.
_preview
(
data
,
do_train
=
False
)
def
run_train
(
self
,
data
:
Dict
[
Component
,
Any
])
->
Generator
[
Tuple
[
str
,
Dict
[
str
,
Any
]],
None
,
None
]:
yield
from
self
.
_launch
(
data
,
do_train
=
True
)
def
run_eval
(
self
,
data
:
Dict
[
Component
,
Any
])
->
Generator
[
Tuple
[
str
,
Dict
[
str
,
Any
]],
None
,
None
]:
yield
from
self
.
_launch
(
data
,
do_train
=
False
)
def
monitor
(
self
)
->
Generator
[
Tuple
[
str
,
Dict
[
str
,
Any
]],
None
,
None
]:
get
=
lambda
name
:
self
.
running_data
[
self
.
manager
.
get_elem_by_name
(
name
)]
self
.
running
=
True
lang
=
get
(
"top.lang"
)
output_dir
=
get_save_dir
(
get
(
"top.model_name"
),
get
(
"top.finetuning_type"
),
get
(
"{}.output_dir"
.
format
(
"train"
if
self
.
do_train
else
"eval"
)),
)
while
self
.
thread
.
is_alive
():
time
.
sleep
(
2
)
if
self
.
aborted
:
yield
ALERTS
[
"info_aborting"
][
lang
],
gr
.
update
(
visible
=
False
)
else
:
yield
self
.
logger_handler
.
log
,
update_process_bar
(
self
.
trainer_callback
)
if
self
.
do_train
:
if
os
.
path
.
exists
(
os
.
path
.
join
(
output_dir
,
TRAINING_ARGS_NAME
)):
finish_info
=
ALERTS
[
"info_finished"
][
lang
]
else
:
finish_info
=
ALERTS
[
"err_failed"
][
lang
]
else
:
if
os
.
path
.
exists
(
os
.
path
.
join
(
output_dir
,
"all_results.json"
)):
finish_info
=
get_eval_results
(
os
.
path
.
join
(
output_dir
,
"all_results.json"
))
else
:
finish_info
=
ALERTS
[
"err_failed"
][
lang
]
yield
self
.
_finalize
(
lang
,
finish_info
),
gr
.
update
(
visible
=
False
)
LLaMA-Factory/src/llmtuner/webui/utils.py
0 → 100644
View file @
afe180a6
import
json
import
os
from
datetime
import
datetime
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
import
gradio
as
gr
from
..extras.packages
import
is_matplotlib_available
from
..extras.ploting
import
smooth
from
.common
import
get_save_dir
from
.locales
import
ALERTS
if
TYPE_CHECKING
:
from
..extras.callbacks
import
LogCallback
if
is_matplotlib_available
():
import
matplotlib.figure
import
matplotlib.pyplot
as
plt
def
update_process_bar
(
callback
:
"LogCallback"
)
->
Dict
[
str
,
Any
]:
if
not
callback
.
max_steps
:
return
gr
.
update
(
visible
=
False
)
percentage
=
round
(
100
*
callback
.
cur_steps
/
callback
.
max_steps
,
0
)
if
callback
.
max_steps
!=
0
else
100.0
label
=
"Running {:d}/{:d}: {} < {}"
.
format
(
callback
.
cur_steps
,
callback
.
max_steps
,
callback
.
elapsed_time
,
callback
.
remaining_time
)
return
gr
.
update
(
label
=
label
,
value
=
percentage
,
visible
=
True
)
def
get_time
()
->
str
:
return
datetime
.
now
().
strftime
(
"%Y-%m-%d-%H-%M-%S"
)
def
can_quantize
(
finetuning_type
:
str
)
->
Dict
[
str
,
Any
]:
if
finetuning_type
!=
"lora"
:
return
gr
.
update
(
value
=
"None"
,
interactive
=
False
)
else
:
return
gr
.
update
(
interactive
=
True
)
def
check_json_schema
(
text
:
str
,
lang
:
str
)
->
None
:
try
:
tools
=
json
.
loads
(
text
)
for
tool
in
tools
:
assert
"name"
in
tool
except
AssertionError
:
gr
.
Warning
(
ALERTS
[
"err_tool_name"
][
lang
])
except
json
.
JSONDecodeError
:
gr
.
Warning
(
ALERTS
[
"err_json_schema"
][
lang
])
def
gen_cmd
(
args
:
Dict
[
str
,
Any
])
->
str
:
args
.
pop
(
"disable_tqdm"
,
None
)
args
[
"plot_loss"
]
=
args
.
get
(
"do_train"
,
None
)
current_devices
=
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
"0"
)
cmd_lines
=
[
"CUDA_VISIBLE_DEVICES={} python src/train_bash.py "
.
format
(
current_devices
)]
for
k
,
v
in
args
.
items
():
if
v
is
not
None
and
v
is
not
False
and
v
!=
""
:
cmd_lines
.
append
(
" --{} {} "
.
format
(
k
,
str
(
v
)))
cmd_text
=
"
\\\n
"
.
join
(
cmd_lines
)
cmd_text
=
"```bash
\n
{}
\n
```"
.
format
(
cmd_text
)
return
cmd_text
def
get_eval_results
(
path
:
os
.
PathLike
)
->
str
:
with
open
(
path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
result
=
json
.
dumps
(
json
.
load
(
f
),
indent
=
4
)
return
"```json
\n
{}
\n
```
\n
"
.
format
(
result
)
def
gen_plot
(
base_model
:
str
,
finetuning_type
:
str
,
output_dir
:
str
)
->
"matplotlib.figure.Figure"
:
if
not
base_model
:
return
log_file
=
get_save_dir
(
base_model
,
finetuning_type
,
output_dir
,
"trainer_log.jsonl"
)
if
not
os
.
path
.
isfile
(
log_file
):
return
plt
.
close
(
"all"
)
fig
=
plt
.
figure
()
ax
=
fig
.
add_subplot
(
111
)
steps
,
losses
=
[],
[]
with
open
(
log_file
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
for
line
in
f
:
log_info
=
json
.
loads
(
line
)
if
log_info
.
get
(
"loss"
,
None
):
steps
.
append
(
log_info
[
"current_steps"
])
losses
.
append
(
log_info
[
"loss"
])
if
len
(
losses
)
==
0
:
return
None
ax
.
plot
(
steps
,
losses
,
alpha
=
0.4
,
label
=
"original"
)
ax
.
plot
(
steps
,
smooth
(
losses
),
label
=
"smoothed"
)
ax
.
legend
()
ax
.
set_xlabel
(
"step"
)
ax
.
set_ylabel
(
"loss"
)
return
fig
LLaMA-Factory/src/train_bash.py
0 → 100644
View file @
afe180a6
from
llmtuner
import
run_exp
def
main
():
run_exp
()
def
_mp_fn
(
index
):
# For xla_spawn (TPUs)
main
()
if
__name__
==
"__main__"
:
main
()
LLaMA-Factory/src/train_web.py
0 → 100644
View file @
afe180a6
from
llmtuner
import
create_ui
def
main
():
demo
=
create_ui
()
demo
.
queue
()
demo
.
launch
(
server_name
=
"0.0.0.0"
,
share
=
False
,
inbrowser
=
True
)
if
__name__
==
"__main__"
:
main
()
LLaMA-Factory/src/web_demo.py
0 → 100644
View file @
afe180a6
from
llmtuner
import
create_web_demo
def
main
():
demo
=
create_web_demo
()
demo
.
queue
()
demo
.
launch
(
server_name
=
"0.0.0.0"
,
share
=
False
,
inbrowser
=
True
)
if
__name__
==
"__main__"
:
main
()
LLaMA-Factory/tests/cal_flops.py
0 → 100644
View file @
afe180a6
# coding=utf-8
# Calculates the flops of pre-trained models.
# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
from
typing
import
Optional
import
fire
import
torch
from
deepspeed.accelerator
import
get_accelerator
# type: ignore
from
deepspeed.profiling.flops_profiler
import
get_model_profile
# type: ignore
from
llmtuner
import
ChatModel
def
calculate_flops
(
model_name_or_path
:
str
,
batch_size
:
Optional
[
int
]
=
1
,
seq_length
:
Optional
[
int
]
=
256
,
flash_attn
:
Optional
[
bool
]
=
False
,
):
with
get_accelerator
().
device
(
0
):
chat_model
=
ChatModel
(
dict
(
model_name_or_path
=
model_name_or_path
,
template
=
"vanilla"
,
flash_attn
=
flash_attn
))
fake_input
=
torch
.
ones
((
batch_size
,
seq_length
),
dtype
=
torch
.
long
,
device
=
chat_model
.
model
.
device
)
input_dict
=
{
"input_ids"
:
fake_input
,
"labels"
:
fake_input
.
clone
()}
flops
,
macs
,
params
=
get_model_profile
(
chat_model
.
model
,
kwargs
=
input_dict
,
print_profile
=
True
,
detailed
=
True
)
print
(
"FLOPs:"
,
flops
)
print
(
"MACs:"
,
macs
)
print
(
"Params:"
,
params
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
calculate_flops
)
LLaMA-Factory/tests/cal_lr.py
0 → 100644
View file @
afe180a6
# coding=utf-8
# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
import
math
from
typing
import
Optional
import
fire
import
torch
from
torch.utils.data
import
DataLoader
from
tqdm
import
tqdm
from
transformers
import
DataCollatorForLanguageModeling
,
DataCollatorForSeq2Seq
from
llmtuner.data
import
get_dataset
from
llmtuner.extras.constants
import
IGNORE_INDEX
from
llmtuner.hparams
import
get_train_args
from
llmtuner.model
import
load_model_and_tokenizer
BASE_LR
=
3e-4
# 1.5e-4 for 30B-70B models
BASE_BS
=
4_000_000
# from llama paper
def
calculate_lr
(
model_name_or_path
:
str
,
batch_size
:
int
,
# total batch size, namely (batch size * gradient accumulation * world size)
stage
:
Optional
[
str
]
=
"sft"
,
dataset
:
Optional
[
str
]
=
"alpaca_en"
,
dataset_dir
:
Optional
[
str
]
=
"data"
,
template
:
Optional
[
str
]
=
"default"
,
cutoff_len
:
Optional
[
int
]
=
1024
,
# i.e. maximum input length during training
is_mistral
:
Optional
[
bool
]
=
False
,
# mistral model uses a smaller learning rate,
):
model_args
,
data_args
,
training_args
,
finetuning_args
,
_
=
get_train_args
(
dict
(
stage
=
stage
,
model_name_or_path
=
model_name_or_path
,
dataset
=
dataset
,
dataset_dir
=
dataset_dir
,
template
=
template
,
cutoff_len
=
cutoff_len
,
output_dir
=
"dummy_dir"
,
overwrite_cache
=
True
,
)
)
_
,
tokenizer
=
load_model_and_tokenizer
(
model_args
,
finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
False
)
trainset
=
get_dataset
(
tokenizer
,
model_args
,
data_args
,
training_args
,
stage
=
stage
)
if
stage
==
"pt"
:
data_collator
=
DataCollatorForLanguageModeling
(
tokenizer
=
tokenizer
,
mlm
=
False
)
elif
stage
==
"sft"
:
data_collator
=
DataCollatorForSeq2Seq
(
tokenizer
=
tokenizer
,
label_pad_token_id
=
IGNORE_INDEX
)
else
:
raise
NotImplementedError
dataloader
=
DataLoader
(
dataset
=
trainset
,
batch_size
=
batch_size
,
shuffle
=
True
,
collate_fn
=
data_collator
,
pin_memory
=
True
)
valid_tokens
,
total_tokens
=
0
,
0
for
batch
in
tqdm
(
dataloader
):
valid_tokens
+=
torch
.
sum
(
batch
[
"labels"
]
!=
IGNORE_INDEX
).
item
()
total_tokens
+=
torch
.
numel
(
batch
[
"labels"
])
batch_max_len
=
cutoff_len
*
batch_size
# max tokens in a batch
valid_ratio
=
valid_tokens
/
total_tokens
batch_valid_len
=
batch_max_len
*
valid_ratio
lr
=
BASE_LR
*
math
.
sqrt
(
batch_valid_len
/
BASE_BS
)
# lr ~ sqrt(batch_size)
lr
=
lr
/
6.0
if
is_mistral
else
lr
print
(
"Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}"
.
format
(
lr
,
valid_ratio
*
100
,
batch_valid_len
)
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
calculate_lr
)
LLaMA-Factory/tests/length_cdf.py
0 → 100644
View file @
afe180a6
# coding=utf-8
# Calculates the distribution of the input lengths in the dataset.
# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
from
collections
import
defaultdict
from
typing
import
Optional
import
fire
from
tqdm
import
tqdm
from
llmtuner.data
import
get_dataset
from
llmtuner.hparams
import
get_train_args
from
llmtuner.model
import
load_model_and_tokenizer
def
length_cdf
(
model_name_or_path
:
str
,
dataset
:
Optional
[
str
]
=
"alpaca_en"
,
dataset_dir
:
Optional
[
str
]
=
"data"
,
template
:
Optional
[
str
]
=
"default"
,
interval
:
Optional
[
int
]
=
1000
,
):
model_args
,
data_args
,
training_args
,
finetuning_args
,
_
=
get_train_args
(
dict
(
stage
=
"sft"
,
model_name_or_path
=
model_name_or_path
,
dataset
=
dataset
,
dataset_dir
=
dataset_dir
,
template
=
template
,
cutoff_len
=
1_000_000
,
output_dir
=
"dummy_dir"
,
overwrite_cache
=
True
,
)
)
_
,
tokenizer
=
load_model_and_tokenizer
(
model_args
,
finetuning_args
,
is_trainable
=
False
,
add_valuehead
=
False
)
trainset
=
get_dataset
(
tokenizer
,
model_args
,
data_args
,
training_args
,
stage
=
"sft"
)
total_num
=
len
(
trainset
)
length_dict
=
defaultdict
(
int
)
for
sample
in
tqdm
(
trainset
[
"input_ids"
]):
length_dict
[
len
(
sample
)
//
interval
*
interval
]
+=
1
length_tuples
=
list
(
length_dict
.
items
())
length_tuples
.
sort
()
count_accu
,
prob_accu
=
0
,
0
for
length
,
count
in
length_tuples
:
count_accu
+=
count
prob_accu
+=
count
/
total_num
*
100
print
(
"{:d} ({:.2f}%) samples have length < {}."
.
format
(
count_accu
,
prob_accu
,
length
+
interval
))
if
__name__
==
"__main__"
:
fire
.
Fire
(
length_cdf
)
LLaMA-Factory/tests/llama_pro.py
0 → 100644
View file @
afe180a6
# coding=utf-8
# Performs block expansion for LLaMA, Mistral or Qwen1.5 models.
# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
import
json
import
os
from
collections
import
OrderedDict
from
typing
import
TYPE_CHECKING
,
Optional
import
fire
import
torch
from
safetensors.torch
import
save_file
from
tqdm
import
tqdm
from
transformers
import
AutoConfig
,
AutoModelForCausalLM
,
AutoTokenizer
from
transformers.modeling_utils
import
(
SAFE_WEIGHTS_INDEX_NAME
,
SAFE_WEIGHTS_NAME
,
WEIGHTS_INDEX_NAME
,
WEIGHTS_NAME
,
shard_checkpoint
,
)
if
TYPE_CHECKING
:
from
transformers
import
PretrainedConfig
,
PreTrainedModel
def
change_name
(
name
:
str
,
old_index
:
int
,
new_index
:
int
)
->
str
:
return
name
.
replace
(
".{:d}."
.
format
(
old_index
),
".{:d}."
.
format
(
new_index
))
def
block_expansion
(
model_name_or_path
:
str
,
output_dir
:
str
,
num_expand
:
int
,
shard_size
:
Optional
[
str
]
=
"2GB"
,
save_safetensors
:
Optional
[
bool
]
=
False
,
):
config
:
"PretrainedConfig"
=
AutoConfig
.
from_pretrained
(
model_name_or_path
)
num_layers
=
getattr
(
config
,
"num_hidden_layers"
)
setattr
(
config
,
"num_hidden_layers"
,
num_layers
+
num_expand
)
config
.
save_pretrained
(
output_dir
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name_or_path
)
tokenizer
.
save_pretrained
(
output_dir
)
config
:
"PretrainedConfig"
=
AutoConfig
.
from_pretrained
(
model_name_or_path
)
# load the original one
if
save_safetensors
:
setattr
(
config
,
"tie_word_embeddings"
,
False
)
# safetensors does not allow shared weights
model
:
"PreTrainedModel"
=
AutoModelForCausalLM
.
from_pretrained
(
model_name_or_path
,
config
=
config
,
torch_dtype
=
"auto"
,
trust_remote_code
=
True
,
low_cpu_mem_usage
=
True
,
)
state_dict
=
model
.
state_dict
()
if
num_layers
%
num_expand
!=
0
:
raise
ValueError
(
"`num_layers` {} should be divisible by `num_expand` {}."
.
format
(
num_layers
,
num_expand
))
split
=
num_layers
//
num_expand
layer_cnt
=
0
output_state_dict
=
OrderedDict
()
for
i
in
range
(
num_layers
):
for
key
,
value
in
state_dict
.
items
():
if
".{:d}."
.
format
(
i
)
in
key
:
output_state_dict
[
change_name
(
key
,
i
,
layer_cnt
)]
=
value
print
(
"Add layer {} copied from layer {}"
.
format
(
layer_cnt
,
i
))
layer_cnt
+=
1
if
(
i
+
1
)
%
split
==
0
:
for
key
,
value
in
state_dict
.
items
():
if
".{:d}."
.
format
(
i
)
in
key
:
if
"down_proj"
in
key
or
"o_proj"
in
key
:
output_state_dict
[
change_name
(
key
,
i
,
layer_cnt
)]
=
torch
.
zeros_like
(
value
)
else
:
output_state_dict
[
change_name
(
key
,
i
,
layer_cnt
)]
=
torch
.
clone
(
value
)
print
(
"Add layer {} expanded from layer {}"
.
format
(
layer_cnt
,
i
))
layer_cnt
+=
1
for
key
,
value
in
state_dict
.
items
():
if
key
not
in
output_state_dict
:
output_state_dict
[
key
]
=
value
weights_name
=
SAFE_WEIGHTS_NAME
if
save_safetensors
else
WEIGHTS_NAME
shards
,
index
=
shard_checkpoint
(
output_state_dict
,
max_shard_size
=
shard_size
,
weights_name
=
weights_name
)
for
shard_file
,
shard
in
tqdm
(
shards
.
items
(),
desc
=
"Save weights"
):
if
save_safetensors
:
save_file
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
),
metadata
=
{
"format"
:
"pt"
})
else
:
torch
.
save
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
))
if
index
is
None
:
print
(
"Model weights saved in {}"
.
format
(
os
.
path
.
join
(
output_dir
,
weights_name
)))
else
:
index_name
=
SAFE_WEIGHTS_INDEX_NAME
if
save_safetensors
else
WEIGHTS_INDEX_NAME
with
open
(
os
.
path
.
join
(
output_dir
,
index_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
index
,
f
,
indent
=
2
,
sort_keys
=
True
)
print
(
"Model weights saved in {}"
.
format
(
output_dir
))
print
(
"Fine-tune this model with:"
)
print
(
" --model_name_or_path {}
\\
"
.
format
(
output_dir
))
print
(
" --finetuning_type freeze
\\
"
)
print
(
" --name_module_trainable all
\\
"
)
print
(
" --num_layer_trainable {}
\\
"
.
format
(
num_expand
))
print
(
" --use_llama_pro"
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
block_expansion
)
LLaMA-Factory/tests/llamafy_baichuan2.py
0 → 100644
View file @
afe180a6
# coding=utf-8
# Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
# Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py
# Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
import
json
import
os
from
collections
import
OrderedDict
from
typing
import
Any
,
Dict
,
Optional
import
fire
import
torch
from
safetensors.torch
import
save_file
from
tqdm
import
tqdm
from
transformers.modeling_utils
import
(
SAFE_WEIGHTS_INDEX_NAME
,
SAFE_WEIGHTS_NAME
,
WEIGHTS_INDEX_NAME
,
WEIGHTS_NAME
,
shard_checkpoint
,
)
CONFIG_NAME
=
"config.json"
def
save_weight
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
str
,
save_safetensors
:
bool
):
baichuan2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
filepath
in
tqdm
(
os
.
listdir
(
input_dir
),
desc
=
"Load weights"
):
if
os
.
path
.
isfile
(
os
.
path
.
join
(
input_dir
,
filepath
))
and
filepath
.
endswith
(
".bin"
):
shard_weight
=
torch
.
load
(
os
.
path
.
join
(
input_dir
,
filepath
),
map_location
=
"cpu"
)
baichuan2_state_dict
.
update
(
shard_weight
)
llama2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
key
,
value
in
tqdm
(
baichuan2_state_dict
.
items
(),
desc
=
"Convert format"
):
if
"W_pack"
in
key
:
proj_size
=
value
.
size
(
0
)
//
3
llama2_state_dict
[
key
.
replace
(
"W_pack"
,
"q_proj"
)]
=
value
[:
proj_size
,
:]
llama2_state_dict
[
key
.
replace
(
"W_pack"
,
"k_proj"
)]
=
value
[
proj_size
:
2
*
proj_size
,
:]
llama2_state_dict
[
key
.
replace
(
"W_pack"
,
"v_proj"
)]
=
value
[
2
*
proj_size
:,
:]
elif
"lm_head"
in
key
:
llama2_state_dict
[
key
]
=
torch
.
nn
.
functional
.
normalize
(
value
)
else
:
llama2_state_dict
[
key
]
=
value
weights_name
=
SAFE_WEIGHTS_NAME
if
save_safetensors
else
WEIGHTS_NAME
shards
,
index
=
shard_checkpoint
(
llama2_state_dict
,
max_shard_size
=
shard_size
,
weights_name
=
weights_name
)
for
shard_file
,
shard
in
tqdm
(
shards
.
items
(),
desc
=
"Save weights"
):
if
save_safetensors
:
save_file
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
),
metadata
=
{
"format"
:
"pt"
})
else
:
torch
.
save
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
))
if
index
is
None
:
print
(
"Model weights saved in {}"
.
format
(
os
.
path
.
join
(
output_dir
,
WEIGHTS_NAME
)))
else
:
index_name
=
SAFE_WEIGHTS_INDEX_NAME
if
save_safetensors
else
WEIGHTS_INDEX_NAME
with
open
(
os
.
path
.
join
(
output_dir
,
index_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
index
,
f
,
indent
=
2
,
sort_keys
=
True
)
print
(
"Model weights saved in {}"
.
format
(
output_dir
))
def
save_config
(
input_dir
:
str
,
output_dir
:
str
):
with
open
(
os
.
path
.
join
(
input_dir
,
CONFIG_NAME
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
llama2_config_dict
:
Dict
[
str
,
Any
]
=
json
.
load
(
f
)
llama2_config_dict
[
"architectures"
]
=
[
"LlamaForCausalLM"
]
llama2_config_dict
.
pop
(
"auto_map"
,
None
)
llama2_config_dict
.
pop
(
"tokenizer_class"
,
None
)
llama2_config_dict
[
"model_type"
]
=
"llama"
with
open
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
llama2_config_dict
,
f
,
indent
=
2
)
print
(
"Model config saved in {}"
.
format
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
)))
def
llamafy_baichuan2
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
Optional
[
str
]
=
"2GB"
,
save_safetensors
:
Optional
[
bool
]
=
False
):
try
:
os
.
makedirs
(
output_dir
,
exist_ok
=
False
)
except
Exception
as
e
:
raise
print
(
"Output dir already exists"
,
e
)
save_weight
(
input_dir
,
output_dir
,
shard_size
,
save_safetensors
)
save_config
(
input_dir
,
output_dir
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
llamafy_baichuan2
)
LLaMA-Factory/tests/llamafy_internlm2.py
0 → 100644
View file @
afe180a6
# coding=utf-8
# Converts the InternLM2 model in the same format as LLaMA2.
# Usage: python llamafy_internlm2.py --input_dir input --output_dir output
# Warning: We have found that the converted model cannot infer correctly. It will be fixed later.
import
json
import
os
from
collections
import
OrderedDict
from
typing
import
Any
,
Dict
,
Optional
import
fire
import
torch
from
safetensors.torch
import
save_file
from
tqdm
import
tqdm
from
transformers.modeling_utils
import
(
SAFE_WEIGHTS_INDEX_NAME
,
SAFE_WEIGHTS_NAME
,
WEIGHTS_INDEX_NAME
,
WEIGHTS_NAME
,
shard_checkpoint
,
)
CONFIG_NAME
=
"config.json"
def
save_weight
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
str
,
save_safetensors
:
bool
):
with
open
(
os
.
path
.
join
(
input_dir
,
CONFIG_NAME
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
internlm2_config_dict
:
Dict
[
str
,
Any
]
=
json
.
load
(
f
)
internlm2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
filepath
in
tqdm
(
os
.
listdir
(
input_dir
),
desc
=
"Load weights"
):
if
os
.
path
.
isfile
(
os
.
path
.
join
(
input_dir
,
filepath
))
and
filepath
.
endswith
(
".bin"
):
shard_weight
=
torch
.
load
(
os
.
path
.
join
(
input_dir
,
filepath
),
map_location
=
"cpu"
)
internlm2_state_dict
.
update
(
shard_weight
)
llama2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
key
,
value
in
tqdm
(
internlm2_state_dict
.
items
(),
desc
=
"Convert format"
):
if
"output"
in
key
:
llama2_state_dict
[
key
.
replace
(
"output"
,
"lm_head"
)]
=
value
elif
"tok_embeddings"
in
key
:
llama2_state_dict
[
key
.
replace
(
"tok_embeddings"
,
"embed_tokens"
)]
=
value
elif
"wqkv"
in
key
:
num_q_heads
=
internlm2_config_dict
[
"num_attention_heads"
]
num_kv_heads
=
internlm2_config_dict
[
"num_key_value_heads"
]
q_size
=
value
.
size
(
0
)
//
(
num_q_heads
+
2
*
num_kv_heads
)
*
num_q_heads
kv_size
=
value
.
size
(
0
)
//
(
num_q_heads
+
2
*
num_kv_heads
)
*
num_kv_heads
llama2_state_dict
[
key
.
replace
(
"attention.wqkv"
,
"self_attn.q_proj"
)]
=
value
[:
q_size
,
...]
llama2_state_dict
[
key
.
replace
(
"attention.wqkv"
,
"self_attn.k_proj"
)]
=
value
[
q_size
:
q_size
+
kv_size
,
...
]
llama2_state_dict
[
key
.
replace
(
"attention.wqkv"
,
"self_attn.v_proj"
)]
=
value
[
q_size
+
kv_size
:,
...]
elif
"wo"
in
key
:
llama2_state_dict
[
key
.
replace
(
"attention.wo"
,
"self_attn.o_proj"
)]
=
value
elif
"attention_norm"
in
key
:
llama2_state_dict
[
key
.
replace
(
"attention_norm"
,
"input_layernorm"
)]
=
value
elif
"ffn_norm"
in
key
:
llama2_state_dict
[
key
.
replace
(
"ffn_norm"
,
"post_attention_layernorm"
)]
=
value
elif
"w1"
in
key
:
llama2_state_dict
[
key
.
replace
(
"feed_forward.w1"
,
"mlp.gate_proj"
)]
=
value
elif
"w2"
in
key
:
llama2_state_dict
[
key
.
replace
(
"feed_forward.w2"
,
"mlp.down_proj"
)]
=
value
elif
"w3"
in
key
:
llama2_state_dict
[
key
.
replace
(
"feed_forward.w3"
,
"mlp.up_proj"
)]
=
value
else
:
llama2_state_dict
[
key
]
=
value
weights_name
=
SAFE_WEIGHTS_NAME
if
save_safetensors
else
WEIGHTS_NAME
shards
,
index
=
shard_checkpoint
(
llama2_state_dict
,
max_shard_size
=
shard_size
,
weights_name
=
weights_name
)
for
shard_file
,
shard
in
tqdm
(
shards
.
items
(),
desc
=
"Save weights"
):
if
save_safetensors
:
save_file
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
),
metadata
=
{
"format"
:
"pt"
})
else
:
torch
.
save
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
))
if
index
is
None
:
print
(
"Model weights saved in {}"
.
format
(
os
.
path
.
join
(
output_dir
,
WEIGHTS_NAME
)))
else
:
index_name
=
SAFE_WEIGHTS_INDEX_NAME
if
save_safetensors
else
WEIGHTS_INDEX_NAME
with
open
(
os
.
path
.
join
(
output_dir
,
index_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
index
,
f
,
indent
=
2
,
sort_keys
=
True
)
print
(
"Model weights saved in {}"
.
format
(
output_dir
))
def
save_config
(
input_dir
:
str
,
output_dir
:
str
):
with
open
(
os
.
path
.
join
(
input_dir
,
CONFIG_NAME
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
llama2_config_dict
:
Dict
[
str
,
Any
]
=
json
.
load
(
f
)
llama2_config_dict
[
"architectures"
]
=
[
"LlamaForCausalLM"
]
llama2_config_dict
.
pop
(
"auto_map"
,
None
)
llama2_config_dict
.
pop
(
"bias"
,
None
)
llama2_config_dict
.
pop
(
"rope_scaling"
,
None
)
llama2_config_dict
[
"model_type"
]
=
"llama"
with
open
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
llama2_config_dict
,
f
,
indent
=
2
)
print
(
"Model config saved in {}"
.
format
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
)))
def
llamafy_internlm2
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
Optional
[
str
]
=
"2GB"
,
save_safetensors
:
Optional
[
bool
]
=
False
):
try
:
os
.
makedirs
(
output_dir
,
exist_ok
=
False
)
except
Exception
as
e
:
raise
print
(
"Output dir already exists"
,
e
)
save_weight
(
input_dir
,
output_dir
,
shard_size
,
save_safetensors
)
save_config
(
input_dir
,
output_dir
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
llamafy_internlm2
)
LLaMA-Factory/tests/llamafy_qwen.py
0 → 100644
View file @
afe180a6
# coding=utf-8
# Converts the Qwen models in the same format as LLaMA2.
# Usage: python llamafy_qwen.py --input_dir input --output_dir output
# Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
import
json
import
os
from
collections
import
OrderedDict
from
typing
import
Any
,
Dict
,
Optional
import
fire
import
torch
from
safetensors
import
safe_open
from
safetensors.torch
import
save_file
from
tqdm
import
tqdm
from
transformers.modeling_utils
import
(
SAFE_WEIGHTS_INDEX_NAME
,
SAFE_WEIGHTS_NAME
,
WEIGHTS_INDEX_NAME
,
WEIGHTS_NAME
,
shard_checkpoint
,
)
from
transformers.utils
import
check_min_version
try
:
check_min_version
(
"4.34.0"
)
except
Exception
:
raise
ValueError
(
"Please upgrade `transformers` to 4.34.0"
)
CONFIG_NAME
=
"config.json"
def
save_weight
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
str
,
save_safetensors
:
bool
)
->
str
:
qwen_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
filepath
in
tqdm
(
os
.
listdir
(
input_dir
),
desc
=
"Load weights"
):
if
os
.
path
.
isfile
(
os
.
path
.
join
(
input_dir
,
filepath
))
and
filepath
.
endswith
(
".safetensors"
):
with
safe_open
(
os
.
path
.
join
(
input_dir
,
filepath
),
framework
=
"pt"
,
device
=
"cpu"
)
as
f
:
for
key
in
f
.
keys
():
qwen_state_dict
[
key
]
=
f
.
get_tensor
(
key
)
llama2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
torch_dtype
=
None
for
key
,
value
in
tqdm
(
qwen_state_dict
.
items
(),
desc
=
"Convert format"
):
if
torch_dtype
is
None
:
torch_dtype
=
value
.
dtype
if
"wte"
in
key
:
llama2_state_dict
[
"model.embed_tokens.weight"
]
=
value
elif
"ln_f"
in
key
:
llama2_state_dict
[
"model.norm.weight"
]
=
value
else
:
key
=
key
.
replace
(
"transformer.h"
,
"model.layers"
)
if
"attn.c_attn"
in
key
:
proj_size
=
value
.
size
(
0
)
//
3
llama2_state_dict
[
key
.
replace
(
"attn.c_attn"
,
"self_attn.q_proj"
)]
=
value
[:
proj_size
,
...]
llama2_state_dict
[
key
.
replace
(
"attn.c_attn"
,
"self_attn.k_proj"
)]
=
value
[
proj_size
:
2
*
proj_size
,
...
]
llama2_state_dict
[
key
.
replace
(
"attn.c_attn"
,
"self_attn.v_proj"
)]
=
value
[
2
*
proj_size
:,
...]
elif
"attn.c_proj"
in
key
:
llama2_state_dict
[
key
.
replace
(
"attn.c_proj"
,
"self_attn.o_proj"
)]
=
value
llama2_state_dict
[
key
.
replace
(
"attn.c_proj.weight"
,
"self_attn.o_proj.bias"
)]
=
torch
.
zeros_like
(
value
[:,
0
]
).
squeeze
()
elif
"ln_1"
in
key
:
llama2_state_dict
[
key
.
replace
(
"ln_1"
,
"input_layernorm"
)]
=
value
elif
"ln_2"
in
key
:
llama2_state_dict
[
key
.
replace
(
"ln_2"
,
"post_attention_layernorm"
)]
=
value
elif
"mlp.w1"
in
key
:
llama2_state_dict
[
key
.
replace
(
"mlp.w1"
,
"mlp.up_proj"
)]
=
value
elif
"mlp.w2"
in
key
:
llama2_state_dict
[
key
.
replace
(
"mlp.w2"
,
"mlp.gate_proj"
)]
=
value
elif
"mlp.c_proj"
in
key
:
llama2_state_dict
[
key
.
replace
(
"mlp.c_proj"
,
"mlp.down_proj"
)]
=
value
elif
"lm_head"
in
key
:
llama2_state_dict
[
key
]
=
value
else
:
raise
KeyError
(
"Unable to process key {}"
.
format
(
key
))
weights_name
=
SAFE_WEIGHTS_NAME
if
save_safetensors
else
WEIGHTS_NAME
shards
,
index
=
shard_checkpoint
(
llama2_state_dict
,
max_shard_size
=
shard_size
,
weights_name
=
weights_name
)
for
shard_file
,
shard
in
tqdm
(
shards
.
items
(),
desc
=
"Save weights"
):
if
save_safetensors
:
save_file
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
),
metadata
=
{
"format"
:
"pt"
})
else
:
torch
.
save
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
))
if
index
is
None
:
print
(
"Model weights saved in {}"
.
format
(
os
.
path
.
join
(
output_dir
,
weights_name
)))
else
:
index_name
=
SAFE_WEIGHTS_INDEX_NAME
if
save_safetensors
else
WEIGHTS_INDEX_NAME
with
open
(
os
.
path
.
join
(
output_dir
,
index_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
index
,
f
,
indent
=
2
,
sort_keys
=
True
)
print
(
"Model weights saved in {}"
.
format
(
output_dir
))
return
str
(
torch_dtype
).
replace
(
"torch."
,
""
)
def
save_config
(
input_dir
:
str
,
output_dir
:
str
,
torch_dtype
:
str
):
with
open
(
os
.
path
.
join
(
input_dir
,
CONFIG_NAME
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
qwen_config_dict
:
Dict
[
str
,
Any
]
=
json
.
load
(
f
)
llama2_config_dict
:
Dict
[
str
,
Any
]
=
OrderedDict
()
llama2_config_dict
[
"architectures"
]
=
[
"LlamaForCausalLM"
]
llama2_config_dict
[
"hidden_act"
]
=
"silu"
llama2_config_dict
[
"hidden_size"
]
=
qwen_config_dict
[
"hidden_size"
]
llama2_config_dict
[
"initializer_range"
]
=
qwen_config_dict
[
"initializer_range"
]
llama2_config_dict
[
"intermediate_size"
]
=
qwen_config_dict
[
"intermediate_size"
]
//
2
llama2_config_dict
[
"max_position_embeddings"
]
=
qwen_config_dict
[
"max_position_embeddings"
]
llama2_config_dict
[
"model_type"
]
=
"llama"
llama2_config_dict
[
"num_attention_heads"
]
=
qwen_config_dict
[
"num_attention_heads"
]
llama2_config_dict
[
"num_hidden_layers"
]
=
qwen_config_dict
[
"num_hidden_layers"
]
llama2_config_dict
[
"num_key_value_heads"
]
=
qwen_config_dict
[
"hidden_size"
]
//
qwen_config_dict
[
"kv_channels"
]
llama2_config_dict
[
"pretraining_tp"
]
=
1
llama2_config_dict
[
"rms_norm_eps"
]
=
qwen_config_dict
[
"layer_norm_epsilon"
]
llama2_config_dict
[
"rope_scaling"
]
=
None
llama2_config_dict
[
"tie_word_embeddings"
]
=
qwen_config_dict
[
"tie_word_embeddings"
]
llama2_config_dict
[
"torch_dtype"
]
=
torch_dtype
llama2_config_dict
[
"transformers_version"
]
=
"4.34.0"
llama2_config_dict
[
"use_cache"
]
=
True
llama2_config_dict
[
"vocab_size"
]
=
qwen_config_dict
[
"vocab_size"
]
llama2_config_dict
[
"attention_bias"
]
=
True
with
open
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
llama2_config_dict
,
f
,
indent
=
2
)
print
(
"Model config saved in {}"
.
format
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
)))
def
llamafy_qwen
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
Optional
[
str
]
=
"2GB"
,
save_safetensors
:
Optional
[
bool
]
=
False
):
try
:
os
.
makedirs
(
output_dir
,
exist_ok
=
False
)
except
Exception
as
e
:
raise
print
(
"Output dir already exists"
,
e
)
torch_dtype
=
save_weight
(
input_dir
,
output_dir
,
shard_size
,
save_safetensors
)
save_config
(
input_dir
,
output_dir
,
torch_dtype
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
llamafy_qwen
)
LLaMA-Factory/tests/loftq_init.py
0 → 100644
View file @
afe180a6
# coding=utf-8
# Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
# Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
# Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py
import
os
from
typing
import
TYPE_CHECKING
,
Optional
import
fire
import
torch
import
torch.nn
as
nn
from
peft
import
LoftQConfig
,
LoraConfig
,
TaskType
,
get_peft_model
from
transformers
import
AutoModelForCausalLM
,
AutoTokenizer
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedModel
class
Shell
(
nn
.
Module
):
def
__init__
(
self
,
weight
:
torch
.
Tensor
,
bias
:
Optional
[
torch
.
Tensor
]
=
None
):
super
().
__init__
()
self
.
weight
=
nn
.
Parameter
(
weight
,
requires_grad
=
False
)
if
bias
is
not
None
:
self
.
bias
=
nn
.
Parameter
(
bias
,
requires_grad
=
False
)
def
unwrap_model
(
model
:
nn
.
Module
,
pattern
=
".base_layer"
)
->
None
:
for
name
in
{
k
.
split
(
pattern
)[
0
]
for
k
,
_
in
model
.
named_modules
()
if
pattern
in
k
}:
parent_name
=
"."
.
join
(
name
.
split
(
"."
)[:
-
1
])
child_name
=
name
.
split
(
"."
)[
-
1
]
parent_module
=
model
.
get_submodule
(
parent_name
)
child_module
=
getattr
(
parent_module
,
child_name
)
base_layer
=
getattr
(
child_module
,
"base_layer"
)
weight
=
getattr
(
base_layer
,
"weight"
,
None
)
bias
=
getattr
(
base_layer
,
"bias"
,
None
)
setattr
(
parent_module
,
child_name
,
Shell
(
weight
,
bias
))
print
(
"Model unwrapped."
)
def
quantize_loftq
(
model_name_or_path
:
str
,
save_dir
:
str
,
loftq_bits
:
Optional
[
int
]
=
4
,
loftq_iter
:
Optional
[
int
]
=
1
,
lora_alpha
:
Optional
[
int
]
=
None
,
lora_rank
:
Optional
[
int
]
=
16
,
lora_target
:
Optional
[
str
]
=
"q_proj,v_proj"
,
save_safetensors
:
Optional
[
bool
]
=
False
,
):
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name_or_path
,
trust_remote_code
=
True
)
model
=
AutoModelForCausalLM
.
from_pretrained
(
model_name_or_path
,
trust_remote_code
=
True
,
torch_dtype
=
"auto"
)
loftq_config
=
LoftQConfig
(
loftq_bits
=
loftq_bits
,
loftq_iter
=
loftq_iter
)
lora_config
=
LoraConfig
(
task_type
=
TaskType
.
CAUSAL_LM
,
inference_mode
=
True
,
r
=
lora_rank
,
lora_alpha
=
lora_alpha
if
lora_alpha
is
not
None
else
lora_rank
*
2
,
lora_dropout
=
0.1
,
target_modules
=
[
name
.
strip
()
for
name
in
lora_target
.
split
(
","
)],
init_lora_weights
=
"loftq"
,
loftq_config
=
loftq_config
,
)
# Init LoftQ model
lora_model
=
get_peft_model
(
model
,
lora_config
)
base_model
:
"PreTrainedModel"
=
lora_model
.
get_base_model
()
# Save LoftQ model
setattr
(
lora_model
.
base_model
.
peft_config
[
"default"
],
"base_model_name_or_path"
,
save_dir
)
setattr
(
lora_model
.
base_model
.
peft_config
[
"default"
],
"init_lora_weights"
,
True
)
lora_model
.
save_pretrained
(
os
.
path
.
join
(
save_dir
,
"adapters"
),
safe_serialization
=
save_safetensors
)
# Save base model
unwrap_model
(
base_model
)
base_model
.
save_pretrained
(
save_dir
,
safe_serialization
=
save_safetensors
)
tokenizer
.
save_pretrained
(
save_dir
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
quantize_loftq
)
LLaMA-Factory/tests/test_toolcall.py
0 → 100644
View file @
afe180a6
import
json
from
typing
import
Sequence
from
openai
import
OpenAI
from
transformers.utils.versions
import
require_version
require_version
(
"openai>=1.5.0"
,
"To fix: pip install openai>=1.5.0"
)
def
calculate_gpa
(
grades
:
Sequence
[
str
],
hours
:
Sequence
[
int
])
->
float
:
grade_to_score
=
{
"A"
:
4
,
"B"
:
3
,
"C"
:
2
}
total_score
,
total_hour
=
0
,
0
for
grade
,
hour
in
zip
(
grades
,
hours
):
total_score
+=
grade_to_score
[
grade
]
*
hour
total_hour
+=
hour
return
total_score
/
total_hour
tool_map
=
{
"calculate_gpa"
:
calculate_gpa
}
if
__name__
==
"__main__"
:
client
=
OpenAI
(
api_key
=
"0"
,
base_url
=
"http://localhost:8000/v1"
,
)
tools
=
[
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"calculate_gpa"
,
"description"
:
"Calculate the Grade Point Average (GPA) based on grades and credit hours"
,
"parameters"
:
{
"type"
:
"object"
,
"properties"
:
{
"grades"
:
{
"type"
:
"array"
,
"items"
:
{
"type"
:
"string"
},
"description"
:
"The grades"
},
"hours"
:
{
"type"
:
"array"
,
"items"
:
{
"type"
:
"integer"
},
"description"
:
"The credit hours"
},
},
"required"
:
[
"grades"
,
"hours"
],
},
},
}
]
messages
=
[]
messages
.
append
({
"role"
:
"user"
,
"content"
:
"My grades are A, A, B, and C. The credit hours are 3, 4, 3, and 2."
})
result
=
client
.
chat
.
completions
.
create
(
messages
=
messages
,
model
=
"test"
,
tools
=
tools
)
tool_call
=
result
.
choices
[
0
].
message
.
tool_calls
[
0
].
function
name
,
arguments
=
tool_call
.
name
,
json
.
loads
(
tool_call
.
arguments
)
messages
.
append
(
{
"role"
:
"function"
,
"content"
:
json
.
dumps
({
"name"
:
name
,
"argument"
:
arguments
},
ensure_ascii
=
False
)}
)
tool_result
=
tool_map
[
name
](
**
arguments
)
messages
.
append
({
"role"
:
"tool"
,
"content"
:
json
.
dumps
({
"gpa"
:
tool_result
},
ensure_ascii
=
False
)})
result
=
client
.
chat
.
completions
.
create
(
messages
=
messages
,
model
=
"test"
,
tools
=
tools
)
print
(
result
.
choices
[
0
].
message
.
content
)
# Based on your grades and credit hours, your calculated Grade Point Average (GPA) is 3.4166666666666665.
README.md
0 → 100644
View file @
afe180a6
# DISC-FinLLM:
**DISC-FinLLM 是一个专门针对金融场景下为用户提供专业、智能、全面的**
金融咨询服务
**的金融领域大模型,由[复旦大学数据智能与社会计算实验室 (Fudan-DISC)](http://fudan-disc.com) 开发并开源。**
## 论文
-
[
论文地址] [DISC-FinLLM: A Chinese Financial Large Language Model based on Multiple Experts Fine-tuning
](
https://arxiv.org/abs/2310.15205
)
-
[huggingface预训练模型下载地址]
<https://huggingface.co/Go4miii/DISC-FinLLM>
## 模型结构
### DISC-FinLLM模型结构
<div
align=
"center"
>
<img
align=
"center"
src=
images/transformer.jpg
>
</div>
## 算法原理
DISC-FinLLM是基于我们构建的高质量金融数据集DISC-Fin-SFT在通用领域中文大模型Baichuan-13B-Chat上进行LoRA指令微调得到的金融大模型。
<div
align=
"center"
>
<img
align=
"center"
src=
images/transformer.png
>
</div>
## 环境配置
### Docker(方法一)
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu22.04-dtk23.10.1-py310
docker run -it -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro --shm-size=64G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name DISC-FinLLM <your imageID> bash
docker exec -it DISC-FinLLM bash
cd /path/your_code_data/DISC-FinLLM
pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
```
### Dockerfile(方法二)
```
cd /path/your_code_data/DISC-FinLLM/docker
docker build --no-cache -t DISC-FinLLM:latest .
docker run --shm-size=64G --name DISC-FinLLM -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video -v /path/your_code_data/:/path/your_code_data/ -it DISC-FinLLM bash
```
### Anaconda(方法三)
关于本项目DCU显卡所需的特殊深度学习库可从
[
光合
](
https://developer.hpccube.com/tool/
)
开发者社区下载安装。
```
DTK驱动:dtk23.10
python:python3.10
torch:2.1
torchvision: 0.16.0
apex: 1.1.0
deepspped: 0.12.3
```
```
conda create -n DISC-FinLLM python=3.10
cd /path/your_code_data/DISC-FinLLM
pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple
```
`Tips:以上dtk驱动、python、torch、deepspeed等DCU相关工具版本需要严格一一对应`
## 数据集
**你可以在这里查看[资料分析评测](https://github.com/FudanDISC/DISC-FinLLM/tree/main/eval/computing_eval.json)、[时事分析评测](https://github.com/FudanDISC/DISC-FinLLM/tree/main/eval/retriever_eval.json)对应的数据集。**
### 自定义数据处理代码
参考data_processor.py
```
import json
jsonl_file_path = '.../data/dataset_new.jsonl'
json_file_path = '../data/dataset_new.json'
data = []
with open(jsonl_file_path, 'r', encoding='utf-8') as file:
for line in file:
jsonl_data = json.loads(line)
json_data = {
"instruction": jsonl_data.get("context").split('\n')[0].replace('Instruction: ', ''),
"input": jsonl_data.get("context").split('\n')[1].replace('Input: ', ''),
"output": jsonl_data.get("target")
}
data.append(json_data)
with open(json_file_path, 'w', encoding='utf-8') as file:
json.dump(data, file, ensure_ascii=False, indent=4)
print(data)
```
项目中已提供用于试验训练的迷你数据集,训练数据目录结构如下,用于正常训练的完整数据集请按此目录结构进行制备:
```
── data
│ ├── computing_part.json
│ ├── consulting_part.json
│ ├── retrieval_part.json
│ └── task_part.json
│——————————
```
## 训练
**运行训练代码时需将模型文件FinLLM中的文件替换下载的本地模型FinLLM文件**
### 单机多卡
```
bash multi_dcu_train.sh
```
### 单机单卡
```
bash sft_work_dtk.sh
```
## 推理
**运行推理代码时需将模型文件FinLLM中的文件替换下载的本地模型FinLLM文件,并且将cli_demo.py文件中的模型路径更换为本地模型路径。**
### 单机单卡
需将
**cli_demo.py**
中的
**model_path**
改为替换模型文件后的本地模型路径
```
python cli_demo.py
```
### web交互推理
需将
**web_demo.py**
中的
**model_path**
改为替换模型文件后的本地模型路径
```
python web_demo.py
```
## result
DISC-FinLLM是一个金融领域的大语言模型,是由面向不同金融场景的4个模组:金融咨询、金融文本分析、金融计算、金融知识检索问答构成的多专家智慧金融系统。这些模组分别在金融NLP任务、人类试题、资料分析和时事分析等四个评测中展现出明显优势,证明了DISC-FinLLM能为广泛的金融领域提供强有力的支持。DISC-FinLLM能在不同应用场景下提供帮助,可用于实现不同的功能:
*
**金融咨询:**
该模组可以在中国金融语境下,与用户展开关于金融话题的多轮对话,或是为用户解释金融专业的相关知识,是由数据集中的金融咨询指令部分训练而来。
*
**金融文本分析:**
该模组可以帮助用户在金融文本上完成的信息抽取、情感分析、文本分类、文本生成等NLP任务,是由数据集中的金融任务指令部分训练而来。
*
**金融计算:**
该模组可以帮助用户完成与数学计算相关的任务,除了利率、增长率等基本计算,它还支持统计分析和包括Black-Scholes期权定价模型、EDF预期违约概率模型在内的金融模型计算。这一模组是由数据集中的金融计算指令部分训练而来。
*
**金融知识检索问答:**
该模组可以基于金融新闻、研报和相关政策文件为用户提供投资建议、时事分析、政策解读。它是由数据集中的检索增强指令部分训练而来。
<div
align=
"center"
>
<img
align=
"center"
src=
images/result.png
>
</div>
### 精度
测试数据:
[
retrieval_part
](
data/retrieval_part.json
)
,使用的加速卡:V100S/K100。
根据测试结果情况填写表格:
| xxx | train_loss | train_runtime |eval_los |eval_runtime |
| :------: | :------: | :------: | :------: |:------: |
| V100s | 0.371248 | 4445.348 | 0.06542 | 30.5495 |
| K100 | 0.671394 | 2384.0498 | 0.64843 | 19.6838 |
## 应用场景
金融,教育,政府,科研
### 算法类别
金融咨询,金融计算,文本分析
## 预训练权重
-
[
Hugging Face Go4miii/DISC-FinLLM
](
https://huggingface.co/Go4miii/DISC-FinLLM
)
下载全参模型权重。
## 参考资料
-
本项目gitlab地址
[
Go4miii/DISC-FinLLM
](
https://github.com/FudanDISC/DISC-FinLLM
)
cli_demo.py
0 → 100644
View file @
afe180a6
import
os
import
torch
import
platform
from
colorama
import
Fore
,
Style
from
transformers
import
AutoModelForCausalLM
,
AutoTokenizer
from
transformers.generation.utils
import
GenerationConfig
def
init_model
():
print
(
"Initializing model..."
)
model_path
=
"/DISC-FinLLM/FinLLM"
model
=
AutoModelForCausalLM
.
from_pretrained
(
model_path
,
torch_dtype
=
torch
.
float16
,
device_map
=
"auto"
,
trust_remote_code
=
True
)
model
.
generation_config
=
GenerationConfig
.
from_pretrained
(
model_path
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_path
,
use_fast
=
False
,
trust_remote_code
=
True
)
return
model
,
tokenizer
def
clear_screen
():
if
platform
.
system
()
==
"Windows"
:
os
.
system
(
"cls"
)
else
:
os
.
system
(
"clear"
)
print
(
Fore
.
YELLOW
+
Style
.
BRIGHT
+
"欢迎使用复旦 DISC-FinLLM,输入进行对话,clear 清空历史,Ctrl+C 中断生成,"
+
"stream 开关流式生成,exit 结束。"
)
return
[]
def
main
(
stream
=
True
):
model
,
tokenizer
=
init_model
()
messages
=
clear_screen
()
while
True
:
prompt
=
input
(
Fore
.
GREEN
+
Style
.
BRIGHT
+
"
\n
用户:"
+
Style
.
NORMAL
)
if
prompt
.
strip
()
==
"exit"
:
break
if
prompt
.
strip
()
==
"clear"
:
messages
=
clear_screen
()
continue
print
(
Fore
.
CYAN
+
Style
.
BRIGHT
+
"
\n
DISC-FinLLM:"
+
Style
.
NORMAL
,
end
=
""
)
if
prompt
.
strip
()
==
"stream"
:
stream
=
not
stream
print
(
Fore
.
YELLOW
+
"({}流式生成)
\n
"
.
format
(
"开启"
if
stream
else
"关闭"
),
end
=
""
,
)
continue
messages
.
append
({
"role"
:
"user"
,
"content"
:
prompt
})
if
stream
:
position
=
0
try
:
for
response
in
model
.
chat
(
tokenizer
,
messages
,
stream
=
True
):
print
(
response
[
position
:],
end
=
""
,
flush
=
True
)
position
=
len
(
response
)
if
torch
.
backends
.
mps
.
is_available
():
torch
.
mps
.
empty_cache
()
except
KeyboardInterrupt
:
pass
print
()
else
:
response
=
model
.
chat
(
tokenizer
,
messages
)
print
(
response
)
if
torch
.
backends
.
mps
.
is_available
():
torch
.
mps
.
empty_cache
()
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
response
})
print
(
Style
.
RESET_ALL
)
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
Prev
1
…
7
8
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment