Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
qwen2.5-coder_pytorch
Commits
53b3977b
Commit
53b3977b
authored
Jul 11, 2025
by
dongchy920
Browse files
Initial commit
parents
Pipeline
#2841
failed with stages
in 0 seconds
Changes
350
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1109 additions
and
0 deletions
+1109
-0
LLaMA-Factory/examples/train_lora/llama3_lora_reward.yaml
LLaMA-Factory/examples/train_lora/llama3_lora_reward.yaml
+40
-0
LLaMA-Factory/examples/train_lora/llama3_lora_sft.yaml
LLaMA-Factory/examples/train_lora/llama3_lora_sft.yaml
+40
-0
LLaMA-Factory/examples/train_lora/llama3_lora_sft_ds3.yaml
LLaMA-Factory/examples/train_lora/llama3_lora_sft_ds3.yaml
+41
-0
LLaMA-Factory/examples/train_lora/llama3_preprocess.yaml
LLaMA-Factory/examples/train_lora/llama3_preprocess.yaml
+22
-0
LLaMA-Factory/examples/train_lora/llava1_5_lora_sft.yaml
LLaMA-Factory/examples/train_lora/llava1_5_lora_sft.yaml
+40
-0
LLaMA-Factory/examples/train_lora/qwen2vl_lora_dpo.yaml
LLaMA-Factory/examples/train_lora/qwen2vl_lora_dpo.yaml
+42
-0
LLaMA-Factory/examples/train_lora/qwen2vl_lora_sft.yaml
LLaMA-Factory/examples/train_lora/qwen2vl_lora_sft.yaml
+40
-0
LLaMA-Factory/examples/train_lora/qwen2vl_lora_sft_custom.yaml
...-Factory/examples/train_lora/qwen2vl_lora_sft_custom.yaml
+41
-0
LLaMA-Factory/examples/train_lora/qwen2vl_lora_sft_offload_custom.yaml
.../examples/train_lora/qwen2vl_lora_sft_offload_custom.yaml
+42
-0
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_aqlm.yaml
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_aqlm.yaml
+40
-0
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_awq.yaml
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_awq.yaml
+40
-0
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_gptq.yaml
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_gptq.yaml
+40
-0
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_otfq.yaml
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_otfq.yaml
+42
-0
LLaMA-Factory/pyproject.toml
LLaMA-Factory/pyproject.toml
+33
-0
LLaMA-Factory/requirements.txt
LLaMA-Factory/requirements.txt
+24
-0
LLaMA-Factory/scripts/api_example/test_image.py
LLaMA-Factory/scripts/api_example/test_image.py
+65
-0
LLaMA-Factory/scripts/api_example/test_toolcall.py
LLaMA-Factory/scripts/api_example/test_toolcall.py
+78
-0
LLaMA-Factory/scripts/convert_ckpt/llamafy_baichuan2.py
LLaMA-Factory/scripts/convert_ckpt/llamafy_baichuan2.py
+108
-0
LLaMA-Factory/scripts/convert_ckpt/llamafy_qwen.py
LLaMA-Factory/scripts/convert_ckpt/llamafy_qwen.py
+161
-0
LLaMA-Factory/scripts/llama_pro.py
LLaMA-Factory/scripts/llama_pro.py
+130
-0
No files found.
LLaMA-Factory/examples/train_lora/llama3_lora_reward.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
rm
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
dpo_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/reward
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_lora/llama3_lora_sft.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_lora/llama3_lora_sft_ds3.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
deepspeed
:
examples/deepspeed/ds_z3_config.json
# choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
2
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_lora/llama3_preprocess.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
tokenized_path
:
saves/llama3-8b/dataset/sft
### output
output_dir
:
saves/llama3-8b/lora/sft
overwrite_output_dir
:
true
LLaMA-Factory/examples/train_lora/llava1_5_lora_sft.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
llava-hf/llava-1.5-7b-hf
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
mllm_demo
template
:
llava
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llava1_5-7b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_lora/qwen2vl_lora_dpo.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
Qwen/Qwen2-VL-7B-Instruct
trust_remote_code
:
true
### method
stage
:
dpo
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
pref_beta
:
0.1
pref_loss
:
sigmoid
# choices: [sigmoid (dpo), orpo, simpo]
### dataset
dataset
:
rlhf_v
template
:
qwen2_vl
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/qwen2_vl-7b/lora/dpo
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
5.0e-6
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_lora/qwen2vl_lora_sft.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
Qwen/Qwen2-VL-7B-Instruct
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
mllm_demo,identity,alpaca_en_demo
# video: mllm_video_demo
template
:
qwen2_vl
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/qwen2_vl-7b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_lora/qwen2vl_lora_sft_custom.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
Qwen/Qwen2.5-Coder-32B-Instruct
# quantization_bit: 4 # bitsandbytes会报错
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
identity
# video: mllm_video_demo
template
:
qwen
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/qwen2-coder-32b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
4
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_lora/qwen2vl_lora_sft_offload_custom.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
Qwen/Qwen2.5-Coder-32B-Instruct
# quantization_bit: 4 # bitsandbytes会报错
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
deepspeed
:
examples/deepspeed/ds_z3_offload_config.json
### dataset
dataset
:
identity
# video: mllm_video_demo
template
:
qwen
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/qwen2-coder-32b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
4
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_aqlm.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_awq.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
TechxGenus/Meta-Llama-3-8B-Instruct-AWQ
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_gptq.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/train_qlora/llama3_lora_sft_otfq.yaml
0 → 100644
View file @
53b3977b
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit
:
4
quantization_method
:
bitsandbytes
# choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)]
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/pyproject.toml
0 → 100644
View file @
53b3977b
[build-system]
requires
=
["setuptools>=61.0"]
build-backend
=
"setuptools.build_meta"
[tool.ruff]
target-version
=
"py38"
line-length
=
119
indent-width
=
4
[tool.ruff.lint]
ignore
=
[
"C408"
,
"C901"
,
"E501"
,
"E731"
,
"E741"
,
"W605"
]
select
=
[
"C"
,
"E"
,
"F"
,
"I"
,
"W"
]
[tool.ruff.lint.isort]
lines-after-imports
=
2
known-first-party
=
["llamafactory"]
known-third-party
=
[
"accelerate"
,
"datasets"
,
"gradio"
,
"numpy"
,
"peft"
,
"torch"
,
"transformers"
,
"trl"
]
[tool.ruff.format]
quote-style
=
"double"
indent-style
=
"space"
docstring-code-format
=
true
skip-magic-trailing-comma
=
false
line-ending
=
"auto"
LLaMA-Factory/requirements.txt
0 → 100644
View file @
53b3977b
transformers>=4.41.2,<=4.46.1
datasets>=2.16.0,<=3.1.0
accelerate>=0.34.0,<=1.0.1
peft>=0.11.1,<=0.12.0
trl>=0.8.6,<=0.9.6
tokenizers>=0.19.0,<0.20.4
gradio>=4.0.0,<5.0.0
pandas>=2.0.0
scipy
einops
sentencepiece
tiktoken
protobuf
uvicorn
pydantic
fastapi
sse-starlette
matplotlib>=3.7.0
fire
packaging
pyyaml
numpy<2.0.0
av
tyro<0.9.0
LLaMA-Factory/scripts/api_example/test_image.py
0 → 100644
View file @
53b3977b
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
from
openai
import
OpenAI
from
transformers.utils.versions
import
require_version
require_version
(
"openai>=1.5.0"
,
"To fix: pip install openai>=1.5.0"
)
def
main
():
client
=
OpenAI
(
api_key
=
"{}"
.
format
(
os
.
environ
.
get
(
"API_KEY"
,
"0"
)),
base_url
=
"http://localhost:{}/v1"
.
format
(
os
.
environ
.
get
(
"API_PORT"
,
8000
)),
)
messages
=
[]
messages
.
append
(
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"Output the color and number of each box."
},
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/boxes.png"
},
},
],
}
)
result
=
client
.
chat
.
completions
.
create
(
messages
=
messages
,
model
=
"test"
)
messages
.
append
(
result
.
choices
[
0
].
message
)
print
(
"Round 1:"
,
result
.
choices
[
0
].
message
.
content
)
# The image shows a pyramid of colored blocks with numbers on them. Here are the colors and numbers of ...
messages
.
append
(
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"What kind of flower is this?"
},
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/flowers.jpg"
},
},
],
}
)
result
=
client
.
chat
.
completions
.
create
(
messages
=
messages
,
model
=
"test"
)
messages
.
append
(
result
.
choices
[
0
].
message
)
print
(
"Round 2:"
,
result
.
choices
[
0
].
message
.
content
)
# The image shows a cluster of forget-me-not flowers. Forget-me-nots are small ...
if
__name__
==
"__main__"
:
main
()
LLaMA-Factory/scripts/api_example/test_toolcall.py
0 → 100644
View file @
53b3977b
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
os
from
typing
import
Sequence
from
openai
import
OpenAI
from
transformers.utils.versions
import
require_version
require_version
(
"openai>=1.5.0"
,
"To fix: pip install openai>=1.5.0"
)
def
calculate_gpa
(
grades
:
Sequence
[
str
],
hours
:
Sequence
[
int
])
->
float
:
grade_to_score
=
{
"A"
:
4
,
"B"
:
3
,
"C"
:
2
}
total_score
,
total_hour
=
0
,
0
for
grade
,
hour
in
zip
(
grades
,
hours
):
total_score
+=
grade_to_score
[
grade
]
*
hour
total_hour
+=
hour
return
round
(
total_score
/
total_hour
,
2
)
def
main
():
client
=
OpenAI
(
api_key
=
"{}"
.
format
(
os
.
environ
.
get
(
"API_KEY"
,
"0"
)),
base_url
=
"http://localhost:{}/v1"
.
format
(
os
.
environ
.
get
(
"API_PORT"
,
8000
)),
)
tools
=
[
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"calculate_gpa"
,
"description"
:
"Calculate the Grade Point Average (GPA) based on grades and credit hours"
,
"parameters"
:
{
"type"
:
"object"
,
"properties"
:
{
"grades"
:
{
"type"
:
"array"
,
"items"
:
{
"type"
:
"string"
},
"description"
:
"The grades"
},
"hours"
:
{
"type"
:
"array"
,
"items"
:
{
"type"
:
"integer"
},
"description"
:
"The credit hours"
},
},
"required"
:
[
"grades"
,
"hours"
],
},
},
}
]
tool_map
=
{
"calculate_gpa"
:
calculate_gpa
}
messages
=
[]
messages
.
append
({
"role"
:
"user"
,
"content"
:
"My grades are A, A, B, and C. The credit hours are 3, 4, 3, and 2."
})
result
=
client
.
chat
.
completions
.
create
(
messages
=
messages
,
model
=
"test"
,
tools
=
tools
)
if
result
.
choices
[
0
].
message
.
tool_calls
is
None
:
raise
ValueError
(
"Cannot retrieve function call from the response."
)
messages
.
append
(
result
.
choices
[
0
].
message
)
tool_call
=
result
.
choices
[
0
].
message
.
tool_calls
[
0
].
function
print
(
tool_call
)
# Function(arguments='{"grades": ["A", "A", "B", "C"], "hours": [3, 4, 3, 2]}', name='calculate_gpa')
name
,
arguments
=
tool_call
.
name
,
json
.
loads
(
tool_call
.
arguments
)
tool_result
=
tool_map
[
name
](
**
arguments
)
messages
.
append
({
"role"
:
"tool"
,
"content"
:
json
.
dumps
({
"gpa"
:
tool_result
},
ensure_ascii
=
False
)})
result
=
client
.
chat
.
completions
.
create
(
messages
=
messages
,
model
=
"test"
,
tools
=
tools
)
print
(
result
.
choices
[
0
].
message
.
content
)
# Based on the grades and credit hours you provided, your Grade Point Average (GPA) is 3.42.
if
__name__
==
"__main__"
:
main
()
LLaMA-Factory/scripts/convert_ckpt/llamafy_baichuan2.py
0 → 100644
View file @
53b3977b
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
os
from
collections
import
OrderedDict
from
typing
import
Any
,
Dict
import
fire
import
torch
from
safetensors.torch
import
save_file
from
tqdm
import
tqdm
from
transformers.modeling_utils
import
(
SAFE_WEIGHTS_INDEX_NAME
,
SAFE_WEIGHTS_NAME
,
WEIGHTS_INDEX_NAME
,
WEIGHTS_NAME
,
shard_checkpoint
,
)
CONFIG_NAME
=
"config.json"
def
save_weight
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
str
,
save_safetensors
:
bool
):
baichuan2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
filepath
in
tqdm
(
os
.
listdir
(
input_dir
),
desc
=
"Load weights"
):
if
os
.
path
.
isfile
(
os
.
path
.
join
(
input_dir
,
filepath
))
and
filepath
.
endswith
(
".bin"
):
shard_weight
=
torch
.
load
(
os
.
path
.
join
(
input_dir
,
filepath
),
map_location
=
"cpu"
)
baichuan2_state_dict
.
update
(
shard_weight
)
llama2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
key
,
value
in
tqdm
(
baichuan2_state_dict
.
items
(),
desc
=
"Convert format"
):
if
"W_pack"
in
key
:
proj_size
=
value
.
size
(
0
)
//
3
llama2_state_dict
[
key
.
replace
(
"W_pack"
,
"q_proj"
)]
=
value
[:
proj_size
,
:]
llama2_state_dict
[
key
.
replace
(
"W_pack"
,
"k_proj"
)]
=
value
[
proj_size
:
2
*
proj_size
,
:]
llama2_state_dict
[
key
.
replace
(
"W_pack"
,
"v_proj"
)]
=
value
[
2
*
proj_size
:,
:]
elif
"lm_head"
in
key
:
llama2_state_dict
[
key
]
=
torch
.
nn
.
functional
.
normalize
(
value
)
else
:
llama2_state_dict
[
key
]
=
value
weights_name
=
SAFE_WEIGHTS_NAME
if
save_safetensors
else
WEIGHTS_NAME
shards
,
index
=
shard_checkpoint
(
llama2_state_dict
,
max_shard_size
=
shard_size
,
weights_name
=
weights_name
)
for
shard_file
,
shard
in
tqdm
(
shards
.
items
(),
desc
=
"Save weights"
):
if
save_safetensors
:
save_file
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
),
metadata
=
{
"format"
:
"pt"
})
else
:
torch
.
save
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
))
if
index
is
None
:
print
(
f
"Model weights saved in
{
os
.
path
.
join
(
output_dir
,
WEIGHTS_NAME
)
}
"
)
else
:
index_name
=
SAFE_WEIGHTS_INDEX_NAME
if
save_safetensors
else
WEIGHTS_INDEX_NAME
with
open
(
os
.
path
.
join
(
output_dir
,
index_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
index
,
f
,
indent
=
2
,
sort_keys
=
True
)
print
(
f
"Model weights saved in
{
output_dir
}
"
)
def
save_config
(
input_dir
:
str
,
output_dir
:
str
):
with
open
(
os
.
path
.
join
(
input_dir
,
CONFIG_NAME
),
encoding
=
"utf-8"
)
as
f
:
llama2_config_dict
:
Dict
[
str
,
Any
]
=
json
.
load
(
f
)
llama2_config_dict
[
"architectures"
]
=
[
"LlamaForCausalLM"
]
llama2_config_dict
.
pop
(
"auto_map"
,
None
)
llama2_config_dict
.
pop
(
"tokenizer_class"
,
None
)
llama2_config_dict
[
"model_type"
]
=
"llama"
with
open
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
llama2_config_dict
,
f
,
indent
=
2
)
print
(
f
"Model config saved in
{
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
)
}
"
)
def
llamafy_baichuan2
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
str
=
"2GB"
,
save_safetensors
:
bool
=
True
,
):
r
"""
Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
"""
try
:
os
.
makedirs
(
output_dir
,
exist_ok
=
False
)
except
Exception
as
e
:
raise
print
(
"Output dir already exists"
,
e
)
save_weight
(
input_dir
,
output_dir
,
shard_size
,
save_safetensors
)
save_config
(
input_dir
,
output_dir
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
llamafy_baichuan2
)
LLaMA-Factory/scripts/convert_ckpt/llamafy_qwen.py
0 → 100644
View file @
53b3977b
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
os
from
collections
import
OrderedDict
from
typing
import
Any
,
Dict
import
fire
import
torch
from
safetensors
import
safe_open
from
safetensors.torch
import
save_file
from
tqdm
import
tqdm
from
transformers.modeling_utils
import
(
SAFE_WEIGHTS_INDEX_NAME
,
SAFE_WEIGHTS_NAME
,
WEIGHTS_INDEX_NAME
,
WEIGHTS_NAME
,
shard_checkpoint
,
)
from
transformers.utils
import
check_min_version
try
:
check_min_version
(
"4.34.0"
)
except
Exception
:
raise
ValueError
(
"Please upgrade `transformers` to 4.34.0"
)
CONFIG_NAME
=
"config.json"
def
save_weight
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
str
,
save_safetensors
:
bool
)
->
str
:
qwen_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
filepath
in
tqdm
(
os
.
listdir
(
input_dir
),
desc
=
"Load weights"
):
if
os
.
path
.
isfile
(
os
.
path
.
join
(
input_dir
,
filepath
))
and
filepath
.
endswith
(
".safetensors"
):
with
safe_open
(
os
.
path
.
join
(
input_dir
,
filepath
),
framework
=
"pt"
,
device
=
"cpu"
)
as
f
:
for
key
in
f
.
keys
():
qwen_state_dict
[
key
]
=
f
.
get_tensor
(
key
)
llama2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
torch_dtype
=
None
for
key
,
value
in
tqdm
(
qwen_state_dict
.
items
(),
desc
=
"Convert format"
):
if
torch_dtype
is
None
:
torch_dtype
=
value
.
dtype
if
"wte"
in
key
:
llama2_state_dict
[
"model.embed_tokens.weight"
]
=
value
elif
"ln_f"
in
key
:
llama2_state_dict
[
"model.norm.weight"
]
=
value
else
:
key
=
key
.
replace
(
"transformer.h"
,
"model.layers"
)
if
"attn.c_attn"
in
key
:
proj_size
=
value
.
size
(
0
)
//
3
llama2_state_dict
[
key
.
replace
(
"attn.c_attn"
,
"self_attn.q_proj"
)]
=
value
[:
proj_size
,
...]
llama2_state_dict
[
key
.
replace
(
"attn.c_attn"
,
"self_attn.k_proj"
)]
=
value
[
proj_size
:
2
*
proj_size
,
...
]
llama2_state_dict
[
key
.
replace
(
"attn.c_attn"
,
"self_attn.v_proj"
)]
=
value
[
2
*
proj_size
:,
...]
elif
"attn.c_proj"
in
key
:
llama2_state_dict
[
key
.
replace
(
"attn.c_proj"
,
"self_attn.o_proj"
)]
=
value
llama2_state_dict
[
key
.
replace
(
"attn.c_proj.weight"
,
"self_attn.o_proj.bias"
)]
=
torch
.
zeros_like
(
value
[:,
0
]
).
squeeze
()
elif
"ln_1"
in
key
:
llama2_state_dict
[
key
.
replace
(
"ln_1"
,
"input_layernorm"
)]
=
value
elif
"ln_2"
in
key
:
llama2_state_dict
[
key
.
replace
(
"ln_2"
,
"post_attention_layernorm"
)]
=
value
elif
"mlp.w1"
in
key
:
llama2_state_dict
[
key
.
replace
(
"mlp.w1"
,
"mlp.up_proj"
)]
=
value
elif
"mlp.w2"
in
key
:
llama2_state_dict
[
key
.
replace
(
"mlp.w2"
,
"mlp.gate_proj"
)]
=
value
elif
"mlp.c_proj"
in
key
:
llama2_state_dict
[
key
.
replace
(
"mlp.c_proj"
,
"mlp.down_proj"
)]
=
value
elif
"lm_head"
in
key
:
llama2_state_dict
[
key
]
=
value
else
:
raise
KeyError
(
f
"Unable to process key
{
key
}
"
)
weights_name
=
SAFE_WEIGHTS_NAME
if
save_safetensors
else
WEIGHTS_NAME
shards
,
index
=
shard_checkpoint
(
llama2_state_dict
,
max_shard_size
=
shard_size
,
weights_name
=
weights_name
)
for
shard_file
,
shard
in
tqdm
(
shards
.
items
(),
desc
=
"Save weights"
):
if
save_safetensors
:
save_file
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
),
metadata
=
{
"format"
:
"pt"
})
else
:
torch
.
save
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
))
if
index
is
None
:
print
(
f
"Model weights saved in
{
os
.
path
.
join
(
output_dir
,
weights_name
)
}
"
)
else
:
index_name
=
SAFE_WEIGHTS_INDEX_NAME
if
save_safetensors
else
WEIGHTS_INDEX_NAME
with
open
(
os
.
path
.
join
(
output_dir
,
index_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
index
,
f
,
indent
=
2
,
sort_keys
=
True
)
print
(
f
"Model weights saved in
{
output_dir
}
"
)
return
str
(
torch_dtype
).
replace
(
"torch."
,
""
)
def
save_config
(
input_dir
:
str
,
output_dir
:
str
,
torch_dtype
:
str
):
with
open
(
os
.
path
.
join
(
input_dir
,
CONFIG_NAME
),
encoding
=
"utf-8"
)
as
f
:
qwen_config_dict
:
Dict
[
str
,
Any
]
=
json
.
load
(
f
)
llama2_config_dict
:
Dict
[
str
,
Any
]
=
OrderedDict
()
llama2_config_dict
[
"architectures"
]
=
[
"LlamaForCausalLM"
]
llama2_config_dict
[
"hidden_act"
]
=
"silu"
llama2_config_dict
[
"hidden_size"
]
=
qwen_config_dict
[
"hidden_size"
]
llama2_config_dict
[
"initializer_range"
]
=
qwen_config_dict
[
"initializer_range"
]
llama2_config_dict
[
"intermediate_size"
]
=
qwen_config_dict
[
"intermediate_size"
]
//
2
llama2_config_dict
[
"max_position_embeddings"
]
=
qwen_config_dict
[
"max_position_embeddings"
]
llama2_config_dict
[
"model_type"
]
=
"llama"
llama2_config_dict
[
"num_attention_heads"
]
=
qwen_config_dict
[
"num_attention_heads"
]
llama2_config_dict
[
"num_hidden_layers"
]
=
qwen_config_dict
[
"num_hidden_layers"
]
llama2_config_dict
[
"num_key_value_heads"
]
=
qwen_config_dict
[
"hidden_size"
]
//
qwen_config_dict
[
"kv_channels"
]
llama2_config_dict
[
"pretraining_tp"
]
=
1
llama2_config_dict
[
"rms_norm_eps"
]
=
qwen_config_dict
[
"layer_norm_epsilon"
]
llama2_config_dict
[
"rope_scaling"
]
=
None
llama2_config_dict
[
"tie_word_embeddings"
]
=
qwen_config_dict
[
"tie_word_embeddings"
]
llama2_config_dict
[
"torch_dtype"
]
=
torch_dtype
llama2_config_dict
[
"transformers_version"
]
=
"4.34.0"
llama2_config_dict
[
"use_cache"
]
=
True
llama2_config_dict
[
"vocab_size"
]
=
qwen_config_dict
[
"vocab_size"
]
llama2_config_dict
[
"attention_bias"
]
=
True
with
open
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
llama2_config_dict
,
f
,
indent
=
2
)
print
(
f
"Model config saved in
{
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
)
}
"
)
def
llamafy_qwen
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
str
=
"2GB"
,
save_safetensors
:
bool
=
False
,
):
r
"""
Converts the Qwen models in the same format as LLaMA2.
Usage: python llamafy_qwen.py --input_dir input --output_dir output
Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
"""
try
:
os
.
makedirs
(
output_dir
,
exist_ok
=
False
)
except
Exception
as
e
:
raise
print
(
"Output dir already exists"
,
e
)
torch_dtype
=
save_weight
(
input_dir
,
output_dir
,
shard_size
,
save_safetensors
)
save_config
(
input_dir
,
output_dir
,
torch_dtype
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
llamafy_qwen
)
LLaMA-Factory/scripts/llama_pro.py
0 → 100644
View file @
53b3977b
# Copyright 2024 Tencent Inc. and the LlamaFactory team.
#
# This code is inspired by the Tencent's LLaMA-Pro library.
# https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
os
from
collections
import
OrderedDict
from
typing
import
TYPE_CHECKING
import
fire
import
torch
from
safetensors.torch
import
save_file
from
tqdm
import
tqdm
from
transformers
import
AutoConfig
,
AutoModelForCausalLM
,
AutoTokenizer
from
transformers.modeling_utils
import
(
SAFE_WEIGHTS_INDEX_NAME
,
SAFE_WEIGHTS_NAME
,
WEIGHTS_INDEX_NAME
,
WEIGHTS_NAME
,
shard_checkpoint
,
)
if
TYPE_CHECKING
:
from
transformers
import
PretrainedConfig
,
PreTrainedModel
def
change_name
(
name
:
str
,
old_index
:
int
,
new_index
:
int
)
->
str
:
return
name
.
replace
(
f
".
{
old_index
:
d
}
."
,
f
".
{
new_index
:
d
}
."
)
def
block_expansion
(
model_name_or_path
:
str
,
output_dir
:
str
,
num_expand
:
int
,
shard_size
:
str
=
"2GB"
,
save_safetensors
:
bool
=
True
,
):
r
"""
Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models.
Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
"""
config
:
"PretrainedConfig"
=
AutoConfig
.
from_pretrained
(
model_name_or_path
)
num_layers
=
getattr
(
config
,
"num_hidden_layers"
)
setattr
(
config
,
"num_hidden_layers"
,
num_layers
+
num_expand
)
config
.
save_pretrained
(
output_dir
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name_or_path
)
tokenizer
.
save_pretrained
(
output_dir
)
config
:
"PretrainedConfig"
=
AutoConfig
.
from_pretrained
(
model_name_or_path
)
# load the original one
if
save_safetensors
:
setattr
(
config
,
"tie_word_embeddings"
,
False
)
# safetensors does not allow shared weights
model
:
"PreTrainedModel"
=
AutoModelForCausalLM
.
from_pretrained
(
model_name_or_path
,
config
=
config
,
torch_dtype
=
"auto"
,
trust_remote_code
=
True
,
low_cpu_mem_usage
=
True
,
)
state_dict
=
model
.
state_dict
()
if
num_layers
%
num_expand
!=
0
:
raise
ValueError
(
f
"`num_layers`
{
num_layers
}
should be divisible by `num_expand`
{
num_expand
}
."
)
split
=
num_layers
//
num_expand
layer_cnt
=
0
output_state_dict
=
OrderedDict
()
for
i
in
range
(
num_layers
):
for
key
,
value
in
state_dict
.
items
():
if
f
".
{
i
:
d
}
."
in
key
:
output_state_dict
[
change_name
(
key
,
i
,
layer_cnt
)]
=
value
print
(
f
"Add layer
{
layer_cnt
}
copied from layer
{
i
}
"
)
layer_cnt
+=
1
if
(
i
+
1
)
%
split
==
0
:
for
key
,
value
in
state_dict
.
items
():
if
f
".
{
i
:
d
}
."
in
key
:
if
"down_proj"
in
key
or
"o_proj"
in
key
:
output_state_dict
[
change_name
(
key
,
i
,
layer_cnt
)]
=
torch
.
zeros_like
(
value
)
else
:
output_state_dict
[
change_name
(
key
,
i
,
layer_cnt
)]
=
torch
.
clone
(
value
)
print
(
f
"Add layer
{
layer_cnt
}
expanded from layer
{
i
}
"
)
layer_cnt
+=
1
for
key
,
value
in
state_dict
.
items
():
if
key
not
in
output_state_dict
:
output_state_dict
[
key
]
=
value
weights_name
=
SAFE_WEIGHTS_NAME
if
save_safetensors
else
WEIGHTS_NAME
shards
,
index
=
shard_checkpoint
(
output_state_dict
,
max_shard_size
=
shard_size
,
weights_name
=
weights_name
)
for
shard_file
,
shard
in
tqdm
(
shards
.
items
(),
desc
=
"Save weights"
):
if
save_safetensors
:
save_file
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
),
metadata
=
{
"format"
:
"pt"
})
else
:
torch
.
save
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
))
if
index
is
None
:
print
(
f
"Model weights saved in
{
os
.
path
.
join
(
output_dir
,
weights_name
)
}
"
)
else
:
index_name
=
SAFE_WEIGHTS_INDEX_NAME
if
save_safetensors
else
WEIGHTS_INDEX_NAME
with
open
(
os
.
path
.
join
(
output_dir
,
index_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
index
,
f
,
indent
=
2
,
sort_keys
=
True
)
print
(
f
"Model weights saved in
{
output_dir
}
"
)
print
(
"- Fine-tune this model with:"
)
print
(
f
"model_name_or_path:
{
output_dir
}
"
)
print
(
"finetuning_type: freeze"
)
print
(
f
"freeze_trainable_layers:
{
num_expand
}
"
)
print
(
"use_llama_pro: true"
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
block_expansion
)
Prev
1
2
3
4
5
6
7
8
9
10
…
18
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment