Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhougaofeng
internlm2-math-7B
Commits
49d62d6a
Commit
49d62d6a
authored
Jun 11, 2024
by
zhougaofeng
Browse files
Upload New File
parent
7f7eae7f
Pipeline
#1105
canceled with stages
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
92 additions
and
0 deletions
+92
-0
finetune/scripts/llamafy_baichuan2.py
finetune/scripts/llamafy_baichuan2.py
+92
-0
No files found.
finetune/scripts/llamafy_baichuan2.py
0 → 100644
View file @
49d62d6a
# coding=utf-8
# Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
# Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py
# Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
import
json
import
os
from
collections
import
OrderedDict
from
typing
import
Any
,
Dict
,
Optional
import
fire
import
torch
from
safetensors.torch
import
save_file
from
tqdm
import
tqdm
from
transformers.modeling_utils
import
(
SAFE_WEIGHTS_INDEX_NAME
,
SAFE_WEIGHTS_NAME
,
WEIGHTS_INDEX_NAME
,
WEIGHTS_NAME
,
shard_checkpoint
,
)
CONFIG_NAME
=
"config.json"
def
save_weight
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
str
,
save_safetensors
:
bool
):
baichuan2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
filepath
in
tqdm
(
os
.
listdir
(
input_dir
),
desc
=
"Load weights"
):
if
os
.
path
.
isfile
(
os
.
path
.
join
(
input_dir
,
filepath
))
and
filepath
.
endswith
(
".bin"
):
shard_weight
=
torch
.
load
(
os
.
path
.
join
(
input_dir
,
filepath
),
map_location
=
"cpu"
)
baichuan2_state_dict
.
update
(
shard_weight
)
llama2_state_dict
:
Dict
[
str
,
torch
.
Tensor
]
=
OrderedDict
()
for
key
,
value
in
tqdm
(
baichuan2_state_dict
.
items
(),
desc
=
"Convert format"
):
if
"W_pack"
in
key
:
proj_size
=
value
.
size
(
0
)
//
3
llama2_state_dict
[
key
.
replace
(
"W_pack"
,
"q_proj"
)]
=
value
[:
proj_size
,
:]
llama2_state_dict
[
key
.
replace
(
"W_pack"
,
"k_proj"
)]
=
value
[
proj_size
:
2
*
proj_size
,
:]
llama2_state_dict
[
key
.
replace
(
"W_pack"
,
"v_proj"
)]
=
value
[
2
*
proj_size
:,
:]
elif
"lm_head"
in
key
:
llama2_state_dict
[
key
]
=
torch
.
nn
.
functional
.
normalize
(
value
)
else
:
llama2_state_dict
[
key
]
=
value
weights_name
=
SAFE_WEIGHTS_NAME
if
save_safetensors
else
WEIGHTS_NAME
shards
,
index
=
shard_checkpoint
(
llama2_state_dict
,
max_shard_size
=
shard_size
,
weights_name
=
weights_name
)
for
shard_file
,
shard
in
tqdm
(
shards
.
items
(),
desc
=
"Save weights"
):
if
save_safetensors
:
save_file
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
),
metadata
=
{
"format"
:
"pt"
})
else
:
torch
.
save
(
shard
,
os
.
path
.
join
(
output_dir
,
shard_file
))
if
index
is
None
:
print
(
"Model weights saved in {}"
.
format
(
os
.
path
.
join
(
output_dir
,
WEIGHTS_NAME
)))
else
:
index_name
=
SAFE_WEIGHTS_INDEX_NAME
if
save_safetensors
else
WEIGHTS_INDEX_NAME
with
open
(
os
.
path
.
join
(
output_dir
,
index_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
index
,
f
,
indent
=
2
,
sort_keys
=
True
)
print
(
"Model weights saved in {}"
.
format
(
output_dir
))
def
save_config
(
input_dir
:
str
,
output_dir
:
str
):
with
open
(
os
.
path
.
join
(
input_dir
,
CONFIG_NAME
),
"r"
,
encoding
=
"utf-8"
)
as
f
:
llama2_config_dict
:
Dict
[
str
,
Any
]
=
json
.
load
(
f
)
llama2_config_dict
[
"architectures"
]
=
[
"LlamaForCausalLM"
]
llama2_config_dict
.
pop
(
"auto_map"
,
None
)
llama2_config_dict
.
pop
(
"tokenizer_class"
,
None
)
llama2_config_dict
[
"model_type"
]
=
"llama"
with
open
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
llama2_config_dict
,
f
,
indent
=
2
)
print
(
"Model config saved in {}"
.
format
(
os
.
path
.
join
(
output_dir
,
CONFIG_NAME
)))
def
llamafy_baichuan2
(
input_dir
:
str
,
output_dir
:
str
,
shard_size
:
Optional
[
str
]
=
"2GB"
,
save_safetensors
:
Optional
[
bool
]
=
False
):
try
:
os
.
makedirs
(
output_dir
,
exist_ok
=
False
)
except
Exception
as
e
:
raise
print
(
"Output dir already exists"
,
e
)
save_weight
(
input_dir
,
output_dir
,
shard_size
,
save_safetensors
)
save_config
(
input_dir
,
output_dir
)
if
__name__
==
"__main__"
:
fire
.
Fire
(
llamafy_baichuan2
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment