Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ktransformers
Commits
8db6a4d4
Unverified
Commit
8db6a4d4
authored
Feb 27, 2025
by
Atream
Committed by
GitHub
Feb 27, 2025
Browse files
Merge branch 'main' into main
parents
cea07d19
3c8c5805
Changes
43
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
218 additions
and
1 deletion
+218
-1
merge_tensors/merge_safetensor_gguf.py
merge_tensors/merge_safetensor_gguf.py
+214
-0
requirements-local_chat.txt
requirements-local_chat.txt
+3
-1
setup.py
setup.py
+1
-0
No files found.
merge_tensors/merge_safetensor_gguf.py
0 → 100644
View file @
8db6a4d4
# this script targets to merge the fp8 safe tensor and the gguf quantized tensors.
import
os
# insert the path of the project
import
sys
sys
.
path
.
insert
(
0
,
"/home/azure/ktransformers"
)
import
argparse
import
torch
from
ktransformers.util.custom_gguf
import
GGUFLoader
,
translate_name_to_gguf
from
safetensors
import
safe_open
from
safetensors.torch
import
save_file
import
re
from
collections
import
defaultdict
def
read_safetensor_keys_from_folder
(
folder_path
)
->
dict
:
"""
:param folder_path: folder path
:return: key_to_file_map
"""
# check if the folder path is exist
if
not
os
.
path
.
exists
(
folder_path
):
raise
FileNotFoundError
(
f
"GGUF dir not found:
{
folder_path
}
"
)
if
os
.
path
.
isfile
(
folder_path
):
folder_path
=
os
.
path
.
dirname
(
folder_path
)
key_to_file_map
=
{}
found_safetensor
=
False
for
root
,
dirs
,
files
in
os
.
walk
(
folder_path
):
# sort files
files
=
sorted
(
files
)
for
file
in
files
:
if
file
.
endswith
(
".safetensors"
):
found_safetensor
=
True
file_path
=
os
.
path
.
join
(
root
,
file
)
try
:
with
safe_open
(
file_path
,
framework
=
"pt"
)
as
f
:
for
key
in
f
.
keys
():
if
"model.layers.61"
in
key
:
# skip MTP layer
continue
# try:
# if int(key.split('.')[2]) > 4:
# continue
# except:
# pass
key_to_file_map
[
key
]
=
file_path
except
Exception
as
e
:
print
(
f
"Error reading Safetensor file
{
file_path
}
:
{
e
}
"
)
if
not
found_safetensor
:
raise
FileNotFoundError
(
f
"No Safetensor files found in
{
folder_path
}
"
)
return
key_to_file_map
tensor_from_gguf
=
[]
# todo: add keys in gguf that should be used in the final tensor
def
translate_name
(
name
:
str
)
->
str
:
"""
:param name: name of the tensor
:return: translated name
"""
name
=
translate_name_to_gguf
(
name
)
name
=
name
.
replace
(
".up_proj."
,
".ffn_up_exps."
)
name
=
name
.
replace
(
".down_proj."
,
".ffn_down_exps."
)
name
=
name
.
replace
(
".gate_proj."
,
".ffn_gate_exps."
)
name
=
name
.
replace
(
".ffn_gate_inp.e_score_correction_bias"
,
".exp_probs_b.bias"
)
return
name
def
combine_tensor_sources
(
safetensor_path
:
str
,
gguf_path
:
str
):
gguf_loader
=
GGUFLoader
(
gguf_path
)
gguf_tensor_file_map
=
gguf_loader
.
tensor_file_map
safetensor_tensor_file_map
=
read_safetensor_keys_from_folder
(
safetensor_path
)
# build a map for the key to the tensor
# according to the key, we can get the tensor from the file
target_tensor_map
=
{}
for
key
in
safetensor_tensor_file_map
.
keys
():
# for all experts, we use the gguf tensor
if
".mlp.experts."
in
key
:
if
'.weight_scale_inv'
in
key
:
continue
key
=
'.'
.
join
(
key
.
split
(
'.'
)[:
5
]
+
key
.
split
(
'.'
)[
-
2
:])
translated_key
=
translate_name
(
key
)
target_tensor_map
[
key
]
=
gguf_tensor_file_map
[
translated_key
]
continue
if
any
(
target_key
in
key
for
target_key
in
tensor_from_gguf
):
target_tensor_map
[
key
]
=
gguf_tensor_file_map
[
translate_name
(
key
)]
else
:
target_tensor_map
[
key
]
=
safetensor_tensor_file_map
[
key
]
return
target_tensor_map
,
gguf_loader
def
write_combined_tensor
(
target_tensor_map
:
dict
,
output_path
:
str
,
gguf_loader
:
GGUFLoader
):
# Ensure output directory exists
os
.
makedirs
(
output_path
,
exist_ok
=
True
)
# Cache for safetensor file handles and GGUF loaders
safetensors_cache
=
{}
gguf_cache
=
{}
# Group tensors by layer
layer_groups
=
defaultdict
(
list
)
non_layer_keys
=
[]
layer_pattern
=
re
.
compile
(
r
'\.layers\.(\d+)\.'
)
for
key
in
target_tensor_map
:
match
=
layer_pattern
.
search
(
key
)
if
match
:
layer_num
=
int
(
match
.
group
(
1
))
layer_groups
[
layer_num
].
append
(
key
)
else
:
non_layer_keys
.
append
(
key
)
# Calculate total shards
total_shards
=
len
(
layer_groups
)
+
(
1
if
non_layer_keys
else
0
)
-
1
if
total_shards
==
0
:
raise
ValueError
(
"No tensors to save"
)
shard_idx
=
0
# Save non-layer tensors to the first shard if they exist
if
non_layer_keys
:
tensors
=
{}
for
key
in
non_layer_keys
:
file_path
=
target_tensor_map
[
key
]
tensor
=
None
ggml_type
=
None
if
file_path
.
endswith
(
'.safetensors'
):
if
file_path
not
in
safetensors_cache
:
safetensors_cache
[
file_path
]
=
safe_open
(
file_path
,
framework
=
'pt'
)
f
=
safetensors_cache
[
file_path
]
tensor
=
f
.
get_tensor
(
key
)
elif
file_path
.
endswith
(
'.gguf'
):
gguf_name
=
translate_name
(
key
)
tensor
,
ggml_type
=
gguf_loader
.
get_undequanted_tensor_and_ggml_type
(
gguf_name
)
else
:
raise
ValueError
(
f
"Unsupported file format:
{
file_path
}
"
)
tensors
[
translate_name
(
key
)]
=
tensor
if
ggml_type
:
ggml_type
=
torch
.
tensor
(
ggml_type
)
ggml_key
=
translate_name
(
key
)[:
-
7
]
+
".ggml_type"
if
translate_name
(
key
).
endswith
(
".weight"
)
else
translate_name
(
key
)
+
".ggml_type"
tensors
[
ggml_key
]
=
ggml_type
output_file
=
os
.
path
.
join
(
output_path
,
f
"model-
{
shard_idx
:
05
}
-of-
{
total_shards
:
05
}
.safetensors"
)
print
(
f
"Saving non-layer tensors to
{
output_file
}
"
)
save_file
(
tensors
,
output_file
)
print
(
tensors
.
keys
())
shard_idx
+=
1
# Save each layer's tensors to subsequent shards
for
layer_num
in
sorted
(
layer_groups
.
keys
()):
layer_keys
=
layer_groups
[
layer_num
]
tensors
=
{}
for
key
in
layer_keys
:
file_path
=
target_tensor_map
[
key
]
tensor
=
None
ggml_type
=
None
if
file_path
.
endswith
(
'.safetensors'
):
if
file_path
not
in
safetensors_cache
:
safetensors_cache
[
file_path
]
=
safe_open
(
file_path
,
framework
=
'pt'
)
f
=
safetensors_cache
[
file_path
]
tensor
=
f
.
get_tensor
(
key
)
tensor_info
=
tensor
.
shape
elif
file_path
.
endswith
(
'.gguf'
):
gguf_name
=
translate_name
(
key
)
tensor
,
ggml_type
=
gguf_loader
.
get_undequanted_tensor_and_ggml_type
(
gguf_name
)
# tensor_info = gguf_loader.tensor_info[gguf_name]
# ggml_type = gguf_loader.tensor_info[gguf_name]['ggml_type']
else
:
raise
ValueError
(
f
"Unsupported file format:
{
file_path
}
"
)
tensors
[
translate_name
(
key
)]
=
tensor
if
ggml_type
:
ggml_type
=
torch
.
tensor
(
ggml_type
)
ggml_key
=
translate_name
(
key
)[:
-
7
]
+
".ggml_type"
if
translate_name
(
key
).
endswith
(
".weight"
)
else
translate_name
(
key
)
+
".ggml_type"
tensors
[
ggml_key
]
=
ggml_type
output_file
=
os
.
path
.
join
(
output_path
,
f
"model-
{
shard_idx
:
05
}
-of-
{
total_shards
:
05
}
.safetensors"
)
print
(
f
"Saving layer
{
layer_num
}
to
{
output_file
}
"
)
# print(tensors.keys())
save_file
(
tensors
,
output_file
)
shard_idx
+=
1
return
def
main
():
# 创建命令行参数解析器
parser
=
argparse
.
ArgumentParser
(
description
=
"Read parameters from Safetensor and GGUF files"
)
parser
.
add_argument
(
"--safetensor_path"
,
type
=
str
,
help
=
"Path to the Safetensor file"
,
default
=
"/mnt/data/model/DeepSeek-V3"
)
parser
.
add_argument
(
"--gguf_path"
,
type
=
str
,
help
=
"Path to the GGUF file"
,
default
=
"/mnt/data/model/DeepseekV3-q4km-gguf"
)
parser
.
add_argument
(
"--output_path"
,
type
=
str
,
help
=
"Path to the output file"
,
default
=
"/mnt/data/model/ktrans-safetensors/DeepSeek-V3-q4km-fp8"
)
# print all the arguments
print
(
"All the arguments:"
)
print
(
parser
.
parse_args
())
# 解析命令行参数
args
=
parser
.
parse_args
()
safetensor_path
=
args
.
safetensor_path
gguf_path
=
args
.
gguf_path
output_path
=
args
.
output_path
target_tensor_map
,
gguf_loader
=
combine_tensor_sources
(
safetensor_path
,
gguf_path
)
write_combined_tensor
(
target_tensor_map
,
output_path
,
gguf_loader
)
return
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
requirements-local_chat.txt
View file @
8db6a4d4
...
...
@@ -4,4 +4,6 @@ numpy
torch>=2.3.0
packaging
cpufeature
protobuf
\ No newline at end of file
protobuf
tiktoken
blobfile
\ No newline at end of file
setup.py
View file @
8db6a4d4
...
...
@@ -350,6 +350,7 @@ elif MUSA_HOME is not None:
"at::cuda"
:
"at::musa"
,
"#include <ATen/cuda/CUDAContext.h>"
:
"#include
\"
torch_musa/csrc/aten/musa/MUSAContext.h
\"
"
,
"#include <c10/cuda/CUDAGuard.h>"
:
"#include
\"
torch_musa/csrc/core/MUSAGuard.h
\"
"
,
"nv_bfloat16"
:
"mt_bfloat16"
,
}).
run
()
ops_module
=
MUSAExtension
(
'KTransformersOps'
,
[
'ktransformers/ktransformers_ext/cuda_musa/custom_gguf/dequant.mu'
,
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment