Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ktransformers
Commits
cdb6f896
Unverified
Commit
cdb6f896
authored
Feb 23, 2025
by
Atream
Committed by
GitHub
Feb 23, 2025
Browse files
Merge pull request #612 from kvcache-ai/fix-bf16-load
fix bf16 load, TODO: refactor cpu dequant
parents
94ab2de3
036ae25a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
2 deletions
+6
-2
ktransformers/util/custom_gguf.py
ktransformers/util/custom_gguf.py
+6
-2
No files found.
ktransformers/util/custom_gguf.py
View file @
cdb6f896
...
@@ -314,10 +314,12 @@ class GGUFLoader:
...
@@ -314,10 +314,12 @@ class GGUFLoader:
return
values
return
values
def
load_gguf_tensor
(
self
,
name
:
str
,
device
:
str
=
"cpu"
,
target_dtype
=
torch
.
get_default_dtype
()
)
->
torch
.
Tensor
:
def
load_gguf_tensor
(
self
,
name
:
str
,
device
:
str
=
"cpu"
,
target_dtype
=
None
)
->
torch
.
Tensor
:
t
=
self
.
tensor_info
[
name
]
t
=
self
.
tensor_info
[
name
]
if
device
.
lower
()
==
"cpu"
:
if
device
.
lower
()
==
"cpu"
:
print
(
f
"loading
{
name
}
with CPU"
)
print
(
f
"loading
{
name
}
with CPU"
)
if
target_dtype
==
None
:
target_dtype
=
torch
.
get_default_dtype
()
shape
=
t
[
"shape"
]
shape
=
t
[
"shape"
]
ggml_type
=
t
[
"ggml_type"
]
ggml_type
=
t
[
"ggml_type"
]
...
@@ -336,7 +338,7 @@ class GGUFLoader:
...
@@ -336,7 +338,7 @@ class GGUFLoader:
blocks_per_iter
=
16384
blocks_per_iter
=
16384
if
num_blocks
>
blocks_per_iter
:
# dequant large tensor
if
num_blocks
>
blocks_per_iter
:
# dequant large tensor
values
=
torch
.
empty
((
num_blocks
,
elements_per_block
),
dtype
=
t
orch
.
float
,
device
=
device
)
values
=
torch
.
empty
((
num_blocks
,
elements_per_block
),
dtype
=
t
arget_dtype
,
device
=
device
)
for
i
in
range
(
(
num_blocks
+
blocks_per_iter
-
1
)
//
blocks_per_iter
):
for
i
in
range
(
(
num_blocks
+
blocks_per_iter
-
1
)
//
blocks_per_iter
):
blocks_begin
=
i
*
blocks_per_iter
blocks_begin
=
i
*
blocks_per_iter
blocks_end
=
min
(
blocks_begin
+
blocks_per_iter
,
num_blocks
)
blocks_end
=
min
(
blocks_begin
+
blocks_per_iter
,
num_blocks
)
...
@@ -347,6 +349,8 @@ class GGUFLoader:
...
@@ -347,6 +349,8 @@ class GGUFLoader:
cur_values
=
torch
.
from_numpy
(
cur_values
.
copy
())
cur_values
=
torch
.
from_numpy
(
cur_values
.
copy
())
cur_values
=
cur_values
.
view
(
-
1
,
elements_per_block
)
cur_values
=
cur_values
.
view
(
-
1
,
elements_per_block
)
if
ggml_name
==
"BF16"
:
cur_values
=
cur_values
.
view
(
torch
.
bfloat16
)
values
[
blocks_begin
:
blocks_end
]
=
cur_values
values
[
blocks_begin
:
blocks_end
]
=
cur_values
else
:
else
:
if
"cuda"
in
device
.
lower
():
if
"cuda"
in
device
.
lower
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment