Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
2fec73ee
Commit
2fec73ee
authored
Apr 11, 2025
by
Michael Yang
Committed by
Michael Yang
Apr 16, 2025
Browse files
fix write gguf padding
parent
1e7f62cb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
13 additions
and
16 deletions
+13
-16
convert/convert_llama.go
convert/convert_llama.go
+7
-7
convert/convert_phi3.go
convert/convert_phi3.go
+1
-2
fs/ggml/gguf.go
fs/ggml/gguf.go
+5
-7
No files found.
convert/convert_llama.go
View file @
2fec73ee
...
@@ -28,12 +28,12 @@ type llamaModel struct {
...
@@ -28,12 +28,12 @@ type llamaModel struct {
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
RopeTheta
float32
`json:"rope_theta"`
RopeTheta
float32
`json:"rope_theta"`
RopeScaling
struct
{
RopeScaling
struct
{
Type
string
`json:"type"`
Type
string
`json:"type"`
RopeType
string
`json:"rope_type"`
RopeType
string
`json:"rope_type"`
Factor
float32
`json:"factor"`
Factor
float32
`json:"factor"`
LowFrequencyFactor
float32
`json:"low_freq_factor"`
LowFrequencyFactor
float32
`json:"low_freq_factor"`
HighFrequencyFactor
float32
`json:"high_freq_factor"`
HighFrequencyFactor
float32
`json:"high_freq_factor"`
OriginalMaxPosition
al
Embeddings
uint32
`json:"original_max_position
al
_embeddings"`
OriginalMaxPositionEmbeddings
uint32
`json:"original_max_position_embeddings"`
factors
ropeFactor
factors
ropeFactor
}
`json:"rope_scaling"`
}
`json:"rope_scaling"`
...
@@ -84,7 +84,7 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
...
@@ -84,7 +84,7 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
factorLow
:=
cmp
.
Or
(
p
.
RopeScaling
.
LowFrequencyFactor
,
1.0
)
factorLow
:=
cmp
.
Or
(
p
.
RopeScaling
.
LowFrequencyFactor
,
1.0
)
factorHigh
:=
cmp
.
Or
(
p
.
RopeScaling
.
HighFrequencyFactor
,
4.0
)
factorHigh
:=
cmp
.
Or
(
p
.
RopeScaling
.
HighFrequencyFactor
,
4.0
)
original
:=
cmp
.
Or
(
p
.
RopeScaling
.
OriginalMaxPosition
al
Embeddings
,
8192
)
original
:=
cmp
.
Or
(
p
.
RopeScaling
.
OriginalMaxPositionEmbeddings
,
8192
)
lambdaLow
:=
float32
(
original
)
/
factorLow
lambdaLow
:=
float32
(
original
)
/
factorLow
lambdaHigh
:=
float32
(
original
)
/
factorHigh
lambdaHigh
:=
float32
(
original
)
/
factorHigh
...
...
convert/convert_phi3.go
View file @
2fec73ee
...
@@ -118,6 +118,5 @@ func (p *phi3Model) Replacements() []string {
...
@@ -118,6 +118,5 @@ func (p *phi3Model) Replacements() []string {
type
ropeFactor
[]
float32
type
ropeFactor
[]
float32
func
(
r
ropeFactor
)
WriteTo
(
w
io
.
Writer
)
(
int64
,
error
)
{
func
(
r
ropeFactor
)
WriteTo
(
w
io
.
Writer
)
(
int64
,
error
)
{
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
r
)
return
0
,
binary
.
Write
(
w
,
binary
.
LittleEndian
,
r
)
return
0
,
err
}
}
fs/ggml/gguf.go
View file @
2fec73ee
...
@@ -235,10 +235,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
...
@@ -235,10 +235,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
// patch KV with parameter count
// patch KV with parameter count
llm
.
kv
[
"general.parameter_count"
]
=
llm
.
parameters
llm
.
kv
[
"general.parameter_count"
]
=
llm
.
parameters
alignment
,
ok
:=
llm
.
kv
[
"general.alignment"
]
.
(
uint32
)
alignment
:=
llm
.
kv
.
Uint
(
"general.alignment"
,
32
)
if
!
ok
{
alignment
=
32
}
offset
,
err
:=
rs
.
Seek
(
0
,
io
.
SeekCurrent
)
offset
,
err
:=
rs
.
Seek
(
0
,
io
.
SeekCurrent
)
if
err
!=
nil
{
if
err
!=
nil
{
...
@@ -506,6 +503,8 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
...
@@ -506,6 +503,8 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
}
}
func
WriteGGUF
(
ws
io
.
WriteSeeker
,
kv
KV
,
ts
[]
Tensor
)
error
{
func
WriteGGUF
(
ws
io
.
WriteSeeker
,
kv
KV
,
ts
[]
Tensor
)
error
{
alignment
:=
kv
.
Uint
(
"general.alignment"
,
32
)
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
"GGUF"
));
err
!=
nil
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
"GGUF"
));
err
!=
nil
{
return
err
return
err
}
}
...
@@ -543,16 +542,15 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
...
@@ -543,16 +542,15 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
var
s
uint64
var
s
uint64
for
_
,
t
:=
range
ts
{
for
_
,
t
:=
range
ts
{
t
.
Offset
=
s
t
.
Offset
=
s
+
uint64
(
ggufPadding
(
int64
(
s
),
int64
(
alignment
)))
if
err
:=
ggufWriteTensorInfo
(
ws
,
t
);
err
!=
nil
{
if
err
:=
ggufWriteTensorInfo
(
ws
,
t
);
err
!=
nil
{
return
err
return
err
}
}
s
+=
t
.
Size
()
s
+=
t
.
Size
()
}
}
var
alignment
int64
=
32
for
_
,
t
:=
range
ts
{
for
_
,
t
:=
range
ts
{
if
err
:=
ggufWriteTensor
(
ws
,
t
,
alignment
);
err
!=
nil
{
if
err
:=
ggufWriteTensor
(
ws
,
t
,
int64
(
alignment
)
)
;
err
!=
nil
{
return
err
return
err
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment