Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
cc6463eb
Unverified
Commit
cc6463eb
authored
Jun 24, 2025
by
Devon Rifkin
Committed by
GitHub
Jun 24, 2025
Browse files
Merge pull request #10238 from ollama/drifkin/array-head-count-simple
ggml: fix crash for array head counts
parents
405d2f62
a3f7dd3e
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
111 additions
and
31 deletions
+111
-31
fs/ggml/ggml.go
fs/ggml/ggml.go
+74
-29
fs/ggml/ggml_test.go
fs/ggml/ggml_test.go
+30
-0
llm/memory.go
llm/memory.go
+6
-1
server/sched.go
server/sched.go
+1
-1
No files found.
fs/ggml/ggml.go
View file @
cc6463eb
...
...
@@ -34,7 +34,8 @@ func (kv KV) Kind() string {
}
func
(
kv
KV
)
ParameterCount
()
uint64
{
return
keyValue
(
kv
,
"general.parameter_count"
,
uint64
(
0
))
val
,
_
:=
keyValue
(
kv
,
"general.parameter_count"
,
uint64
(
0
))
return
val
}
func
(
kv
KV
)
FileType
()
FileType
{
...
...
@@ -53,16 +54,27 @@ func (kv KV) EmbeddingLength() uint64 {
return
uint64
(
kv
.
Uint
(
"embedding_length"
))
}
func
(
kv
KV
)
HeadCount
()
uint64
{
return
uint64
(
kv
.
Uint
(
"attention.head_count"
))
func
(
kv
KV
)
HeadCountMax
()
uint64
{
// TODO(drifkin): using the max value can cause an overestimation. In the
// future if array values become more popular, we can adapt the more invasive
// <https://github.com/ollama/ollama/pull/10225>
return
uint64
(
kv
.
UintOrMaxArrayValue
(
"attention.head_count"
,
1
))
}
func
(
kv
KV
)
HeadCount
KV
()
uint64
{
return
uint64
(
kv
.
Uint
(
"attention.head_count
_kv
"
,
1
))
func
(
kv
KV
)
HeadCount
Min
()
uint64
{
return
uint64
(
kv
.
Uint
OrMinArrayValue
(
"attention.head_count"
,
1
))
}
func
(
kv
KV
)
EmbeddingHeadCount
()
uint64
{
if
heads
:=
kv
.
HeadCount
();
heads
>
0
{
func
(
kv
KV
)
HeadCountKVMax
()
uint64
{
return
uint64
(
kv
.
UintOrMaxArrayValue
(
"attention.head_count_kv"
,
1
))
}
func
(
kv
KV
)
HeadCountKVMin
()
uint64
{
return
uint64
(
kv
.
UintOrMinArrayValue
(
"attention.head_count_kv"
,
1
))
}
func
(
kv
KV
)
EmbeddingHeadCountMax
()
uint64
{
if
heads
:=
kv
.
HeadCountMin
();
heads
>
0
{
return
kv
.
EmbeddingLength
()
/
heads
}
...
...
@@ -70,15 +82,11 @@ func (kv KV) EmbeddingHeadCount() uint64 {
}
func
(
kv
KV
)
EmbeddingHeadCountK
()
uint64
{
return
uint64
(
kv
.
Uint
(
"attention.key_length"
,
uint32
(
kv
.
EmbeddingHeadCount
())))
return
uint64
(
kv
.
Uint
(
"attention.key_length"
,
uint32
(
kv
.
EmbeddingHeadCount
Max
())))
}
func
(
kv
KV
)
EmbeddingHeadCountV
()
uint64
{
return
uint64
(
kv
.
Uint
(
"attention.value_length"
,
uint32
(
kv
.
EmbeddingHeadCount
())))
}
func
(
kv
KV
)
GQA
()
uint64
{
return
kv
.
HeadCount
()
/
kv
.
HeadCountKV
()
return
uint64
(
kv
.
Uint
(
"attention.value_length"
,
uint32
(
kv
.
EmbeddingHeadCountMax
())))
}
func
(
kv
KV
)
ContextLength
()
uint64
{
...
...
@@ -90,35 +98,72 @@ func (kv KV) ChatTemplate() string {
}
func
(
kv
KV
)
String
(
key
string
,
defaultValue
...
string
)
string
{
return
keyValue
(
kv
,
key
,
append
(
defaultValue
,
""
)
...
)
val
,
_
:=
keyValue
(
kv
,
key
,
append
(
defaultValue
,
""
)
...
)
return
val
}
func
(
kv
KV
)
Uint
(
key
string
,
defaultValue
...
uint32
)
uint32
{
return
keyValue
(
kv
,
key
,
append
(
defaultValue
,
0
)
...
)
val
,
_
:=
keyValue
(
kv
,
key
,
append
(
defaultValue
,
0
)
...
)
return
val
}
func
(
kv
KV
)
Float
(
key
string
,
defaultValue
...
float32
)
float32
{
return
keyValue
(
kv
,
key
,
append
(
defaultValue
,
0
)
...
)
val
,
_
:=
keyValue
(
kv
,
key
,
append
(
defaultValue
,
0
)
...
)
return
val
}
func
(
kv
KV
)
Bool
(
key
string
,
defaultValue
...
bool
)
bool
{
return
keyValue
(
kv
,
key
,
append
(
defaultValue
,
false
)
...
)
val
,
_
:=
keyValue
(
kv
,
key
,
append
(
defaultValue
,
false
)
...
)
return
val
}
func
(
kv
KV
)
UintOrMaxArrayValue
(
key
string
,
defaultValue
uint32
)
uint32
{
_
,
max
:=
kv
.
UintOrArrayValue
(
key
,
defaultValue
)
return
max
}
func
(
kv
KV
)
UintOrMinArrayValue
(
key
string
,
defaultValue
uint32
)
uint32
{
min
,
_
:=
kv
.
UintOrArrayValue
(
key
,
defaultValue
)
return
min
}
func
(
kv
KV
)
UintOrArrayValue
(
key
string
,
defaultValue
uint32
)
(
uint32
,
uint32
)
{
if
u32
,
ok
:=
keyValue
(
kv
,
key
,
uint32
(
0
));
ok
{
return
u32
,
u32
}
else
if
u32s
,
ok
:=
keyValue
(
kv
,
key
,
&
array
[
uint32
]{});
ok
{
min
:=
slices
.
Min
(
u32s
.
values
)
max
:=
slices
.
Max
(
u32s
.
values
)
return
min
,
max
}
else
if
i32s
,
ok
:=
keyValue
(
kv
,
key
,
&
array
[
int32
]{});
ok
{
min
:=
slices
.
Min
(
i32s
.
values
)
max
:=
slices
.
Max
(
i32s
.
values
)
if
min
<
0
||
max
<
0
{
slog
.
Warn
(
"array values are unexpectedly negative"
,
"key"
,
key
,
"min"
,
min
,
"max"
,
max
)
}
return
uint32
(
min
),
uint32
(
max
)
}
return
defaultValue
,
defaultValue
}
func
(
kv
KV
)
Strings
(
key
string
,
defaultValue
...
[]
string
)
[]
string
{
return
keyValue
(
kv
,
key
,
&
array
[
string
]{
values
:
append
(
defaultValue
,
[]
string
(
nil
))[
0
]})
.
values
val
,
_
:=
keyValue
(
kv
,
key
,
&
array
[
string
]{
values
:
append
(
defaultValue
,
[]
string
(
nil
))[
0
]})
return
val
.
values
}
func
(
kv
KV
)
Ints
(
key
string
,
defaultValue
...
[]
int32
)
[]
int32
{
return
keyValue
(
kv
,
key
,
&
array
[
int32
]{
values
:
append
(
defaultValue
,
[]
int32
(
nil
))[
0
]})
.
values
val
,
_
:=
keyValue
(
kv
,
key
,
&
array
[
int32
]{
values
:
append
(
defaultValue
,
[]
int32
(
nil
))[
0
]})
return
val
.
values
}
func
(
kv
KV
)
Uints
(
key
string
,
defaultValue
...
[]
uint32
)
[]
uint32
{
return
keyValue
(
kv
,
key
,
&
array
[
uint32
]{
values
:
append
(
defaultValue
,
[]
uint32
(
nil
))[
0
]})
.
values
val
,
_
:=
keyValue
(
kv
,
key
,
&
array
[
uint32
]{
values
:
append
(
defaultValue
,
[]
uint32
(
nil
))[
0
]})
return
val
.
values
}
func
(
kv
KV
)
Floats
(
key
string
,
defaultValue
...
[]
float32
)
[]
float32
{
return
keyValue
(
kv
,
key
,
&
array
[
float32
]{
values
:
append
(
defaultValue
,
[]
float32
(
nil
))[
0
]})
.
values
val
,
_
:=
keyValue
(
kv
,
key
,
&
array
[
float32
]{
values
:
append
(
defaultValue
,
[]
float32
(
nil
))[
0
]})
return
val
.
values
}
func
(
kv
KV
)
OllamaEngineRequired
()
bool
{
...
...
@@ -143,17 +188,17 @@ type arrayValueTypes interface {
*
array
[
string
]
|
*
array
[
float32
]
|
*
array
[
float64
]
|
*
array
[
bool
]
}
func
keyValue
[
T
valueTypes
|
arrayValueTypes
](
kv
KV
,
key
string
,
defaultValue
...
T
)
T
{
func
keyValue
[
T
valueTypes
|
arrayValueTypes
](
kv
KV
,
key
string
,
defaultValue
...
T
)
(
T
,
bool
)
{
if
!
strings
.
HasPrefix
(
key
,
"tokenizer."
)
&&
!
strings
.
HasPrefix
(
key
,
"general."
)
{
key
=
kv
.
Architecture
()
+
"."
+
key
}
if
val
,
ok
:=
kv
[
key
];
ok
{
return
val
.
(
T
)
if
val
,
ok
:=
kv
[
key
]
.
(
T
)
;
ok
{
return
val
,
true
}
slog
.
Debug
(
"key not found"
,
"key"
,
key
,
"default"
,
defaultValue
[
0
])
return
defaultValue
[
0
]
slog
.
Debug
(
"key
with type
not found"
,
"key"
,
key
,
"default"
,
defaultValue
[
0
])
return
defaultValue
[
0
]
,
false
}
type
Tensors
struct
{
...
...
@@ -425,11 +470,11 @@ func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, error) {
func
(
f
GGML
)
GraphSize
(
context
,
batch
uint64
,
numParallel
int
,
kvCacheType
string
)
(
kv
[]
uint64
,
partialOffload
,
fullOffload
uint64
)
{
embedding
:=
f
.
KV
()
.
EmbeddingLength
()
heads
:=
f
.
KV
()
.
HeadCount
()
headsKV
:=
f
.
KV
()
.
HeadCountKV
()
heads
:=
f
.
KV
()
.
HeadCount
Max
()
headsKV
:=
f
.
KV
()
.
HeadCountKV
Max
()
vocab
:=
uint64
(
f
.
KV
()[
"tokenizer.ggml.tokens"
]
.
(
*
array
[
string
])
.
size
)
embeddingHeads
:=
f
.
KV
()
.
EmbeddingHeadCount
()
embeddingHeads
:=
f
.
KV
()
.
EmbeddingHeadCount
Max
()
embeddingHeadsK
:=
f
.
KV
()
.
EmbeddingHeadCountK
()
embeddingHeadsV
:=
f
.
KV
()
.
EmbeddingHeadCountV
()
...
...
fs/ggml/ggml_test.go
View file @
cc6463eb
...
...
@@ -269,3 +269,33 @@ func TestKeyValue(t *testing.T) {
t
.
Errorf
(
"unexpected uint8s (-got +want):
\n
%s"
,
diff
)
}
}
func
TestHeadCount
(
t
*
testing
.
T
)
{
valuesArray
:=
[]
int32
{
1
,
5
,
3
,
4
}
cases
:=
[]
struct
{
kv
KV
want
uint64
}{
{
kv
:
KV
{
"general.architecture"
:
"abc"
,
"abc.attention.head_count"
:
&
array
[
int32
]{
values
:
valuesArray
,
size
:
len
(
valuesArray
)},
},
want
:
uint64
(
5
),
},
{
kv
:
KV
{
"general.architecture"
:
"abc"
,
"abc.attention.head_count"
:
uint32
(
3
),
},
want
:
uint64
(
3
),
},
}
for
_
,
tt
:=
range
cases
{
got
:=
tt
.
kv
.
HeadCountMax
()
if
got
!=
tt
.
want
{
t
.
Errorf
(
"unexpected max value: got=%d want=%d"
,
got
,
tt
.
want
)
}
}
}
llm/memory.go
View file @
cc6463eb
...
...
@@ -151,7 +151,12 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
}
if
graphPartialOffload
==
0
{
graphPartialOffload
=
f
.
KV
()
.
GQA
()
*
kvTotal
/
6
headsKV
:=
f
.
KV
()
.
HeadCountKVMin
()
if
headsKV
==
0
{
headsKV
=
1
}
gqa
:=
f
.
KV
()
.
HeadCountMax
()
/
headsKV
graphPartialOffload
=
gqa
*
kvTotal
/
6
}
if
graphFullOffload
==
0
{
graphFullOffload
=
graphPartialOffload
...
...
server/sched.go
View file @
cc6463eb
...
...
@@ -191,7 +191,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
}
// Load model for fitting
ggml
,
err
:=
llm
.
LoadModel
(
pending
.
model
.
ModelPath
,
0
)
ggml
,
err
:=
llm
.
LoadModel
(
pending
.
model
.
ModelPath
,
1024
)
if
err
!=
nil
{
pending
.
errCh
<-
err
break
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment