Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
4ea4d2b1
Unverified
Commit
4ea4d2b1
authored
Mar 13, 2025
by
Michael Yang
Committed by
GitHub
Mar 13, 2025
Browse files
Merge pull request #9703 from ollama/mxyng/gemma3-memory
count gemma3 vision tensors
parents
74b44fdf
8d76fa23
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
34 additions
and
21 deletions
+34
-21
fs/ggml/ggml.go
fs/ggml/ggml.go
+32
-19
llm/memory.go
llm/memory.go
+2
-2
No files found.
fs/ggml/ggml.go
View file @
4ea4d2b1
...
...
@@ -583,39 +583,52 @@ func (f GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partialO
}
func
(
llm
GGML
)
VisionGraphSize
()
(
weights
,
graphSize
uint64
)
{
switch
llm
.
KV
()
.
Architecture
()
{
case
"mllama"
:
for
_
,
layer
:=
range
llm
.
Tensors
()
.
GroupLayers
()[
"v"
]
{
weights
+=
layer
.
Size
()
}
if
llm
.
KV
()
.
Uint
(
"vision.block_count"
)
==
0
{
return
}
kv
:=
func
(
n
string
)
uint64
{
if
v
,
ok
:=
llm
.
KV
()[
"mllama.vision."
+
n
]
.
(
uint32
);
ok
{
return
uint64
(
v
)
for
name
,
layer
:=
range
llm
.
Tensors
()
.
GroupLayers
()
{
if
name
==
"v"
||
strings
.
HasPrefix
(
name
,
"v."
)
{
for
_
,
tensor
:=
range
layer
{
weights
+=
tensor
.
Size
()
}
return
0
}
}
imageSize
:=
uint64
(
llm
.
KV
()
.
Uint
(
"vision.image_size"
))
patchSize
:=
uint64
(
llm
.
KV
()
.
Uint
(
"vision.patch_size"
))
if
patchSize
==
0
{
slog
.
Warn
(
"unknown patch size for vision model"
)
return
}
imageSize
:=
kv
(
"image_size
"
)
numChannels
:=
uint64
(
llm
.
KV
()
.
Uint
(
"vision.num_channels
"
)
)
maxNumTiles
:=
kv
(
"max_num_tiles"
)
embeddingLength
:=
kv
(
"embedding_length"
)
headCount
:=
kv
(
"attention.head_count"
)
numPatches
:=
(
imageSize
/
patchSize
)
*
(
imageSize
/
patchSize
)
if
_
,
ok
:=
llm
.
Tensors
()
.
GroupLayers
()[
"v"
][
"class_embd"
];
ok
{
numPatches
++
}
numPatches
:=
(
imageSize
/
kv
(
"patch_size"
))
*
(
imageSize
/
kv
(
"patch_size"
))
if
_
,
ok
:=
llm
.
Tensors
()
.
GroupLayers
()[
"v"
][
"class_embd"
];
ok
{
numPatches
++
}
headCount
:=
uint64
(
llm
.
KV
()
.
Uint
(
"vision.attention.head_count"
))
embeddingLength
:=
uint64
(
llm
.
KV
()
.
Uint
(
"vision.embedding_length"
))
switch
llm
.
KV
()
.
Architecture
()
{
case
"mllama"
:
numPaddedPatches
:=
numPatches
+
8
-
(
numPatches
%
8
)
%
8
maxNumTiles
:=
uint64
(
llm
.
KV
()
.
Uint
(
"vision.max_num_tiles"
))
graphSize
=
4
*
(
8
+
imageSize
*
imageSize
*
kv
(
"
num
_c
hannels
"
)
*
maxNumTiles
+
imageSize
*
imageSize
*
num
C
hannels
*
maxNumTiles
+
embeddingLength
*
numPatches
*
maxNumTiles
+
9
*
embeddingLength
*
numPaddedPatches
*
maxNumTiles
+
numPaddedPatches
*
maxNumTiles
*
numPaddedPatches
*
maxNumTiles
*
headCount
)
case
"gemma3"
:
graphSize
=
4
*
(
imageSize
*
imageSize
*
numChannels
+
embeddingLength
*
patchSize
+
numPatches
*
numPatches
*
headCount
)
}
return
weights
,
graphSize
}
...
...
llm/memory.go
View file @
4ea4d2b1
...
...
@@ -218,8 +218,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
if
blk
,
ok
:=
layers
[
fmt
.
Sprintf
(
"blk.%d"
,
i
)];
ok
{
layerSize
=
blk
.
Size
()
layerSize
+=
kv
/
f
.
KV
()
.
BlockCount
()
memoryWeights
+=
blk
.
Size
()
}
memoryWeights
+=
layerSize
if
opts
.
NumGPU
>=
0
&&
layerCount
>=
opts
.
NumGPU
{
// Stop allocating on GPU(s) once we hit the users target NumGPU
...
...
@@ -376,7 +376,7 @@ func (m MemoryEstimate) LogValue() slog.Value {
// memory of the weights
"total"
,
format
.
HumanBytes2
(
m
.
memoryWeights
),
// memory of repeating layers
"repeating"
,
format
.
HumanBytes2
(
m
.
memoryWeights
-
m
.
memoryLayerOutput
),
"repeating"
,
format
.
HumanBytes2
(
m
.
memoryWeights
),
// memory of non-repeating layers
"nonrepeating"
,
format
.
HumanBytes2
(
m
.
memoryLayerOutput
),
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment