Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
033cec23
Commit
033cec23
authored
Mar 12, 2025
by
Michael Yang
Browse files
count gemma3 vision tensors
parent
6b45b1d6
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
2 deletions
+10
-2
fs/ggml/ggml.go
fs/ggml/ggml.go
+8
-0
llm/memory.go
llm/memory.go
+2
-2
No files found.
fs/ggml/ggml.go
View file @
033cec23
...
@@ -611,6 +611,14 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
...
@@ -611,6 +611,14 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
embeddingLength
*
numPatches
*
maxNumTiles
+
embeddingLength
*
numPatches
*
maxNumTiles
+
9
*
embeddingLength
*
numPaddedPatches
*
maxNumTiles
+
9
*
embeddingLength
*
numPaddedPatches
*
maxNumTiles
+
numPaddedPatches
*
maxNumTiles
*
numPaddedPatches
*
maxNumTiles
*
headCount
)
numPaddedPatches
*
maxNumTiles
*
numPaddedPatches
*
maxNumTiles
*
headCount
)
case
"gemma3"
:
for
name
,
layer
:=
range
llm
.
Tensors
()
.
GroupLayers
()
{
if
strings
.
HasPrefix
(
name
,
"v."
)
{
for
_
,
tensor
:=
range
layer
{
weights
+=
tensor
.
Size
()
}
}
}
}
}
return
weights
,
graphSize
return
weights
,
graphSize
}
}
...
...
llm/memory.go
View file @
033cec23
...
@@ -218,8 +218,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
...
@@ -218,8 +218,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
if
blk
,
ok
:=
layers
[
fmt
.
Sprintf
(
"blk.%d"
,
i
)];
ok
{
if
blk
,
ok
:=
layers
[
fmt
.
Sprintf
(
"blk.%d"
,
i
)];
ok
{
layerSize
=
blk
.
Size
()
layerSize
=
blk
.
Size
()
layerSize
+=
kv
/
f
.
KV
()
.
BlockCount
()
layerSize
+=
kv
/
f
.
KV
()
.
BlockCount
()
memoryWeights
+=
blk
.
Size
()
}
}
memoryWeights
+=
layerSize
if
opts
.
NumGPU
>=
0
&&
layerCount
>=
opts
.
NumGPU
{
if
opts
.
NumGPU
>=
0
&&
layerCount
>=
opts
.
NumGPU
{
// Stop allocating on GPU(s) once we hit the users target NumGPU
// Stop allocating on GPU(s) once we hit the users target NumGPU
...
@@ -376,7 +376,7 @@ func (m MemoryEstimate) LogValue() slog.Value {
...
@@ -376,7 +376,7 @@ func (m MemoryEstimate) LogValue() slog.Value {
// memory of the weights
// memory of the weights
"total"
,
format
.
HumanBytes2
(
m
.
memoryWeights
),
"total"
,
format
.
HumanBytes2
(
m
.
memoryWeights
),
// memory of repeating layers
// memory of repeating layers
"repeating"
,
format
.
HumanBytes2
(
m
.
memoryWeights
-
m
.
memoryLayerOutput
),
"repeating"
,
format
.
HumanBytes2
(
m
.
memoryWeights
),
// memory of non-repeating layers
// memory of non-repeating layers
"nonrepeating"
,
format
.
HumanBytes2
(
m
.
memoryLayerOutput
),
"nonrepeating"
,
format
.
HumanBytes2
(
m
.
memoryLayerOutput
),
),
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment