Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
7bb7cb8a
Commit
7bb7cb8a
authored
Apr 25, 2024
by
Michael Yang
Browse files
only count output tensors
parent
5f73c087
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
9 deletions
+18
-9
llm/memory.go
llm/memory.go
+18
-9
No files found.
llm/memory.go
View file @
7bb7cb8a
...
@@ -5,7 +5,6 @@ import (
...
@@ -5,7 +5,6 @@ import (
"log/slog"
"log/slog"
"os"
"os"
"strconv"
"strconv"
"strings"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/format"
...
@@ -100,8 +99,22 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
...
@@ -100,8 +99,22 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
return
0
,
0
return
0
,
0
}
}
var
layerCount
int
layers
:=
ggml
.
Tensors
()
.
Layers
()
layers
:=
ggml
.
Tensors
()
.
Layers
()
var
memoryLayerOutput
uint64
for
k
,
v
:=
range
layers
{
if
k
==
"output"
||
k
==
"output_norm"
{
memoryLayerOutput
+=
v
.
size
()
}
}
if
gpus
[
0
]
.
Library
==
"metal"
&&
opts
.
UseMMap
{
// memory is preallocated for output tensors
memoryRequiredTotal
+=
memoryLayerOutput
memoryRequiredPartial
+=
memoryLayerOutput
}
var
layerCount
int
for
i
:=
0
;
i
<
int
(
ggml
.
KV
()
.
BlockCount
());
i
++
{
for
i
:=
0
;
i
<
int
(
ggml
.
KV
()
.
BlockCount
());
i
++
{
memoryLayer
:=
layers
[
fmt
.
Sprintf
(
"blk.%d"
,
i
)]
.
size
()
memoryLayer
:=
layers
[
fmt
.
Sprintf
(
"blk.%d"
,
i
)]
.
size
()
...
@@ -115,15 +128,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
...
@@ -115,15 +128,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
}
}
}
}
var
memoryLayerOutput
uint64
if
gpus
[
0
]
.
Library
!=
"metal"
||
!
opts
.
UseMMap
{
for
k
,
v
:=
range
layers
{
// memory was not preallocated for output tensors
if
!
strings
.
HasPrefix
(
k
,
"blk."
)
{
memoryRequiredTotal
+=
memoryLayerOutput
memoryLayerOutput
+=
v
.
size
()
}
}
}
memoryRequiredTotal
+=
memoryLayerOutput
if
memoryAvailable
>
memoryRequiredTotal
{
if
memoryAvailable
>
memoryRequiredTotal
{
layerCount
=
int
(
ggml
.
KV
()
.
BlockCount
())
+
1
layerCount
=
int
(
ggml
.
KV
()
.
BlockCount
())
+
1
memoryRequiredPartial
=
memoryRequiredTotal
memoryRequiredPartial
=
memoryRequiredTotal
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment