Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
1d359e73
Commit
1d359e73
authored
May 13, 2024
by
Michael Yang
Browse files
typo
parent
50b9056e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
9 deletions
+13
-9
llm/memory.go
llm/memory.go
+13
-9
No files found.
llm/memory.go
View file @
1d359e73
...
...
@@ -54,8 +54,10 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
}
layers
:=
ggml
.
Tensors
()
.
Layers
()
// add one layer worth of memorr as a buffer
memoryMinimum
+=
layers
[
"blk.0"
]
.
size
()
// add one layer worth of memory as a buffer
if
blk0
,
ok
:=
layers
[
"blk.0"
];
ok
{
memoryMinimum
+=
blk0
.
size
()
}
// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
var
kv
uint64
=
2
*
2
*
uint64
(
opts
.
NumCtx
)
*
ggml
.
KV
()
.
BlockCount
()
*
ggml
.
KV
()
.
EmbeddingLength
()
/
ggml
.
KV
()
.
HeadCount
()
*
ggml
.
KV
()
.
HeadCountKV
()
...
...
@@ -102,15 +104,17 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
var
layerCount
int
for
i
:=
0
;
i
<
int
(
ggml
.
KV
()
.
BlockCount
());
i
++
{
memoryLayer
:=
layers
[
fmt
.
Sprintf
(
"blk.%d"
,
i
)]
.
size
()
if
blk
,
ok
:=
layers
[
fmt
.
Sprintf
(
"blk.%d"
,
i
)];
ok
{
memoryLayer
:=
blk
.
size
()
// KV is proportional to the number of layers
memoryLayer
+=
kv
/
ggml
.
KV
()
.
BlockCount
()
// KV is proportional to the number of layers
memoryLayer
+=
kv
/
ggml
.
KV
()
.
BlockCount
()
memoryRequiredTotal
+=
memoryLayer
if
(
opts
.
NumGPU
>=
0
&&
layerCount
+
1
<=
opts
.
NumGPU
)
||
(
opts
.
NumGPU
<
0
&&
memoryAvailable
>
memoryRequiredPartial
+
memoryLayer
)
{
memoryRequiredPartial
+=
memoryLayer
layerCount
++
memoryRequiredTotal
+=
memoryLayer
if
(
opts
.
NumGPU
>=
0
&&
layerCount
+
1
<=
opts
.
NumGPU
)
||
(
opts
.
NumGPU
<
0
&&
memoryAvailable
>
memoryRequiredPartial
+
memoryLayer
)
{
memoryRequiredPartial
+=
memoryLayer
layerCount
++
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment