Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
1d359e73
Commit
1d359e73
authored
May 13, 2024
by
Michael Yang
Browse files
typo
parent
50b9056e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
9 deletions
+13
-9
llm/memory.go
llm/memory.go
+13
-9
No files found.
llm/memory.go
View file @
1d359e73
...
@@ -54,8 +54,10 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
...
@@ -54,8 +54,10 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
}
}
layers
:=
ggml
.
Tensors
()
.
Layers
()
layers
:=
ggml
.
Tensors
()
.
Layers
()
// add one layer worth of memorr as a buffer
// add one layer worth of memory as a buffer
memoryMinimum
+=
layers
[
"blk.0"
]
.
size
()
if
blk0
,
ok
:=
layers
[
"blk.0"
];
ok
{
memoryMinimum
+=
blk0
.
size
()
}
// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
var
kv
uint64
=
2
*
2
*
uint64
(
opts
.
NumCtx
)
*
ggml
.
KV
()
.
BlockCount
()
*
ggml
.
KV
()
.
EmbeddingLength
()
/
ggml
.
KV
()
.
HeadCount
()
*
ggml
.
KV
()
.
HeadCountKV
()
var
kv
uint64
=
2
*
2
*
uint64
(
opts
.
NumCtx
)
*
ggml
.
KV
()
.
BlockCount
()
*
ggml
.
KV
()
.
EmbeddingLength
()
/
ggml
.
KV
()
.
HeadCount
()
*
ggml
.
KV
()
.
HeadCountKV
()
...
@@ -102,7 +104,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
...
@@ -102,7 +104,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
var
layerCount
int
var
layerCount
int
for
i
:=
0
;
i
<
int
(
ggml
.
KV
()
.
BlockCount
());
i
++
{
for
i
:=
0
;
i
<
int
(
ggml
.
KV
()
.
BlockCount
());
i
++
{
memoryLayer
:=
layers
[
fmt
.
Sprintf
(
"blk.%d"
,
i
)]
.
size
()
if
blk
,
ok
:=
layers
[
fmt
.
Sprintf
(
"blk.%d"
,
i
)];
ok
{
memoryLayer
:=
blk
.
size
()
// KV is proportional to the number of layers
// KV is proportional to the number of layers
memoryLayer
+=
kv
/
ggml
.
KV
()
.
BlockCount
()
memoryLayer
+=
kv
/
ggml
.
KV
()
.
BlockCount
()
...
@@ -113,6 +116,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
...
@@ -113,6 +116,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
layerCount
++
layerCount
++
}
}
}
}
}
if
gpus
[
0
]
.
Library
!=
"metal"
||
!
opts
.
UseMMap
{
if
gpus
[
0
]
.
Library
!=
"metal"
||
!
opts
.
UseMMap
{
// memory was not preallocated for output tensors
// memory was not preallocated for output tensors
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment