Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
177b69a2
Commit
177b69a2
authored
Aug 25, 2023
by
Jeffrey Morgan
Browse files
add missing entries for 34B
parent
dad63f08
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
0 deletions
+7
-0
llm/llama.cpp
llm/llama.cpp
+7
-0
No files found.
llm/llama.cpp
View file @
177b69a2
...
...
@@ -149,6 +149,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0(int n_ctx)
{
MODEL_7B
,
((
size_t
)
n_ctx
/
16ull
+
100ull
)
*
MB
},
{
MODEL_13B
,
((
size_t
)
n_ctx
/
12ull
+
120ull
)
*
MB
},
{
MODEL_30B
,
((
size_t
)
n_ctx
/
9ull
+
160ull
)
*
MB
},
{
MODEL_34B
,
((
size_t
)
n_ctx
/
9ull
+
160ull
)
*
MB
},
{
MODEL_65B
,
((
size_t
)
n_ctx
/
6ull
+
256ull
)
*
MB
},
// guess
{
MODEL_70B
,
((
size_t
)
n_ctx
/
7ull
+
164ull
)
*
MB
},
};
...
...
@@ -162,6 +163,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
{
MODEL_7B
,
160ull
*
MB
},
{
MODEL_13B
,
192ull
*
MB
},
{
MODEL_30B
,
256ull
*
MB
},
{
MODEL_34B
,
256ull
*
MB
},
{
MODEL_65B
,
384ull
*
MB
},
// guess
{
MODEL_70B
,
304ull
*
MB
},
};
...
...
@@ -176,6 +178,7 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
{
MODEL_7B
,
10ull
*
MB
},
{
MODEL_13B
,
12ull
*
MB
},
{
MODEL_30B
,
16ull
*
MB
},
{
MODEL_34B
,
16ull
*
MB
},
{
MODEL_65B
,
24ull
*
MB
},
// guess
{
MODEL_70B
,
24ull
*
MB
},
};
...
...
@@ -191,6 +194,7 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_BASE()
{
MODEL_7B
,
512ull
*
kB
},
{
MODEL_13B
,
640ull
*
kB
},
{
MODEL_30B
,
768ull
*
kB
},
{
MODEL_34B
,
768ull
*
kB
},
{
MODEL_65B
,
1280ull
*
kB
},
{
MODEL_70B
,
1280ull
*
kB
},
};
...
...
@@ -206,6 +210,7 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT()
{
MODEL_7B
,
128ull
},
{
MODEL_13B
,
160ull
},
{
MODEL_30B
,
208ull
},
{
MODEL_34B
,
208ull
},
{
MODEL_65B
,
256ull
},
{
MODEL_70B
,
256ull
},
};
...
...
@@ -1123,6 +1128,8 @@ static void llama_model_load_internal(
LLAMA_LOG_WARN
(
"%s: warning: assuming 70B model based on GQA == %d
\n
"
,
__func__
,
n_gqa
);
model
.
type
=
e_model
::
MODEL_70B
;
hparams
.
f_ffn_mult
=
1.3
f
;
// from the params.json of the 70B model
}
else
if
(
model
.
type
==
e_model
::
MODEL_34B
&&
n_gqa
==
8
)
{
hparams
.
f_ffn_mult
=
1.0
f
;
// from the params.json of the 34B model
}
hparams
.
rope_freq_base
=
rope_freq_base
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment