Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
e04c7012
"docs/vscode:/vscode.git/clone" did not exist on "9a0b0c2e74ea47176805d84385c67dfdf00936f3"
Unverified
Commit
e04c7012
authored
Aug 06, 2024
by
Jeffrey Morgan
Committed by
GitHub
Aug 06, 2024
Browse files
update llama.cpp submodule to `1e6f6554` (#6208)
parent
d4a7216c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
25 additions
and
45 deletions
+25
-45
llm/ext_server/server.cpp
llm/ext_server/server.cpp
+11
-3
llm/llama.cpp
llm/llama.cpp
+1
-1
llm/patches/09-lora.diff
llm/patches/09-lora.diff
+13
-21
llm/patches/10-params.diff
llm/patches/10-params.diff
+0
-20
No files found.
llm/ext_server/server.cpp
View file @
e04c7012
...
@@ -403,7 +403,9 @@ struct llama_server_context
...
@@ -403,7 +403,9 @@ struct llama_server_context
}
}
}
}
std
::
tie
(
model
,
ctx
)
=
llama_init_from_gpt_params
(
params
);
auto
init_result
=
llama_init_from_gpt_params
(
params
);
model
=
init_result
.
model
;
ctx
=
init_result
.
context
;
if
(
model
==
nullptr
)
if
(
model
==
nullptr
)
{
{
LOG_ERROR
(
"unable to load model"
,
{{
"model"
,
params
.
model
}});
LOG_ERROR
(
"unable to load model"
,
{{
"model"
,
params
.
model
}});
...
@@ -2422,7 +2424,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
...
@@ -2422,7 +2424,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
invalid_param
=
true
;
invalid_param
=
true
;
break
;
break
;
}
}
params
.
lora_adapter
.
emplace_back
(
argv
[
i
],
1.0
f
);
params
.
lora_adapters
.
push_back
({
std
::
string
(
argv
[
i
]),
1.0
,
});
params
.
use_mmap
=
false
;
params
.
use_mmap
=
false
;
}
}
else
if
(
arg
==
"--lora-scaled"
)
else
if
(
arg
==
"--lora-scaled"
)
...
@@ -2438,7 +2443,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
...
@@ -2438,7 +2443,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
invalid_param
=
true
;
invalid_param
=
true
;
break
;
break
;
}
}
params
.
lora_adapter
.
emplace_back
(
lora_adapter
,
std
::
stof
(
argv
[
i
]));
params
.
lora_adapters
.
push_back
({
lora_adapter
,
std
::
stof
(
argv
[
i
])
});
params
.
use_mmap
=
false
;
params
.
use_mmap
=
false
;
}
}
else
if
(
arg
==
"-v"
||
arg
==
"--verbose"
)
else
if
(
arg
==
"-v"
||
arg
==
"--verbose"
)
...
...
llama.cpp
@
1e6f6554
Compare
6eeaeba1
...
1e6f6554
Subproject commit
6eeaeba126ff701f3e8f79f246805b7023709972
Subproject commit
1e6f6554aa11fa10160a5fda689e736c3c34169f
llm/patches/09-lora.diff
View file @
e04c7012
diff --git a/common/common.cpp b/common/common.cpp
diff --git a/common/common.cpp b/common/common.cpp
index
dbb724fb..c26fe6e
e 100644
index
2e8374d5..70d0afd
e 100644
--- a/common/common.cpp
--- a/common/common.cpp
+++ b/common/common.cpp
+++ b/common/common.cpp
@@ -2087,14 +2087,27 @@
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
@@ -2110,9 +2110,21 @@
struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
if (loaded_la.adapter == nullptr) {
float lora_scale = std::get<1>(params.lora_adapter[i]);
fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
+
+ // try to load as gguf
auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
if (adapter == nullptr) {
- fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
- llama_free(lctx);
- llama_free(lctx);
- llama_free_model(model);
- llama_free_model(model);
- return std::make_tuple(nullptr, nullptr);
- return iparams;
+ fprintf(stderr, "%s: error: failed to apply lora adapter, trying ggla\n", __func__);
+
+
+ // if that fails, try loading as ggla for compatibility
+ // if that fails, try loading as ggla for compatibility
+ int err = llama_model_apply_lora_from_file(model,
+ int err = llama_model_apply_lora_from_file(model,
+ l
ora_adapter
.c_str(),
+ l
a.path
.c_str(),
+ l
ora_
scale,
+ l
a.
scale,
+ nullptr,
+ nullptr,
+ params.n_threads);
+ params.n_threads);
+ if (err != 0) {
+ if (err != 0) {
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
+ llama_free(lctx);
+ llama_free(lctx);
+ llama_free_model(model);
+ llama_free_model(model);
+ return std::make_tuple(nullptr, nullptr);
+ return iparams;
+ }
+ } else {
+ } else {
+ llama_lora_adapter_set(lctx, adapter, lora_scale);
+ break;
+ }
}
}
-
llama_
lora_adapter
_set(lctx, adapter, lora_scale);
iparams.
lora_adapter
s.push_back(loaded_la); // copy to list of loaded adapters
}
}
if (params.ignore_eos) {
diff --git a/include/llama.h b/include/llama.h
diff --git a/include/llama.h b/include/llama.h
index 93fd77ca..b0fb37a6 100644
index 93fd77ca..b0fb37a6 100644
--- a/include/llama.h
--- a/include/llama.h
...
...
llm/patches/10-params.diff
deleted
100644 → 0
View file @
d4a7216c
diff --git a/src/llama.cpp b/src/llama.cpp
index a207451f..fba6b175 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -4969,6 +4969,7 @@
static void llm_load_hparams(
hparams.attn_soft_cap = true;
switch (hparams.n_layer) {
+ case 26: model.type = e_model::MODEL_2B; break;
case 42: model.type = e_model::MODEL_9B; break;
case 46: model.type = e_model::MODEL_27B; break;
default: model.type = e_model::MODEL_UNKNOWN;
@@ -11736,6 +11737,7 @@
struct llm_build_context {
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
switch (model.type) {
+ case e_model::MODEL_2B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
default: GGML_ABORT("fatal error");
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment