Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
e04c7012
Unverified
Commit
e04c7012
authored
Aug 06, 2024
by
Jeffrey Morgan
Committed by
GitHub
Aug 06, 2024
Browse files
update llama.cpp submodule to `1e6f6554` (#6208)
parent
d4a7216c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
25 additions
and
45 deletions
+25
-45
llm/ext_server/server.cpp
llm/ext_server/server.cpp
+11
-3
llm/llama.cpp
llm/llama.cpp
+1
-1
llm/patches/09-lora.diff
llm/patches/09-lora.diff
+13
-21
llm/patches/10-params.diff
llm/patches/10-params.diff
+0
-20
No files found.
llm/ext_server/server.cpp
View file @
e04c7012
...
@@ -403,7 +403,9 @@ struct llama_server_context
...
@@ -403,7 +403,9 @@ struct llama_server_context
}
}
}
}
std
::
tie
(
model
,
ctx
)
=
llama_init_from_gpt_params
(
params
);
auto
init_result
=
llama_init_from_gpt_params
(
params
);
model
=
init_result
.
model
;
ctx
=
init_result
.
context
;
if
(
model
==
nullptr
)
if
(
model
==
nullptr
)
{
{
LOG_ERROR
(
"unable to load model"
,
{{
"model"
,
params
.
model
}});
LOG_ERROR
(
"unable to load model"
,
{{
"model"
,
params
.
model
}});
...
@@ -2422,7 +2424,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
...
@@ -2422,7 +2424,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
invalid_param
=
true
;
invalid_param
=
true
;
break
;
break
;
}
}
params
.
lora_adapter
.
emplace_back
(
argv
[
i
],
1.0
f
);
params
.
lora_adapters
.
push_back
({
std
::
string
(
argv
[
i
]),
1.0
,
});
params
.
use_mmap
=
false
;
params
.
use_mmap
=
false
;
}
}
else
if
(
arg
==
"--lora-scaled"
)
else
if
(
arg
==
"--lora-scaled"
)
...
@@ -2438,7 +2443,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
...
@@ -2438,7 +2443,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
invalid_param
=
true
;
invalid_param
=
true
;
break
;
break
;
}
}
params
.
lora_adapter
.
emplace_back
(
lora_adapter
,
std
::
stof
(
argv
[
i
]));
params
.
lora_adapters
.
push_back
({
lora_adapter
,
std
::
stof
(
argv
[
i
])
});
params
.
use_mmap
=
false
;
params
.
use_mmap
=
false
;
}
}
else
if
(
arg
==
"-v"
||
arg
==
"--verbose"
)
else
if
(
arg
==
"-v"
||
arg
==
"--verbose"
)
...
...
llama.cpp
@
1e6f6554
Compare
6eeaeba1
...
1e6f6554
Subproject commit
6eeaeba126ff701f3e8f79f246805b7023709972
Subproject commit
1e6f6554aa11fa10160a5fda689e736c3c34169f
llm/patches/09-lora.diff
View file @
e04c7012
diff --git a/common/common.cpp b/common/common.cpp
diff --git a/common/common.cpp b/common/common.cpp
index
dbb724fb..c26fe6e
e 100644
index
2e8374d5..70d0afd
e 100644
--- a/common/common.cpp
--- a/common/common.cpp
+++ b/common/common.cpp
+++ b/common/common.cpp
@@ -2087,14 +2087,27 @@
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
@@ -2110,9 +2110,21 @@
struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
if (loaded_la.adapter == nullptr) {
float lora_scale = std::get<1>(params.lora_adapter[i]);
fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
+
+ // try to load as gguf
auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
if (adapter == nullptr) {
- fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
- llama_free(lctx);
- llama_free(lctx);
- llama_free_model(model);
- llama_free_model(model);
- return std::make_tuple(nullptr, nullptr);
- return iparams;
+ fprintf(stderr, "%s: error: failed to apply lora adapter, trying ggla\n", __func__);
+
+
+ // if that fails, try loading as ggla for compatibility
+ // if that fails, try loading as ggla for compatibility
+ int err = llama_model_apply_lora_from_file(model,
+ int err = llama_model_apply_lora_from_file(model,
+ l
ora_adapter
.c_str(),
+ l
a.path
.c_str(),
+ l
ora_
scale,
+ l
a.
scale,
+ nullptr,
+ nullptr,
+ params.n_threads);
+ params.n_threads);
+ if (err != 0) {
+ if (err != 0) {
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
+ llama_free(lctx);
+ llama_free(lctx);
+ llama_free_model(model);
+ llama_free_model(model);
+ return std::make_tuple(nullptr, nullptr);
+ return iparams;
+ } else {
+ break;
+ }
+ }
+ } else {
+ llama_lora_adapter_set(lctx, adapter, lora_scale);
}
}
-
llama_
lora_adapter
_set(lctx, adapter, lora_scale);
iparams.
lora_adapter
s.push_back(loaded_la); // copy to list of loaded adapters
}
}
if (params.ignore_eos) {
diff --git a/include/llama.h b/include/llama.h
diff --git a/include/llama.h b/include/llama.h
index 93fd77ca..b0fb37a6 100644
index 93fd77ca..b0fb37a6 100644
--- a/include/llama.h
--- a/include/llama.h
...
@@ -355,4 +347,4 @@ index 80a0dd0f..9d7b0e17 100644
...
@@ -355,4 +347,4 @@ index 80a0dd0f..9d7b0e17 100644
+ return 1;
+ return 1;
+ }
+ }
+}
+}
\
No newline at end of file
\
No newline at end of file
\ No newline at end of file
llm/patches/10-params.diff
deleted
100644 → 0
View file @
d4a7216c
diff --git a/src/llama.cpp b/src/llama.cpp
index a207451f..fba6b175 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -4969,6 +4969,7 @@
static void llm_load_hparams(
hparams.attn_soft_cap = true;
switch (hparams.n_layer) {
+ case 26: model.type = e_model::MODEL_2B; break;
case 42: model.type = e_model::MODEL_9B; break;
case 46: model.type = e_model::MODEL_27B; break;
default: model.type = e_model::MODEL_UNKNOWN;
@@ -11736,6 +11737,7 @@
struct llm_build_context {
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
switch (model.type) {
+ case e_model::MODEL_2B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
default: GGML_ABORT("fatal error");
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment