Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
68ee42f9
".github/vscode:/vscode.git/clone" did not exist on "8abf952a7253a3bfabd058331b01f2bef418a1c8"
Unverified
Commit
68ee42f9
authored
Jul 29, 2024
by
Jeffrey Morgan
Committed by
GitHub
Jul 29, 2024
Browse files
update llama.cpp submodule to `6eeaeba1` (#6039)
parent
f26aef9a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
8 additions
and
89 deletions
+8
-89
llm/ext_server/server.cpp
llm/ext_server/server.cpp
+0
-9
llm/llama.cpp
llm/llama.cpp
+1
-1
llm/patches/05-default-pretokenizer.diff
llm/patches/05-default-pretokenizer.diff
+5
-5
llm/patches/09-lora.diff
llm/patches/09-lora.diff
+2
-4
llm/patches/10-llama3-rope.diff
llm/patches/10-llama3-rope.diff
+0
-70
No files found.
llm/ext_server/server.cpp
View file @
68ee42f9
...
...
@@ -2438,15 +2438,6 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
params
.
lora_adapter
.
emplace_back
(
lora_adapter
,
std
::
stof
(
argv
[
i
]));
params
.
use_mmap
=
false
;
}
else
if
(
arg
==
"--lora-base"
)
{
if
(
++
i
>=
argc
)
{
invalid_param
=
true
;
break
;
}
params
.
lora_base
=
argv
[
i
];
}
else
if
(
arg
==
"-v"
||
arg
==
"--verbose"
)
{
server_verbose
=
true
;
...
...
llama.cpp
@
6eeaeba1
Compare
d94c6e0c
...
6eeaeba1
Subproject commit
d94c6e0ccbd29ee1ba4f44e9caa8682ad94df9fa
Subproject commit
6eeaeba126ff701f3e8f79f246805b7023709972
llm/patches/05-default-pretokenizer.diff
View file @
68ee42f9
diff --git a/src/llama.cpp b/src/llama.cpp
index
8fe51971..7113ba64
100644
index
a207451f..2ddf431d
100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -5
433
,16 +5
433
,7 @@
static void llm_load_vocab(
@@ -5
347
,16 +5
347
,7 @@
static void llm_load_vocab(
if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
vocab.tokenizer_add_space_prefix = false;
vocab.tokenizer_clean_spaces = true;
...
...
@@ -20,9 +20,9 @@ index 8fe51971..7113ba64 100644
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
} else if (
tokenizer_pre == "llama3" ||
@@ -5
526
,7 +5
517
,8 @@
static void llm_load_vocab(
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMOLLM;
vocab.t
okenizer_clean_spaces = false
;
@@ -5
443
,7 +5
434
,8 @@
static void llm_load_vocab(
tokenizer_pre == "codeshell") {
vocab.t
ype_pre = LLAMA_VOCAB_PRE_TYPE_CODESHELL
;
} else {
- throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
+ LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__);
...
...
llm/patches/09-lora.diff
View file @
68ee42f9
...
...
@@ -2,7 +2,7 @@ diff --git a/common/common.cpp b/common/common.cpp
index dbb724fb..c26fe6ee 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2087,14 +2087,2
9
@@
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
@@ -2087,14 +2087,2
7
@@
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
float lora_scale = std::get<1>(params.lora_adapter[i]);
...
...
@@ -20,9 +20,7 @@ index dbb724fb..c26fe6ee 100644
+ int err = llama_model_apply_lora_from_file(model,
+ lora_adapter.c_str(),
+ lora_scale,
+ ((i > 0) || params.lora_base.empty())
+ ? NULL
+ : params.lora_base.c_str(),
+ nullptr,
+ params.n_threads);
+ if (err != 0) {
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
...
...
llm/patches/10-llama3-rope.diff
deleted
100644 → 0
View file @
f26aef9a
From 2f872f294fb6f5c6e8f983b68c40ea656053dd92 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Tue, 23 Jul 2024 14:33:29 -0700
Subject: [PATCH] llama 3.1 rope scaling
---
src/llama.cpp | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/src/llama.cpp b/src/llama.cpp
index 8fe51971..a9969df8 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -2472,6 +2472,7 @@
struct llama_layer {
// long rope factors
struct ggml_tensor * rope_long = nullptr;
struct ggml_tensor * rope_short = nullptr;
+ struct ggml_tensor * rope_freqs = nullptr;
// bitnet scale
struct ggml_tensor * wq_scale;
@@ -6143,6 +6144,8 @@
static bool llm_load_tensors(
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
+ layer.rope_freqs = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ROPE_FREQS, "weight"), { n_embd/n_head/2 }, llama_model_loader::TENSOR_NOT_REQUIRED | (i != 0 ? llama_model_loader::TENSOR_DUPLICATED : 0));
+
if (n_expert == 0) {
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
@@ -8620,6 +8623,10 @@
struct llm_build_context {
// choose long/short freq factors based on the context size
const auto n_ctx_pre_seq = cparams.n_ctx / cparams.n_seq_max;
+ if (model.layers[il].rope_freqs != nullptr) {
+ return model.layers[il].rope_freqs;
+ }
+
if (n_ctx_pre_seq > hparams.n_ctx_orig_yarn) {
return model.layers[il].rope_long;
}
@@ -8814,6 +8821,9 @@
struct llm_build_context {
// self-attention
{
+ // rope freq factors for llama3; may return nullptr for llama2 and other models
+ struct ggml_tensor * rope_factors = build_rope_factors(il);
+
// compute Q and K and RoPE them
struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur);
cb(Qcur, "Qcur", il);
@@ -8837,14 +8847,14 @@
struct llm_build_context {
}
Qcur = ggml_rope_ext(
- ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, nullptr,
+ ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, rope_factors,
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow
);
cb(Qcur, "Qcur", il);
Kcur = ggml_rope_ext(
- ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, nullptr,
+ ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, rope_factors,
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow
);
--
2.45.2
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment