Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
9adca7f7
Commit
9adca7f7
authored
Dec 14, 2023
by
Daniel Hiltgen
Browse files
Bump llama.cpp to b1662 and set n_parallel=1
parent
89bbaafa
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
9 deletions
+9
-9
llm/ext_server.go
llm/ext_server.go
+1
-1
llm/llama.cpp/gguf
llm/llama.cpp/gguf
+1
-1
llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
...ama.cpp/patches/0001-Expose-callable-API-for-server.patch
+7
-7
No files found.
llm/ext_server.go
View file @
9adca7f7
...
@@ -160,7 +160,7 @@ func newExtServer(server extServer, model string, adapters, projectors []string,
...
@@ -160,7 +160,7 @@ func newExtServer(server extServer, model string, adapters, projectors []string,
sparams
.
n_batch
=
C
.
uint
(
opts
.
NumBatch
)
sparams
.
n_batch
=
C
.
uint
(
opts
.
NumBatch
)
sparams
.
n_gpu_layers
=
C
.
int
(
numGPU
)
sparams
.
n_gpu_layers
=
C
.
int
(
numGPU
)
sparams
.
main_gpu
=
C
.
int
(
opts
.
MainGPU
)
sparams
.
main_gpu
=
C
.
int
(
opts
.
MainGPU
)
sparams
.
n_parallel
=
2
// TODO - wire up concurrency
sparams
.
n_parallel
=
1
// TODO - wire up concurrency
// Always use the value encoded in the model
// Always use the value encoded in the model
sparams
.
rope_freq_base
=
0.0
sparams
.
rope_freq_base
=
0.0
...
...
gguf
@
328b83de
Compare
a7aee47b
...
328b83de
Subproject commit
a7aee47b98e45539d491071b25778b833b77e387
Subproject commit
328b83de23b33240e28f4e74900d1d06726f5eb1
llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
View file @
9adca7f7
From
087cf3300e973d7790db8f7cad01d2a790de38be
Mon Sep 17 00:00:00 2001
From
b5e195803e2a989e57eef0010adce778df1e2d01
Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
From: Daniel Hiltgen <daniel@ollama.com>
Date: Mon, 13 Nov 2023 12:25:58 -0800
Date: Mon, 13 Nov 2023 12:25:58 -0800
Subject: [PATCH] Expose callable API for server
Subject: [PATCH] Expose callable API for server
...
@@ -46,7 +46,7 @@ index 859cd12..4ea47a7 100644
...
@@ -46,7 +46,7 @@ index 859cd12..4ea47a7 100644
+endif()
+endif()
\
No newline at end of file
\
No newline at end of file
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index
d0cd8e1..5f5d4c5
100644
index
0403853..2084fd8
100644
--- a/examples/server/server.cpp
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -5,6 +5,9 @@
@@ -5,6 +5,9 @@
...
@@ -59,15 +59,15 @@ index d0cd8e1..5f5d4c5 100644
...
@@ -59,15 +59,15 @@ index d0cd8e1..5f5d4c5 100644
#ifndef NDEBUG
#ifndef NDEBUG
// crash the server in debug mode, otherwise send an http 500 error
// crash the server in debug mode, otherwise send an http 500 error
@@ -263
2
,6 +26
35
,7 @@
static void append_to_generated_text_from_generated_token_probs(llama_server_con
@@ -26
4
3,6 +26
46
,7 @@
static void append_to_generated_text_from_generated_token_probs(llama_server_con
}
}
}
}
+#ifndef LLAMA_SERVER_LIBRARY
+#ifndef LLAMA_SERVER_LIBRARY
int main(int argc, char **argv)
int main(int argc, char **argv)
{
{
// own arguments required by this example
#if SERVER_VERBOSE != 1
@@ -3
066
,3 +3
070
,273 @@
int main(int argc, char **argv)
@@ -3
123
,3 +3
127
,273 @@
int main(int argc, char **argv)
llama_backend_free();
llama_backend_free();
return 0;
return 0;
}
}
...
@@ -439,10 +439,10 @@ index 0000000..d22f1b6
...
@@ -439,10 +439,10 @@ index 0000000..d22f1b6
+#endif // LLAMA_SERVER_LIBRARY
+#endif // LLAMA_SERVER_LIBRARY
\
No newline at end of file
\
No newline at end of file
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index
9e1acd3..ea64b55
100644
index
f20846f..9640cf3
100644
--- a/ggml-cuda.cu
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -6
505
,6 +6
505
,7 @@
static cudaError_t ggml_cuda_cpy_tensor_2d(
@@ -6
757
,6 +6
757
,7 @@
static cudaError_t ggml_cuda_cpy_tensor_2d(
CUDA_CHECK(cudaGetDevice(&id));
CUDA_CHECK(cudaGetDevice(&id));
src_ptr = (char *) extra->data_device[id];
src_ptr = (char *) extra->data_device[id];
} else {
} else {
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment