Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c942e4a0
Unverified
Commit
c942e4a0
authored
Apr 17, 2024
by
ManniX-ITA
Committed by
GitHub
Apr 17, 2024
Browse files
Fixed startup sequence to report model loading
parent
bd54b082
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
21 deletions
+21
-21
llm/ext_server/server.cpp
llm/ext_server/server.cpp
+21
-21
No files found.
llm/ext_server/server.cpp
View file @
c942e4a0
...
...
@@ -2726,7 +2726,7 @@ static json format_detokenized_response(std::string content)
static
void
log_server_request
(
const
httplib
::
Request
&
req
,
const
httplib
::
Response
&
res
)
{
// skip GH copilot requests when using default port
if
(
req
.
path
==
"/v1/health"
||
req
.
path
==
"/v1/completions"
)
if
(
req
.
path
==
"/health"
||
req
.
path
==
"/v1/health"
||
req
.
path
==
"/v1/completions"
)
{
return
;
}
...
...
@@ -3053,6 +3053,26 @@ int main(int argc, char **argv) {
log_data
[
"api_key"
]
=
"api_key: "
+
std
::
to_string
(
sparams
.
api_keys
.
size
())
+
" keys loaded"
;
}
if
(
sparams
.
n_threads_http
<
1
)
{
// +2 threads for monitoring endpoints
sparams
.
n_threads_http
=
std
::
max
(
params
.
n_parallel
+
2
,
(
int32_t
)
std
::
thread
::
hardware_concurrency
()
-
1
);
}
log_data
[
"n_threads_http"
]
=
std
::
to_string
(
sparams
.
n_threads_http
);
svr
.
new_task_queue
=
[
&
sparams
]
{
return
new
httplib
::
ThreadPool
(
sparams
.
n_threads_http
);
};
LOG_INFO
(
"HTTP server listening"
,
log_data
);
// run the HTTP server in a thread - see comment below
std
::
thread
t
([
&
]()
{
if
(
!
svr
.
listen_after_bind
())
{
state
.
store
(
SERVER_STATE_ERROR
);
return
1
;
}
return
0
;
});
// load the model
if
(
!
llama
.
load_model
(
params
))
{
...
...
@@ -3257,26 +3277,6 @@ int main(int argc, char **argv) {
}*/
//);
if
(
sparams
.
n_threads_http
<
1
)
{
// +2 threads for monitoring endpoints
sparams
.
n_threads_http
=
std
::
max
(
params
.
n_parallel
+
2
,
(
int32_t
)
std
::
thread
::
hardware_concurrency
()
-
1
);
}
log_data
[
"n_threads_http"
]
=
std
::
to_string
(
sparams
.
n_threads_http
);
svr
.
new_task_queue
=
[
&
sparams
]
{
return
new
httplib
::
ThreadPool
(
sparams
.
n_threads_http
);
};
LOG_INFO
(
"HTTP server listening"
,
log_data
);
// run the HTTP server in a thread - see comment below
std
::
thread
t
([
&
]()
{
if
(
!
svr
.
listen_after_bind
())
{
state
.
store
(
SERVER_STATE_ERROR
);
return
1
;
}
return
0
;
});
llama
.
queue_tasks
.
on_new_task
(
std
::
bind
(
&
llama_server_context
::
process_single_task
,
&
llama
,
std
::
placeholders
::
_1
));
llama
.
queue_tasks
.
on_finish_multitask
(
std
::
bind
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment