Unverified Commit c942e4a0 authored by ManniX-ITA's avatar ManniX-ITA Committed by GitHub
Browse files

Fixed startup sequence to report model loading

parent bd54b082
...@@ -2726,7 +2726,7 @@ static json format_detokenized_response(std::string content) ...@@ -2726,7 +2726,7 @@ static json format_detokenized_response(std::string content)
static void log_server_request(const httplib::Request &req, const httplib::Response &res) static void log_server_request(const httplib::Request &req, const httplib::Response &res)
{ {
// skip GH copilot requests when using default port // skip GH copilot requests when using default port
if (req.path == "/v1/health" || req.path == "/v1/completions") if (req.path == "/health" || req.path == "/v1/health" || req.path == "/v1/completions")
{ {
return; return;
} }
...@@ -3053,6 +3053,26 @@ int main(int argc, char **argv) { ...@@ -3053,6 +3053,26 @@ int main(int argc, char **argv) {
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
} }
if (sparams.n_threads_http < 1) {
// +2 threads for monitoring endpoints
sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1);
}
log_data["n_threads_http"] = std::to_string(sparams.n_threads_http);
svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); };
LOG_INFO("HTTP server listening", log_data);
// run the HTTP server in a thread - see comment below
std::thread t([&]()
{
if (!svr.listen_after_bind())
{
state.store(SERVER_STATE_ERROR);
return 1;
}
return 0;
});
// load the model // load the model
if (!llama.load_model(params)) if (!llama.load_model(params))
{ {
...@@ -3257,26 +3277,6 @@ int main(int argc, char **argv) { ...@@ -3257,26 +3277,6 @@ int main(int argc, char **argv) {
}*/ }*/
//); //);
if (sparams.n_threads_http < 1) {
// +2 threads for monitoring endpoints
sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1);
}
log_data["n_threads_http"] = std::to_string(sparams.n_threads_http);
svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); };
LOG_INFO("HTTP server listening", log_data);
// run the HTTP server in a thread - see comment below
std::thread t([&]()
{
if (!svr.listen_after_bind())
{
state.store(SERVER_STATE_ERROR);
return 1;
}
return 0;
});
llama.queue_tasks.on_new_task(std::bind( llama.queue_tasks.on_new_task(std::bind(
&llama_server_context::process_single_task, &llama, std::placeholders::_1)); &llama_server_context::process_single_task, &llama, std::placeholders::_1));
llama.queue_tasks.on_finish_multitask(std::bind( llama.queue_tasks.on_finish_multitask(std::bind(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment