Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
58876091
Commit
58876091
authored
May 09, 2024
by
Michael Yang
Browse files
log clean up
parent
dc18eee3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
30 additions
and
34 deletions
+30
-34
llm/ext_server/server.cpp
llm/ext_server/server.cpp
+20
-24
llm/ext_server/utils.hpp
llm/ext_server/utils.hpp
+8
-5
llm/server.go
llm/server.go
+2
-5
No files found.
llm/ext_server/server.cpp
View file @
58876091
...
...
@@ -66,7 +66,7 @@ struct server_params {
};
bool
server_verbose
=
false
;
bool
server_log_json
=
tru
e
;
bool
server_log_json
=
fals
e
;
enum
stop_type
{
STOP_FULL
,
...
...
@@ -266,7 +266,7 @@ struct server_slot {
sprintf
(
buffer
,
"prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)"
,
t_prompt_processing
,
n_prompt_tokens_processed
,
t_token
,
n_tokens_second
);
LOG_
INFO
(
buffer
,
{
LOG_
DEBUG
(
buffer
,
{
{
"slot_id"
,
id
},
{
"task_id"
,
task_id
},
{
"t_prompt_processing"
,
t_prompt_processing
},
...
...
@@ -280,7 +280,7 @@ struct server_slot {
sprintf
(
buffer
,
"generation eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)"
,
t_token_generation
,
n_decoded
,
t_token
,
n_tokens_second
);
LOG_
INFO
(
buffer
,
{
LOG_
DEBUG
(
buffer
,
{
{
"slot_id"
,
id
},
{
"task_id"
,
task_id
},
{
"t_token_generation"
,
t_token_generation
},
...
...
@@ -290,7 +290,7 @@ struct server_slot {
});
sprintf
(
buffer
,
" total time = %10.2f ms"
,
t_prompt_processing
+
t_token_generation
);
LOG_
INFO
(
buffer
,
{
LOG_
DEBUG
(
buffer
,
{
{
"slot_id"
,
id
},
{
"task_id"
,
task_id
},
{
"t_prompt_processing"
,
t_prompt_processing
},
...
...
@@ -371,7 +371,7 @@ struct llama_server_context
{
if
(
clp_ctx
)
{
LOG_
INFO
(
"freeing clip model"
,
{});
LOG_
DEBUG
(
"freeing clip model"
,
{});
clip_free
(
clp_ctx
);
clp_ctx
=
nullptr
;
}
...
...
@@ -392,7 +392,7 @@ struct llama_server_context
params
=
params_
;
if
(
!
params
.
mmproj
.
empty
())
{
multimodal
=
true
;
LOG_
INFO
(
"Multi Modal Mode Enabled"
,
{});
LOG_
DEBUG
(
"Multi Modal Mode Enabled"
,
{});
clp_ctx
=
clip_model_load
(
params
.
mmproj
.
c_str
(),
/*verbosity=*/
1
);
if
(
clp_ctx
==
nullptr
)
{
LOG_ERROR
(
"unable to load clip model"
,
{{
"model"
,
params
.
mmproj
}});
...
...
@@ -445,7 +445,7 @@ struct llama_server_context
const
int32_t
n_ctx_slot
=
n_ctx
/
params
.
n_parallel
;
LOG_
INFO
(
"initializing slots"
,
{{
"n_slots"
,
params
.
n_parallel
}});
LOG_
DEBUG
(
"initializing slots"
,
{{
"n_slots"
,
params
.
n_parallel
}});
for
(
int
i
=
0
;
i
<
params
.
n_parallel
;
i
++
)
{
server_slot
slot
;
...
...
@@ -454,7 +454,7 @@ struct llama_server_context
slot
.
n_ctx
=
n_ctx_slot
;
slot
.
n_predict
=
params
.
n_predict
;
LOG_
INFO
(
"new slot"
,
{
LOG_
DEBUG
(
"new slot"
,
{
{
"slot_id"
,
slot
.
id
},
{
"n_ctx_slot"
,
slot
.
n_ctx
}
});
...
...
@@ -468,7 +468,7 @@ struct llama_server_context
//GGML_ASSERT(n_ctx_train % ga_w == 0 && "n_ctx_train must be a multiple of ga_w"); // NOLINT
//GGML_ASSERT(n_ctx >= n_ctx_train * ga_n && "n_ctx must be at least n_ctx_train * ga_n"); // NOLINT
LOG_
INFO
(
"slot self-extend"
,
{
LOG_
DEBUG
(
"slot self-extend"
,
{
{
"slot_id"
,
slot
.
id
},
{
"ga_n"
,
ga_n
},
{
"ga_w"
,
ga_w
}
...
...
@@ -827,7 +827,7 @@ struct llama_server_context
all_slots_are_idle
=
false
;
LOG_
INFO
(
"slot is processing task"
,
{
LOG_
DEBUG
(
"slot is processing task"
,
{
{
"slot_id"
,
slot
->
id
},
{
"task_id"
,
slot
->
task_id
},
});
...
...
@@ -1504,7 +1504,7 @@ struct llama_server_context
}
slots_data
.
push_back
(
slot_data
);
}
LOG_
INFO
(
"slot data"
,
{
LOG_
DEBUG
(
"slot data"
,
{
{
"task_id"
,
task
.
id
},
{
"n_idle_slots"
,
n_idle_slots
},
{
"n_processing_slots"
,
n_processing_slots
}
...
...
@@ -1566,7 +1566,7 @@ struct llama_server_context
bool
update_slots
()
{
if
(
system_need_update
)
{
LOG_
INFO
(
"updating system prompt"
,
{});
LOG_
DEBUG
(
"updating system prompt"
,
{});
system_prompt_update
();
}
...
...
@@ -1576,7 +1576,7 @@ struct llama_server_context
{
if
(
system_prompt
.
empty
()
&&
clean_kv_cache
)
{
LOG_
INFO
(
"all slots are idle and system prompt is empty, clear the KV cache"
,
{});
LOG_
DEBUG
(
"all slots are idle and system prompt is empty, clear the KV cache"
,
{});
kv_cache_clear
();
}
return
true
;
...
...
@@ -1599,7 +1599,7 @@ struct llama_server_context
const
int
n_left
=
(
int
)
system_tokens
.
size
()
+
slot
.
n_past
-
n_keep
;
const
int
n_discard
=
n_left
/
2
;
LOG_
INFO
(
"slot context shift"
,
{
LOG_
DEBUG
(
"slot context shift"
,
{
{
"slot_id"
,
slot
.
id
},
{
"task_id"
,
slot
.
task_id
},
{
"n_keep"
,
n_keep
},
...
...
@@ -1638,7 +1638,7 @@ struct llama_server_context
slot
.
command
=
NONE
;
slot
.
t_last_used
=
ggml_time_us
();
LOG_
INFO
(
"slot released"
,
{
LOG_
DEBUG
(
"slot released"
,
{
{
"slot_id"
,
slot
.
id
},
{
"task_id"
,
slot
.
task_id
},
{
"n_ctx"
,
n_ctx
},
...
...
@@ -1807,7 +1807,7 @@ struct llama_server_context
slot
.
ga_i
=
ga_i
;
}
LOG_
INFO
(
"slot progression"
,
{
LOG_
DEBUG
(
"slot progression"
,
{
{
"slot_id"
,
slot
.
id
},
{
"task_id"
,
slot
.
task_id
},
{
"n_past"
,
slot
.
n_past
},
...
...
@@ -1822,7 +1822,7 @@ struct llama_server_context
if
(
slot
.
n_past
==
slot
.
n_prompt_tokens
&&
slot
.
n_past
>
0
)
{
// we have to evaluate at least 1 token to generate logits.
LOG_
INFO
(
"we have to evaluate at least 1 token to generate logits"
,
{
LOG_
DEBUG
(
"we have to evaluate at least 1 token to generate logits"
,
{
{
"slot_id"
,
slot
.
id
},
{
"task_id"
,
slot
.
task_id
}
});
...
...
@@ -1834,7 +1834,7 @@ struct llama_server_context
}
int
p0
=
(
int
)
system_tokens
.
size
()
+
slot
.
n_past
;
LOG_
INFO
(
"kv cache rm [p0, end)"
,
{
LOG_
DEBUG
(
"kv cache rm [p0, end)"
,
{
{
"slot_id"
,
slot
.
id
},
{
"task_id"
,
slot
.
task_id
},
{
"p0"
,
p0
}
...
...
@@ -2491,11 +2491,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
}
else
if
(
arg
==
"-v"
||
arg
==
"--verbose"
)
{
#if SERVER_VERBOSE != 1
LOG_WARNING
(
"server.cpp is not built with verbose logging."
,
{});
#else
server_verbose
=
true
;
#endif
}
else
if
(
arg
==
"--mlock"
)
{
...
...
@@ -2601,7 +2597,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
else
if
(
arg
==
"--log-disable"
)
{
log_set_target
(
stdout
);
LOG_
INFO
(
"logging to file is disabled."
,
{});
LOG_
DEBUG
(
"logging to file is disabled."
,
{});
}
else
if
(
arg
==
"--slots-endpoint-disable"
)
{
...
...
@@ -2732,7 +2728,7 @@ static void log_server_request(const httplib::Request &req, const httplib::Respo
return
;
}
LOG_
INFO
(
"request"
,
{
LOG_
DEBUG
(
"request"
,
{
{
"remote_addr"
,
req
.
remote_addr
},
{
"remote_port"
,
req
.
remote_port
},
{
"status"
,
res
.
status
},
...
...
llm/ext_server/utils.hpp
View file @
58876091
...
...
@@ -55,9 +55,10 @@ extern bool server_log_json;
} while (0)
#endif
#define LOG_ERROR( MSG, ...) server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_ERROR( MSG, ...) server_log("ERR
OR
", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_DEBUG( MSG, ...) server_log("DEBUG", __func__, __LINE__, MSG, __VA_ARGS__)
enum
server_state
{
SERVER_STATE_LOADING_MODEL
,
// Server is starting up, model not fully loaded yet
...
...
@@ -123,6 +124,10 @@ static inline void server_log(const char *level, const char *function, int line,
{
"timestamp"
,
time
(
nullptr
)},
};
if
(
strncmp
(
"DEBUG"
,
level
,
strlen
(
level
))
==
0
&&
!
server_verbose
)
{
return
;
}
if
(
server_log_json
)
{
log
.
merge_patch
(
{
...
...
@@ -137,14 +142,12 @@ static inline void server_log(const char *level, const char *function, int line,
std
::
cout
<<
log
.
dump
(
-
1
,
' '
,
false
,
json
::
error_handler_t
::
replace
)
<<
"
\n
"
<<
std
::
flush
;
}
else
{
char
buf
[
1024
];
snprintf
(
buf
,
1024
,
"%4s [%24s] %s"
,
level
,
function
,
message
);
if
(
!
extra
.
empty
())
{
log
.
merge_patch
(
extra
);
}
std
::
stringstream
ss
;
ss
<<
buf
<<
" |"
;
ss
<<
level
<<
" ["
<<
function
<<
"] "
<<
message
<<
" |"
;
for
(
const
auto
&
el
:
log
.
items
())
{
const
std
::
string
value
=
el
.
value
().
dump
(
-
1
,
' '
,
false
,
json
::
error_handler_t
::
replace
);
...
...
llm/server.go
View file @
58876091
...
...
@@ -157,11 +157,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
"--batch-size"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumBatch
),
"--embedding"
,
}
if
envconfig
.
Debug
{
params
=
append
(
params
,
"--log-format"
,
"json"
)
}
else
{
params
=
append
(
params
,
"--log-disable"
)
}
params
=
append
(
params
,
"--log-disable"
)
if
opts
.
NumGPU
>=
0
{
params
=
append
(
params
,
"--n-gpu-layers"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumGPU
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment