Unverified Commit 0204946d authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

Max token capacity metric (#2595)



* adding max_token_capacity_metric

* added tgi to name of metric

* Adding max capacity metric.

* Add description for the metrics

---------
Co-authored-by: default avatarEdwinhr716 <Edandres249@gmail.com>
parent d18ed5cf
...@@ -100,6 +100,7 @@ pub async fn connect_backend( ...@@ -100,6 +100,7 @@ pub async fn connect_backend(
.map_err(V3Error::Warmup)?, .map_err(V3Error::Warmup)?,
)?; )?;
tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}"); tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens);
let backend_info = BackendInfo { let backend_info = BackendInfo {
waiting_served_ratio, waiting_served_ratio,
......
...@@ -1937,6 +1937,11 @@ async fn start( ...@@ -1937,6 +1937,11 @@ async fn start(
metrics::Unit::Count, metrics::Unit::Count,
"Maximum tokens for the current batch" "Maximum tokens for the current batch"
); );
metrics::describe_gauge!(
"tgi_batch_total_tokens",
metrics::Unit::Count,
"Maximum amount of tokens in total."
);
metrics::describe_histogram!( metrics::describe_histogram!(
"tgi_request_max_new_tokens", "tgi_request_max_new_tokens",
metrics::Unit::Count, metrics::Unit::Count,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment