Unverified Commit 272e31c0 authored by Travis Johnson's avatar Travis Johnson Committed by GitHub
Browse files

[Bugfix] Guard for negative counter metrics to prevent crash (#10430)


Signed-off-by: default avatarTravis Johnson <tsjohnso@us.ibm.com>
parent 74f8c2cf
...@@ -1716,7 +1716,7 @@ class LLMEngine: ...@@ -1716,7 +1716,7 @@ class LLMEngine:
# not counted (to avoid double counting) # not counted (to avoid double counting)
actual_num_batched_tokens = scheduler_outputs.num_batched_tokens # type: ignore actual_num_batched_tokens = scheduler_outputs.num_batched_tokens # type: ignore
num_generation_tokens_from_prefill_groups = 0. num_generation_tokens_from_prefill_groups = 0
# NOTE: if scheduler_outputs.num_prefill_groups > 0 and # NOTE: if scheduler_outputs.num_prefill_groups > 0 and
# the len of scheduler_outputs.scheduled_seq_groups is != # the len of scheduler_outputs.scheduled_seq_groups is !=
# scheduler_outputs.num_prefill_groups, this means that # scheduler_outputs.num_prefill_groups, this means that
......
...@@ -512,6 +512,11 @@ class PrometheusStatLogger(StatLoggerBase): ...@@ -512,6 +512,11 @@ class PrometheusStatLogger(StatLoggerBase):
def _log_counter(self, counter, data: Union[int, float]) -> None: def _log_counter(self, counter, data: Union[int, float]) -> None:
# Convenience function for logging to counter. # Convenience function for logging to counter.
# Prevent ValueError from negative increment
if data < 0:
logger.warning("Skipping negative increment of %g to %s", data,
counter)
return
counter.labels(**self.labels).inc(data) counter.labels(**self.labels).inc(data)
def _log_counter_labels(self, counter, data: CollectionsCounter, def _log_counter_labels(self, counter, data: CollectionsCounter,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment