Unverified Commit 4df2e2d6 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

fix: reduce nats stats query frequency (#2847)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent f72dc01d
......@@ -273,13 +273,12 @@ impl Component {
/// Add Prometheus metrics for this component's NATS service stats.
///
/// Starts a background task that periodically requests service statistics from NATS
/// and updates the corresponding Prometheus metrics. The scraping interval is set to
/// approximately 873ms (MAX_DELAY_MS), which is arbitrary but any value less than a second
/// is fair game. This frequent scraping provides real-time service statistics updates.
/// and updates the corresponding Prometheus metrics. The first scrape happens immediately,
/// then subsequent scrapes occur at a fixed interval of 9.8 seconds (MAX_WAIT_MS),
/// which should be near or smaller than typical Prometheus scraping intervals to ensure
/// metrics are fresh when Prometheus collects them.
pub fn start_scraping_nats_service_component_metrics(&self) -> Result<()> {
const NATS_TIMEOUT_AND_INITIAL_DELAY_MS: std::time::Duration =
std::time::Duration::from_millis(300);
const MAX_DELAY_MS: std::time::Duration = std::time::Duration::from_millis(873);
const MAX_WAIT_MS: std::time::Duration = std::time::Duration::from_millis(9800); // Should be <= Prometheus scrape interval
// If there is another component with the same service name, this will fail.
let component_metrics = ComponentNatsServerPrometheusMetrics::new(self)?;
......@@ -308,8 +307,8 @@ impl Component {
// By using the DRT's own runtime handle, we ensure the task runs in the
// correct runtime that will persist for the lifetime of the component.
c.drt().runtime().secondary().spawn(async move {
let timeout = NATS_TIMEOUT_AND_INITIAL_DELAY_MS;
let mut interval = tokio::time::interval(MAX_DELAY_MS);
let timeout = std::time::Duration::from_millis(500);
let mut interval = tokio::time::interval(MAX_WAIT_MS);
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
loop {
......@@ -326,6 +325,7 @@ impl Component {
m.reset_to_zeros();
}
}
interval.tick().await;
}
});
......
......@@ -1527,6 +1527,10 @@ mod test_metricsregistry_nats {
}
println!("✓ Sent messages and received responses successfully");
println!("\n=== Waiting 500ms for metrics to update ===");
sleep(Duration::from_millis(500)).await;
println!("✓ Wait complete, getting final metrics...");
let final_drt_output = drt.prometheus_metrics_fmt().unwrap();
println!("\n=== Final Prometheus DRT output ===");
println!("{}", final_drt_output);
......@@ -1542,10 +1546,6 @@ mod test_metricsregistry_nats {
.filter_map(|line| super::test_helpers::parse_prometheus_metric(line.as_str()))
.collect();
println!("\n=== Waiting 1 second for metrics to stabilize ===");
sleep(Duration::from_secs(1)).await;
println!("✓ Wait complete, checking final metrics...");
let post_expected_metric_values = [
// DRT NATS metrics
(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment