Unverified Commit 4df2e2d6 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

fix: reduce nats stats query frequency (#2847)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent f72dc01d
...@@ -273,13 +273,12 @@ impl Component { ...@@ -273,13 +273,12 @@ impl Component {
/// Add Prometheus metrics for this component's NATS service stats. /// Add Prometheus metrics for this component's NATS service stats.
/// ///
/// Starts a background task that periodically requests service statistics from NATS /// Starts a background task that periodically requests service statistics from NATS
/// and updates the corresponding Prometheus metrics. The scraping interval is set to /// and updates the corresponding Prometheus metrics. The first scrape happens immediately,
/// approximately 873ms (MAX_DELAY_MS), which is arbitrary but any value less than a second /// then subsequent scrapes occur at a fixed interval of 9.8 seconds (MAX_WAIT_MS),
/// is fair game. This frequent scraping provides real-time service statistics updates. /// which should be near or smaller than typical Prometheus scraping intervals to ensure
/// metrics are fresh when Prometheus collects them.
pub fn start_scraping_nats_service_component_metrics(&self) -> Result<()> { pub fn start_scraping_nats_service_component_metrics(&self) -> Result<()> {
const NATS_TIMEOUT_AND_INITIAL_DELAY_MS: std::time::Duration = const MAX_WAIT_MS: std::time::Duration = std::time::Duration::from_millis(9800); // Should be <= Prometheus scrape interval
std::time::Duration::from_millis(300);
const MAX_DELAY_MS: std::time::Duration = std::time::Duration::from_millis(873);
// If there is another component with the same service name, this will fail. // If there is another component with the same service name, this will fail.
let component_metrics = ComponentNatsServerPrometheusMetrics::new(self)?; let component_metrics = ComponentNatsServerPrometheusMetrics::new(self)?;
...@@ -308,8 +307,8 @@ impl Component { ...@@ -308,8 +307,8 @@ impl Component {
// By using the DRT's own runtime handle, we ensure the task runs in the // By using the DRT's own runtime handle, we ensure the task runs in the
// correct runtime that will persist for the lifetime of the component. // correct runtime that will persist for the lifetime of the component.
c.drt().runtime().secondary().spawn(async move { c.drt().runtime().secondary().spawn(async move {
let timeout = NATS_TIMEOUT_AND_INITIAL_DELAY_MS; let timeout = std::time::Duration::from_millis(500);
let mut interval = tokio::time::interval(MAX_DELAY_MS); let mut interval = tokio::time::interval(MAX_WAIT_MS);
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
loop { loop {
...@@ -326,6 +325,7 @@ impl Component { ...@@ -326,6 +325,7 @@ impl Component {
m.reset_to_zeros(); m.reset_to_zeros();
} }
} }
interval.tick().await; interval.tick().await;
} }
}); });
......
...@@ -1527,6 +1527,10 @@ mod test_metricsregistry_nats { ...@@ -1527,6 +1527,10 @@ mod test_metricsregistry_nats {
} }
println!("✓ Sent messages and received responses successfully"); println!("✓ Sent messages and received responses successfully");
println!("\n=== Waiting 500ms for metrics to update ===");
sleep(Duration::from_millis(500)).await;
println!("✓ Wait complete, getting final metrics...");
let final_drt_output = drt.prometheus_metrics_fmt().unwrap(); let final_drt_output = drt.prometheus_metrics_fmt().unwrap();
println!("\n=== Final Prometheus DRT output ==="); println!("\n=== Final Prometheus DRT output ===");
println!("{}", final_drt_output); println!("{}", final_drt_output);
...@@ -1542,10 +1546,6 @@ mod test_metricsregistry_nats { ...@@ -1542,10 +1546,6 @@ mod test_metricsregistry_nats {
.filter_map(|line| super::test_helpers::parse_prometheus_metric(line.as_str())) .filter_map(|line| super::test_helpers::parse_prometheus_metric(line.as_str()))
.collect(); .collect();
println!("\n=== Waiting 1 second for metrics to stabilize ===");
sleep(Duration::from_secs(1)).await;
println!("✓ Wait complete, checking final metrics...");
let post_expected_metric_values = [ let post_expected_metric_values = [
// DRT NATS metrics // DRT NATS metrics
( (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment