Unverified Commit 5a00a7d6 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

fix: prevent duplicate Prometheus metrics from Python expfmt callbacks (#5761)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 941ad640
......@@ -64,20 +64,13 @@ impl RuntimeMetrics {
})
});
// Register the callback at this hierarchy level
// Register the callback at this hierarchy level only.
// Do NOT register on parent hierarchies - combined scrapes automatically
// traverse child registries and include their callbacks.
self.hierarchy
.get_metrics_registry()
.add_expfmt_callback(callback_arc.clone());
// Also register at all parent hierarchy levels so the callback is accessible
// when prometheus_expfmt() is called on any parent (e.g., DRT)
let parents = self.hierarchy.parent_hierarchies();
for parent in parents.iter() {
parent
.get_metrics_registry()
.add_expfmt_callback(callback_arc.clone());
}
Ok(())
}
}
......
......@@ -1251,6 +1251,37 @@ mod test_metricsregistry_prefixes {
);
}
#[tokio::test]
async fn test_expfmt_callback_only_registered_on_endpoint_is_included_once() {
// Sanity test: if an expfmt callback is registered only on the endpoint registry,
// scraping from the root (DRT) should still include it exactly once via the
// child-registry traversal.
let drt = create_test_drt_async().await;
let namespace = drt.namespace("ns_expfmt_ep_only").unwrap();
let component = namespace.component("comp_expfmt_ep_only").unwrap();
let endpoint = component.endpoint("ep_expfmt_ep_only");
let metric_line = "dynamo_component_active_decode_blocks{dp_rank=\"0\"} 0\n";
let callback: PrometheusExpositionFormatCallback =
Arc::new(move || Ok(metric_line.to_string()));
endpoint
.get_metrics_registry()
.add_expfmt_callback(callback);
let output = drt.metrics().prometheus_expfmt().unwrap();
let occurrences = output
.lines()
.filter(|line| line == &metric_line.trim_end_matches('\n'))
.count();
assert_eq!(
occurrences, 1,
"endpoint-registered exposition callback should appear once, got {} occurrences\n\n{}",
occurrences, output
);
}
#[tokio::test]
async fn test_recursive_namespace() {
// Create a distributed runtime for testing
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment