Unverified Commit 6f7f6b12 authored by Tzu-Ling Kan's avatar Tzu-Ling Kan Committed by GitHub
Browse files

feat: Rename dynamo_component_concurrent_requests (#2515)


Signed-off-by: default avatarTzu-Ling Kan <tzulingk@nvidia.com>
parent 199b9a30
...@@ -40,7 +40,7 @@ As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build container ...@@ -40,7 +40,7 @@ As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build container
The core Dynamo backend system automatically exposes metrics with the `dynamo_component_*` prefix for all components that use the `DistributedRuntime` framework: The core Dynamo backend system automatically exposes metrics with the `dynamo_component_*` prefix for all components that use the `DistributedRuntime` framework:
- `dynamo_component_concurrent_requests`: Requests currently being processed (gauge) - `dynamo_component_inflight_requests`: Requests currently being processed (gauge)
- `dynamo_component_request_bytes_total`: Total bytes received in requests (counter) - `dynamo_component_request_bytes_total`: Total bytes received in requests (counter)
- `dynamo_component_request_duration_seconds`: Request processing time (histogram) - `dynamo_component_request_duration_seconds`: Request processing time (histogram)
- `dynamo_component_requests_total`: Total requests processed (counter) - `dynamo_component_requests_total`: Total requests processed (counter)
......
...@@ -62,7 +62,7 @@ The `dynamo_component_errors_total` metric includes the following error types: ...@@ -62,7 +62,7 @@ The `dynamo_component_errors_total` metric includes the following error types:
- `dynamo_component_request_duration_seconds` - Request processing time - `dynamo_component_request_duration_seconds` - Request processing time
### Gauges ### Gauges
- `dynamo_component_concurrent_requests` - Number of requests currently being processed - `dynamo_component_inflight_requests` - Number of requests currently being processed
### Custom Metrics (Optional) ### Custom Metrics (Optional)
- `dynamo_component_bytes_processed_total` - Total data bytes processed by system handler (example) - `dynamo_component_bytes_processed_total` - Total data bytes processed by system handler (example)
...@@ -80,9 +80,9 @@ These labels are prefixed with "dynamo_" to avoid collisions with Kubernetes and ...@@ -80,9 +80,9 @@ These labels are prefixed with "dynamo_" to avoid collisions with Kubernetes and
When the system is running, you'll see metrics from http://<ip>:<port>/metrics like this: When the system is running, you'll see metrics from http://<ip>:<port>/metrics like this:
```prometheus ```prometheus
# HELP dynamo_component_concurrent_requests Number of requests currently being processed by component endpoint # HELP dynamo_component_inflight_requests Number of requests currently being processed by component endpoint
# TYPE dynamo_component_concurrent_requests gauge # TYPE dynamo_component_inflight_requests gauge
dynamo_component_concurrent_requests{dynamo_component="example_component",dynamo_endpoint="example_endpoint9881",dynamo_namespace="example_namespace"} 0 dynamo_component_inflight_requests{dynamo_component="example_component",dynamo_endpoint="example_endpoint9881",dynamo_namespace="example_namespace"} 0
# HELP dynamo_component_bytes_processed_total Example of a custom metric. Total number of data bytes processed by system handler # HELP dynamo_component_bytes_processed_total Example of a custom metric. Total number of data bytes processed by system handler
# TYPE dynamo_component_bytes_processed_total counter # TYPE dynamo_component_bytes_processed_total counter
......
...@@ -118,7 +118,7 @@ async fn test_backend_with_metrics() -> Result<()> { ...@@ -118,7 +118,7 @@ async fn test_backend_with_metrics() -> Result<()> {
println!("{}", metrics_content); println!("{}", metrics_content);
println!("=== END METRICS CONTENT ==="); println!("=== END METRICS CONTENT ===");
// Parse and verify ingress metrics are greater than 0 (except concurrent_requests) // Parse and verify ingress metrics are greater than 0 (except inflight_requests)
verify_ingress_metrics_greater_than_0(&metrics_content); verify_ingress_metrics_greater_than_0(&metrics_content);
println!("Successfully retrieved and verified metrics!"); println!("Successfully retrieved and verified metrics!");
...@@ -143,7 +143,7 @@ async fn test_backend_with_metrics() -> Result<()> { ...@@ -143,7 +143,7 @@ async fn test_backend_with_metrics() -> Result<()> {
} }
fn verify_ingress_metrics_greater_than_0(metrics_content: &str) { fn verify_ingress_metrics_greater_than_0(metrics_content: &str) {
// Define the work handler metrics we want to verify (excluding concurrent_requests which can be 0) // Define the work handler metrics we want to verify (excluding inflight_requests which can be 0)
let metrics_to_verify = [ let metrics_to_verify = [
"my_custom_bytes_processed_total", "my_custom_bytes_processed_total",
"requests_total", "requests_total",
......
...@@ -1651,11 +1651,7 @@ mod test_metricsregistry_nats { ...@@ -1651,11 +1651,7 @@ mod test_metricsregistry_nats {
18000.0, 18000.0,
23000.0, 23000.0,
), // ~75-125% of 20660 ), // ~75-125% of 20660
( (build_metric_name(work_handler::INFLIGHT_REQUESTS), 0.0, 1.0), // 0 or very low
build_metric_name(work_handler::CONCURRENT_REQUESTS),
0.0,
1.0,
), // 0 or very low
// Histograms have _{count,sum} suffixes // Histograms have _{count,sum} suffixes
( (
format!( format!(
......
...@@ -127,7 +127,7 @@ pub mod work_handler { ...@@ -127,7 +127,7 @@ pub mod work_handler {
pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total"; pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";
/// Number of requests currently being processed by work handler /// Number of requests currently being processed by work handler
pub const CONCURRENT_REQUESTS: &str = "concurrent_requests"; pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
/// Time spent processing requests by work handler (histogram) /// Time spent processing requests by work handler (histogram)
pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds"; pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
......
...@@ -26,7 +26,7 @@ use tracing::Instrument; ...@@ -26,7 +26,7 @@ use tracing::Instrument;
pub struct WorkHandlerMetrics { pub struct WorkHandlerMetrics {
pub request_counter: IntCounter, pub request_counter: IntCounter,
pub request_duration: Histogram, pub request_duration: Histogram,
pub concurrent_requests: IntGauge, pub inflight_requests: IntGauge,
pub request_bytes: IntCounter, pub request_bytes: IntCounter,
pub response_bytes: IntCounter, pub response_bytes: IntCounter,
pub error_counter: IntCounterVec, pub error_counter: IntCounterVec,
...@@ -36,7 +36,7 @@ impl WorkHandlerMetrics { ...@@ -36,7 +36,7 @@ impl WorkHandlerMetrics {
pub fn new( pub fn new(
request_counter: IntCounter, request_counter: IntCounter,
request_duration: Histogram, request_duration: Histogram,
concurrent_requests: IntGauge, inflight_requests: IntGauge,
request_bytes: IntCounter, request_bytes: IntCounter,
response_bytes: IntCounter, response_bytes: IntCounter,
error_counter: IntCounterVec, error_counter: IntCounterVec,
...@@ -44,7 +44,7 @@ impl WorkHandlerMetrics { ...@@ -44,7 +44,7 @@ impl WorkHandlerMetrics {
Self { Self {
request_counter, request_counter,
request_duration, request_duration,
concurrent_requests, inflight_requests,
request_bytes, request_bytes,
response_bytes, response_bytes,
error_counter, error_counter,
...@@ -68,8 +68,8 @@ impl WorkHandlerMetrics { ...@@ -68,8 +68,8 @@ impl WorkHandlerMetrics {
None, None,
)?; )?;
let concurrent_requests = endpoint.create_intgauge( let inflight_requests = endpoint.create_intgauge(
"concurrent_requests", "inflight_requests",
"Number of requests currently being processed by work handler", "Number of requests currently being processed by work handler",
&[], &[],
)?; )?;
...@@ -96,7 +96,7 @@ impl WorkHandlerMetrics { ...@@ -96,7 +96,7 @@ impl WorkHandlerMetrics {
Ok(Self::new( Ok(Self::new(
request_counter, request_counter,
request_duration, request_duration,
concurrent_requests, inflight_requests,
request_bytes, request_bytes,
response_bytes, response_bytes,
error_counter, error_counter,
...@@ -121,7 +121,7 @@ where ...@@ -121,7 +121,7 @@ where
if let Some(m) = self.metrics() { if let Some(m) = self.metrics() {
m.request_counter.inc(); m.request_counter.inc();
m.concurrent_requests.inc(); m.inflight_requests.inc();
m.request_bytes.inc_by(payload.len() as u64); m.request_bytes.inc_by(payload.len() as u64);
} }
...@@ -289,7 +289,7 @@ where ...@@ -289,7 +289,7 @@ where
if let Some(m) = self.metrics() { if let Some(m) = self.metrics() {
let duration = start_time.elapsed(); let duration = start_time.elapsed();
m.request_duration.observe(duration.as_secs_f64()); m.request_duration.observe(duration.as_secs_f64());
m.concurrent_requests.dec(); m.inflight_requests.dec();
} }
Ok(()) Ok(())
......
...@@ -131,7 +131,7 @@ async def send_request_with_retry(url: str, payload: dict, max_retries: int = 4) ...@@ -131,7 +131,7 @@ async def send_request_with_retry(url: str, payload: dict, max_retries: int = 4)
return False return False
async def send_concurrent_requests(urls: list, payload: dict, num_requests: int): async def send_inflight_requests(urls: list, payload: dict, num_requests: int):
"""Send multiple requests concurrently, alternating between URLs if multiple provided""" """Send multiple requests concurrently, alternating between URLs if multiple provided"""
# First, send test requests with retry to ensure all systems are ready # First, send test requests with retry to ensure all systems are ready
...@@ -228,7 +228,7 @@ def test_mocker_kv_router(request, runtime_services): ...@@ -228,7 +228,7 @@ def test_mocker_kv_router(request, runtime_services):
# Use async to send requests concurrently for better performance # Use async to send requests concurrently for better performance
asyncio.run( asyncio.run(
send_concurrent_requests( send_inflight_requests(
[ [
f"http://localhost:{frontend_port}/v1/chat/completions" f"http://localhost:{frontend_port}/v1/chat/completions"
], # Pass as list ], # Pass as list
...@@ -301,7 +301,7 @@ def test_mocker_two_kv_router(request, runtime_services): ...@@ -301,7 +301,7 @@ def test_mocker_two_kv_router(request, runtime_services):
# Use async to send requests concurrently, alternating between routers # Use async to send requests concurrently, alternating between routers
asyncio.run( asyncio.run(
send_concurrent_requests( send_inflight_requests(
router_urls, router_urls,
TEST_PAYLOAD, TEST_PAYLOAD,
NUM_REQUESTS, NUM_REQUESTS,
...@@ -404,7 +404,7 @@ def test_mocker_kv_router_overload_503(request, runtime_services): ...@@ -404,7 +404,7 @@ def test_mocker_kv_router_overload_503(request, runtime_services):
# First, send one request with retry to ensure system is ready # First, send one request with retry to ensure system is ready
logger.info("Sending initial request to ensure system is ready...") logger.info("Sending initial request to ensure system is ready...")
asyncio.run(send_concurrent_requests([url], test_payload_503, 1)) asyncio.run(send_inflight_requests([url], test_payload_503, 1))
# Now send 50 concurrent requests to exhaust resources, then verify 503 # Now send 50 concurrent requests to exhaust resources, then verify 503
logger.info("Sending 50 concurrent requests to exhaust resources...") logger.info("Sending 50 concurrent requests to exhaust resources...")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment