Unverified Commit 82794761 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat(perf): add pipeline instrumentation and feature gated NVTX markers (#6746)

parent ba9a8a9f
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
use super::unified_client::{ClientStats, Headers, RequestPlaneClient}; use super::unified_client::{ClientStats, Headers, RequestPlaneClient};
use crate::error::{DynamoError, ErrorType}; use crate::error::{DynamoError, ErrorType};
use crate::metrics::transport_metrics::NATS_ERRORS_TOTAL;
use anyhow::Result; use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
use bytes::Bytes; use bytes::Bytes;
...@@ -51,6 +52,9 @@ impl RequestPlaneClient for NatsRequestClient { ...@@ -51,6 +52,9 @@ impl RequestPlaneClient for NatsRequestClient {
.request_with_headers(address.clone(), nats_headers, payload) .request_with_headers(address.clone(), nats_headers, payload)
.await .await
.map_err(|e| { .map_err(|e| {
NATS_ERRORS_TOTAL
.with_label_values(&["request_failed"])
.inc();
anyhow::anyhow!( anyhow::anyhow!(
DynamoError::builder() DynamoError::builder()
.error_type(ErrorType::CannotConnect) .error_type(ErrorType::CannotConnect)
......
...@@ -16,7 +16,9 @@ fn is_inhibited(err: &(dyn std::error::Error + 'static)) -> bool { ...@@ -16,7 +16,9 @@ fn is_inhibited(err: &(dyn std::error::Error + 'static)) -> bool {
} }
use crate::{ use crate::{
component::{Client, Endpoint}, component::{Client, Endpoint},
dynamo_nvtx_range,
engine::{AsyncEngine, Data}, engine::{AsyncEngine, Data},
metrics::frontend_perf::STAGE_DURATION_SECONDS,
pipeline::{ pipeline::{
AddressedPushRouter, AddressedRequest, Error, ManyOut, SingleIn, AddressedPushRouter, AddressedRequest, Error, ManyOut, SingleIn,
error::{PipelineError, PipelineErrorExt}, error::{PipelineError, PipelineErrorExt},
...@@ -34,6 +36,7 @@ use std::{ ...@@ -34,6 +36,7 @@ use std::{
Arc, Arc,
atomic::{AtomicU64, Ordering}, atomic::{AtomicU64, Ordering},
}, },
time::Instant,
}; };
use tokio_stream::StreamExt; use tokio_stream::StreamExt;
use tracing::Instrument; use tracing::Instrument;
...@@ -324,6 +327,7 @@ where ...@@ -324,6 +327,7 @@ where
instance_id: u64, instance_id: u64,
request: SingleIn<T>, request: SingleIn<T>,
) -> anyhow::Result<ManyOut<U>> { ) -> anyhow::Result<ManyOut<U>> {
let route_start = Instant::now();
let request_id = request.id().to_string(); let request_id = request.id().to_string();
let route_span = if matches!(self.router_mode, RouterMode::KV) { let route_span = if matches!(self.router_mode, RouterMode::KV) {
tracing::Span::none() tracing::Span::none()
...@@ -357,7 +361,7 @@ where ...@@ -357,7 +361,7 @@ where
} }
// Get the address based on discovered transport type // Get the address based on discovered transport type
let address = { let (address, _transport_kind) = {
use crate::component::TransportType; use crate::component::TransportType;
// Get the instance and use its actual transport type // Get the instance and use its actual transport type
...@@ -376,7 +380,7 @@ where ...@@ -376,7 +380,7 @@ where
http_endpoint = %http_endpoint, http_endpoint = %http_endpoint,
"Using HTTP transport for instance" "Using HTTP transport for instance"
); );
http_endpoint.clone() (http_endpoint.clone(), "transport.http.request")
} }
TransportType::Tcp(tcp_endpoint) => { TransportType::Tcp(tcp_endpoint) => {
tracing::debug!( tracing::debug!(
...@@ -384,7 +388,7 @@ where ...@@ -384,7 +388,7 @@ where
tcp_endpoint = %tcp_endpoint, tcp_endpoint = %tcp_endpoint,
"Using TCP transport for instance" "Using TCP transport for instance"
); );
tcp_endpoint.clone() (tcp_endpoint.clone(), "transport.tcp.request")
} }
TransportType::Nats(subject) => { TransportType::Nats(subject) => {
tracing::debug!( tracing::debug!(
...@@ -392,13 +396,18 @@ where ...@@ -392,13 +396,18 @@ where
subject = %subject, subject = %subject,
"Using NATS transport for instance" "Using NATS transport for instance"
); );
subject.clone() (subject.clone(), "transport.nats.request")
} }
} }
}; };
let request = request.map(|req| AddressedRequest::new(req, address)); let request = request.map(|req| AddressedRequest::new(req, address));
STAGE_DURATION_SECONDS
.with_label_values(&["route"])
.observe(route_start.elapsed().as_secs_f64());
let _nvtx_transport = dynamo_nvtx_range!(_transport_kind);
let stream: anyhow::Result<ManyOut<U>> = self let stream: anyhow::Result<ManyOut<U>> = self
.addressed .addressed
.generate(request) .generate(request)
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
//! //!
use super::unified_client::{ClientStats, Headers, RequestPlaneClient}; use super::unified_client::{ClientStats, Headers, RequestPlaneClient};
use crate::metrics::transport_metrics::{
TCP_BYTES_RECEIVED_TOTAL, TCP_BYTES_SENT_TOTAL, TCP_ERRORS_TOTAL,
};
use anyhow::Result; use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
use bytes::Bytes; use bytes::Bytes;
...@@ -250,6 +253,7 @@ impl TcpConnection { ...@@ -250,6 +253,7 @@ impl TcpConnection {
// With TCP_NODELAY, no need for explicit flush() // With TCP_NODELAY, no need for explicit flush()
match write_half.write_all(&req.encoded_data).await { match write_half.write_all(&req.encoded_data).await {
Ok(()) => { Ok(()) => {
TCP_BYTES_SENT_TOTAL.inc_by(req.encoded_data.len() as f64);
// Forward response channel to reader task (FIFO ordering) // Forward response channel to reader task (FIFO ordering)
if response_tx_channel.send(req.response_tx).is_err() { if response_tx_channel.send(req.response_tx).is_err() {
tracing::debug!("Reader task closed, stopping writer"); tracing::debug!("Reader task closed, stopping writer");
...@@ -505,9 +509,10 @@ impl RequestPlaneClient for TcpRequestClient { ...@@ -505,9 +509,10 @@ impl RequestPlaneClient for TcpRequestClient {
) -> Result<Bytes> { ) -> Result<Bytes> {
tracing::debug!("TCP client sending request to address: {address}"); tracing::debug!("TCP client sending request to address: {address}");
self.stats.requests_sent.fetch_add(1, Ordering::Relaxed); self.stats.requests_sent.fetch_add(1, Ordering::Relaxed);
let payload_len = payload.len();
self.stats self.stats
.bytes_sent .bytes_sent
.fetch_add(payload.len() as u64, Ordering::Relaxed); .fetch_add(payload_len as u64, Ordering::Relaxed);
let (addr, endpoint_name) = Self::parse_address(&address)?; let (addr, endpoint_name) = Self::parse_address(&address)?;
...@@ -535,6 +540,7 @@ impl RequestPlaneClient for TcpRequestClient { ...@@ -535,6 +540,7 @@ impl RequestPlaneClient for TcpRequestClient {
self.stats self.stats
.bytes_received .bytes_received
.fetch_add(response.len() as u64, Ordering::Relaxed); .fetch_add(response.len() as u64, Ordering::Relaxed);
TCP_BYTES_RECEIVED_TOTAL.inc_by(response.len() as f64);
// Return connection to pool (health check happens inside) // Return connection to pool (health check happens inside)
self.pool.return_connection(conn).await; self.pool.return_connection(conn).await;
...@@ -543,6 +549,7 @@ impl RequestPlaneClient for TcpRequestClient { ...@@ -543,6 +549,7 @@ impl RequestPlaneClient for TcpRequestClient {
} }
Ok(Err(e)) => { Ok(Err(e)) => {
self.stats.errors.fetch_add(1, Ordering::Relaxed); self.stats.errors.fetch_add(1, Ordering::Relaxed);
TCP_ERRORS_TOTAL.inc();
tracing::warn!("TCP request failed to {}: {}", addr, e); tracing::warn!("TCP request failed to {}: {}", addr, e);
// Don't return unhealthy connection to pool, let it drop // Don't return unhealthy connection to pool, let it drop
let cause = crate::error::DynamoError::from( let cause = crate::error::DynamoError::from(
...@@ -558,6 +565,7 @@ impl RequestPlaneClient for TcpRequestClient { ...@@ -558,6 +565,7 @@ impl RequestPlaneClient for TcpRequestClient {
} }
Err(_) => { Err(_) => {
self.stats.errors.fetch_add(1, Ordering::Relaxed); self.stats.errors.fetch_add(1, Ordering::Relaxed);
TCP_ERRORS_TOTAL.inc();
tracing::warn!("TCP request timeout to {addr}"); tracing::warn!("TCP request timeout to {addr}");
// Don't return timed-out connection to pool // Don't return timed-out connection to pool
Err(anyhow::anyhow!( Err(anyhow::anyhow!(
......
...@@ -4,6 +4,9 @@ ...@@ -4,6 +4,9 @@
use super::*; use super::*;
use crate::metrics::prometheus_names::work_handler; use crate::metrics::prometheus_names::work_handler;
use crate::metrics::work_handler_perf::{
WORK_HANDLER_NETWORK_TRANSIT_SECONDS, WORK_HANDLER_TIME_TO_FIRST_RESPONSE_SECONDS,
};
use crate::protocols::maybe_error::MaybeError; use crate::protocols::maybe_error::MaybeError;
use prometheus::{Histogram, IntCounter, IntCounterVec, IntGauge}; use prometheus::{Histogram, IntCounter, IntCounterVec, IntGauge};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
...@@ -137,6 +140,10 @@ where ...@@ -137,6 +140,10 @@ where
} }
async fn handle_payload(&self, payload: Bytes) -> Result<(), PipelineError> { async fn handle_payload(&self, payload: Bytes) -> Result<(), PipelineError> {
let t2_wallclock_ns = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos() as u64;
let start_time = std::time::Instant::now(); let start_time = std::time::Instant::now();
// Increment inflight and ensure it's decremented on all exits via RAII guard // Increment inflight and ensure it's decremented on all exits via RAII guard
...@@ -194,6 +201,12 @@ where ...@@ -194,6 +201,12 @@ where
} }
}; };
// Compute network transit time (T2 - T1) using cross-process wall-clock timestamps
if let Some(t1_ns) = control_msg.frontend_send_ts_ns {
let transit_ns = t2_wallclock_ns.saturating_sub(t1_ns);
WORK_HANDLER_NETWORK_TRANSIT_SECONDS.observe(transit_ns as f64 / 1_000_000_000.0);
}
// extend request with context // extend request with context
tracing::trace!("received control message: {:?}", control_msg); tracing::trace!("received control message: {:?}", control_msg);
tracing::trace!("received request: {:?}", request); tracing::trace!("received request: {:?}", request);
...@@ -238,6 +251,8 @@ where ...@@ -238,6 +251,8 @@ where
Ok(stream) => { Ok(stream) => {
tracing::trace!("Successfully generated response stream; sending prologue"); tracing::trace!("Successfully generated response stream; sending prologue");
let _result = publisher.send_prologue(None).await; let _result = publisher.send_prologue(None).await;
WORK_HANDLER_TIME_TO_FIRST_RESPONSE_SECONDS
.observe(start_time.elapsed().as_secs_f64());
stream stream
} }
Err(e) => { Err(e) => {
......
...@@ -186,6 +186,20 @@ impl SharedTcpServer { ...@@ -186,6 +186,20 @@ impl SharedTcpServer {
"TCP worker processing request" "TCP worker processing request"
); );
// Compute network transit time from the transport header stamped right
// before the TCP write on the frontend side.
if let Some(t1_str) = work_item.headers.get("x-frontend-send-ts-ns")
&& let Ok(t1_ns) = t1_str.parse::<u64>()
{
let t2_ns = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos() as u64;
let transit_ns = t2_ns.saturating_sub(t1_ns);
crate::metrics::work_handler_perf::WORK_HANDLER_NETWORK_TRANSIT_SECONDS
.observe(transit_ns as f64 / 1_000_000_000.0);
}
// Create span with trace context from headers // Create span with trace context from headers
let span = crate::logging::make_handle_payload_span_from_tcp_headers( let span = crate::logging::make_handle_payload_span_from_tcp_headers(
&work_item.headers, &work_item.headers,
......
...@@ -159,8 +159,21 @@ impl TcpStreamServer { ...@@ -159,8 +159,21 @@ impl TcpStreamServer {
match resolved_ip { match resolved_ip {
Ok(addr) => addr, Ok(addr) => addr,
Err(Error::LocalIpAddressNotFound) => IpAddr::from([127, 0, 0, 1]), // Only fall back to loopback when no routable IP exists at all;
Err(err) => return Err(err.into()), // propagate other resolver errors (I/O, platform) so
// misconfigured hosts fail fast instead of silently binding
// to 127.0.0.1.
Err(Error::LocalIpAddressNotFound) => {
tracing::warn!(
"No routable local IP address found; falling back to 127.0.0.1"
);
IpAddr::from([127, 0, 0, 1])
}
Err(err) => {
return Err(PipelineError::Generic(format!(
"Failed to resolve local IP address: {err}"
)));
}
} }
.to_string() .to_string()
} }
......
...@@ -51,6 +51,9 @@ pub struct Runtime { ...@@ -51,6 +51,9 @@ pub struct Runtime {
impl Runtime { impl Runtime {
fn new(runtime: RuntimeType, secondary: Option<RuntimeType>) -> anyhow::Result<Runtime> { fn new(runtime: RuntimeType, secondary: Option<RuntimeType>) -> anyhow::Result<Runtime> {
// Initialise NVTX toggle once from environment (no-op when feature is off)
crate::nvtx::init();
// worker id // worker id
let id = Arc::new(uuid::Uuid::new_v4().to_string()); let id = Arc::new(uuid::Uuid::new_v4().to_string());
...@@ -146,6 +149,12 @@ impl Runtime { ...@@ -146,6 +149,12 @@ impl Runtime {
if let (Some(pool), Some(permits)) = (&self.compute_pool, &self.block_in_place_permits) { if let (Some(pool), Some(permits)) = (&self.compute_pool, &self.block_in_place_permits) {
crate::compute::thread_local::initialize_context(Arc::clone(pool), Arc::clone(permits)); crate::compute::thread_local::initialize_context(Arc::clone(pool), Arc::clone(permits));
} }
// Name this worker thread in the Nsight Systems timeline (no-op when nvtx feature is off)
let thread_name = std::thread::current()
.name()
.map(|n| n.to_string())
.unwrap_or_else(|| format!("tokio-worker-{:?}", std::thread::current().id()));
crate::nvtx::name_current_thread_impl(&thread_name);
} }
/// Initialize thread-local compute context on all worker threads using a barrier /// Initialize thread-local compute context on all worker threads using a barrier
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment