Unverified Commit 53f3d2af authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat: replace polling with event-driven metrics updates (#3207)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent da7d1a33
...@@ -80,6 +80,7 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul ...@@ -80,6 +80,7 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
router_config.busy_threshold, router_config.busy_threshold,
target_namespace, target_namespace,
Arc::new(http_service.clone()), Arc::new(http_service.clone()),
http_service.state().metrics_clone(),
) )
.await?; .await?;
} }
...@@ -217,7 +218,11 @@ async fn run_watcher( ...@@ -217,7 +218,11 @@ async fn run_watcher(
busy_threshold: Option<f64>, busy_threshold: Option<f64>,
target_namespace: Option<String>, target_namespace: Option<String>,
http_service: Arc<HttpService>, http_service: Arc<HttpService>,
metrics: Arc<crate::http::service::metrics::Metrics>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
// Clone model_manager before it's moved into ModelWatcher
let model_manager_clone = model_manager.clone();
let mut watch_obj = ModelWatcher::new( let mut watch_obj = ModelWatcher::new(
runtime, runtime,
model_manager, model_manager,
...@@ -234,11 +239,22 @@ async fn run_watcher( ...@@ -234,11 +239,22 @@ async fn run_watcher(
watch_obj.set_notify_on_model_update(tx); watch_obj.set_notify_on_model_update(tx);
// Spawn a task to watch for model type changes and update HTTP service endpoints // Spawn a task to watch for model type changes and update HTTP service endpoints and metrics
let _endpoint_enabler_task = tokio::spawn(async move { let _endpoint_enabler_task = tokio::spawn(async move {
while let Some(model_type) = rx.recv().await { while let Some(model_type) = rx.recv().await {
tracing::debug!("Received model type update: {:?}", model_type); tracing::debug!("Received model type update: {:?}", model_type);
// Update HTTP endpoints (existing functionality)
update_http_endpoints(http_service.clone(), model_type); update_http_endpoints(http_service.clone(), model_type);
// Update metrics (only for added models)
update_model_metrics(
model_type,
model_manager_clone.clone(),
metrics.clone(),
Some(etcd_client.clone()),
)
.await;
} }
}); });
...@@ -271,3 +287,46 @@ fn update_http_endpoints(service: Arc<HttpService>, model_type: ModelUpdate) { ...@@ -271,3 +287,46 @@ fn update_http_endpoints(service: Arc<HttpService>, model_type: ModelUpdate) {
} }
} }
} }
/// Updates metrics for model type changes
async fn update_model_metrics(
model_type: ModelUpdate,
model_manager: Arc<ModelManager>,
metrics: Arc<crate::http::service::metrics::Metrics>,
etcd_client: Option<etcd::Client>,
) {
match model_type {
ModelUpdate::Added(model_type) => {
tracing::debug!("Updating metrics for added model type: {:?}", model_type);
// Get all model entries and update metrics for matching types
let model_entries = model_manager.get_model_entries();
for entry in model_entries {
if entry.model_type == model_type {
// Update runtime config metrics if available
if let Some(runtime_config) = &entry.runtime_config {
metrics.update_runtime_config_metrics(&entry.name, runtime_config);
}
// Update MDC metrics if etcd is available
if let Some(ref etcd) = etcd_client
&& let Err(e) = metrics
.update_metrics_from_model_entry_with_mdc(&entry, etcd)
.await
{
tracing::debug!(
model = %entry.name,
error = %e,
"Failed to update MDC metrics for newly added model"
);
}
}
}
}
ModelUpdate::Removed(model_type) => {
tracing::debug!("Model type removed: {:?}", model_type);
// Note: Metrics are typically not removed to preserve historical data
// This matches the behavior in the polling task
}
}
}
...@@ -472,37 +472,6 @@ impl Metrics { ...@@ -472,37 +472,6 @@ impl Metrics {
} }
} }
/// Update model deployment card metrics for a model
/// This should be called when model deployment card information is available
pub fn update_mdc_metrics(
&self,
model_name: &str,
context_length: u32,
kv_cache_block_size: u32,
migration_limit: u32,
) {
self.model_context_length
.with_label_values(&[model_name])
.set(context_length as i64);
self.model_kv_cache_block_size
.with_label_values(&[model_name])
.set(kv_cache_block_size as i64);
self.model_migration_limit
.with_label_values(&[model_name])
.set(migration_limit as i64);
}
/// Update metrics from a ModelEntry
/// This is a convenience method that extracts runtime config from a ModelEntry
/// and updates the appropriate metrics
pub fn update_metrics_from_model_entry(&self, model_entry: &ModelEntry) {
if let Some(runtime_config) = &model_entry.runtime_config {
self.update_runtime_config_metrics(&model_entry.name, runtime_config);
}
}
/// Update metrics from a ModelEntry and its ModelDeploymentCard /// Update metrics from a ModelEntry and its ModelDeploymentCard
/// This updates both runtime config metrics and MDC-specific metrics /// This updates both runtime config metrics and MDC-specific metrics
pub async fn update_metrics_from_model_entry_with_mdc( pub async fn update_metrics_from_model_entry_with_mdc(
...@@ -525,12 +494,19 @@ impl Metrics { ...@@ -525,12 +494,19 @@ impl Metrics {
.await .await
{ {
Ok(Some(mdc)) => { Ok(Some(mdc)) => {
self.update_mdc_metrics( // Inline MDC metrics update
&model_entry.name, self.model_context_length
mdc.context_length, .with_label_values(&[&model_entry.name])
mdc.kv_cache_block_size, .set(mdc.context_length as i64);
mdc.migration_limit,
); self.model_kv_cache_block_size
.with_label_values(&[&model_entry.name])
.set(mdc.kv_cache_block_size as i64);
self.model_migration_limit
.with_label_values(&[&model_entry.name])
.set(mdc.migration_limit as i64);
tracing::debug!( tracing::debug!(
model = %model_entry.name, model = %model_entry.name,
"Successfully updated MDC metrics" "Successfully updated MDC metrics"
...@@ -554,110 +530,6 @@ impl Metrics { ...@@ -554,110 +530,6 @@ impl Metrics {
Ok(()) Ok(())
} }
/// Start a background task that periodically updates runtime config metrics
///
/// ## Why Polling is Required
///
/// Polling is necessary because new models may come online at any time through the distributed
/// discovery system. The ModelManager is continuously updated as workers register/deregister
/// with etcd, and we need to periodically check for these changes to expose their metrics.
///
/// ## Behavior
///
/// - Polls the ModelManager for current models and updates metrics accordingly
/// - Models are never removed from metrics to preserve historical data
/// - If multiple model instances have the same name, only the first instance's metrics are used
/// - Subsequent instances with duplicate names will be skipped
///
/// ## MDC (Model Deployment Card) Behavior
///
/// Currently, we don't overwrite an MDC. The first worker to start wins, and we assume
/// that all other workers claiming to serve that model really are using the same configuration.
/// Later, every worker will have its own MDC, and the frontend will validate that they
/// checksum the same. For right now, you can assume they have the same MDC, because
/// they aren't allowed to change it.
///
/// The task will run until the provided cancellation token is cancelled.
pub fn start_runtime_config_polling_task(
metrics: Arc<Self>,
manager: Arc<crate::discovery::ModelManager>,
etcd_client: Option<dynamo_runtime::transports::etcd::Client>,
poll_interval: Duration,
cancel_token: tokio_util::sync::CancellationToken,
) -> tokio::task::JoinHandle<()> {
tokio::spawn(async move {
let mut interval = tokio::time::interval(poll_interval);
let mut known_models = std::collections::HashSet::new();
tracing::info!(
interval_secs = poll_interval.as_secs(),
"Starting runtime config metrics polling task (metrics never removed)"
);
loop {
tokio::select! {
_ = cancel_token.cancelled() => {
tracing::info!("Runtime config metrics polling task cancelled");
break;
}
_ = interval.tick() => {
// Continue with polling logic
}
}
// Get current model entries from the manager
let current_entries = manager.get_model_entries();
let mut current_models = std::collections::HashSet::new();
// Note: If multiple model instances have the same name, only the first instance's config metrics are recorded.
// Subsequent instances with duplicate names will be skipped for config updates.
// This is based on the assumption that all workers serving the same model have identical
// configuration values (MDC content, runtime config, etc.). This assumption holds because
// workers are not allowed to change their configuration after registration.
// Update configuration metrics for current models
for entry in current_entries {
// Skip config processing if we've already seen this model name
if !current_models.insert(entry.name.clone()) {
tracing::debug!(
model_name = %entry.name,
endpoint = ?entry.endpoint_id,
"Skipping duplicate model instance - only first instance config metrics are recorded"
);
continue;
}
// Update runtime config metrics if available
if let Some(runtime_config) = &entry.runtime_config {
metrics.update_runtime_config_metrics(&entry.name, runtime_config);
}
// Optionally load MDC for additional metrics if etcd is available
if let Some(ref etcd) = etcd_client
&& let Err(e) = metrics
.update_metrics_from_model_entry_with_mdc(&entry, etcd)
.await
{
tracing::debug!(
model = %entry.name,
error = %e,
"Failed to update MDC metrics (this is normal if MDC is not available)"
);
}
}
// Update our known models set
known_models.extend(current_models.iter().cloned());
tracing::trace!(
active_models = current_models.len(),
total_known_models = known_models.len(),
"Updated runtime config metrics for active models"
);
}
})
}
/// Create a new [`InflightGuard`] for the given model and annotate if its a streaming request, /// Create a new [`InflightGuard`] for the given model and annotate if its a streaming request,
/// and the kind of endpoint that was hit /// and the kind of endpoint that was hit
/// ///
......
...@@ -133,9 +133,6 @@ pub struct HttpService { ...@@ -133,9 +133,6 @@ pub struct HttpService {
tls_cert_path: Option<PathBuf>, tls_cert_path: Option<PathBuf>,
tls_key_path: Option<PathBuf>, tls_key_path: Option<PathBuf>,
route_docs: Vec<RouteDoc>, route_docs: Vec<RouteDoc>,
// Metrics polling configuration
etcd_client: Option<dynamo_runtime::transports::etcd::Client>,
} }
#[derive(Clone, Builder)] #[derive(Clone, Builder)]
...@@ -204,22 +201,6 @@ impl HttpService { ...@@ -204,22 +201,6 @@ impl HttpService {
let protocol = if self.enable_tls { "HTTPS" } else { "HTTP" }; let protocol = if self.enable_tls { "HTTPS" } else { "HTTP" };
tracing::info!(protocol, address, "Starting HTTP(S) service"); tracing::info!(protocol, address, "Starting HTTP(S) service");
// Start background task to poll runtime config metrics with proper cancellation
let poll_interval_secs = std::env::var("DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS")
.ok()
.and_then(|s| s.parse::<f64>().ok())
.filter(|&secs| secs > 0.0) // Guard against zero or negative values
.unwrap_or(8.0);
let poll_interval = Duration::from_secs_f64(poll_interval_secs);
let _polling_task = super::metrics::Metrics::start_runtime_config_polling_task(
self.state.metrics_clone(),
self.state.manager_clone(),
self.etcd_client.clone(),
poll_interval,
cancel_token.child_token(),
);
let router = self.router.clone(); let router = self.router.clone();
let observer = cancel_token.child_token(); let observer = cancel_token.child_token();
...@@ -313,8 +294,8 @@ impl HttpServiceConfigBuilder { ...@@ -313,8 +294,8 @@ impl HttpServiceConfigBuilder {
let config: HttpServiceConfig = self.build_internal()?; let config: HttpServiceConfig = self.build_internal()?;
let model_manager = Arc::new(ModelManager::new()); let model_manager = Arc::new(ModelManager::new());
let etcd_client = config.etcd_client.clone(); let etcd_client = config.etcd_client;
let state = Arc::new(State::new_with_etcd(model_manager, config.etcd_client)); let state = Arc::new(State::new_with_etcd(model_manager, etcd_client));
state state
.flags .flags
...@@ -366,7 +347,6 @@ impl HttpServiceConfigBuilder { ...@@ -366,7 +347,6 @@ impl HttpServiceConfigBuilder {
tls_cert_path: config.tls_cert_path, tls_cert_path: config.tls_cert_path,
tls_key_path: config.tls_key_path, tls_key_path: config.tls_key_path,
route_docs: all_docs, route_docs: all_docs,
etcd_client,
}) })
} }
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment