//! Pipeline stages for gRPC router request processing
//!
//! This module defines the core pipeline abstraction and individual processing stages
//! that transform a RequestContext through its lifecycle.
use std::{
borrow::Cow,
collections::HashMap,
sync::Arc,
time::{Instant, SystemTime, UNIX_EPOCH},
};
use async_trait::async_trait;
use axum::response::{IntoResponse, Response};
use proto::DisaggregatedParams;
use rand::Rng;
use tokio::sync::RwLock;
use tracing::{debug, error, warn};
use uuid::Uuid;
use super::{context::*, processing, responses::BackgroundTaskInfo, streaming, utils};
use crate::{
core::{ConnectionMode, Worker, WorkerRegistry, WorkerType},
grpc_client::proto,
policies::PolicyRegistry,
protocols::{
chat::{ChatCompletionRequest, ChatCompletionResponse},
common::InputIds,
generate::GenerateRequest,
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
// ============================================================================
// Pipeline Trait
// ============================================================================
/// Trait for pipeline stages that process requests
#[async_trait]
pub trait PipelineStage: Send + Sync {
/// Execute this stage, mutating the context
///
/// Returns:
/// - `Ok(None)` - Continue to next stage
/// - `Ok(Some(response))` - Pipeline complete, return this response (e.g., streaming)
/// - `Err(response)` - Error occurred, return this error response
async fn execute(&self, ctx: &mut RequestContext) -> Result