// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::VecDeque; use std::time::Duration; use tokio::time::Instant; use super::single::RequestId; #[derive(Debug, Clone, Copy)] pub(super) struct PrefillLoadState { pub(super) initial_effective_prefill_tokens: usize, pub(super) expected_prefill_duration: Option, } #[derive(Debug, Default)] pub(super) struct PrefillLoadTracker { pub(super) prefill_order: VecDeque, pub(super) prefill_full_tokens_sum: usize, pub(super) anchored_prefill: Option<(RequestId, Instant)>, } impl PrefillLoadTracker { pub(super) fn insert( &mut self, request_id: &RequestId, prefill: PrefillLoadState, decay_now: Instant, ) { self.prefill_full_tokens_sum += prefill.initial_effective_prefill_tokens; let should_anchor = self.anchored_prefill.is_none(); self.prefill_order.push_back(request_id.clone()); if should_anchor { self.anchored_prefill = Some((request_id.clone(), decay_now)); } } pub(super) fn remove( &mut self, request_id: &RequestId, prefill: PrefillLoadState, decay_now: Instant, ) { self.prefill_full_tokens_sum = self .prefill_full_tokens_sum .checked_sub(prefill.initial_effective_prefill_tokens) .expect("prefill_full_tokens_sum underflow"); let removed_front = self.prefill_order.front() == Some(request_id); if removed_front { let removed = self.prefill_order.pop_front(); debug_assert_eq!(removed.as_ref(), Some(request_id)); } else { self.prefill_order .retain(|queued_request_id| queued_request_id != request_id); } if self .anchored_prefill .as_ref() .is_some_and(|(anchored_request_id, _)| anchored_request_id == request_id) { self.set_anchor_to_front(decay_now); } } pub(super) fn set_anchor_to_front(&mut self, now: Instant) { self.anchored_prefill = self .prefill_order .front() .cloned() .map(|request_id| (request_id, now)); } }