Unverified Commit 02b1c58a authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

feat(mocker): add offline disagg replay (#7617)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 4b8826b3
This diff is collapsed.
...@@ -139,6 +139,8 @@ impl PrefillCost { ...@@ -139,6 +139,8 @@ impl PrefillCost {
pub struct OutputSignal { pub struct OutputSignal {
pub uuid: Uuid, pub uuid: Uuid,
pub completed: bool, pub completed: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub handoff_delay_ms: Option<f64>,
} }
/// Preemption policy for evicting decode requests under memory pressure /// Preemption policy for evicting decode requests under memory pressure
...@@ -286,6 +288,10 @@ pub struct MockEngineArgs { ...@@ -286,6 +288,10 @@ pub struct MockEngineArgs {
#[builder(default = "WorkerType::Aggregated")] #[builder(default = "WorkerType::Aggregated")]
pub worker_type: WorkerType, pub worker_type: WorkerType,
/// Original planner profile NPZ path used to materialize `perf_model`.
#[builder(default = "None")]
pub planner_profile_data: Option<PathBuf>,
/// Performance model for timing predictions (not serialized, loaded from planner_profile_data) /// Performance model for timing predictions (not serialized, loaded from planner_profile_data)
#[serde(skip)] #[serde(skip)]
#[builder(default = "Arc::new(PerfModel::default())")] #[builder(default = "Arc::new(PerfModel::default())")]
...@@ -691,6 +697,7 @@ impl MockEngineArgs { ...@@ -691,6 +697,7 @@ impl MockEngineArgs {
&& let Some(path_str) = path_str.as_str() && let Some(path_str) = path_str.as_str()
{ {
let npz_path = PathBuf::from(path_str); let npz_path = PathBuf::from(path_str);
builder = builder.planner_profile_data(Some(npz_path.clone()));
match PerfModel::from_npz(&npz_path) { match PerfModel::from_npz(&npz_path) {
Ok(model) => { Ok(model) => {
tracing::info!("Successfully loaded performance model from: {:?}", npz_path); tracing::info!("Successfully loaded performance model from: {:?}", npz_path);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -138,6 +138,7 @@ impl KvReplayRouter { ...@@ -138,6 +138,7 @@ impl KvReplayRouter {
args.block_size as u32, args.block_size as u32,
selector, selector,
policy, policy,
config.router_track_prefill_tokens,
CancellationToken::new(), CancellationToken::new(),
"replay", "replay",
false, false,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment