Unverified Commit 88478b4b authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

feat(replay): add agentic trace replay format (#8627)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent c8c99dd1
...@@ -61,21 +61,28 @@ impl OfflineDisaggReplayConfig { ...@@ -61,21 +61,28 @@ impl OfflineDisaggReplayConfig {
pub use entrypoints::{ pub use entrypoints::{
generate_trace_worker_artifacts_offline, simulate_concurrency_file, generate_trace_worker_artifacts_offline, simulate_concurrency_file,
simulate_concurrency_file_disagg_with_router_mode, simulate_concurrency_file_with_router_mode, simulate_concurrency_file_disagg_with_router_mode,
simulate_concurrency_live_file, simulate_concurrency_live_file_with_router_mode, simulate_concurrency_file_disagg_with_router_mode_and_format,
simulate_concurrency_live_requests, simulate_concurrency_live_requests_with_router_mode, simulate_concurrency_file_with_router_mode,
simulate_concurrency_live_workload, simulate_concurrency_live_workload_with_router_mode, simulate_concurrency_file_with_router_mode_and_format, simulate_concurrency_live_file,
simulate_concurrency_requests, simulate_concurrency_requests_disagg_with_router_mode, simulate_concurrency_live_file_with_router_mode,
simulate_concurrency_live_file_with_router_mode_and_format, simulate_concurrency_live_requests,
simulate_concurrency_live_requests_with_router_mode, simulate_concurrency_live_workload,
simulate_concurrency_live_workload_with_router_mode, simulate_concurrency_requests,
simulate_concurrency_requests_disagg_with_router_mode,
simulate_concurrency_requests_with_router_mode, simulate_concurrency_workload, simulate_concurrency_requests_with_router_mode, simulate_concurrency_workload,
simulate_concurrency_workload_disagg_with_router_mode, simulate_concurrency_workload_disagg_with_router_mode,
simulate_concurrency_workload_with_router_mode, simulate_trace_file, simulate_concurrency_workload_with_router_mode, simulate_trace_file,
simulate_trace_file_disagg_with_router_mode, simulate_trace_file_with_router_mode, simulate_trace_file_disagg_with_router_mode,
simulate_trace_live_file, simulate_trace_live_file_with_router_mode, simulate_trace_file_disagg_with_router_mode_and_format, simulate_trace_file_with_router_mode,
simulate_trace_live_requests, simulate_trace_live_requests_with_router_mode, simulate_trace_file_with_router_mode_and_format, simulate_trace_live_file,
simulate_trace_live_workload, simulate_trace_live_workload_with_router_mode, simulate_trace_live_file_with_router_mode,
simulate_trace_requests, simulate_trace_requests_disagg_with_router_mode, simulate_trace_live_file_with_router_mode_and_format, simulate_trace_live_requests,
simulate_trace_requests_with_router_mode, simulate_trace_workload, simulate_trace_live_requests_with_router_mode, simulate_trace_live_workload,
simulate_trace_workload_disagg_with_router_mode, simulate_trace_workload_with_router_mode, simulate_trace_live_workload_with_router_mode, simulate_trace_requests,
simulate_trace_requests_disagg_with_router_mode, simulate_trace_requests_with_router_mode,
simulate_trace_workload, simulate_trace_workload_disagg_with_router_mode,
simulate_trace_workload_with_router_mode,
}; };
pub use planner_handle::{PlannerReplayHandle, PlannerTickData}; pub use planner_handle::{PlannerReplayHandle, PlannerTickData};
pub use validate::validate_replay_args_mode; pub use validate::validate_replay_args_mode;
......
...@@ -479,9 +479,10 @@ impl VllmCore { ...@@ -479,9 +479,10 @@ impl VllmCore {
.get(&uuid) .get(&uuid)
.unwrap_or_else(|| panic!("schedule_request: {uuid} missing from state.requests")); .unwrap_or_else(|| panic!("schedule_request: {uuid} missing from state.requests"));
debug_assert_vllm_request_invariants(uuid, request); debug_assert_vllm_request_invariants(uuid, request);
let prefill_cost = self.kv_manager.get_prefill_cost(&request.sequence);
let cached_prefix_tokens = if request.num_computed_tokens == 0 { let cached_prefix_tokens = if request.num_computed_tokens == 0 {
prefill_cost.cached_tokens self.kv_manager
.get_prefill_cost(&request.sequence)
.cached_tokens
} else { } else {
0 0
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment