help="KV Router: Disable tracking of active blocks (blocks being used for ongoing generation). By default, active blocks are tracked for load balancing.",
help="KV Router: Disable tracking of active blocks (blocks being used for ongoing generation). By default, active blocks are tracked for load balancing.",
)
)
parser.add_argument(
"--enforce-disagg",
action="store_true",
default=False,
help="Enforce disaggregated prefill-decode. When set, unactivated prefill router will return an error instead of falling back to decode-only mode.",
tracing::debug!("Prefill router not activated, falling back to decode-only");
next.generate(context.map(|_|req)).await
}
Err(e)=>{
Err(e)=>{
ifself.enforce_disagg{
tracing::error!(
error=%e,
"Remote prefill failed, but disaggregated mode is enforced. Failing request."
);
returnErr(anyhow::anyhow!(e));
}
tracing::warn!(
tracing::warn!(
error=%e,
error=%e,
"Remote prefill failed, falling back to decode-only. This may impact performance in disaggregated deployments. Verify prefill workers are healthy and accessible."
"Remote prefill failed, falling back to decode-only. This may impact performance in disaggregated deployments. Verify prefill workers are healthy and accessible."