Unverified Commit d9cc6f6b authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

feat: expected output tokens plumbing (#5181)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent f56483cb
...@@ -246,6 +246,7 @@ impl OpenAIPreprocessor { ...@@ -246,6 +246,7 @@ impl OpenAIPreprocessor {
decode_worker_id: nvext.decode_worker_id, decode_worker_id: nvext.decode_worker_id,
dp_rank: None, // dp_rank is set later in the pipeline dp_rank: None, // dp_rank is set later in the pipeline
enable_local_updates: nvext.enable_local_updates, enable_local_updates: nvext.enable_local_updates,
expected_output_tokens: nvext.expected_output_tokens,
}; };
builder.routing(Some(routing)); builder.routing(Some(routing));
} }
......
...@@ -42,6 +42,11 @@ pub struct RoutingHints { ...@@ -42,6 +42,11 @@ pub struct RoutingHints {
/// - `Some(false)`: External caller (e.g., GAIE sidecar) handles bookkeeping via C FFI /// - `Some(false)`: External caller (e.g., GAIE sidecar) handles bookkeeping via C FFI
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub enable_local_updates: Option<bool>, pub enable_local_updates: Option<bool>,
/// Expected number of output tokens for this request.
/// Used as a hint for routing decisions to estimate resource requirements.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub expected_output_tokens: Option<u32>,
} }
#[derive(Serialize, Deserialize, Debug, Clone, Default)] #[derive(Serialize, Deserialize, Debug, Clone, Default)]
......
...@@ -116,6 +116,12 @@ pub struct NvExt { ...@@ -116,6 +116,12 @@ pub struct NvExt {
#[builder(default, setter(strip_option))] #[builder(default, setter(strip_option))]
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub enable_local_updates: Option<bool>, pub enable_local_updates: Option<bool>,
/// Expected number of output tokens for this request.
/// Used as a hint for routing decisions to estimate resource requirements.
#[builder(default, setter(strip_option))]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub expected_output_tokens: Option<u32>,
} }
impl Default for NvExt { impl Default for NvExt {
...@@ -165,6 +171,7 @@ mod tests { ...@@ -165,6 +171,7 @@ mod tests {
assert_eq!(nv_ext.prefill_worker_id, None); assert_eq!(nv_ext.prefill_worker_id, None);
assert_eq!(nv_ext.decode_worker_id, None); assert_eq!(nv_ext.decode_worker_id, None);
assert_eq!(nv_ext.enable_local_updates, None); assert_eq!(nv_ext.enable_local_updates, None);
assert_eq!(nv_ext.expected_output_tokens, None);
} }
// Test valid builder configurations // Test valid builder configurations
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment