Unverified Commit 263f99dc authored by Zhongxuan (Daniel) Wang's avatar Zhongxuan (Daniel) Wang Committed by GitHub
Browse files

feat: nvext field to OpenAI APIs and add worker_id reporting (vLLM) (#4372)


Signed-off-by: default avatarZhongxuan Wang <daniewang@nvidia.com>
parent 684107c4
...@@ -46,6 +46,7 @@ mod tests { ...@@ -46,6 +46,7 @@ mod tests {
object: "chat.completion.chunk".to_string(), object: "chat.completion.chunk".to_string(),
usage: None, usage: None,
service_tier: None, service_tier: None,
nvext: None,
}; };
Annotated { Annotated {
...@@ -84,6 +85,7 @@ mod tests { ...@@ -84,6 +85,7 @@ mod tests {
object: "chat.completion.chunk".to_string(), object: "chat.completion.chunk".to_string(),
usage: None, usage: None,
service_tier: None, service_tier: None,
nvext: None,
}; };
Annotated { Annotated {
...@@ -126,6 +128,7 @@ mod tests { ...@@ -126,6 +128,7 @@ mod tests {
object: "chat.completion.chunk".to_string(), object: "chat.completion.chunk".to_string(),
usage: None, usage: None,
service_tier: None, service_tier: None,
nvext: None,
}; };
Annotated { Annotated {
...@@ -169,6 +172,7 @@ mod tests { ...@@ -169,6 +172,7 @@ mod tests {
object: "chat.completion.chunk".to_string(), object: "chat.completion.chunk".to_string(),
usage: None, usage: None,
service_tier: None, service_tier: None,
nvext: None,
}; };
Annotated { Annotated {
...@@ -212,6 +216,7 @@ mod tests { ...@@ -212,6 +216,7 @@ mod tests {
object: "chat.completion.chunk".to_string(), object: "chat.completion.chunk".to_string(),
usage: None, usage: None,
service_tier: None, service_tier: None,
nvext: None,
}; };
Annotated { Annotated {
...@@ -2050,6 +2055,7 @@ mod parallel_jail_tests { ...@@ -2050,6 +2055,7 @@ mod parallel_jail_tests {
object: "chat.completion.chunk".to_string(), object: "chat.completion.chunk".to_string(),
usage: None, usage: None,
service_tier: None, service_tier: None,
nvext: None,
}; };
Annotated { Annotated {
......
...@@ -36,6 +36,7 @@ fn create_mock_response_chunk( ...@@ -36,6 +36,7 @@ fn create_mock_response_chunk(
object: "chat.completion.chunk".to_string(), object: "chat.completion.chunk".to_string(),
usage: None, usage: None,
service_tier: None, service_tier: None,
nvext: None,
}; };
Annotated { Annotated {
......
...@@ -108,6 +108,7 @@ fn load_test_data(file_path: &str) -> TestData { ...@@ -108,6 +108,7 @@ fn load_test_data(file_path: &str) -> TestData {
object: "chat.completion.chunk".to_string(), object: "chat.completion.chunk".to_string(),
usage: None, usage: None,
service_tier: None, service_tier: None,
nvext: None,
}; };
Annotated { Annotated {
......
...@@ -108,6 +108,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B ...@@ -108,6 +108,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B
finish_reason: None, finish_reason: None,
index: Some(0), index: Some(0),
completion_usage: None, completion_usage: None,
disaggregated_params: None,
}, },
BackendOutput { BackendOutput {
token_ids: vec![1917], token_ids: vec![1917],
...@@ -119,6 +120,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B ...@@ -119,6 +120,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B
finish_reason: None, finish_reason: None,
index: Some(0), index: Some(0),
completion_usage: None, completion_usage: None,
disaggregated_params: None,
}, },
BackendOutput { BackendOutput {
token_ids: vec![0], token_ids: vec![0],
...@@ -139,6 +141,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B ...@@ -139,6 +141,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B
}), }),
completion_tokens_details: None, completion_tokens_details: None,
}), }),
disaggregated_params: None,
}, },
] ]
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment