Unverified Commit 09af0a7b authored by rongfu.leng's avatar rongfu.leng Committed by GitHub
Browse files

[sgl-route] Optimize the use of constant slices and retain to simplif… (#12159)


Signed-off-by: default avatarrongfu.leng <lenronfu@gmail.com>
parent c8d385ce
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
use std::{ use std::{
any::Any, any::Any,
collections::HashSet,
sync::{atomic::AtomicBool, Arc}, sync::{atomic::AtomicBool, Arc},
time::{Duration, Instant}, time::{Duration, Instant},
}; };
...@@ -15,6 +16,7 @@ use axum::{ ...@@ -15,6 +16,7 @@ use axum::{
}; };
use dashmap::DashMap; use dashmap::DashMap;
use futures_util::StreamExt; use futures_util::StreamExt;
use once_cell::sync::Lazy;
use serde_json::{json, to_value, Value}; use serde_json::{json, to_value, Value};
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio_stream::wrappers::UnboundedReceiverStream; use tokio_stream::wrappers::UnboundedReceiverStream;
...@@ -61,6 +63,32 @@ use crate::{ ...@@ -61,6 +63,32 @@ use crate::{
// OpenAIRouter Struct // OpenAIRouter Struct
// ============================================================================ // ============================================================================
/// Fields specific to SGLang that should be stripped when forwarding to OpenAI-compatible endpoints
static SGLANG_FIELDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
HashSet::from([
"request_id",
"priority",
"top_k",
"min_p",
"min_tokens",
"regex",
"ebnf",
"stop_token_ids",
"no_stop_trim",
"ignore_eos",
"continue_final_message",
"skip_special_tokens",
"lora_path",
"session_params",
"separate_reasoning",
"stream_reasoning",
"chat_template_kwargs",
"return_hidden_states",
"repetition_penalty",
"sampling_seed",
])
});
/// Cached endpoint information /// Cached endpoint information
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
struct CachedEndpoint { struct CachedEndpoint {
...@@ -547,29 +575,7 @@ impl crate::routers::RouterTrait for OpenAIRouter { ...@@ -547,29 +575,7 @@ impl crate::routers::RouterTrait for OpenAIRouter {
}; };
if let Some(obj) = payload.as_object_mut() { if let Some(obj) = payload.as_object_mut() {
// Always remove SGLang-specific fields (unsupported by OpenAI) // Always remove SGLang-specific fields (unsupported by OpenAI)
for key in [ obj.retain(|k, _| !SGLANG_FIELDS.contains(&k.as_str()));
"top_k",
"min_p",
"min_tokens",
"regex",
"ebnf",
"stop_token_ids",
"no_stop_trim",
"ignore_eos",
"continue_final_message",
"skip_special_tokens",
"lora_path",
"session_params",
"separate_reasoning",
"stream_reasoning",
"chat_template_kwargs",
"return_hidden_states",
"repetition_penalty",
"sampling_seed",
] {
obj.remove(key);
}
// Remove logprobs if false (Gemini don't accept it) // Remove logprobs if false (Gemini don't accept it)
if obj.get("logprobs").and_then(|v| v.as_bool()) == Some(false) { if obj.get("logprobs").and_then(|v| v.as_bool()) == Some(false) {
obj.remove("logprobs"); obj.remove("logprobs");
...@@ -899,30 +905,7 @@ impl crate::routers::RouterTrait for OpenAIRouter { ...@@ -899,30 +905,7 @@ impl crate::routers::RouterTrait for OpenAIRouter {
// Remove SGLang-specific fields only // Remove SGLang-specific fields only
if let Some(obj) = payload.as_object_mut() { if let Some(obj) = payload.as_object_mut() {
// Remove SGLang-specific fields (not part of OpenAI API) // Remove SGLang-specific fields (not part of OpenAI API)
for key in [ obj.retain(|k, _| !SGLANG_FIELDS.contains(&k.as_str()));
"request_id",
"priority",
"top_k",
"min_p",
"min_tokens",
"regex",
"ebnf",
"stop_token_ids",
"no_stop_trim",
"ignore_eos",
"continue_final_message",
"skip_special_tokens",
"lora_path",
"session_params",
"separate_reasoning",
"stream_reasoning",
"chat_template_kwargs",
"return_hidden_states",
"repetition_penalty",
"sampling_seed",
] {
obj.remove(key);
}
// XAI (Grok models) requires special handling of input items // XAI (Grok models) requires special handling of input items
// Check if model is a Grok model // Check if model is a Grok model
let is_grok_model = obj let is_grok_model = obj
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment