Unverified Commit f4245c99 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore: clean up PreprocessedRequest (#5040)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 37cc1f3d
...@@ -90,6 +90,12 @@ class StandaloneRouterHandler: ...@@ -90,6 +90,12 @@ class StandaloneRouterHandler:
# Wrap incoming request into PreprocessedRequest format for KvPushRouter # Wrap incoming request into PreprocessedRequest format for KvPushRouter
# The request should already have most fields, but we ensure it has the structure # The request should already have most fields, but we ensure it has the structure
# Build routing hints from request (supports both nested routing object and legacy dp_rank)
routing = request.get("routing")
dp_rank = request.get("dp_rank")
if routing is None and dp_rank is not None:
routing = {"dp_rank": dp_rank}
preprocessed_request = { preprocessed_request = {
"model": request.get("model", "unknown"), "model": request.get("model", "unknown"),
"token_ids": request["token_ids"], "token_ids": request["token_ids"],
...@@ -98,9 +104,11 @@ class StandaloneRouterHandler: ...@@ -98,9 +104,11 @@ class StandaloneRouterHandler:
"output_options": request.get("output_options", {}), "output_options": request.get("output_options", {}),
"eos_token_ids": request.get("eos_token_ids", []), "eos_token_ids": request.get("eos_token_ids", []),
"annotations": request.get("annotations", []), "annotations": request.get("annotations", []),
"disaggregated_params": request.get("disaggregated_params"), "routing": routing,
"dp_rank": request.get("dp_rank"), "router_config_override": request.get("router_config_override"),
"extra_args": request.get("extra_args", {}), "prefill_result": request.get("prefill_result"),
"bootstrap_info": request.get("bootstrap_info"),
"extra_args": request.get("extra_args"),
} }
# Route and process through KvPushRouter # Route and process through KvPushRouter
...@@ -117,6 +125,7 @@ class StandaloneRouterHandler: ...@@ -117,6 +125,7 @@ class StandaloneRouterHandler:
"log_probs": worker_output.get("log_probs"), "log_probs": worker_output.get("log_probs"),
"top_logprobs": worker_output.get("top_logprobs"), "top_logprobs": worker_output.get("top_logprobs"),
"finish_reason": worker_output.get("finish_reason"), "finish_reason": worker_output.get("finish_reason"),
"stop_reason": worker_output.get("stop_reason"),
"index": worker_output.get("index"), "index": worker_output.get("index"),
"disaggregated_params": worker_output.get("disaggregated_params"), "disaggregated_params": worker_output.get("disaggregated_params"),
"extra_args": worker_output.get("extra_args"), "extra_args": worker_output.get("extra_args"),
......
...@@ -1279,13 +1279,17 @@ impl KvPushRouter { ...@@ -1279,13 +1279,17 @@ impl KvPushRouter {
.sampling_options(sampling_options) .sampling_options(sampling_options)
.output_options(output_options) .output_options(output_options)
.router_config_override(router_config_override) .router_config_override(router_config_override)
.dp_rank(dp_rank)
.extra_args(extra_args) .extra_args(extra_args)
.tracker(Some(tracker.clone())); .tracker(Some(tracker.clone()));
// Set backend_instance_id if worker_id is provided // Set routing hints if worker_id or dp_rank is provided
if let Some(worker_id) = worker_id { if worker_id.is_some() || dp_rank.is_some() {
request_builder.backend_instance_id(Some(worker_id)); let routing = llm_rs::protocols::common::preprocessor::RoutingHints {
backend_instance_id: worker_id,
dp_rank,
..Default::default()
};
request_builder.routing(Some(routing));
} }
let request = request_builder.build().map_err(to_pyerr)?; let request = request_builder.build().map_err(to_pyerr)?;
......
...@@ -661,10 +661,7 @@ impl AsyncEngine<SingleIn<RouterRequest>, ManyOut<Annotated<RouterResponse>>, Er ...@@ -661,10 +661,7 @@ impl AsyncEngine<SingleIn<RouterRequest>, ManyOut<Annotated<RouterResponse>>, Er
let context_id = ctx.context().id().to_string(); let context_id = ctx.context().id().to_string();
// Handle different request types // Handle different request types
let response = match request { let response = match request {
RouterRequest::New { RouterRequest::New { tokens } => {
tokens,
request_extra_info: _,
} => {
let (best_worker, overlap_blocks) = self let (best_worker, overlap_blocks) = self
.find_best_match(Some(&context_id), &tokens, None, true) .find_best_match(Some(&context_id), &tokens, None, true)
.await?; .await?;
...@@ -743,57 +740,61 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu ...@@ -743,57 +740,61 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
.map(|t| t.phase()) .map(|t| t.phase())
.unwrap_or(RequestPhase::Aggregated); .unwrap_or(RequestPhase::Aggregated);
// Get pre-selected worker based on phase // Get pre-selected worker based on phase, with backend_instance_id as fallback
let routing = request.routing.as_ref();
let preselected = match phase { let preselected = match phase {
RequestPhase::Prefill => request.target_prefill_worker_id, RequestPhase::Prefill => {
RequestPhase::Decode => request.target_decode_worker_id, routing.and_then(|r| r.prefill_worker_id.or(r.backend_instance_id))
RequestPhase::Aggregated => None, }
RequestPhase::Decode => {
routing.and_then(|r| r.decode_worker_id.or(r.backend_instance_id))
}
RequestPhase::Aggregated => routing.and_then(|r| r.backend_instance_id),
}; };
let block_size = self.chooser.block_size() as usize; let block_size = self.chooser.block_size() as usize;
let (instance_id, dp_rank, overlap_amount) = let (instance_id, dp_rank, overlap_amount) = if let Some(id) = preselected {
if let Some(id) = preselected.or(request.backend_instance_id) { // Route to pre-selected or explicitly specified worker
// Route to pre-selected or explicitly specified worker let dp_rank = routing.and_then(|r| r.dp_rank).unwrap_or(0);
let dp_rank = request.dp_rank.unwrap_or(0); tracing::debug!(
tracing::debug!( worker_id = id,
worker_id = id, dp_rank = dp_rank,
dp_rank = dp_rank, ?phase,
?phase, "Routing to specified worker"
"Routing to specified worker" );
);
// Compute actual overlap blocks by querying the indexer // Compute actual overlap blocks by querying the indexer
let block_hashes = let block_hashes =
compute_block_hash_for_seq(&request.token_ids, self.chooser.block_size(), None); compute_block_hash_for_seq(&request.token_ids, self.chooser.block_size(), None);
let overlap_scores = self.chooser.indexer.find_matches(block_hashes).await?; let overlap_scores = self.chooser.indexer.find_matches(block_hashes).await?;
let worker = WorkerWithDpRank::new(id, dp_rank); let worker = WorkerWithDpRank::new(id, dp_rank);
let overlap_blocks = overlap_scores.scores.get(&worker).copied().unwrap_or(0); let overlap_blocks = overlap_scores.scores.get(&worker).copied().unwrap_or(0);
if !is_query_only { if !is_query_only {
self.chooser self.chooser
.add_request( .add_request(
context_id.clone(), context_id.clone(),
&request.token_ids,
overlap_blocks,
worker,
)
.await;
}
(id, dp_rank, overlap_blocks)
} else {
// Find the best worker match
// Don't update states if this is a query-only request
let (best_worker, overlap_amount) = self
.chooser
.find_best_match(
Some(&context_id),
&request.token_ids, &request.token_ids,
request.router_config_override.as_ref(), overlap_blocks,
!is_query_only, worker,
) )
.await?; .await;
(best_worker.worker_id, best_worker.dp_rank, overlap_amount) }
}; (id, dp_rank, overlap_blocks)
} else {
// Find the best worker match
// Don't update states if this is a query-only request
let (best_worker, overlap_amount) = self
.chooser
.find_best_match(
Some(&context_id),
&request.token_ids,
request.router_config_override.as_ref(),
!is_query_only,
)
.await?;
(best_worker.worker_id, best_worker.dp_rank, overlap_amount)
};
// Record metrics in tracker: KV hit rate and worker ID based on phase // Record metrics in tracker: KV hit rate and worker ID based on phase
if let Some(ref tracker) = request.tracker { if let Some(ref tracker) = request.tracker {
...@@ -830,7 +831,7 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu ...@@ -830,7 +831,7 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
// Route to worker // Route to worker
let (mut backend_input, context) = request.into_parts(); let (mut backend_input, context) = request.into_parts();
backend_input.dp_rank = Some(dp_rank); backend_input.routing_mut().dp_rank = Some(dp_rank);
let updated_request = context.map(|_| backend_input); let updated_request = context.map(|_| backend_input);
let mut response_stream = self.inner.direct(updated_request, instance_id).await?; let mut response_stream = self.inner.direct(updated_request, instance_id).await?;
......
...@@ -72,7 +72,7 @@ impl InnerPrefillRouter { ...@@ -72,7 +72,7 @@ impl InnerPrefillRouter {
/// ///
/// Supports regular Dynamo and GAIE integrated mode via query_instance_id state machine: /// Supports regular Dynamo and GAIE integrated mode via query_instance_id state machine:
/// - GAIE Stage 1: query_instance_id transitions "" -> "prefill" -> "decode", returns only worker IDs /// - GAIE Stage 1: query_instance_id transitions "" -> "prefill" -> "decode", returns only worker IDs
/// - GAIE Stage 2: target_prefill_worker_id/target_decode_worker_id are set, full execution with specified workers /// - GAIE Stage 2: routing.prefill_worker_id/routing.decode_worker_id are set, full execution with specified workers
/// - Non-GAIE: like GAIE Stage 2 but the worker ids have to be determined. /// - Non-GAIE: like GAIE Stage 2 but the worker ids have to be determined.
pub struct PrefillRouter { pub struct PrefillRouter {
prefill_router: OnceLock<InnerPrefillRouter>, prefill_router: OnceLock<InnerPrefillRouter>,
...@@ -221,7 +221,7 @@ impl PrefillRouter { ...@@ -221,7 +221,7 @@ impl PrefillRouter {
// Use pre-selected worker (GAIE Stage 2) or query for best worker // Use pre-selected worker (GAIE Stage 2) or query for best worker
let (worker_id, dp_rank) = if let Some(id) = preselected_worker { let (worker_id, dp_rank) = if let Some(id) = preselected_worker {
let dp_rank = req.dp_rank.unwrap_or(0); let dp_rank = req.routing.as_ref().and_then(|r| r.dp_rank).unwrap_or(0);
tracing::debug!( tracing::debug!(
worker_id = id, worker_id = id,
dp_rank = dp_rank, dp_rank = dp_rank,
...@@ -377,13 +377,17 @@ impl PrefillRouter { ...@@ -377,13 +377,17 @@ impl PrefillRouter {
prefill_req prefill_req
.annotations .annotations
.push(format!("query_instance_id:{}", RequestPhase::Prefill)); .push(format!("query_instance_id:{}", RequestPhase::Prefill));
} else if let Some(prefill_worker_id) = prefill_req.target_prefill_worker_id { } else if let Some(prefill_worker_id) = prefill_req
.routing
.as_ref()
.and_then(|r| r.prefill_worker_id)
{
// GAIE Stage 2: Route to pre-selected prefill worker from the stage 1 // GAIE Stage 2: Route to pre-selected prefill worker from the stage 1
tracing::debug!( tracing::debug!(
target_prefill_worker_id = prefill_worker_id, prefill_worker_id = prefill_worker_id,
"GAIE Stage 2: Routing prefill to pre-selected worker" "GAIE Stage 2: Routing prefill to pre-selected worker"
); );
prefill_req.backend_instance_id = Some(prefill_worker_id); prefill_req.routing_mut().backend_instance_id = Some(prefill_worker_id);
} }
} }
...@@ -456,8 +460,11 @@ impl ...@@ -456,8 +460,11 @@ impl
Self::prepare_prefill_for_gaie(&mut prefill_req, is_gaie_stage1); Self::prepare_prefill_for_gaie(&mut prefill_req, is_gaie_stage1);
// Try build_bootstrap_info optimization (skip for GAIE Stage 1 which needs query-only flow) // Try build_bootstrap_info optimization (skip for GAIE Stage 1 which needs query-only flow)
// For GAIE Stage 2, use target_prefill_worker_id if provided // For GAIE Stage 2, use prefill_worker_id if provided
let preselected_worker = prefill_req.target_prefill_worker_id; let preselected_worker = prefill_req
.routing
.as_ref()
.and_then(|r| r.prefill_worker_id);
let prefill_result = if !is_gaie_stage1 { let prefill_result = if !is_gaie_stage1 {
if let Some((worker_id, dp_rank, bootstrap_info)) = self if let Some((worker_id, dp_rank, bootstrap_info)) = self
.build_bootstrap_info(&prefill_req, preselected_worker) .build_bootstrap_info(&prefill_req, preselected_worker)
...@@ -466,8 +473,9 @@ impl ...@@ -466,8 +473,9 @@ impl
let bootstrap_room = bootstrap_info.bootstrap_room; let bootstrap_room = bootstrap_info.bootstrap_room;
// Prepare request with bootstrap_room and force routing to specific worker // Prepare request with bootstrap_room and force routing to specific worker
prefill_req.backend_instance_id = Some(worker_id); let routing = prefill_req.routing_mut();
prefill_req.dp_rank = Some(dp_rank); routing.backend_instance_id = Some(worker_id);
routing.dp_rank = Some(dp_rank);
let extra_args = prefill_req let extra_args = prefill_req
.extra_args .extra_args
.get_or_insert_with(|| serde_json::json!({})); .get_or_insert_with(|| serde_json::json!({}));
...@@ -578,8 +586,10 @@ impl ...@@ -578,8 +586,10 @@ impl
}); });
// GAIE Stage 2: Route to pre-selected decode worker if specified // GAIE Stage 2: Route to pre-selected decode worker if specified
if let Some(decode_worker_id) = decode_req.target_decode_worker_id { if let Some(decode_worker_id) =
decode_req.backend_instance_id = Some(decode_worker_id); decode_req.routing.as_ref().and_then(|r| r.decode_worker_id)
{
decode_req.routing_mut().backend_instance_id = Some(decode_worker_id);
tracing::debug!( tracing::debug!(
decode_worker_id = decode_worker_id, decode_worker_id = decode_worker_id,
"GAIE Stage 2: Routing decode to pre-selected worker" "GAIE Stage 2: Routing decode to pre-selected worker"
......
...@@ -36,12 +36,9 @@ impl WorkerWithDpRank { ...@@ -36,12 +36,9 @@ impl WorkerWithDpRank {
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "method", rename_all = "snake_case")] #[serde(tag = "method", rename_all = "snake_case")]
pub enum RouterRequest { pub enum RouterRequest {
// ini
#[serde(rename = "new")] #[serde(rename = "new")]
New { New {
tokens: Vec<Token>, tokens: Vec<Token>,
#[serde(default, skip_serializing_if = "Option::is_none")]
request_extra_info: Option<RequestExtraInfo>,
}, },
MarkPrefill, MarkPrefill,
MarkFree, MarkFree,
...@@ -49,10 +46,7 @@ pub enum RouterRequest { ...@@ -49,10 +46,7 @@ pub enum RouterRequest {
impl Default for RouterRequest { impl Default for RouterRequest {
fn default() -> Self { fn default() -> Self {
RouterRequest::New { RouterRequest::New { tokens: vec![] }
tokens: vec![],
request_extra_info: None,
}
} }
} }
......
...@@ -238,8 +238,12 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<LLMEngineOutput>, Error> ...@@ -238,8 +238,12 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<LLMEngineOutput>, Error>
) -> Result<ManyOut<LLMEngineOutput>, Error> { ) -> Result<ManyOut<LLMEngineOutput>, Error> {
let (request, ctx) = input.into_parts(); let (request, ctx) = input.into_parts();
// Extract dp_rank from request field (defaults to 0 if not set) // Extract dp_rank from routing hints (defaults to 0 if not set)
let dp_rank = request.dp_rank.unwrap_or(0); let dp_rank = request
.routing
.as_ref()
.and_then(|r| r.dp_rank)
.unwrap_or(0);
// Validate dp_rank // Validate dp_rank
if dp_rank >= self.engine_args.dp_size { if dp_rank >= self.engine_args.dp_size {
......
...@@ -31,7 +31,7 @@ use crate::model_card::{ModelDeploymentCard, ModelInfo}; ...@@ -31,7 +31,7 @@ use crate::model_card::{ModelDeploymentCard, ModelInfo};
use crate::preprocessor::media::MediaLoader; use crate::preprocessor::media::MediaLoader;
use crate::preprocessor::prompt::OAIChatLikeRequest; use crate::preprocessor::prompt::OAIChatLikeRequest;
use crate::protocols::common::preprocessor::{ use crate::protocols::common::preprocessor::{
MultimodalData, MultimodalDataMap, PreprocessedRequestBuilder, MultimodalData, MultimodalDataMap, PreprocessedRequestBuilder, RoutingHints,
}; };
use crate::tokenizers::Encoding; use crate::tokenizers::Encoding;
...@@ -237,13 +237,16 @@ impl OpenAIPreprocessor { ...@@ -237,13 +237,16 @@ impl OpenAIPreprocessor {
builder.output_options(request.extract_output_options()?); builder.output_options(request.extract_output_options()?);
builder.annotations(request.annotations().unwrap_or_default()); builder.annotations(request.annotations().unwrap_or_default());
builder.mdc_sum(Some(self.mdcsum.clone())); builder.mdc_sum(Some(self.mdcsum.clone()));
// Extract backend_instance_id, extra_fields, and worker IDs from nvext if present // Extract routing hints from nvext if present
if let Some(nvext) = request.nvext() { if let Some(nvext) = request.nvext() {
builder.backend_instance_id(nvext.backend_instance_id); // Build routing hints from nvext fields
builder.extra_fields(nvext.extra_fields.clone()); let routing = RoutingHints {
// GAIE Stage 2: Extract targeted worker IDs for disaggregated serving backend_instance_id: nvext.backend_instance_id,
builder.target_prefill_worker_id(nvext.prefill_worker_id); prefill_worker_id: nvext.prefill_worker_id,
builder.target_decode_worker_id(nvext.decode_worker_id); decode_worker_id: nvext.decode_worker_id,
dp_rank: None, // dp_rank is set later in the pipeline
};
builder.routing(Some(routing));
} }
Ok(builder) Ok(builder)
......
...@@ -8,11 +8,34 @@ use serde::{Deserialize, Serialize}; ...@@ -8,11 +8,34 @@ use serde::{Deserialize, Serialize};
use super::timing::RequestTracker; use super::timing::RequestTracker;
use super::{OutputOptions, SamplingOptions, StopConditions}; use super::{OutputOptions, SamplingOptions, StopConditions};
use crate::kv_router::{RouterConfigOverride, protocols::RequestExtraInfo}; use crate::kv_router::RouterConfigOverride;
#[cfg(feature = "media-nixl")] #[cfg(feature = "media-nixl")]
use crate::preprocessor::media::RdmaMediaDataDescriptor; use crate::preprocessor::media::RdmaMediaDataDescriptor;
use crate::protocols::TokenIdType; use crate::protocols::TokenIdType;
/// Routing hints for directing requests to specific workers.
/// These fields are extracted from nvext and used by the router to determine
/// which worker(s) should handle the request.
#[derive(Serialize, Deserialize, Debug, Clone, Default, Builder)]
#[builder(default)]
pub struct RoutingHints {
/// General backend instance ID for direct routing (aggregated mode)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub backend_instance_id: Option<u64>,
/// Targeted prefill worker ID for disaggregated serving (GAIE Stage 2)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub prefill_worker_id: Option<u64>,
/// Targeted decode worker ID for disaggregated serving (GAIE Stage 2)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub decode_worker_id: Option<u64>,
/// Data parallel rank for the request
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dp_rank: Option<u32>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)] #[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct BootstrapInfo { pub struct BootstrapInfo {
/// The host address for bootstrap connection /// The host address for bootstrap connection
...@@ -85,9 +108,10 @@ pub struct PreprocessedRequest { ...@@ -85,9 +108,10 @@ pub struct PreprocessedRequest {
#[builder(default)] #[builder(default)]
pub annotations: Vec<String>, pub annotations: Vec<String>,
/// Targeted backend instance ID for the request /// Routing hints for worker targeting (backend_instance_id, prefill/decode worker IDs, dp_rank)
#[builder(default)] #[builder(default)]
pub backend_instance_id: Option<u64>, #[serde(default, skip_serializing_if = "Option::is_none")]
pub routing: Option<RoutingHints>,
/// Router configuration overrides for this specific request /// Router configuration overrides for this specific request
#[builder(default)] #[builder(default)]
...@@ -103,41 +127,15 @@ pub struct PreprocessedRequest { ...@@ -103,41 +127,15 @@ pub struct PreprocessedRequest {
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub bootstrap_info: Option<BootstrapInfo>, pub bootstrap_info: Option<BootstrapInfo>,
/// Data parallel rank for the request (used with data parallelism)
#[builder(default)]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dp_rank: Option<u32>,
/// Additional arguments for extensibility /// Additional arguments for extensibility
#[builder(default)] #[builder(default)]
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub extra_args: Option<serde_json::Value>, pub extra_args: Option<serde_json::Value>,
/// Extra fields requested to be included in the response's nvext
#[builder(default)]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub extra_fields: Option<Vec<String>>,
/// Multimodal request-level metadata (mm_hash and token offsets)
#[builder(default)]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub request_extra_info: Option<RequestExtraInfo>,
/// Optional request tracker for per-request metrics (shared with DeltaGenerator) /// Optional request tracker for per-request metrics (shared with DeltaGenerator)
#[builder(default)] #[builder(default)]
#[serde(skip)] #[serde(skip)]
pub tracker: Option<Arc<RequestTracker>>, pub tracker: Option<Arc<RequestTracker>>,
/// Targeted prefill worker ID for disaggregated serving (GAIE Stage 2)
/// When set, the prefill request will be routed to this specific worker.
#[builder(default)]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub target_prefill_worker_id: Option<u64>,
/// Targeted decode worker ID for disaggregated serving (GAIE Stage 2)
/// When set, the decode request will be routed to this specific worker.
#[builder(default)]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub target_decode_worker_id: Option<u64>,
} }
impl PreprocessedRequest { impl PreprocessedRequest {
...@@ -154,12 +152,15 @@ impl PreprocessedRequest { ...@@ -154,12 +152,15 @@ impl PreprocessedRequest {
.find(|a| a.starts_with(&prefix)) .find(|a| a.starts_with(&prefix))
.map(|a| a[prefix.len()..].to_string()) .map(|a| a[prefix.len()..].to_string())
} }
}
impl PreprocessedRequest {
pub fn builder() -> PreprocessedRequestBuilder { pub fn builder() -> PreprocessedRequestBuilder {
PreprocessedRequestBuilder::default() PreprocessedRequestBuilder::default()
} }
/// Get mutable access to routing hints, creating default if None
pub fn routing_mut(&mut self) -> &mut RoutingHints {
self.routing.get_or_insert_with(RoutingHints::default)
}
} }
/// [`PreprocessedEmbeddingRequest`] is the internal representation of an embedding request /// [`PreprocessedEmbeddingRequest`] is the internal representation of an embedding request
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment