Unverified Commit f4aa7880 authored by Jonah Bernard's avatar Jonah Bernard Committed by GitHub
Browse files

[router] Add Rust CLI flags for queue size, timeout, and rate limit for token...


[router] Add Rust CLI flags for queue size, timeout, and rate limit for token bucket rate limiter (#11483)
Co-authored-by: default avatarSimo Lin <linsimo.mark@gmail.com>
parent 5e3f7e7f
...@@ -426,6 +426,11 @@ curl -X POST http://localhost:8080/add_worker?url=http://worker3:8000&api_key=wo ...@@ -426,6 +426,11 @@ curl -X POST http://localhost:8080/add_worker?url=http://worker3:8000&api_key=wo
#### Authentication #### Authentication
- `--api-key`: API key for router authentication (clients must provide this as Bearer token) - `--api-key`: API key for router authentication (clients must provide this as Bearer token)
#### Concurrency and Rate Limiting
- `--queue-size`: Size of the pending-request queue when concurrency limits are reached (default: 100; set to 0 to disable queuing)
- `--queue-timeout-secs`: Maximum time a request may wait in the queue before timing out (default: 60; must be > 0 when queue is enabled)
- `--rate-limit-tokens-per-second`: Override token bucket refill rate for rate limiting (defaults to `--max-concurrent-requests` when omitted)
## Development ## Development
### Build Process ### Build Process
......
...@@ -205,6 +205,24 @@ impl ConfigValidator { ...@@ -205,6 +205,24 @@ impl ConfigValidator {
}); });
} }
if config.queue_size > 0 && config.queue_timeout_secs == 0 {
return Err(ConfigError::InvalidValue {
field: "queue_timeout_secs".to_string(),
value: config.queue_timeout_secs.to_string(),
reason: "Must be > 0 when queue_size > 0".to_string(),
});
}
if let Some(tokens_per_second) = config.rate_limit_tokens_per_second {
if tokens_per_second <= 0 {
return Err(ConfigError::InvalidValue {
field: "rate_limit_tokens_per_second".to_string(),
value: tokens_per_second.to_string(),
reason: "Must be > 0 when specified".to_string(),
});
}
}
if config.worker_startup_timeout_secs == 0 { if config.worker_startup_timeout_secs == 0 {
return Err(ConfigError::InvalidValue { return Err(ConfigError::InvalidValue {
field: "worker_startup_timeout_secs".to_string(), field: "worker_startup_timeout_secs".to_string(),
......
...@@ -195,6 +195,15 @@ struct CliArgs { ...@@ -195,6 +195,15 @@ struct CliArgs {
#[arg(long, default_value_t = -1)] #[arg(long, default_value_t = -1)]
max_concurrent_requests: i32, max_concurrent_requests: i32,
#[arg(long, default_value_t = 100)]
queue_size: usize,
#[arg(long, default_value_t = 60)]
queue_timeout_secs: u64,
#[arg(long)]
rate_limit_tokens_per_second: Option<i32>,
#[arg(long, num_args = 0..)] #[arg(long, num_args = 0..)]
cors_allowed_origins: Vec<String>, cors_allowed_origins: Vec<String>,
...@@ -535,8 +544,8 @@ impl CliArgs { ...@@ -535,8 +544,8 @@ impl CliArgs {
Some(self.request_id_headers.clone()) Some(self.request_id_headers.clone())
}, },
max_concurrent_requests: self.max_concurrent_requests, max_concurrent_requests: self.max_concurrent_requests,
queue_size: 100, queue_size: self.queue_size,
queue_timeout_secs: 60, queue_timeout_secs: self.queue_timeout_secs,
cors_allowed_origins: self.cors_allowed_origins.clone(), cors_allowed_origins: self.cors_allowed_origins.clone(),
retry: RetryConfig { retry: RetryConfig {
max_retries: self.retry_max_retries, max_retries: self.retry_max_retries,
...@@ -561,7 +570,7 @@ impl CliArgs { ...@@ -561,7 +570,7 @@ impl CliArgs {
endpoint: self.health_check_endpoint.clone(), endpoint: self.health_check_endpoint.clone(),
}, },
enable_igw: self.enable_igw, enable_igw: self.enable_igw,
rate_limit_tokens_per_second: None, rate_limit_tokens_per_second: self.rate_limit_tokens_per_second,
model_path: self.model_path.clone(), model_path: self.model_path.clone(),
tokenizer_path: self.tokenizer_path.clone(), tokenizer_path: self.tokenizer_path.clone(),
chat_template: self.chat_template.clone(), chat_template: self.chat_template.clone(),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment