Unverified Commit e63d728f authored by ryan-lempka's avatar ryan-lempka Committed by GitHub
Browse files

fix: use random port assignment for http tests (#2472)

parent 844f8819
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
/// Get a random available port for testing (prefer to hardcoding port numbers to avoid collisions)
pub async fn get_random_port() -> u16 {
let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
.await
.expect("failed to bind ephemeral port");
let port = listener
.local_addr()
.expect("failed to read local_addr")
.port();
drop(listener);
port
}
......@@ -46,11 +46,14 @@ use dynamo_runtime::{
use futures::StreamExt;
use prometheus::{proto::MetricType, Registry};
use reqwest::StatusCode;
use rstest::*;
use std::{io::Cursor, sync::Arc};
use tokio::time::timeout;
use tokio_util::codec::FramedRead;
#[path = "common/ports.rs"]
mod ports;
use ports::get_random_port;
struct CounterEngine {}
// Add a new long-running test engine
......@@ -270,8 +273,9 @@ fn inc_counter(
#[allow(deprecated)]
#[tokio::test]
async fn test_http_service() {
let port = get_random_port().await;
let service = HttpService::builder()
.port(8989)
.port(port)
.enable_chat_endpoints(true)
.enable_cmpl_endpoints(true)
.build()
......@@ -335,7 +339,7 @@ async fn test_http_service() {
request.max_tokens = Some(3000);
let response = client
.post("http://localhost:8989/v1/chat/completions")
.post(format!("http://localhost:{}/v1/chat/completions", port))
.json(&request)
.send()
.await
......@@ -415,7 +419,7 @@ async fn test_http_service() {
request.max_tokens = Some(0);
let future = client
.post("http://localhost:8989/v1/chat/completions")
.post(format!("http://localhost:{}/v1/chat/completions", port))
.json(&request)
.send();
......@@ -440,7 +444,7 @@ async fn test_http_service() {
request.stream = Some(true);
let response = client
.post("http://localhost:8989/v1/chat/completions")
.post(format!("http://localhost:{}/v1/chat/completions", port))
.json(&request)
.send()
.await
......@@ -461,7 +465,7 @@ async fn test_http_service() {
request.stream = Some(false);
let response = client
.post("http://localhost:8989/v1/chat/completions")
.post(format!("http://localhost:{}/v1/chat/completions", port))
.json(&request)
.send()
.await
......@@ -486,7 +490,7 @@ async fn test_http_service() {
.unwrap();
let response = client
.post("http://localhost:8989/v1/completions")
.post(format!("http://localhost:{}/v1/completions", port))
.json(&request)
.send()
.await
......@@ -507,7 +511,7 @@ async fn test_http_service() {
request.stream = Some(true);
let response = client
.post("http://localhost:8989/v1/completions")
.post(format!("http://localhost:{}/v1/completions", port))
.json(&request)
.send()
.await
......@@ -529,7 +533,7 @@ async fn test_http_service() {
request.stream = Some(false);
let response = client
.post("http://localhost:8989/v1/chat/completions")
.post(format!("http://localhost:{}/v1/chat/completions", port))
.json(&request)
.send()
.await
......@@ -544,7 +548,7 @@ async fn test_http_service() {
// =========== Query /metrics endpoint ===========
let response = client
.get("http://localhost:8989/metrics")
.get(format!("http://localhost:{}/metrics", port))
.send()
.await
.unwrap();
......@@ -573,10 +577,8 @@ async fn wait_for_service_ready(port: u16) {
}
}
#[fixture]
fn service_with_engines(
#[default(8990)] port: u16,
) -> (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>) {
async fn service_with_engines() -> (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>, u16) {
let port = get_random_port().await;
let service = HttpService::builder()
.enable_chat_endpoints(true)
.enable_cmpl_endpoints(true)
......@@ -598,11 +600,10 @@ fn service_with_engines(
.add_completions_model("bar", failure.clone())
.unwrap();
(service, counter, failure)
(service, counter, failure, port)
}
#[fixture]
fn pure_openai_client(#[default(8990)] port: u16) -> PureOpenAIClient {
fn pure_openai_client(port: u16) -> PureOpenAIClient {
let config = HttpClientConfig {
openai_config: OpenAIConfig::new().with_api_base(format!("http://localhost:{}/v1", port)),
verbose: false,
......@@ -610,8 +611,7 @@ fn pure_openai_client(#[default(8990)] port: u16) -> PureOpenAIClient {
PureOpenAIClient::new(config)
}
#[fixture]
fn nv_custom_client(#[default(8991)] port: u16) -> NvCustomClient {
fn nv_custom_client(port: u16) -> NvCustomClient {
let config = HttpClientConfig {
openai_config: OpenAIConfig::new().with_api_base(format!("http://localhost:{}/v1", port)),
verbose: false,
......@@ -619,8 +619,7 @@ fn nv_custom_client(#[default(8991)] port: u16) -> NvCustomClient {
NvCustomClient::new(config)
}
#[fixture]
fn generic_byot_client(#[default(8992)] port: u16) -> GenericBYOTClient {
fn generic_byot_client(port: u16) -> GenericBYOTClient {
let config = HttpClientConfig {
openai_config: OpenAIConfig::new().with_api_base(format!("http://localhost:{}/v1", port)),
verbose: false,
......@@ -628,13 +627,11 @@ fn generic_byot_client(#[default(8992)] port: u16) -> GenericBYOTClient {
GenericBYOTClient::new(config)
}
#[rstest]
#[tokio::test]
async fn test_pure_openai_client(
#[with(8990)] service_with_engines: (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>),
#[with(8990)] pure_openai_client: PureOpenAIClient,
) {
let (service, _counter, _failure) = service_with_engines;
async fn test_pure_openai_client() {
let (service, _counter, _failure, port) = service_with_engines().await;
let pure_openai_client = pure_openai_client(port);
let token = CancellationToken::new();
let cancel_token = token.clone();
......@@ -642,7 +639,7 @@ async fn test_pure_openai_client(
let task = tokio::spawn(async move { service.run(token).await });
// Wait for service to be ready
wait_for_service_ready(8990).await;
wait_for_service_ready(port).await;
// Test successful streaming request
let request = async_openai::types::CreateChatCompletionRequestArgs::default()
......@@ -739,13 +736,11 @@ async fn test_pure_openai_client(
task.await.unwrap().unwrap();
}
#[rstest]
#[tokio::test]
async fn test_nv_custom_client(
#[with(8991)] service_with_engines: (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>),
#[with(8991)] nv_custom_client: NvCustomClient,
) {
let (service, _counter, _failure) = service_with_engines;
async fn test_nv_custom_client() {
let (service, _counter, _failure, port) = service_with_engines().await;
let nv_custom_client = nv_custom_client(port);
let token = CancellationToken::new();
let cancel_token = token.clone();
......@@ -753,7 +748,7 @@ async fn test_nv_custom_client(
let task = tokio::spawn(async move { service.run(token).await });
// Wait for service to be ready
wait_for_service_ready(8991).await;
wait_for_service_ready(port).await;
// Test successful streaming request
let inner_request = async_openai::types::CreateChatCompletionRequestArgs::default()
......@@ -868,13 +863,11 @@ async fn test_nv_custom_client(
task.await.unwrap().unwrap();
}
#[rstest]
#[tokio::test]
async fn test_generic_byot_client(
#[with(8992)] service_with_engines: (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>),
#[with(8992)] generic_byot_client: GenericBYOTClient,
) {
let (service, _counter, _failure) = service_with_engines;
async fn test_generic_byot_client() {
let (service, _counter, _failure, port) = service_with_engines().await;
let generic_byot_client = generic_byot_client(port);
let token = CancellationToken::new();
let cancel_token = token.clone();
......@@ -882,7 +875,7 @@ async fn test_generic_byot_client(
let task = tokio::spawn(async move { service.run(token).await });
// Wait for service to be ready
wait_for_service_ready(8992).await;
wait_for_service_ready(port).await;
// Test successful streaming request
let request = serde_json::json!({
......@@ -965,13 +958,13 @@ async fn test_generic_byot_client(
task.await.unwrap().unwrap();
}
#[rstest]
#[tokio::test]
async fn test_client_disconnect_cancellation_unary() {
let port = get_random_port().await;
let service = HttpService::builder()
.enable_chat_endpoints(true)
.enable_cmpl_endpoints(true)
.port(8993)
.port(port)
.build()
.unwrap();
let state = service.state_clone();
......@@ -984,7 +977,7 @@ async fn test_client_disconnect_cancellation_unary() {
let task = tokio::spawn(async move { service.run(token).await });
// Wait for service to be ready
wait_for_service_ready(8993).await;
wait_for_service_ready(port).await;
// Create a long-running engine (10 seconds)
let long_running_engine = Arc::new(LongRunningEngine::new(10_000));
......@@ -1015,7 +1008,7 @@ async fn test_client_disconnect_cancellation_unary() {
let request_future = async {
client
.post("http://localhost:8993/v1/chat/completions")
.post(format!("http://localhost:{}/v1/chat/completions", port))
.json(&request)
.send()
.await
......@@ -1054,15 +1047,15 @@ async fn test_client_disconnect_cancellation_unary() {
task.await.unwrap().unwrap();
}
#[rstest]
#[tokio::test]
async fn test_client_disconnect_cancellation_streaming() {
dynamo_runtime::logging::init();
let port = get_random_port().await;
let service = HttpService::builder()
.enable_chat_endpoints(true)
.enable_cmpl_endpoints(true)
.port(8994)
.port(port)
.build()
.unwrap();
let state = service.state_clone();
......@@ -1075,7 +1068,7 @@ async fn test_client_disconnect_cancellation_streaming() {
let task = tokio::spawn(async move { service.run(token).await });
// Wait for service to be ready
wait_for_service_ready(8994).await;
wait_for_service_ready(port).await;
// Create a long-running engine (10 seconds)
let long_running_engine = Arc::new(LongRunningEngine::new(10_000));
......@@ -1106,7 +1099,7 @@ async fn test_client_disconnect_cancellation_streaming() {
let request_future = async {
let response = client
.post("http://localhost:8994/v1/chat/completions")
.post(format!("http://localhost:{}/v1/chat/completions", port))
.json(&request)
.send()
.await
......@@ -1151,16 +1144,16 @@ async fn test_client_disconnect_cancellation_streaming() {
task.await.unwrap().unwrap();
}
#[rstest]
#[tokio::test]
async fn test_request_id_annotation() {
// TODO(ryan): make better fixtures, this is too much to test sometime so simple
dynamo_runtime::logging::init();
let port = get_random_port().await;
let service = HttpService::builder()
.enable_chat_endpoints(true)
.enable_cmpl_endpoints(true)
.port(8995)
.port(port)
.build()
.unwrap();
let state = service.state_clone();
......@@ -1173,7 +1166,7 @@ async fn test_request_id_annotation() {
let task = tokio::spawn(async move { service.run(token).await });
// Wait for service to be ready
wait_for_service_ready(8995).await;
wait_for_service_ready(port).await;
// Add a counter engine for this test
let counter_engine = Arc::new(CounterEngine {});
......@@ -1205,7 +1198,7 @@ async fn test_request_id_annotation() {
// Make the streaming request with custom header
let response = client
.post("http://localhost:8995/v1/chat/completions")
.post(format!("http://localhost:{}/v1/chat/completions", port))
.header("x-dynamo-request-id", request_uuid.to_string())
.json(&request_json)
.send()
......
......@@ -7,15 +7,20 @@ use dynamo_runtime::CancellationToken;
use serial_test::serial;
use std::{env, time::Duration};
#[path = "common/ports.rs"]
mod ports;
use ports::get_random_port;
#[tokio::test]
#[serial]
async fn metrics_prefix_default_then_env_override() {
// Case 1: default prefix
env::remove_var(metrics::METRICS_PREFIX_ENV);
let svc1 = HttpService::builder().port(9101).build().unwrap();
let p1 = get_random_port().await;
let svc1 = HttpService::builder().port(p1).build().unwrap();
let token1 = CancellationToken::new();
let _h1 = svc1.spawn(token1.clone()).await;
wait_for_metrics_ready(9101).await;
let h1 = svc1.spawn(token1.clone()).await;
wait_for_metrics_ready(p1).await;
// Populate labeled metrics
let s1 = svc1.state_clone();
......@@ -26,7 +31,7 @@ async fn metrics_prefix_default_then_env_override() {
false,
);
}
let body1 = reqwest::get("http://localhost:9101/metrics")
let body1 = reqwest::get(format!("http://localhost:{}/metrics", p1))
.await
.unwrap()
.text()
......@@ -34,13 +39,15 @@ async fn metrics_prefix_default_then_env_override() {
.unwrap();
assert!(body1.contains("dynamo_frontend_requests_total"));
token1.cancel();
let _ = h1.await; // ensure port is released
// Case 2: env override to prefix
env::set_var(metrics::METRICS_PREFIX_ENV, "custom_prefix");
let svc2 = HttpService::builder().port(9102).build().unwrap();
let p2 = get_random_port().await;
let svc2 = HttpService::builder().port(p2).build().unwrap();
let token2 = CancellationToken::new();
let _h2 = svc2.spawn(token2.clone()).await;
wait_for_metrics_ready(9102).await;
let h2 = svc2.spawn(token2.clone()).await;
wait_for_metrics_ready(p2).await;
// Populate labeled metrics
let s2 = svc2.state_clone();
......@@ -50,7 +57,7 @@ async fn metrics_prefix_default_then_env_override() {
.create_inflight_guard("test-model", Endpoint::ChatCompletions, true);
}
// Single fetch and assert
let body2 = reqwest::get("http://localhost:9102/metrics")
let body2 = reqwest::get(format!("http://localhost:{}/metrics", p2))
.await
.unwrap()
.text()
......@@ -59,13 +66,15 @@ async fn metrics_prefix_default_then_env_override() {
assert!(body2.contains("custom_prefix_requests_total"));
assert!(!body2.contains("dynamo_frontend_requests_total"));
token2.cancel();
let _ = h2.await;
// Case 3: invalid env prefix is sanitized
env::set_var(metrics::METRICS_PREFIX_ENV, "nv-llm/http service");
let svc3 = HttpService::builder().port(9103).build().unwrap();
let p3 = get_random_port().await;
let svc3 = HttpService::builder().port(p3).build().unwrap();
let token3 = CancellationToken::new();
let _h3 = svc3.spawn(token3.clone()).await;
wait_for_metrics_ready(9103).await;
let h3 = svc3.spawn(token3.clone()).await;
wait_for_metrics_ready(p3).await;
let s3 = svc3.state_clone();
{
......@@ -73,7 +82,7 @@ async fn metrics_prefix_default_then_env_override() {
s3.metrics_clone()
.create_inflight_guard("test-model", Endpoint::ChatCompletions, true);
}
let body3 = reqwest::get("http://localhost:9103/metrics")
let body3 = reqwest::get(format!("http://localhost:{}/metrics", p3))
.await
.unwrap()
.text()
......@@ -82,6 +91,7 @@ async fn metrics_prefix_default_then_env_override() {
assert!(body3.contains("nv_llm_http_service_requests_total"));
assert!(!body3.contains("dynamo_frontend_requests_total"));
token3.cancel();
let _ = h3.await;
// Cleanup env to avoid leaking state
env::remove_var(metrics::METRICS_PREFIX_ENV);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment