fix: use random port assignment for http tests (#2472)

e63d728f · ryan-lempka · GitHub · 844f8819 · e63d728f · e63d728f
Unverified Commit e63d728f authored Aug 18, 2025 by ryan-lempka Committed by GitHub Aug 18, 2025
Showing with 84 additions and 66 deletions

lib/llm/tests/common/ports.rs lib/llm/tests/common/ports.rs +15 -0

lib/llm/tests/http-service.rs lib/llm/tests/http-service.rs +47 -54

lib/llm/tests/http_metrics.rs lib/llm/tests/http_metrics.rs +22 -12

No files found.
--- a/lib/llm/tests/common/ports.rs
+++ b/lib/llm/tests/common/ports.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/// Get a random available port for testing (prefer to hardcoding port numbers to avoid collisions)
+pub async fn get_random_port() -> u16 {
+    let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
+        .await
+        .expect("failed to bind ephemeral port");
+    let port = listener
+        .local_addr()
+        .expect("failed to read local_addr")
+        .port();
+    drop(listener);
+    port
+}
--- a/lib/llm/tests/http-service.rs
+++ b/lib/llm/tests/http-service.rs
@@ -46,11 +46,14 @@ use dynamo_runtime::{
 use futures::StreamExt;
 use prometheus::{proto::MetricType, Registry};
 use reqwest::StatusCode;
-use rstest::*;
 use std::{io::Cursor, sync::Arc};
 use tokio::time::timeout;
 use tokio_util::codec::FramedRead;

+#[path = "common/ports.rs"]
+mod ports;
+use ports::get_random_port;
+
 struct CounterEngine {}

 // Add a new long-running test engine
@@ -270,8 +273,9 @@ fn inc_counter(
 #[allow(deprecated)]
 #[tokio::test]
 async fn test_http_service() {
+    let port = get_random_port().await;
    let service = HttpService::builder()
-        .port(8989)
+        .port(port)
        .enable_chat_endpoints(true)
        .enable_cmpl_endpoints(true)
        .build()
@@ -335,7 +339,7 @@ async fn test_http_service() {
    request.max_tokens = Some(3000);

    let response = client
-        .post("http://localhost:8989/v1/chat/completions")
+        .post(format!("http://localhost:{}/v1/chat/completions", port))
        .json(&request)
        .send()
        .await
@@ -415,7 +419,7 @@ async fn test_http_service() {
    request.max_tokens = Some(0);

    let future = client
-        .post("http://localhost:8989/v1/chat/completions")
+        .post(format!("http://localhost:{}/v1/chat/completions", port))
        .json(&request)
        .send();

@@ -440,7 +444,7 @@ async fn test_http_service() {
    request.stream = Some(true);

    let response = client
-        .post("http://localhost:8989/v1/chat/completions")
+        .post(format!("http://localhost:{}/v1/chat/completions", port))
        .json(&request)
        .send()
        .await
@@ -461,7 +465,7 @@ async fn test_http_service() {
    request.stream = Some(false);

    let response = client
-        .post("http://localhost:8989/v1/chat/completions")
+        .post(format!("http://localhost:{}/v1/chat/completions", port))
        .json(&request)
        .send()
        .await
@@ -486,7 +490,7 @@ async fn test_http_service() {
        .unwrap();

    let response = client
-        .post("http://localhost:8989/v1/completions")
+        .post(format!("http://localhost:{}/v1/completions", port))
        .json(&request)
        .send()
        .await
@@ -507,7 +511,7 @@ async fn test_http_service() {
    request.stream = Some(true);

    let response = client
-        .post("http://localhost:8989/v1/completions")
+        .post(format!("http://localhost:{}/v1/completions", port))
        .json(&request)
        .send()
        .await
@@ -529,7 +533,7 @@ async fn test_http_service() {
    request.stream = Some(false);

    let response = client
-        .post("http://localhost:8989/v1/chat/completions")
+        .post(format!("http://localhost:{}/v1/chat/completions", port))
        .json(&request)
        .send()
        .await
@@ -544,7 +548,7 @@ async fn test_http_service() {

    // =========== Query /metrics endpoint ===========
    let response = client
-        .get("http://localhost:8989/metrics")
+        .get(format!("http://localhost:{}/metrics", port))
        .send()
        .await
        .unwrap();
@@ -573,10 +577,8 @@ async fn wait_for_service_ready(port: u16) {
    }
 }

-#[fixture]
-fn service_with_engines(
-    #[default(8990)] port: u16,
-) -> (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>) {
+async fn service_with_engines() -> (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>, u16) {
+    let port = get_random_port().await;
    let service = HttpService::builder()
        .enable_chat_endpoints(true)
        .enable_cmpl_endpoints(true)
@@ -598,11 +600,10 @@ fn service_with_engines(
        .add_completions_model("bar", failure.clone())
        .unwrap();

-    (service, counter, failure)
+    (service, counter, failure, port)
 }

-#[fixture]
-fn pure_openai_client(#[default(8990)] port: u16) -> PureOpenAIClient {
+fn pure_openai_client(port: u16) -> PureOpenAIClient {
    let config = HttpClientConfig {
        openai_config: OpenAIConfig::new().with_api_base(format!("http://localhost:{}/v1", port)),
        verbose: false,
@@ -610,8 +611,7 @@ fn pure_openai_client(#[default(8990)] port: u16) -> PureOpenAIClient {
    PureOpenAIClient::new(config)
 }

-#[fixture]
-fn nv_custom_client(#[default(8991)] port: u16) -> NvCustomClient {
+fn nv_custom_client(port: u16) -> NvCustomClient {
    let config = HttpClientConfig {
        openai_config: OpenAIConfig::new().with_api_base(format!("http://localhost:{}/v1", port)),
        verbose: false,
@@ -619,8 +619,7 @@ fn nv_custom_client(#[default(8991)] port: u16) -> NvCustomClient {
    NvCustomClient::new(config)
 }

-#[fixture]
-fn generic_byot_client(#[default(8992)] port: u16) -> GenericBYOTClient {
+fn generic_byot_client(port: u16) -> GenericBYOTClient {
    let config = HttpClientConfig {
        openai_config: OpenAIConfig::new().with_api_base(format!("http://localhost:{}/v1", port)),
        verbose: false,
@@ -628,13 +627,11 @@ fn generic_byot_client(#[default(8992)] port: u16) -> GenericBYOTClient {
    GenericBYOTClient::new(config)
 }

-#[rstest]
 #[tokio::test]
-async fn test_pure_openai_client(
-    #[with(8990)] service_with_engines: (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>),
-    #[with(8990)] pure_openai_client: PureOpenAIClient,
-) {
-    let (service, _counter, _failure) = service_with_engines;
+async fn test_pure_openai_client() {
+    let (service, _counter, _failure, port) = service_with_engines().await;
+    let pure_openai_client = pure_openai_client(port);
+
    let token = CancellationToken::new();
    let cancel_token = token.clone();

@@ -642,7 +639,7 @@ async fn test_pure_openai_client(
    let task = tokio::spawn(async move { service.run(token).await });

    // Wait for service to be ready
-    wait_for_service_ready(8990).await;
+    wait_for_service_ready(port).await;

    // Test successful streaming request
    let request = async_openai::types::CreateChatCompletionRequestArgs::default()
@@ -739,13 +736,11 @@ async fn test_pure_openai_client(
    task.await.unwrap().unwrap();
 }

-#[rstest]
 #[tokio::test]
-async fn test_nv_custom_client(
-    #[with(8991)] service_with_engines: (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>),
-    #[with(8991)] nv_custom_client: NvCustomClient,
-) {
-    let (service, _counter, _failure) = service_with_engines;
+async fn test_nv_custom_client() {
+    let (service, _counter, _failure, port) = service_with_engines().await;
+    let nv_custom_client = nv_custom_client(port);
+
    let token = CancellationToken::new();
    let cancel_token = token.clone();

@@ -753,7 +748,7 @@ async fn test_nv_custom_client(
    let task = tokio::spawn(async move { service.run(token).await });

    // Wait for service to be ready
-    wait_for_service_ready(8991).await;
+    wait_for_service_ready(port).await;

    // Test successful streaming request
    let inner_request = async_openai::types::CreateChatCompletionRequestArgs::default()
@@ -868,13 +863,11 @@ async fn test_nv_custom_client(
    task.await.unwrap().unwrap();
 }

-#[rstest]
 #[tokio::test]
-async fn test_generic_byot_client(
-    #[with(8992)] service_with_engines: (HttpService, Arc<CounterEngine>, Arc<AlwaysFailEngine>),
-    #[with(8992)] generic_byot_client: GenericBYOTClient,
-) {
-    let (service, _counter, _failure) = service_with_engines;
+async fn test_generic_byot_client() {
+    let (service, _counter, _failure, port) = service_with_engines().await;
+    let generic_byot_client = generic_byot_client(port);
+
    let token = CancellationToken::new();
    let cancel_token = token.clone();

@@ -882,7 +875,7 @@ async fn test_generic_byot_client(
    let task = tokio::spawn(async move { service.run(token).await });

    // Wait for service to be ready
-    wait_for_service_ready(8992).await;
+    wait_for_service_ready(port).await;

    // Test successful streaming request
    let request = serde_json::json!({
@@ -965,13 +958,13 @@ async fn test_generic_byot_client(
    task.await.unwrap().unwrap();
 }

-#[rstest]
 #[tokio::test]
 async fn test_client_disconnect_cancellation_unary() {
+    let port = get_random_port().await;
    let service = HttpService::builder()
        .enable_chat_endpoints(true)
        .enable_cmpl_endpoints(true)
-        .port(8993)
+        .port(port)
        .build()
        .unwrap();
    let state = service.state_clone();
@@ -984,7 +977,7 @@ async fn test_client_disconnect_cancellation_unary() {
    let task = tokio::spawn(async move { service.run(token).await });

    // Wait for service to be ready
-    wait_for_service_ready(8993).await;
+    wait_for_service_ready(port).await;

    // Create a long-running engine (10 seconds)
    let long_running_engine = Arc::new(LongRunningEngine::new(10_000));
@@ -1015,7 +1008,7 @@ async fn test_client_disconnect_cancellation_unary() {

    let request_future = async {
        client
-            .post("http://localhost:8993/v1/chat/completions")
+            .post(format!("http://localhost:{}/v1/chat/completions", port))
            .json(&request)
            .send()
            .await
@@ -1054,15 +1047,15 @@ async fn test_client_disconnect_cancellation_unary() {
    task.await.unwrap().unwrap();
 }

-#[rstest]
 #[tokio::test]
 async fn test_client_disconnect_cancellation_streaming() {
    dynamo_runtime::logging::init();

+    let port = get_random_port().await;
    let service = HttpService::builder()
        .enable_chat_endpoints(true)
        .enable_cmpl_endpoints(true)
-        .port(8994)
+        .port(port)
        .build()
        .unwrap();
    let state = service.state_clone();
@@ -1075,7 +1068,7 @@ async fn test_client_disconnect_cancellation_streaming() {
    let task = tokio::spawn(async move { service.run(token).await });

    // Wait for service to be ready
-    wait_for_service_ready(8994).await;
+    wait_for_service_ready(port).await;

    // Create a long-running engine (10 seconds)
    let long_running_engine = Arc::new(LongRunningEngine::new(10_000));
@@ -1106,7 +1099,7 @@ async fn test_client_disconnect_cancellation_streaming() {

    let request_future = async {
        let response = client
-            .post("http://localhost:8994/v1/chat/completions")
+            .post(format!("http://localhost:{}/v1/chat/completions", port))
            .json(&request)
            .send()
            .await
@@ -1151,16 +1144,16 @@ async fn test_client_disconnect_cancellation_streaming() {
    task.await.unwrap().unwrap();
 }

-#[rstest]
 #[tokio::test]
 async fn test_request_id_annotation() {
    // TODO(ryan): make better fixtures, this is too much to test sometime so simple
    dynamo_runtime::logging::init();

+    let port = get_random_port().await;
    let service = HttpService::builder()
        .enable_chat_endpoints(true)
        .enable_cmpl_endpoints(true)
-        .port(8995)
+        .port(port)
        .build()
        .unwrap();
    let state = service.state_clone();
@@ -1173,7 +1166,7 @@ async fn test_request_id_annotation() {
    let task = tokio::spawn(async move { service.run(token).await });

    // Wait for service to be ready
-    wait_for_service_ready(8995).await;
+    wait_for_service_ready(port).await;

    // Add a counter engine for this test
    let counter_engine = Arc::new(CounterEngine {});
@@ -1205,7 +1198,7 @@ async fn test_request_id_annotation() {

    // Make the streaming request with custom header
    let response = client
-        .post("http://localhost:8995/v1/chat/completions")
+        .post(format!("http://localhost:{}/v1/chat/completions", port))
        .header("x-dynamo-request-id", request_uuid.to_string())
        .json(&request_json)
        .send()

--- a/lib/llm/tests/http_metrics.rs
+++ b/lib/llm/tests/http_metrics.rs
@@ -7,15 +7,20 @@ use dynamo_runtime::CancellationToken;
 use serial_test::serial;
 use std::{env, time::Duration};

+#[path = "common/ports.rs"]
+mod ports;
+use ports::get_random_port;
+
 #[tokio::test]
 #[serial]
 async fn metrics_prefix_default_then_env_override() {
    // Case 1: default prefix
    env::remove_var(metrics::METRICS_PREFIX_ENV);
-    let svc1 = HttpService::builder().port(9101).build().unwrap();
+    let p1 = get_random_port().await;
+    let svc1 = HttpService::builder().port(p1).build().unwrap();
    let token1 = CancellationToken::new();
-    let _h1 = svc1.spawn(token1.clone()).await;
-    wait_for_metrics_ready(9101).await;
+    let h1 = svc1.spawn(token1.clone()).await;
+    wait_for_metrics_ready(p1).await;

    // Populate labeled metrics
    let s1 = svc1.state_clone();
@@ -26,7 +31,7 @@ async fn metrics_prefix_default_then_env_override() {
            false,
        );
    }
-    let body1 = reqwest::get("http://localhost:9101/metrics")
+    let body1 = reqwest::get(format!("http://localhost:{}/metrics", p1))
        .await
        .unwrap()
        .text()
@@ -34,13 +39,15 @@ async fn metrics_prefix_default_then_env_override() {
        .unwrap();
    assert!(body1.contains("dynamo_frontend_requests_total"));
    token1.cancel();
+    let _ = h1.await; // ensure port is released

    // Case 2: env override to prefix
    env::set_var(metrics::METRICS_PREFIX_ENV, "custom_prefix");
-    let svc2 = HttpService::builder().port(9102).build().unwrap();
+    let p2 = get_random_port().await;
+    let svc2 = HttpService::builder().port(p2).build().unwrap();
    let token2 = CancellationToken::new();
-    let _h2 = svc2.spawn(token2.clone()).await;
-    wait_for_metrics_ready(9102).await;
+    let h2 = svc2.spawn(token2.clone()).await;
+    wait_for_metrics_ready(p2).await;

    // Populate labeled metrics
    let s2 = svc2.state_clone();
@@ -50,7 +57,7 @@ async fn metrics_prefix_default_then_env_override() {
                .create_inflight_guard("test-model", Endpoint::ChatCompletions, true);
    }
    // Single fetch and assert
-    let body2 = reqwest::get("http://localhost:9102/metrics")
+    let body2 = reqwest::get(format!("http://localhost:{}/metrics", p2))
        .await
        .unwrap()
        .text()
@@ -59,13 +66,15 @@ async fn metrics_prefix_default_then_env_override() {
    assert!(body2.contains("custom_prefix_requests_total"));
    assert!(!body2.contains("dynamo_frontend_requests_total"));
    token2.cancel();
+    let _ = h2.await;

    // Case 3: invalid env prefix is sanitized
    env::set_var(metrics::METRICS_PREFIX_ENV, "nv-llm/http service");
-    let svc3 = HttpService::builder().port(9103).build().unwrap();
+    let p3 = get_random_port().await;
+    let svc3 = HttpService::builder().port(p3).build().unwrap();
    let token3 = CancellationToken::new();
-    let _h3 = svc3.spawn(token3.clone()).await;
-    wait_for_metrics_ready(9103).await;
+    let h3 = svc3.spawn(token3.clone()).await;
+    wait_for_metrics_ready(p3).await;

    let s3 = svc3.state_clone();
    {
@@ -73,7 +82,7 @@ async fn metrics_prefix_default_then_env_override() {
            s3.metrics_clone()
                .create_inflight_guard("test-model", Endpoint::ChatCompletions, true);
    }
-    let body3 = reqwest::get("http://localhost:9103/metrics")
+    let body3 = reqwest::get(format!("http://localhost:{}/metrics", p3))
        .await
        .unwrap()
        .text()
@@ -82,6 +91,7 @@ async fn metrics_prefix_default_then_env_override() {
    assert!(body3.contains("nv_llm_http_service_requests_total"));
    assert!(!body3.contains("dynamo_frontend_requests_total"));
    token3.cancel();
+    let _ = h3.await;

    // Cleanup env to avoid leaking state
    env::remove_var(metrics::METRICS_PREFIX_ENV);