fix: Integration tests fixes (#2161)

Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com>

fix: Integration tests fixes (#2161)
Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com>
f10e44ca · Keiven C · GitHub · 7e3b3fab · f10e44ca · f10e44ca
Unverified Commit f10e44ca authored Jul 31, 2025 by Keiven C Committed by GitHub Jul 31, 2025
5 changed files
--- a/lib/runtime/src/component/component.rs
+++ b/lib/runtime/src/component/component.rs
@@ -86,27 +86,17 @@ mod tests {
    // todo - make a distributed runtime fixture
    // todo - two options - fully mocked or integration test
    #[tokio::test]
-    async fn test_publish() {
+    async fn test_publish_and_subscribe() {
        let rt = Runtime::from_current().unwrap();
        let dtr = DistributedRuntime::from_settings(rt.clone()).await.unwrap();
-        let ns = dtr.namespace("test".to_string()).unwrap();
-        let cp = ns.component("component".to_string()).unwrap();
-        cp.publish("test", &"test".to_string()).await.unwrap();
-        rt.shutdown();
-    }
-
-    #[tokio::test]
-    async fn test_subscribe() {
-        let rt = Runtime::from_current().unwrap();
-        let dtr = DistributedRuntime::from_settings(rt.clone()).await.unwrap();
-        let ns = dtr.namespace("test".to_string()).unwrap();
-        let cp = ns.component("component".to_string()).unwrap();
+        let ns = dtr.namespace("test_component".to_string()).unwrap();
+        let cp = ns.component("test_component".to_string()).unwrap();

-        // Create a subscriber
-        let mut subscriber = ns.subscribe("test").await.unwrap();
+        // Create a subscriber on the component
+        let mut subscriber = cp.subscribe("test_event").await.unwrap();

-        // Publish a message
-        cp.publish("test", &"test_message".to_string())
+        // Publish a message from the component
+        cp.publish("test_event", &"test_message".to_string())
            .await
            .unwrap();


--- a/lib/runtime/src/component/namespace.rs
+++ b/lib/runtime/src/component/namespace.rs
@@ -99,8 +99,8 @@ mod tests {
    async fn test_publish() {
        let rt = Runtime::from_current().unwrap();
        let dtr = DistributedRuntime::from_settings(rt.clone()).await.unwrap();
-        let ns = dtr.namespace("test".to_string()).unwrap();
-        ns.publish("test", &"test".to_string()).await.unwrap();
+        let ns = dtr.namespace("test_namespace_publish".to_string()).unwrap();
+        ns.publish("test_event", &"test".to_string()).await.unwrap();
        rt.shutdown();
    }

@@ -108,13 +108,15 @@ mod tests {
    async fn test_subscribe() {
        let rt = Runtime::from_current().unwrap();
        let dtr = DistributedRuntime::from_settings(rt.clone()).await.unwrap();
-        let ns = dtr.namespace("test".to_string()).unwrap();
+        let ns = dtr
+            .namespace("test_namespace_subscribe".to_string())
+            .unwrap();

        // Create a subscriber
-        let mut subscriber = ns.subscribe("test").await.unwrap();
+        let mut subscriber = ns.subscribe("test_event").await.unwrap();

        // Publish a message
-        ns.publish("test", &"test_message".to_string())
+        ns.publish("test_event", &"test_message".to_string())
            .await
            .unwrap();


--- a/lib/runtime/src/http_server.rs
+++ b/lib/runtime/src/http_server.rs
@@ -77,7 +77,7 @@ impl crate::traits::DistributedRuntimeProvider for HttpMetricsRegistry {

 impl MetricsRegistry for HttpMetricsRegistry {
    fn basename(&self) -> String {
-        "http_server".to_string()
+        "dynamo".to_string()
    }

    fn parent_hierarchy(&self) -> Vec<String> {
@@ -100,7 +100,7 @@ impl HttpServerState {
        // Note: This metric is created at the DRT level (no namespace), so we manually add "dynamo_" prefix
        // to maintain consistency with the project's metric naming convention
        let uptime_gauge = http_metrics_registry.as_ref().create_gauge(
-            "dynamo_uptime_seconds",
+            "system_uptime_seconds",
            "Total uptime of the DistributedRuntime in seconds",
            &[],
        )?;
@@ -368,9 +368,9 @@ mod tests {
        println!("Full metrics response:\n{}", response);

        let expected = "\
-# HELP dynamo_uptime_seconds Total uptime of the DistributedRuntime in seconds
-# TYPE dynamo_uptime_seconds gauge
-dynamo_uptime_seconds{namespace=\"http_server\"} 42
+# HELP dynamo_system_uptime_seconds Total uptime of the DistributedRuntime in seconds
+# TYPE dynamo_system_uptime_seconds gauge
+dynamo_system_uptime_seconds{namespace=\"dynamo\"} 42
 ";
        assert_eq!(response, expected);
    }

--- a/lib/runtime/src/metrics.rs
+++ b/lib/runtime/src/metrics.rs
@@ -797,7 +797,7 @@ mod test_prefixes {
        println!("\n=== Testing Invalid Namespace Behavior ===");

        // Create a namespace with invalid name (contains hyphen)
-        let invalid_namespace = drt.namespace("test-namespace").unwrap();
+        let invalid_namespace = drt.namespace("@@123").unwrap();

        // Debug: Let's see what the hierarchy looks like
        println!(
@@ -810,15 +810,15 @@ mod test_prefixes {
        );
        println!("Invalid namespace prefix: '{}'", invalid_namespace.prefix());

-        // Try to create a metric - this should fail because the namespace name will be used in the metric name
+        // Try to create a metric - this should fail because "@@123" gets stripped to "" which is invalid
        let result = invalid_namespace.create_counter("test_counter", "A test counter", &[]);
-        println!("Result with invalid namespace 'test-namespace':");
+        println!("Result with invalid namespace '@@123':");
        println!("{:?}", result);

-        // The result should be an error from Prometheus
+        // The result should be an error because empty metric names are invalid
        assert!(
            result.is_err(),
-            "Creating metric with invalid namespace should fail"
+            "Creating metric with namespace '@@123' should fail because it gets stripped to empty string"
        );

        // For comparison, show a valid namespace works
@@ -926,15 +926,15 @@ testnamespace_testgauge{{component="testcomponent",namespace="testnamespace"}} 5
        println!("{}", namespace_output);

        let expected_namespace_output = format!(
-            r#"# HELP testintcounter A test int counter
-# TYPE testintcounter counter
-testintcounter{{namespace="testnamespace"}} 12345
-# HELP testnamespace_testcounter A test counter
+            r#"# HELP testnamespace_testcounter A test counter
 # TYPE testnamespace_testcounter counter
 testnamespace_testcounter{{component="testcomponent",endpoint="testendpoint",namespace="testnamespace"}} 123.456789
 # HELP testnamespace_testgauge A test gauge
 # TYPE testnamespace_testgauge gauge
 testnamespace_testgauge{{component="testcomponent",namespace="testnamespace"}} 50000
+# HELP testnamespace_testintcounter A test int counter
+# TYPE testnamespace_testintcounter counter
+testnamespace_testintcounter{{namespace="testnamespace"}} 12345
 "#
        );

@@ -1015,9 +1015,6 @@ testhistogram_bucket{{le="10"}} 3
 testhistogram_bucket{{le="+Inf"}} 3
 testhistogram_sum 7.5
 testhistogram_count 3
-# HELP testintcounter A test int counter
-# TYPE testintcounter counter
-testintcounter{{namespace="testnamespace"}} 12345
 # HELP testintgauge A test int gauge
 # TYPE testintgauge gauge
 testintgauge 42
@@ -1031,6 +1028,9 @@ testnamespace_testcounter{{component="testcomponent",endpoint="testendpoint",nam
 # HELP testnamespace_testgauge A test gauge
 # TYPE testnamespace_testgauge gauge
 testnamespace_testgauge{{component="testcomponent",namespace="testnamespace"}} 50000
+# HELP testnamespace_testintcounter A test int counter
+# TYPE testnamespace_testintcounter counter
+testnamespace_testintcounter{{namespace="testnamespace"}} 12345
 "#
        );


--- a/lib/runtime/tests/soak.rs
+++ b/lib/runtime/tests/soak.rs
@@ -13,6 +13,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+// cargo test --test soak integration::main --features integration
+//!
+//! It will send a batch of requests to the runtime and measure the throughput.
+//!
+//! It will also measure the latency of the requests.
+//!
+//! A reasonable soak test configuration to start off is 1 minute duration with 10000 batch load:
+//! export DYN_QUEUED_UP_PROCESSING=true
+//! export DYN_SOAK_BATCH_LOAD=10000
+//! export DYN_SOAK_RUN_DURATION=60s
+//! cargo test --test soak integration::main --features integration -- --nocapture
 #[cfg(feature = "integration")]
 mod integration {

@@ -22,13 +33,17 @@ mod integration {
        logging,
        pipeline::{
            async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
-            ResponseStream, SingleIn,
+            PushRouter, ResponseStream, SingleIn,
        },
        protocols::annotated::Annotated,
-        DistributedRuntime, ErrorContext, Result, Runtime, Worker,
+        stream, DistributedRuntime, ErrorContext, Result, Runtime, Worker,
    };
    use futures::StreamExt;
-    use std::{sync::Arc, time::Duration};
+    use std::{
+        sync::atomic::{AtomicU64, Ordering},
+        sync::Arc,
+        time::Duration,
+    };
    use tokio::time::Instant;

    #[test]
@@ -45,16 +60,29 @@ mod integration {

        client.await??;
        distributed.shutdown();
-        server.await??;
+        let handler = server.await??;
+
+        // Print final backend counter value
+        let final_count = handler.backend_counter.load(Ordering::Relaxed);
+        println!(
+            "Final RequestHandler backend_counter: {} requests processed",
+            final_count
+        );

        Ok(())
    }

-    struct RequestHandler {}
+    struct RequestHandler {
+        backend_counter: AtomicU64,
+        queued_up_processing: bool,
+    }

    impl RequestHandler {
-        fn new() -> Arc<Self> {
-            Arc::new(Self {})
+        fn new(queued_up_processing: bool) -> Arc<Self> {
+            Arc::new(Self {
+                backend_counter: AtomicU64::new(0),
+                queued_up_processing,
+            })
        }
    }

@@ -63,25 +91,40 @@ mod integration {
        async fn generate(&self, input: SingleIn<String>) -> Result<ManyOut<Annotated<String>>> {
            let (data, ctx) = input.into_parts();

+            // Increment backend counter
+            self.backend_counter.fetch_add(1, Ordering::Relaxed);
+
            let chars = data
                .chars()
                .map(|c| Annotated::from_data(c.to_string()))
                .collect::<Vec<_>>();

-            let stream = async_stream::stream! {
+            if self.queued_up_processing {
+                // queued up processing - delayed response to saturate the queue
+                let async_stream = async_stream::stream! {
                    for c in chars {
                        yield c;
                        tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
                    }
                };
-
-            Ok(ResponseStream::new(Box::pin(stream), ctx.context()))
+                Ok(ResponseStream::new(Box::pin(async_stream), ctx.context()))
+            } else {
+                // normal processing - immediate response
+                let iter_stream = stream::iter(chars);
+                Ok(ResponseStream::new(Box::pin(iter_stream), ctx.context()))
+            }
        }
    }

-    async fn backend(runtime: DistributedRuntime) -> Result<()> {
+    async fn backend(runtime: DistributedRuntime) -> Result<Arc<RequestHandler>> {
+        // get the queued up processing setting from env (not delayed)
+        let queued_up_processing =
+            std::env::var("DYN_QUEUED_UP_PROCESSING").unwrap_or("false".to_string());
+        let queued_up_processing: bool = queued_up_processing.parse().unwrap_or(false);
+
        // attach an ingress to an engine
-        let ingress = Ingress::for_engine(RequestHandler::new())?;
+        let handler = RequestHandler::new(queued_up_processing);
+        let ingress = Ingress::for_engine(handler.clone())?;

        // // make the ingress discoverable via a component service
        // // we must first create a service, then we can attach one more more endpoints
@@ -95,27 +138,32 @@ mod integration {
            .endpoint_builder()
            .handler(ingress)
            .start()
-            .await
+            .await?;
+
+        Ok(handler)
    }

    async fn client(runtime: DistributedRuntime) -> Result<()> {
        // get the run duration from env
-        let run_duration = std::env::var("DYN_SOAK_RUN_DURATION").unwrap_or("1m".to_string());
+        let run_duration = std::env::var("DYN_SOAK_RUN_DURATION").unwrap_or("3s".to_string());
        let run_duration =
-            humantime::parse_duration(&run_duration).unwrap_or(Duration::from_secs(60));
+            humantime::parse_duration(&run_duration).unwrap_or(Duration::from_secs(3));

-        let batch_load = std::env::var("DYN_SOAK_BATCH_LOAD").unwrap_or("10000".to_string());
-        let batch_load: usize = batch_load.parse().unwrap_or(10000);
+        let batch_load = std::env::var("DYN_SOAK_BATCH_LOAD").unwrap_or("100".to_string());
+        let batch_load: usize = batch_load.parse().unwrap_or(100);

        let client = runtime
            .namespace(DEFAULT_NAMESPACE)?
            .component("backend")?
            .endpoint("generate")
-            .client::<String, Annotated<String>>()
+            .client()
            .await?;

        client.wait_for_instances().await?;
-        let client = Arc::new(client);
+        let router =
+            PushRouter::<String, Annotated<String>>::from_client(client, Default::default())
+                .await?;
+        let router = Arc::new(router);

        let start = Instant::now();
        let mut count = 0;
@@ -123,11 +171,11 @@ mod integration {
        loop {
            let mut tasks = Vec::new();
            for _ in 0..batch_load {
-                let client = client.clone();
+                let router = router.clone();
                tasks.push(tokio::spawn(async move {
                    let mut stream = tokio::time::timeout(
-                        Duration::from_secs(30),
-                        client.random("hello world".to_string().into()),
+                        Duration::from_secs(5),
+                        router.random("hello world".to_string().into()),
                    )
                    .await
                    .context("request timed out")??;
@@ -147,7 +195,9 @@ mod integration {

            let elapsed = start.elapsed();
            count += batch_load;
+            if count % 1000 == 0 {
                println!("elapsed: {:?}; count: {}", elapsed, count);
+            }

            if elapsed > run_duration {
                println!("done");