fix: Flaky cuda event test (#7747)

Signed-off-by: jthomson04 <jwillthomson19@gmail.com>

fix: Flaky cuda event test (#7747)
Signed-off-by: jthomson04 <jwillthomson19@gmail.com>
e5e65b58 · jthomson04 · GitHub · a8894467 · e5e65b58 · e5e65b58
Unverified Commit e5e65b58 authored Mar 31, 2026 by jthomson04 Committed by GitHub Apr 01, 2026
2 changed files
--- a/lib/kvbm-physical/src/transfer/notifications/cuda_event.rs
+++ b/lib/kvbm-physical/src/transfer/notifications/cuda_event.rs
@@ -32,56 +32,3 @@ impl CompletionChecker for CudaEventChecker {
        }
    }
 }
-#[cfg(all(test, feature = "testing-kvbm"))]
-mod tests {
-    use crate::manager::TransferManager;
-    use crate::transfer::tests::CudaSleep;
-    use dynamo_memory::nixl::NixlAgent;
-    use std::time::{Duration, Instant};
-    #[tokio::test]
-    async fn test_cuda_event_delayed_notification() {
-        let agent = NixlAgent::new("test_agent").unwrap();
-        let manager = TransferManager::builder()
-            .cuda_device_id(0)
-            .nixl_agent(agent)
-            .build()
-            .unwrap();
-        let stream = manager.h2d_stream();
-        let cuda_ctx = manager.cuda_context();
-        // Get or create the CudaSleep utility (compiles kernel and calibrates on first use)
-        let cuda_sleep = CudaSleep::for_context(cuda_ctx).unwrap();
-        // Test 1: Launch sleep and wait via async notification
-        let t0_queue_start = Instant::now();
-        cuda_sleep
-            .launch(Duration::from_millis(600), stream)
-            .unwrap();
-        let queue_time = t0_queue_start.elapsed();
-        let event = stream.record_event(None).unwrap();
-        let notification = manager.register_cuda_event(event);
-        notification.await.unwrap();
-        let wait_time = t0_queue_start.elapsed() - queue_time;
-        println!(
-            "GPU sleep test: queue {:?}, wait {:?}",
-            queue_time, wait_time
-        );
-        assert!(
-            queue_time < Duration::from_millis(10),
-            "launching the sleep kernel should be fast: {:?}",
-            queue_time
-        );
-        assert!(
-            wait_time >= Duration::from_millis(500),
-            "wait time should reflect >=500ms of GPU work: {:?}",
-            wait_time
-        );
-    }
-}
--- a/lib/llm/src/block_manager/v2/physical/transfer/notifications/cuda_event.rs
+++ b/lib/llm/src/block_manager/v2/physical/transfer/notifications/cuda_event.rs
@@ -56,28 +56,20 @@ mod tests {
        // Get or create the CudaSleep utility (compiles kernel and calibrates on first use)
        let cuda_sleep = CudaSleep::for_context(cuda_ctx).unwrap();
-        // Test 1: Launch sleep and wait via async notification
+        let start = Instant::now();
-        let t0_queue_start = Instant::now();
        cuda_sleep
            .launch(Duration::from_millis(600), stream)
            .unwrap();
-        let queue_time = t0_queue_start.elapsed();
        let event = stream.record_event(None).unwrap();
        let notification = manager.register_cuda_event(event);
-        notification.await.unwrap();
+        tokio::time::timeout(Duration::from_secs(5), notification)
-        let wait_time = t0_queue_start.elapsed() - queue_time;
+            .await
+            .expect("notification should complete once the CUDA event signals")
-        println!(
+            .unwrap();
-            "GPU sleep test: queue {:?}, wait {:?}",
+        let wait_time = start.elapsed();
-            queue_time, wait_time
-        );
-        assert!(
+        println!("GPU sleep test: total wait {:?}", wait_time);
-            queue_time < Duration::from_millis(10),
-            "launching the sleep kernel should be fast: {:?}",
-            queue_time
-        );
        assert!(
            wait_time >= Duration::from_millis(500),