feat: add --dry-run flag to preview VRAM-based test selection (#7491)

Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>

feat: add --dry-run flag to preview VRAM-based test selection (#7491)
Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>
19fc7660 · Keiven C · GitHub · 3a925951 · 19fc7660 · 19fc7660
Unverified Commit 19fc7660 authored Mar 18, 2026 by Keiven C Committed by GitHub Mar 18, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 70 additions and 6 deletions

tests/README.md tests/README.md +10 -6

tests/conftest.py tests/conftest.py +60 -0

No files found.
--- a/tests/README.md
+++ b/tests/README.md
@@ -137,18 +137,22 @@ def test_kv_cache_behavior():

 The `max_vram_gib(N)` marker records how much GPU memory a test needs. The pytest invocation can use `--max-vram-gib=N` as a **selector** to run only tests that fit on the available GPU. Tests that exceed the budget are skipped at collection time (before any test starts). Tests without a `max_vram_gib` marker always run (no constraint assumed).

+This is for the following use cases:
+- **MIG partitioned GPUs:** when running tests in parallel on MIG slices (e.g., 2x 40 GiB partitions on an 80 GiB GPU), each slice has limited VRAM.
+- **Smaller CI GPUs:** some CI jobs use L4 GPUs with only 24 GiB of VRAM.
+
 Nothing prevents you from running without this flag — but if a test needs more VRAM than is physically available, it will OOM at runtime (e.g., vLLM raises `ValueError: No available memory for the cache blocks`).

 ```bash
-# Run only tests that fit on a 48 GiB GPU — tests needing >48 GiB are skipped
-python3 -m pytest --max-vram-gib=48 tests/
+# Preview which gpu_1 vllm tests fit on a 16 GiB MIG partition (no tests are executed)
+python3 -m pytest --max-vram-gib=16 --dry-run -m "gpu_1 and vllm" tests/serve/test_vllm.py
+
+# Same, but for 24 GiB L4 CI GPUs
+python3 -m pytest --max-vram-gib=24 --dry-run -m "gpu_1 and vllm" tests/serve/test_vllm.py

 # GPU tests that have no max_vram_gib marker yet — need profiling
 # TODO: profile these tests and add max_vram_gib markers
-python3 -m pytest -m "(gpu_1 or gpu_2 or gpu_4 or gpu_8) and not max_vram_gib" tests/
-
-# No filter — run everything regardless of VRAM (tests that exceed available memory will OOM)
-python3 -m pytest tests/
+python3 -m pytest --dry-run -m "(gpu_1 or gpu_2 or gpu_4 or gpu_8) and not max_vram_gib" tests/serve/test_vllm.py
 ```

 ### Lifecycle Marker Note

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -108,6 +108,12 @@ def pytest_addoption(parser: pytest.Parser) -> None:
        default=None,
        help="Skip tests whose @pytest.mark.max_vram_gib(N) exceeds this value (GiB).",
    )
+    parser.addoption(
+        "--dry-run",
+        action="store_true",
+        default=False,
+        help="Show which tests would run vs skip based on --max-vram-gib, then exit.",
+    )


 LOG_FORMAT = "[TEST] %(asctime)s %(levelname)s %(name)s: %(message)s"
@@ -311,6 +317,60 @@ def pytest_collection_modifyitems(config, items):
            if vram_mark and vram_mark.args and vram_mark.args[0] > vram_limit:
                item.add_marker(skip_vram)

+    # --dry-run: print run/skip breakdown and exit without executing tests
+    if config.getoption("--dry-run", default=False):
+        would_run = []
+        would_skip = []
+        unmarked = []
+        for item in items:
+            vram_mark = item.get_closest_marker("max_vram_gib")
+            vram_val = vram_mark.args[0] if vram_mark and vram_mark.args else None
+            name = item.nodeid.split("::", 1)[1] if "::" in item.nodeid else item.nodeid
+
+            skip_reasons = []
+            for marker in item.iter_markers("skip"):
+                reason = marker.kwargs.get("reason", "")
+                if not reason and marker.args:
+                    reason = marker.args[0]
+                skip_reasons.append(reason or "no reason given")
+
+            vram_skipped = (
+                vram_limit is not None
+                and vram_val is not None
+                and vram_val > vram_limit
+            )
+            if vram_skipped:
+                skip_reasons.insert(0, f"{vram_val} GiB > {vram_limit} GiB VRAM limit")
+
+            if skip_reasons:
+                would_skip.append((name, vram_val, skip_reasons))
+            elif vram_val is not None:
+                would_run.append((name, vram_val))
+            else:
+                unmarked.append(name)
+
+        print(f"\n{'=' * 60}")
+        print(
+            f"--max-vram-gib={vram_limit or 'not set'}  |  {len(items)} tests selected"
+        )
+        print(f"{'=' * 60}")
+        if would_run:
+            print(f"\nWould RUN ({len(would_run)}):")
+            for name, gib in would_run:
+                print(f"  {name}  ({gib} GiB)")
+        if would_skip:
+            print(f"\nWould SKIP ({len(would_skip)}):")
+            for name, vram_val, reasons in would_skip:
+                vram_str = f"  ({vram_val} GiB)" if vram_val is not None else ""
+                print(f"  {name}{vram_str}  -- {'; '.join(reasons)}")
+        if unmarked:
+            print(f"\nNo VRAM marker — always run ({len(unmarked)}):")
+            for name in unmarked:
+                print(f"  {name}")
+        print()
+        items.clear()
+        return
+
    # Collect models via explicit pytest mark from final filtered items only
    models_to_download = set()
    for item in items: