fix: pin sniffio dependency (#4665)

Signed-off-by: PeaBrane <yanrpei@gmail.com> Co-authored-by: PeaBrane <yanrpei@gmail.com>

fix: pin sniffio dependency (#4665)
Signed-off-by: PeaBrane <yanrpei@gmail.com> Co-authored-by: PeaBrane <yanrpei@gmail.com>
9fb5f03a · Biswa Panda · GitHub · fb4432e1 · 9fb5f03a · 9fb5f03a
Unverified Commit 9fb5f03a authored Dec 01, 2025 by Biswa Panda Committed by GitHub Dec 02, 2025
7 changed files
--- a/.github/workflows/container-validation-dynamo.yml
+++ b/.github/workflows/container-validation-dynamo.yml
@@ -65,7 +65,7 @@ jobs:
          docker compose down
      - name: Run pytest (parallel tests with xdist)
        env:
-          PYTEST_MARKS: "pre_merge and parallel and not (vllm or trtllm or sglang or k8s)"
+          PYTEST_MARKS: "pre_merge and parallel"
        run: |
          docker run -w /workspace \
            --name ${{ env.CONTAINER_ID }}_pytest_parallel \
@@ -77,7 +77,7 @@ jobs:
          docker cp ${{ env.CONTAINER_ID }}_pytest_parallel:/workspace/${{ env.PYTEST_PARALLEL_XML_FILE }} . || echo "No parallel test report found"
      - name: Run pytest (sequential tests)
        env:
-          PYTEST_MARKS: "(pre_merge and not parallel and not (vllm or trtllm or sglang or k8s)) or mypy"
+          PYTEST_MARKS: "(pre_merge and not parallel) or mypy"
        run: |
          docker run -w /workspace \
            --name ${{ env.CONTAINER_ID }}_pytest \

--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -174,6 +174,9 @@ WORKDIR /workspace
 ENV DYNAMO_HOME=/opt/dynamo
 ENV VIRTUAL_ENV=/opt/dynamo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
+# Set CUDA_DEVICE_ORDER to ensure CUDA logical device IDs match NVML physical device IDs
+# This fixes NVML InvalidArgument errors when CUDA_VISIBLE_DEVICES is set
+ENV CUDA_DEVICE_ORDER=PCI_BUS_ID

 ARG ARCH_ALT
 ARG PYTHON_VERSION

--- a/container/deps/requirements.txt
+++ b/container/deps/requirements.txt
@@ -43,6 +43,9 @@ PyYAML==6.0.3
 scikit-learn==1.7.2
 scipy<1.14.0  # Upper bound for pmdarima compatibility
 sentencepiece==0.2.1
+# Required by kr8s
+# https://github.com/kr8s-org/kr8s/blob/750022c3ebbb7988cddb5a979aca2ee8074a1069/examples/kubectl-ng/uv.lock#L988
+sniffio==1.3.1
 tensorboard==2.19.0
 tensorboardX==2.6.2.2
 # Transformers version constraint for container builds

--- a/tests/planner/unit/test_prometheus.py
+++ b/tests/planner/unit/test_prometheus.py
@@ -140,7 +140,7 @@ def test_get_average_metric_none_result():
        mock_query.return_value = None

        result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
            interval="60s",
            operation_name="test operation",
            model_name="test_model",
@@ -157,7 +157,7 @@ def test_get_average_metric_empty_result():
        mock_query.return_value = []

        result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
            interval="60s",
            operation_name="test operation",
            model_name="test_model",
@@ -175,7 +175,7 @@ def test_get_average_metric_no_matching_containers(mock_prometheus_result):
        mock_query.return_value = [mock_prometheus_result[0]]

        result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
            interval="60s",
            operation_name="test operation",
            model_name="target_model",
@@ -193,7 +193,7 @@ def test_get_average_metric_one_matching_container(mock_prometheus_result):
        mock_query.return_value = mock_prometheus_result[:2]

        result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
            interval="60s",
            operation_name="test operation",
            model_name="target_model",
@@ -227,7 +227,7 @@ def test_get_average_metric_with_validation_error():
        mock_query.return_value = mock_result

        result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
            interval="60s",
            operation_name="test operation",
            model_name="target_model",
@@ -245,7 +245,7 @@ def test_get_average_metric_multiple_matching_containers(mock_prometheus_result)
        mock_query.return_value = mock_prometheus_result[1:]

        result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
            interval="60s",
            operation_name="test operation",
            model_name="target_model",

--- a/tests/serve/test_sglang.py
+++ b/tests/serve/test_sglang.py
@@ -44,7 +44,7 @@ sglang_configs = {
        name="aggregated",
        directory=sglang_dir,
        script_name="agg.sh",
-        marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
+        marks=[pytest.mark.gpu_1],
        model="Qwen/Qwen3-0.6B",
        env={},
        models_port=8000,

--- a/tests/serve/test_trtllm.py
+++ b/tests/serve/test_trtllm.py
@@ -40,7 +40,7 @@ trtllm_configs = {
        name="aggregated",
        directory=trtllm_dir,
        script_name="agg_metrics.sh",
-        marks=[pytest.mark.gpu_1, pytest.mark.trtllm, pytest.mark.pre_merge],
+        marks=[pytest.mark.gpu_1, pytest.mark.trtllm],
        model="Qwen/Qwen3-0.6B",
        models_port=8000,
        request_payloads=[
@@ -140,7 +140,6 @@ def test_deployment(trtllm_config_test, request, runtime_services, predownload_m
 # TODO make this a normal guy
 @pytest.mark.e2e
 @pytest.mark.gpu_1
-@pytest.mark.pre_merge
 @pytest.mark.trtllm
 def test_chat_only_aggregated_with_test_logits_processor(
    request, runtime_services, predownload_models, monkeypatch

--- a/tests/serve/test_vllm.py
+++ b/tests/serve/test_vllm.py
@@ -43,7 +43,7 @@ vllm_configs = {
        name="aggregated",
        directory=vllm_dir,
        script_name="agg.sh",
-        marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
+        marks=[pytest.mark.gpu_1],
        model="Qwen/Qwen3-0.6B",
        request_payloads=[
            chat_payload_default(),