test(sglang): add pre_merge diffusion CI smoke tests for T2I and T2V (#8409)

Signed-off-by: Krishnan Prashanth <kprashanth@nvidia.com>

test(sglang): add pre_merge diffusion CI smoke tests for T2I and T2V (#8409)
Signed-off-by: Krishnan Prashanth <kprashanth@nvidia.com>
8679faa3 · Krishnan Prashanth · GitHub · 19ecf46f · 8679faa3 · 8679faa3
Unverified Commit 8679faa3 authored Apr 21, 2026 by Krishnan Prashanth Committed by GitHub Apr 21, 2026
4 changed files
--- a/examples/backends/sglang/launch/image_diffusion.sh
+++ b/examples/backends/sglang/launch/image_diffusion.sh
@@ -15,7 +15,7 @@ source "$SCRIPT_DIR/../../../common/launch_utils.sh"
 MODEL_PATH="black-forest-labs/FLUX.1-dev"
 FS_URL="file:///tmp/dynamo_media"
 HTTP_URL=""
-HTTP_PORT="${HTTP_PORT:-8000}"
+HTTP_PORT="${DYN_HTTP_PORT:-${HTTP_PORT:-8000}}"

 # Parse command line arguments
 EXTRA_ARGS=()

--- a/examples/backends/sglang/launch/text-to-video-diffusion.sh
+++ b/examples/backends/sglang/launch/text-to-video-diffusion.sh
@@ -14,7 +14,7 @@ source "$SCRIPT_DIR/../../../common/launch_utils.sh"
 # Defaults
 WAN_SIZE="1b"
 FS_URL="file:///tmp/dynamo_media"
-HTTP_PORT="${HTTP_PORT:-8000}"
+HTTP_PORT="${DYN_HTTP_PORT:-${HTTP_PORT:-8000}}"
 NUM_FRAMES=17
 HEIGHT=480
 WIDTH=832

--- a/tests/serve/test_sglang.py
+++ b/tests/serve/test_sglang.py
@@ -30,7 +30,11 @@ from tests.utils.payload_builder import (
    responses_payload_default,
    responses_stream_payload_default,
 )
-from tests.utils.payloads import LoraTestChatPayload
+from tests.utils.payloads import (
+    ImageGenerationPayload,
+    LoraTestChatPayload,
+    VideoGenerationPayload,
+)

 logger = logging.getLogger(__name__)

@@ -457,6 +461,77 @@ sglang_configs = {
            completion_payload_default(),
        ],
    ),
+    # ── Diffusion pre_merge smoke tests ─────────────────────────────────
+    "diffusion_t2i_z_image_turbo": SGLangConfig(
+        name="diffusion_t2i_z_image_turbo",
+        directory=sglang_dir,
+        script_name="image_diffusion.sh",
+        script_args=["--model-path", "Tongyi-MAI/Z-Image-Turbo"],
+        marks=[
+            pytest.mark.gpu_1,
+            pytest.mark.profiled_vram_gib(19.3),
+            pytest.mark.timeout(240),
+            pytest.mark.pre_merge,
+        ],
+        model="Tongyi-MAI/Z-Image-Turbo",
+        env={},
+        frontend_port=DefaultPort.FRONTEND.value,
+        request_payloads=[
+            ImageGenerationPayload(
+                body={
+                    "prompt": "A red apple on a white table",
+                    "size": "512x512",
+                    "response_format": "url",
+                    "nvext": {"num_inference_steps": 4},
+                },
+                repeat_count=1,
+                expected_response=[],
+                expected_log=[],
+            ),
+        ],
+    ),
+    "diffusion_t2v_wan_1_3b": SGLangConfig(
+        name="diffusion_t2v_wan_1_3b",
+        directory=sglang_dir,
+        script_name="text-to-video-diffusion.sh",
+        script_args=[
+            "--wan-size",
+            "1b",
+            "--num-inference-steps",
+            "3",
+            "--num-frames",
+            "9",
+            "--height",
+            "256",
+            "--width",
+            "256",
+        ],
+        marks=[
+            pytest.mark.gpu_1,
+            pytest.mark.profiled_vram_gib(17.6),
+            pytest.mark.timeout(180),
+            pytest.mark.pre_merge,
+        ],
+        model="Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+        env={},
+        frontend_port=DefaultPort.FRONTEND.value,
+        request_payloads=[
+            VideoGenerationPayload(
+                body={
+                    "prompt": "A dog running on a beach",
+                    "size": "256x256",
+                    "response_format": "url",
+                    "nvext": {
+                        "num_inference_steps": 3,
+                        "num_frames": 9,
+                    },
+                },
+                repeat_count=1,
+                expected_response=[],
+                expected_log=[],
+            ),
+        ],
+    ),
    "anthropic_messages": SGLangConfig(
        name="anthropic_messages",
        directory=sglang_dir,

--- a/tests/utils/payloads.py
+++ b/tests/utils/payloads.py
@@ -1317,3 +1317,51 @@ def completions_response_handler(response):

 def chat_completions_response_handler(response):
    return ChatPayload.extract_content(response)
+
+
+@dataclass
+class ImageGenerationPayload(BasePayload):
+    """Payload for /v1/images/generations endpoint (diffusion image generation)."""
+
+    endpoint: str = "/v1/images/generations"
+    timeout: int = 300
+
+    def response_handler(self, response: Any) -> str:
+        response.raise_for_status()
+        result = response.json()
+        assert (
+            "data" in result
+        ), f"Missing 'data' in response. Keys: {list(result.keys())}"
+        assert len(result["data"]) > 0, "Empty data in image response"
+        entry = result["data"][0]
+        if "url" in entry:
+            assert entry["url"], "Image response url is empty"
+            return entry["url"]
+        assert entry.get("b64_json"), "Image response b64_json is empty"
+        return "b64_image_returned"
+
+
+@dataclass
+class VideoGenerationPayload(BasePayload):
+    """Payload for /v1/videos endpoint (diffusion video generation)."""
+
+    endpoint: str = "/v1/videos"
+    timeout: int = 600
+
+    def response_handler(self, response: Any) -> str:
+        response.raise_for_status()
+        result = response.json()
+        assert result.get("status") == "completed", (
+            f"Video generation not completed. Status: {result.get('status')}, "
+            f"Error: {result.get('error', 'none')}"
+        )
+        assert (
+            "data" in result
+        ), f"Missing 'data' in response. Keys: {list(result.keys())}"
+        assert len(result["data"]) > 0, "Empty data in video response"
+        entry = result["data"][0]
+        if "url" in entry:
+            assert entry["url"], "Video response url is empty"
+            return entry["url"]
+        assert entry.get("b64_json"), "Video response b64_json is empty"
+        return "b64_video_returned"