Unverified Commit 1b0334c8 authored by GuanLuo's avatar GuanLuo Committed by GitHub
Browse files

feat: add TRTLLM text-to-image support (#8200)


Signed-off-by: default avatarGuan Luo <41310872+GuanLuo@users.noreply.github.com>
Signed-off-by: default avatarGuanLuo <41310872+GuanLuo@users.noreply.github.com>
parent 5135c321
......@@ -25,7 +25,7 @@ from tests.utils.payload_builder import (
metric_payload_default,
multimodal_payload_default,
)
from tests.utils.payloads import VideoGenerationPayload
from tests.utils.payloads import ImageGenerationPayload, VideoGenerationPayload
logger = logging.getLogger(__name__)
......@@ -414,6 +414,65 @@ trtllm_configs = {
),
],
),
# TensorRT-LLM image diffusion test using Flux.1-dev model.
# Validates the end-to-end image generation pipeline (frontend → worker → /v1/images/generations).
# Uses --skip-warmup (warmup at default resolution OOMs on 22 GB L4 GPU),
# --disable-torch-compile, and small default resolution (256x256)
# to fit within CI GPU memory constraints.
"image_diffusion": TRTLLMConfig(
name="image_diffusion",
directory=trtllm_dir,
script_name="agg_image_diffusion.sh",
script_args=[
"--skip-warmup",
"--disable-torch-compile",
"--default-height",
"256",
"--default-width",
"256",
"--default-num-images-per-prompt",
"1",
],
marks=[
pytest.mark.gpu_1, # 1 GPU(s) used, peak 20.0 GiB
pytest.mark.trtllm,
pytest.mark.pre_merge,
# Diffusion models don't use KV cache, so requested_trtllm_kv_tokens
# doesn't apply. requested_trtllm_vram_gib maps to
# KvCacheConfig.max_gpu_total_bytes which has no effect on the
# diffusion engine itself, but the parallel scheduler requires one
# of the KV/VRAM markers to accept the test. We set it to the
# profiled peak so the scheduler's VRAM budget is accurate.
pytest.mark.profiled_vram_gib(
20.0
), # actual nvidia-smi peak 20.0 GiB [gluo FIXME] reprofil as new model is used
pytest.mark.requested_trtllm_vram_gib(20.0),
pytest.mark.timeout(
600
), # Image generation is slow even at small resolution
],
model="black-forest-labs/FLUX.2-klein-4B",
frontend_port=DefaultPort.FRONTEND.value,
timeout=300,
delayed_start=5,
request_payloads=[
ImageGenerationPayload(
body={
"prompt": "A golden retriever running on a beach",
"size": "256x256",
"response_format": "url",
"nvext": {
"num_inference_steps": 10,
"guidance_scale": 5.0,
"seed": 42,
},
},
repeat_count=1,
expected_response=[],
expected_log=[],
),
],
),
# Aggregated multimodal with --frontend-decoding enabled.
# Verifies image URL inference works when images are decoded by the Rust
# MediaDecoder in the frontend instead of the Python backend.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment