steps:
  - label: ":docker: Build image"
    key: image-build
    commands:
      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
      - "docker build --file docker/Dockerfile.ci -t vllm-omni-ci ."
      - "docker tag vllm-omni-ci public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT"
      - "docker push public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT"
    agents:
      queue: "cpu_queue_premerge"

  # - label: "Test on NPU"
  #   depends_on: ~
  #   key: npu-test
  #   commands:
  #     - ".buildkite/scripts/hardware_ci/run_npu_test.sh"
  #   agents:
  #     queue: "ascend"

  - label: "Simple Unit Test"
    depends_on: image-build
    commands:
      - pytest -v -s tests/entrypoints/
      - pytest -v -s tests/diffusion/cache/
      - pytest -v -s tests/diffusion/lora/
      - pytest -v -s tests/model_executor/models/qwen2_5_omni/test_audio_length.py
      - pytest -v -s tests/worker/
      - pytest -v -s tests/distributed/omni_connectors/test_kv_flow.py
    agents:
      queue: "gpu_1_queue"
    plugins:
      - docker#v5.2.0:
          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
          always-pull: true
          propagate-environment: true
          environment:
            - "HF_HOME=/fsx/hf_cache"
          volumes:
            - "/fsx/hf_cache:/fsx/hf_cache"

  - label: "Diffusion Model Test"
    timeout_in_minutes: 20
    depends_on: image-build
    commands:
      - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py
    agents:
      queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
    plugins:
      - docker#v5.2.0:
          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
          always-pull: true
          propagate-environment: true
          environment:
            - "HF_HOME=/fsx/hf_cache"
          volumes:
            - "/fsx/hf_cache:/fsx/hf_cache"

  - label: "Diffusion Images API LoRA E2E"
    timeout_in_minutes: 20
    depends_on: image-build
    commands:
      - pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py
    agents:
      queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
    plugins:
      - docker#v5.2.0:
          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
          always-pull: true
          propagate-environment: true
          environment:
            - "HF_HOME=/fsx/hf_cache"
          volumes:
            - "/fsx/hf_cache:/fsx/hf_cache"

  - label: "Diffusion Model CPU offloading Test"
    timeout_in_minutes: 20
    depends_on: image-build
    commands:
      - pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
      - pytest -s -v tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
    agents:
      queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
    plugins:
      - docker#v5.2.0:
          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
          always-pull: true
          propagate-environment: true
          environment:
            - "HF_HOME=/fsx/hf_cache"
          volumes:
            - "/fsx/hf_cache:/fsx/hf_cache"

  - label: "Audio Generation Model Test"
    timeout_in_minutes: 20
    depends_on: image-build
    commands:
      - pytest -s -v tests/e2e/offline_inference/test_stable_audio_model.py
    agents:
      queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
    plugins:
      - docker#v5.2.0:
          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
          always-pull: true
          propagate-environment: true
          environment:
            - "HF_HOME=/fsx/hf_cache"
          volumes:
            - "/fsx/hf_cache:/fsx/hf_cache"

  - label: "Diffusion Cache Backend Test"
    timeout_in_minutes: 15
    depends_on: image-build
    commands:
      - pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py
    agents:
      queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
    plugins:
      - docker#v5.2.0:
          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
          always-pull: true
          propagate-environment: true
          environment:
            - "HF_HOME=/fsx/hf_cache"
          volumes:
            - "/fsx/hf_cache:/fsx/hf_cache"

  - label: "Diffusion Sequence Parallelism Test"
    timeout_in_minutes: 20
    depends_on: image-build
    commands:
      - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py
    agents:
      queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
    plugins:
      - docker#v5.2.0:
          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
          always-pull: true
          propagate-environment: true
          shm-size: "8gb"
          environment:
            - "HF_HOME=/fsx/hf_cache"
          volumes:
            - "/fsx/hf_cache:/fsx/hf_cache"

  - label: "Diffusion Tensor Parallelism Test"
    timeout_in_minutes: 20
    depends_on: image-build
    commands:
      - pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py
    agents:
      queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
    plugins:
      - docker#v5.2.0:
          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
          always-pull: true
          propagate-environment: true
          shm-size: "8gb"
          environment:
            - "HF_HOME=/fsx/hf_cache"
          volumes:
            - "/fsx/hf_cache:/fsx/hf_cache"

  - label: "Diffusion GPU Worker Test"
    timeout_in_minutes: 20
    depends_on: image-build
    commands:
      - pytest -s -v tests/diffusion/test_diffusion_worker.py
    agents:
      queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
    plugins:
      - docker#v5.2.0:
          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
          always-pull: true
          propagate-environment: true
          shm-size: "8gb"
          environment:
            - "HF_HOME=/fsx/hf_cache"
          volumes:
            - "/fsx/hf_cache:/fsx/hf_cache"


  - label: "Benchmark Test"
    timeout_in_minutes: 15
    depends_on: image-build
    commands:
      - export VLLM_WORKER_MULTIPROC_METHOD=spawn
      - pytest -s -v tests/benchmarks/test_serve_cli.py
    agents:
      queue: "mithril-h100-pool"
    plugins:
      - kubernetes:
          podSpec:
            containers:
              - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
                resources:
                  limits:
                    nvidia.com/gpu: 2
                volumeMounts:
                  - name: devshm
                    mountPath: /dev/shm
                  - name: hf-cache
                    mountPath: /root/.cache/huggingface
                env:
                  - name: HF_HOME
                    value: /root/.cache/huggingface
            nodeSelector:
              node.kubernetes.io/instance-type: gpu-h100-sxm
            volumes:
              - name: devshm
                emptyDir:
                  medium: Memory
              - name: hf-cache
                hostPath:
                  path: /mnt/hf-cache
                  type: DirectoryOrCreate

  - label: "Omni Model Test"
    timeout_in_minutes: 15
    depends_on: image-build
    commands:
    - export VLLM_LOGGING_LEVEL=DEBUG
    - export VLLM_WORKER_MULTIPROC_METHOD=spawn
    - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
    agents:
      queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
    plugins:
    - docker#v5.2.0:
        image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
        always-pull: true
        propagate-environment: true
        environment:
        - "HF_HOME=/fsx/hf_cache"
        volumes:
        - "/fsx/hf_cache:/fsx/hf_cache"

  # - label: "Omni Model Test with H100"
  #   timeout_in_minutes: 30
  #   depends_on: image-build
  #   commands:
  #     - export VLLM_WORKER_MULTIPROC_METHOD=spawn
  #     - export VLLM_TEST_CLEAN_GPU_MEMORY="1"
  #     - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
  #     - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py
  #     - pytest -s -v tests/e2e/online_serving/test_async_omni.py
  #   agents:
  #     queue: "mithril-h100-pool"
  #   plugins:
  #     - kubernetes:
  #         podSpec:
  #           containers:
  #             - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
  #               resources:
  #                 limits:
  #                   nvidia.com/gpu: 2
  #               volumeMounts:
  #                 - name: devshm
  #                   mountPath: /dev/shm
  #                 - name: hf-cache
  #                   mountPath: /root/.cache/huggingface
  #               env:
  #                 - name: HF_HOME
  #                   value: /root/.cache/huggingface
  #           nodeSelector:
  #             node.kubernetes.io/instance-type: gpu-h100-sxm
  #           volumes:
  #             - name: devshm
  #               emptyDir:
  #                 medium: Memory
  #             - name: hf-cache
  #               hostPath:
  #                 path: /mnt/hf-cache
  #                 type: DirectoryOrCreate

  - label: "Diffusion Image Edit Test with H100 (1 GPU)"
    timeout_in_minutes: 20
    depends_on: image-build
    commands:
      - export VLLM_WORKER_MULTIPROC_METHOD=spawn
      - pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py
    agents:
      queue: "mithril-h100-pool"
    plugins:
      - kubernetes:
          podSpec:
            containers:
              - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
                resources:
                  limits:
                    nvidia.com/gpu: 1
                volumeMounts:
                  - name: devshm
                    mountPath: /dev/shm
                  - name: hf-cache
                    mountPath: /root/.cache/huggingface
                env:
                  - name: HF_HOME
                    value: /root/.cache/huggingface
            nodeSelector:
              node.kubernetes.io/instance-type: gpu-h100-sxm
            volumes:
              - name: devshm
                emptyDir:
                  medium: Memory
              - name: hf-cache
                hostPath:
                  path: /mnt/hf-cache
                  type: DirectoryOrCreate

  # - label: "Bagel Text2Img Model Test with H100"
  #   timeout_in_minutes: 30
  #   depends_on: image-build
  #   commands:
  #     - export VLLM_WORKER_MULTIPROC_METHOD=spawn
  #     - pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
  #   agents:
  #     queue: "mithril-h100-pool"
  #   plugins:
  #     - kubernetes:
  #         podSpec:
  #           containers:
  #             - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
  #               resources:
  #                 limits:
  #                   nvidia.com/gpu: 1
  #               volumeMounts:
  #                 - name: devshm
  #                   mountPath: /dev/shm
  #                 - name: hf-cache
  #                   mountPath: /root/.cache/huggingface
  #               env:
  #                 - name: HF_HOME
  #                   value: /root/.cache/huggingface
  #           nodeSelector:
  #             node.kubernetes.io/instance-type: gpu-h100-sxm
  #           volumes:
  #             - name: devshm
  #               emptyDir:
  #                 medium: Memory
  #             - name: hf-cache
  #               hostPath:
  #                 path: /mnt/hf-cache
  #                 type: DirectoryOrCreate