New Pipeline Slow Test runners (#5131)

* pipline fetcher * update script * clean up * clean up * clean up * new pipeline runner * rename tests to match modules * test actions in pr * change runner to gpu * clean up * clean up * clean up * fix report * fix reporting * clean up * show test stats in failure reports * give names to jobs * add lora tests * split torch cuda tests and add compile tests * clean up * fix tests * change push to run only on main --------- Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

New Pipeline Slow Test runners (#5131)
* pipline fetcher * update script * clean up * clean up * clean up * new pipeline runner * rename tests to match modules * test actions in pr * change runner to gpu * clean up * clean up * clean up * fix report * fix reporting * clean up * show test stats in failure reports * give names to jobs * add lora tests * split torch cuda tests and add compile tests * clean up * fix tests * change push to run only on main --------- Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
dd5a3629 · Dhruv Nair · GitHub · 7271f8b7 · dd5a3629 · dd5a3629
Unverified Commit dd5a3629 authored Oct 04, 2023 by Dhruv Nair Committed by GitHub Oct 04, 2023
20 changed files
--- a/.github/workflows/push_tests.yml
+++ b/.github/workflows/push_tests.yml
-name: Slow tests on main
+name: Slow Tests on main

 on:
  push:
    branches:
      - main

+
 env:
  DIFFUSERS_IS_CI: yes
  HF_HOME: /mnt/cache
@@ -12,53 +13,115 @@ env:
  MKL_NUM_THREADS: 8
  PYTEST_TIMEOUT: 600
  RUN_SLOW: yes
+  PIPELINE_USAGE_CUTOFF: 50000

 jobs:
-  run_slow_tests:
+  setup_torch_cuda_pipeline_matrix:
+    name: Setup Torch Pipelines CUDA Slow Tests Matrix
+    runs-on: docker-gpu
+    container:
+      image: diffusers/diffusers-pytorch-cpu # this is a CPU image, but we need it to fetch the matrix
+      options: --shm-size "16gb" --ipc host
+    outputs:
+      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - name: Install dependencies
+        run: |
+          apt-get update && apt-get install libsndfile1-dev libgl1 -y
+          python -m pip install -e .[quality,test]
+          python -m pip install git+https://github.com/huggingface/accelerate.git
+
+      - name: Environment
+        run: |
+          python utils/print_env.py
+
+      - name: Fetch Pipeline Matrix
+        id: fetch_pipeline_matrix
+        run: |
+          matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py)
+          echo $matrix
+          echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
+
+      - name: Pipeline Tests Artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: test-pipelines.json
+          path: reports
+
+  torch_pipelines_cuda_tests:
+    name: Torch Pipelines CUDA Slow Tests
+    needs: setup_torch_cuda_pipeline_matrix
    strategy:
      fail-fast: false
      max-parallel: 1
      matrix:
-        config:
-          - name: Slow PyTorch CUDA tests on Ubuntu
-            framework: pytorch
-            runner: docker-gpu
+        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
+    runs-on: docker-gpu
+    container:
      image: diffusers/diffusers-pytorch-cuda
-            report: torch_cuda
-          - name: Slow Flax TPU tests on Ubuntu
-            framework: flax
-            runner: docker-tpu
-            image: diffusers/diffusers-flax-tpu
-            report: flax_tpu
-          - name: Slow ONNXRuntime CUDA tests on Ubuntu
-            framework: onnxruntime
-            runner: docker-gpu
-            image: diffusers/diffusers-onnxruntime-cuda
-            report: onnx_cuda
-
-    name: ${{ matrix.config.name }}
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+      - name: Install dependencies
+        run: |
+          apt-get update && apt-get install libsndfile1-dev libgl1 -y
+          python -m pip install -e .[quality,test]
+          python -m pip install git+https://github.com/huggingface/accelerate.git
+      - name: Environment
+        run: |
+          python utils/print_env.py
+      - name: Slow PyTorch CUDA checkpoint tests on Ubuntu
+        env:
+          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
+          CUBLAS_WORKSPACE_CONFIG: :16:8
+        run: |
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+            -s -v -k "not Flax and not Onnx" \
+            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
+            tests/pipelines/${{ matrix.module }}
+      - name: Failure short reports
+        if: ${{ failure() }}
+        run: |
+          cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
+          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt

-    runs-on: ${{ matrix.config.runner }}
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: pipeline_${{ matrix.module }}_test_reports
+          path: reports

+  torch_cuda_tests:
+    name: Torch CUDA Tests
+    runs-on: docker-gpu
    container:
-      image: ${{ matrix.config.image }}
-      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ ${{ matrix.config.runner == 'docker-tpu' && '--privileged' || '--gpus 0'}}
-
+      image: diffusers/diffusers-pytorch-cuda
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
    defaults:
      run:
        shell: bash
-
+    strategy:
+      matrix:
+        module: [models, schedulers, lora, others]
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2

-    - name: NVIDIA-SMI
-      if : ${{ matrix.config.runner == 'docker-gpu' }}
-      run: |
-        nvidia-smi
-
    - name: Install dependencies
      run: |
        apt-get update && apt-get install libsndfile1-dev libgl1 -y
@@ -70,47 +133,121 @@ jobs:
        python utils/print_env.py

    - name: Run slow PyTorch CUDA tests
-      if: ${{ matrix.config.framework == 'pytorch' }}
      env:
        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
-
      run: |
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-          -s -v -k "not Flax and not Onnx and not compile" \
-          --make-reports=tests_${{ matrix.config.report }} \
-          tests/
+          -s -v -k "not Flax and not Onnx" \
+          --make-reports=tests_torch_cuda \
+          tests/${{ matrix.module }}
+
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: |
+        cat reports/tests_torch_cuda_stats.txt
+        cat reports/tests_torch_cuda_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: torch_cuda_test_reports
+        path: reports
+
+  flax_tpu_tests:
+    name: Flax TPU Tests
+    runs-on: docker-tpu
+    container:
+      image: diffusers/diffusers-flax-tpu
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
+    defaults:
+      run:
+        shell: bash
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: Install dependencies
+      run: |
+        apt-get update && apt-get install libsndfile1-dev libgl1 -y
+        python -m pip install -e .[quality,test]
+        python -m pip install git+https://github.com/huggingface/accelerate.git
+
+    - name: Environment
+      run: |
+        python utils/print_env.py

    - name: Run slow Flax TPU tests
-      if: ${{ matrix.config.framework == 'flax' }}
      env:
        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
      run: |
        python -m pytest -n 0 \
          -s -v -k "Flax" \
-          --make-reports=tests_${{ matrix.config.report }} \
+          --make-reports=tests_flax_tpu \
          tests/

+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: |
+        cat reports/tests_flax_tpu_stats.txt
+        cat reports/tests_flax_tpu_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: flax_tpu_test_reports
+        path: reports
+
+  onnx_cuda_tests:
+    name: ONNX CUDA Tests
+    runs-on: docker-gpu
+    container:
+      image: diffusers/diffusers-onnxruntime-cuda
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
+    defaults:
+      run:
+        shell: bash
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: Install dependencies
+      run: |
+        apt-get update && apt-get install libsndfile1-dev libgl1 -y
+        python -m pip install -e .[quality,test]
+        python -m pip install git+https://github.com/huggingface/accelerate.git
+
+    - name: Environment
+      run: |
+        python utils/print_env.py
+
    - name: Run slow ONNXRuntime CUDA tests
-      if: ${{ matrix.config.framework == 'onnxruntime' }}
      env:
        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
      run: |
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "Onnx" \
-          --make-reports=tests_${{ matrix.config.report }} \
+          --make-reports=tests_onnx_cuda \
          tests/

    - name: Failure short reports
      if: ${{ failure() }}
-      run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt
+      run: |
+        cat reports/tests_onnx_cuda_stats.txt
+        cat reports/tests_onnx_cuda_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v2
      with:
-        name: ${{ matrix.config.report }}_test_reports
+        name: onnx_cuda_test_reports
        path: reports

  run_torch_compile_tests:
@@ -131,21 +268,17 @@ jobs:
    - name: NVIDIA-SMI
      run: |
        nvidia-smi
-
    - name: Install dependencies
      run: |
        python -m pip install -e .[quality,test,training]
-
    - name: Environment
      run: |
        python utils/print_env.py
-
    - name: Run example tests on GPU
      env:
        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
      run: |
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
-
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_compile_cuda_failures_short.txt
@@ -192,7 +325,9 @@ jobs:

    - name: Failure short reports
      if: ${{ failure() }}
-      run: cat reports/examples_torch_cuda_failures_short.txt
+      run: |
+        cat reports/examples_torch_cuda_stats.txt
+        cat reports/examples_torch_cuda_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}

--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py
@@ -213,7 +213,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
            do_center_crop=False,
            do_normalize=False,
            return_tensors="pt",
-        )["pixel_values"].to(self.device)
+        )["pixel_values"].to(device)
        image_batch_size = image.shape[0]

        if image_batch_size == 1:
@@ -365,7 +365,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
            height=height,
            batch_size=batch_size,
            num_images_per_prompt=1,
-            device=self.device,
+            device=device,
            dtype=self.controlnet.dtype,
            do_classifier_free_guidance=do_classifier_free_guidance,
        )

--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -765,8 +765,9 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi

            if needs_upcasting:
                self.upcast_vae()
-                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)

+            # Ensure latents are always the same type as the VAE
+            latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]

            # cast back to fp16 if needed

--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -1554,7 +1554,7 @@ class UNet2DConditionLoRAModelTests(unittest.TestCase):
        torch_device != "cuda" or not is_xformers_available(),
        reason="XFormers attention is only available with CUDA and `xformers` installed",
    )
-    def test_lora_xformers_on_off(self, expected_max_diff=1e-4):
+    def test_lora_xformers_on_off(self, expected_max_diff=6e-4):
        # enable deterministic behavior for gradient checkpointing
        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()


--- a/tests/pipelines/controlnet/test_controlnet_inpaint.py
+++ b/tests/pipelines/controlnet/test_controlnet_inpaint.py
@@ -39,6 +39,7 @@ from diffusers.utils.testing_utils import (
    enable_full_determinism,
    floats_tensor,
    load_numpy,
+    numpy_cosine_similarity_distance,
    require_torch_gpu,
    slow,
    torch_device,
@@ -550,7 +551,7 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase):
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/boy_ray_ban.npy"
        )

-        assert np.abs(expected_image - image).max() < 0.9e-1
+        assert numpy_cosine_similarity_distance(expected_image.flatten(), image.flatten()) < 1e-2

    def test_load_local(self):
        controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")

--- a/tests/pipelines/kandinsky_v22/__init__.py
+++ b/tests/pipelines/kandinsky_v22/__init__.py
--- a/tests/pipelines/kandinsky_v22/test_kandinsky.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky.py
--- a/tests/pipelines/kandinsky_v22/test_kandinsky_combined.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky_combined.py
--- a/tests/pipelines/kandinsky_v22/test_kandinsky_controlnet.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky_controlnet.py
@@ -221,6 +221,9 @@ class KandinskyV22ControlnetPipelineFastTests(PipelineTesterMixin, unittest.Test
    def test_float16_inference(self):
        super().test_float16_inference(expected_max_diff=1e-1)

+    def test_inference_batch_single_identical(self):
+        super().test_inference_batch_single_identical(expected_max_diff=5e-4)
+

 @nightly
 @require_torch_gpu

--- a/tests/pipelines/kandinsky_v22/test_kandinsky_controlnet_img2img.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky_controlnet_img2img.py
--- a/tests/pipelines/kandinsky_v22/test_kandinsky_img2img.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky_img2img.py
--- a/tests/pipelines/kandinsky_v22/test_kandinsky_inpaint.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky_inpaint.py
--- a/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py
--- a/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py
--- a/tests/pipelines/test_pipelines_flax.py
+++ b/tests/pipelines/test_pipelines_flax.py
@@ -110,7 +110,7 @@ class FlaxPipelineTests(unittest.TestCase):

        assert images.shape == (num_samples, 1, 512, 512, 3)
        if jax.device_count() == 8:
-            assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.05652401)) < 1e-3
+            assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.05652401)) < 1e-2
            assert np.abs((np.abs(images, dtype=np.float32).sum() - 2383808.2)) < 5e-1

    def test_stable_diffusion_v1_4_bfloat_16(self):
@@ -139,7 +139,7 @@ class FlaxPipelineTests(unittest.TestCase):

        assert images.shape == (num_samples, 1, 512, 512, 3)
        if jax.device_count() == 8:
-            assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.04003906)) < 1e-3
+            assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.04003906)) < 5e-2
            assert np.abs((np.abs(images, dtype=np.float32).sum() - 2373516.75)) < 5e-1

    def test_stable_diffusion_v1_4_bfloat_16_with_safety(self):
@@ -168,7 +168,7 @@ class FlaxPipelineTests(unittest.TestCase):

        assert images.shape == (num_samples, 1, 512, 512, 3)
        if jax.device_count() == 8:
-            assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.04003906)) < 1e-3
+            assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.04003906)) < 5e-2
            assert np.abs((np.abs(images, dtype=np.float32).sum() - 2373516.75)) < 5e-1

    def test_stable_diffusion_v1_4_bfloat_16_ddim(self):
@@ -212,7 +212,7 @@ class FlaxPipelineTests(unittest.TestCase):

        assert images.shape == (num_samples, 1, 512, 512, 3)
        if jax.device_count() == 8:
-            assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.045043945)) < 1e-3
+            assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.045043945)) < 5e-2
            assert np.abs((np.abs(images, dtype=np.float32).sum() - 2347693.5)) < 5e-1

    def test_jax_memory_efficient_attention(self):

--- a/tests/pipelines/text_to_video/__init__.py
+++ b/tests/pipelines/text_to_video/__init__.py
--- a/tests/pipelines/text_to_video/test_text_to_video.py
+++ b/tests/pipelines/text_to_video/test_text_to_video.py
--- a/tests/pipelines/text_to_video/test_text_to_video_zero.py
+++ b/tests/pipelines/text_to_video/test_text_to_video_zero.py
--- a/tests/pipelines/text_to_video/test_video_to_video.py
+++ b/tests/pipelines/text_to_video/test_video_to_video.py
--- a/utils/fetch_torch_cuda_pipeline_test_matrix.py
+++ b/utils/fetch_torch_cuda_pipeline_test_matrix.py
+import json
+import logging
+import os
+from collections import defaultdict
+from pathlib import Path
+
+from huggingface_hub import HfApi, ModelFilter
+
+import diffusers
+
+
+PATH_TO_REPO = Path(__file__).parent.parent.resolve()
+ALWAYS_TEST_PIPELINE_MODULES = [
+    "controlnet",
+    "stable_diffusion",
+    "stable_diffusion_2",
+    "stable_diffusion_xl",
+    "deepfloyd_if",
+    "kandinsky",
+    "kandinsky2_2",
+    "text_to_video_synthesis",
+    "wuerstchen",
+]
+PIPELINE_USAGE_CUTOFF = int(os.getenv("PIPELINE_USAGE_CUTOFF", 50000))
+
+logger = logging.getLogger(__name__)
+api = HfApi()
+filter = ModelFilter(library="diffusers")
+
+
+def filter_pipelines(usage_dict, usage_cutoff=10000):
+    output = []
+    for diffusers_object, usage in usage_dict.items():
+        if usage < usage_cutoff:
+            continue
+
+        if "Pipeline" in diffusers_object:
+            output.append(diffusers_object)
+
+    return output
+
+
+def fetch_pipeline_objects():
+    models = api.list_models(filter=filter)
+    downloads = defaultdict(int)
+
+    for model in models:
+        is_counted = False
+        for tag in model.tags:
+            if tag.startswith("diffusers:"):
+                is_counted = True
+                downloads[tag[len("diffusers:") :]] += model.downloads
+
+        if not is_counted:
+            downloads["other"] += model.downloads
+
+    # Remove 0 downloads
+    downloads = {k: v for k, v in downloads.items() if v > 0}
+    pipeline_objects = filter_pipelines(downloads, PIPELINE_USAGE_CUTOFF)
+
+    return pipeline_objects
+
+
+def fetch_pipeline_modules_to_test():
+    try:
+        pipeline_objects = fetch_pipeline_objects()
+    except Exception as e:
+        logger.error(e)
+        raise RuntimeError("Unable to fetch model list from HuggingFace Hub.")
+
+    test_modules = []
+    for pipeline_name in pipeline_objects:
+        module = getattr(diffusers, pipeline_name)
+        test_module = module.__module__.split(".")[-2].strip()
+        test_modules.append(test_module)
+
+    return test_modules
+
+
+def main():
+    test_modules = fetch_pipeline_modules_to_test()
+    test_modules.extend(ALWAYS_TEST_PIPELINE_MODULES)
+
+    # Get unique modules
+    test_modules = list(set(test_modules))
+    print(json.dumps(test_modules))
+
+    save_path = f"{PATH_TO_REPO}/reports"
+    os.makedirs(save_path, exist_ok=True)
+
+    with open(f"{save_path}/test-pipelines.json", "w") as f:
+        json.dump({"pipeline_test_modules": test_modules}, f)
+
+
+if __name__ == "__main__":
+    main()