New xformers test runner (#5349)

* move xformers to dedicated runner * fix * remove ptl from test runner images

New xformers test runner (#5349)
* move xformers to dedicated runner * fix * remove ptl from test runner images
4d2c981d · Dhruv Nair · GitHub · cf03f5b7 · 4d2c981d · 4d2c981d
Unverified Commit 4d2c981d authored Oct 13, 2023 by Dhruv Nair Committed by GitHub Oct 13, 2023
7 changed files
--- a/.github/workflows/build_docker_images.yml
+++ b/.github/workflows/build_docker_images.yml
@@ -27,6 +27,7 @@ jobs:
          - diffusers-pytorch-cpu
          - diffusers-pytorch-cuda
          - diffusers-pytorch-compile-cuda
+          - diffusers-pytorch-xformers-cuda
          - diffusers-flax-cpu
          - diffusers-flax-tpu
          - diffusers-onnxruntime-cpu

--- a/.github/workflows/push_tests.yml
+++ b/.github/workflows/push_tests.yml
@@ -290,6 +290,46 @@ jobs:
        name: torch_compile_test_reports
        path: reports

+  run_xformers_tests:
+    name: PyTorch xformers CUDA tests
+
+    runs-on: docker-gpu
+
+    container:
+      image: diffusers/diffusers-pytorch-xformers-cuda
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
+
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: NVIDIA-SMI
+      run: |
+        nvidia-smi
+    - name: Install dependencies
+      run: |
+        python -m pip install -e .[quality,test,training]
+    - name: Environment
+      run: |
+        python utils/print_env.py
+    - name: Run example tests on GPU
+      env:
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      run: |
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: cat reports/tests_torch_xformers_cuda_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: torch_xformers_test_reports
+        path: reports
+
  run_examples_tests:
    name: Examples PyTorch CUDA tests on Ubuntu


--- a/docker/diffusers-pytorch-compile-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-compile-cuda/Dockerfile
@@ -41,8 +41,6 @@ RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \
        scipy \
        tensorboard \
        transformers \
-        omegaconf \
-        pytorch-lightning \
-        xformers
+        omegaconf

 CMD ["/bin/bash"]
--- a/docker/diffusers-pytorch-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-cuda/Dockerfile
@@ -25,8 +25,8 @@ ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN python3 -m pip install --no-cache-dir --upgrade pip && \
    python3 -m pip install --no-cache-dir \
-        torch==2.0.1 \
-        torchvision==0.15.2 \
+        torch \
+        torchvision \
        torchaudio \
        invisible_watermark && \
    python3 -m pip install --no-cache-dir \
@@ -40,8 +40,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \
        scipy \
        tensorboard \
        transformers \
-        omegaconf \
-        pytorch-lightning \
-        xformers
+        omegaconf

 CMD ["/bin/bash"]
--- a/docker/diffusers-pytorch-xformers-cuda/Dockerfile
+++ b/docker/diffusers-pytorch-xformers-cuda/Dockerfile
+FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04
+LABEL maintainer="Hugging Face"
+LABEL repository="diffusers"
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt update && \
+    apt install -y bash \
+                   build-essential \
+                   git \
+                   git-lfs \
+                   curl \
+                   ca-certificates \
+                   libsndfile1-dev \
+                   libgl1 \
+                   python3.8 \
+                   python3-pip \
+                   python3.8-venv && \
+    rm -rf /var/lib/apt/lists
+
+# make sure to use venv
+RUN python3 -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
+RUN python3 -m pip install --no-cache-dir --upgrade pip && \
+    python3 -m pip install --no-cache-dir \
+        torch==2.0.1 \
+        torchvision==0.15.2 \
+        torchaudio \
+        invisible_watermark && \
+    python3 -m pip install --no-cache-dir \
+        accelerate \
+        datasets \
+        hf-doc-builder \
+        huggingface-hub \
+        Jinja2 \
+        librosa \
+        numpy \
+        scipy \
+        tensorboard \
+        transformers \
+        omegaconf \
+        xformers
+
+CMD ["/bin/bash"]
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -293,8 +293,8 @@ class LoraLoaderMixinTests(unittest.TestCase):
        )
        self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))

-    @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda")
-    def test_stable_diffusion_attn_processors(self):
+    @unittest.skipIf(not torch.cuda.is_available() or not is_xformers_available(), reason="xformers requires cuda")
+    def test_stable_diffusion_xformers_attn_processors(self):
        # disable_full_determinism()
        device = "cuda"  # ensure determinism for the device-dependent torch.Generator
        components, _ = self.get_dummy_components()
@@ -304,12 +304,23 @@ class LoraLoaderMixinTests(unittest.TestCase):

        _, _, inputs = self.get_dummy_inputs()

-        # run normal sd pipe
+        # run xformers attention
+        sd_pipe.enable_xformers_memory_efficient_attention()
        image = sd_pipe(**inputs).images
        assert image.shape == (1, 64, 64, 3)

-        # run xformers attention
-        sd_pipe.enable_xformers_memory_efficient_attention()
+    @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda")
+    def test_stable_diffusion_attn_processors(self):
+        # disable_full_determinism()
+        device = "cuda"  # ensure determinism for the device-dependent torch.Generator
+        components, _ = self.get_dummy_components()
+        sd_pipe = StableDiffusionPipeline(**components)
+        sd_pipe = sd_pipe.to(device)
+        sd_pipe.set_progress_bar_config(disable=None)
+
+        _, _, inputs = self.get_dummy_inputs()
+
+        # run normal sd pipe
        image = sd_pipe(**inputs).images
        assert image.shape == (1, 64, 64, 3)


--- a/tests/models/test_modeling_common.py
+++ b/tests/models/test_modeling_common.py
@@ -30,7 +30,7 @@ from requests.exceptions import HTTPError
 from diffusers.models import UNet2DConditionModel
 from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0, XFormersAttnProcessor
 from diffusers.training_utils import EMAModel
-from diffusers.utils import logging
+from diffusers.utils import is_xformers_available, logging
 from diffusers.utils.testing_utils import (
    CaptureLogger,
    require_python39_or_higher,
@@ -269,6 +269,32 @@ class ModelTesterMixin:

        assert str(error.exception) == f"'{type(model).__name__}' object has no attribute 'does_not_exist'"

+    @unittest.skipIf(
+        torch_device != "cuda" or not is_xformers_available(),
+        reason="XFormers attention is only available with CUDA and `xformers` installed",
+    )
+    def test_set_xformers_attn_processor_for_determinism(self):
+        torch.use_deterministic_algorithms(False)
+        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
+        model = self.model_class(**init_dict)
+        model.to(torch_device)
+
+        if not hasattr(model, "set_attn_processor"):
+            # If not has `set_attn_processor`, skip test
+            return
+
+        model.set_default_attn_processor()
+        assert all(type(proc) == AttnProcessor for proc in model.attn_processors.values())
+        with torch.no_grad():
+            output = model(**inputs_dict)[0]
+
+        model.enable_xformers_memory_efficient_attention()
+        assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values())
+        with torch.no_grad():
+            output_2 = model(**inputs_dict)[0]
+
+        assert torch.allclose(output, output_2, atol=self.base_precision)
+
    @require_torch_gpu
    def test_set_attn_processor_for_determinism(self):
        torch.use_deterministic_algorithms(False)
@@ -292,7 +318,7 @@ class ModelTesterMixin:
        model.enable_xformers_memory_efficient_attention()
        assert all(type(proc) == XFormersAttnProcessor for proc in model.attn_processors.values())
        with torch.no_grad():
-            output_3 = model(**inputs_dict)[0]
+            model(**inputs_dict)[0]

        model.set_attn_processor(AttnProcessor2_0())
        assert all(type(proc) == AttnProcessor2_0 for proc in model.attn_processors.values())
@@ -313,7 +339,6 @@ class ModelTesterMixin:

        # make sure that outputs match
        assert torch.allclose(output_2, output_1, atol=self.base_precision)
-        assert torch.allclose(output_2, output_3, atol=self.base_precision)
        assert torch.allclose(output_2, output_4, atol=self.base_precision)
        assert torch.allclose(output_2, output_5, atol=self.base_precision)
        assert torch.allclose(output_2, output_6, atol=self.base_precision)