Integration tests precision improvement for inpainting (#1052)

* improve test precision get tests passing with greater precision using lewington images * make old numpy load function a wrapper around a more flexible numpy loading function * adhere to black formatting * add more black formatting * adhere to isort * loosen precision and replace path Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

Integration tests precision improvement for inpainting (#1052)
* improve test precision get tests passing with greater precision using lewington images * make old numpy load function a wrapper around a more flexible numpy loading function * adhere to black formatting * add more black formatting * adhere to isort * loosen precision and replace path Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
8ee21915 · Lewington-pitsos · GitHub · 86087957 · 8ee21915 · 8ee21915
Unverified Commit 8ee21915 authored Nov 02, 2022 by Lewington-pitsos Committed by GitHub Nov 02, 2022
6 changed files
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -42,6 +42,7 @@ from .outputs import BaseOutput
 if is_torch_available():
    from .testing_utils import (
        floats_tensor,
+        load_hf_numpy,
        load_image,
        load_numpy,
        parse_flag_from_env,

--- a/src/diffusers/utils/testing_utils.py
+++ b/src/diffusers/utils/testing_utils.py
@@ -139,6 +139,29 @@ def require_onnxruntime(test_case):
    return unittest.skipUnless(is_onnx_available(), "test requires onnxruntime")(test_case)
+def load_numpy(arry: Union[str, np.ndarray]) -> np.ndarray:
+    if isinstance(arry, str):
+        if arry.startswith("http://") or arry.startswith("https://"):
+            response = requests.get(arry)
+            response.raise_for_status()
+            arry = np.load(BytesIO(response.content))
+        elif os.path.isfile(arry):
+            arry = np.load(arry)
+        else:
+            raise ValueError(
+                f"Incorrect path or url, URLs must start with `http://` or `https://`, and {arry} is not a valid path"
+            )
+    elif isinstance(arry, np.ndarray):
+        pass
+    else:
+        raise ValueError(
+            "Incorrect format used for numpy ndarray. Should be an url linking to an image, a local path, or a"
+            " ndarray."
+        )
+    return arry
 def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image:
    """
    Args:
@@ -168,17 +191,13 @@ def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image:
    return image
-def load_numpy(path) -> np.ndarray:
+def load_hf_numpy(path) -> np.ndarray:
    if not path.startswith("http://") or path.startswith("https://"):
        path = os.path.join(
            "https://huggingface.co/datasets/fusing/diffusers-testing/resolve/main", urllib.parse.quote(path)
        )
-    response = requests.get(path)
+    return load_numpy(path)
-    response.raise_for_status()
-    array = np.load(BytesIO(response.content))
-    return array
 # --- pytest conf functions --- #

--- a/tests/models/test_models_unet_2d.py
+++ b/tests/models/test_models_unet_2d.py
@@ -21,7 +21,15 @@ import unittest
 import torch
 from diffusers import UNet2DConditionModel, UNet2DModel
-from diffusers.utils import floats_tensor, load_numpy, logging, require_torch_gpu, slow, torch_all_close, torch_device
+from diffusers.utils import (
+    floats_tensor,
+    load_hf_numpy,
+    logging,
+    require_torch_gpu,
+    slow,
+    torch_all_close,
+    torch_device,
+)
 from parameterized import parameterized
 from ..test_modeling_common import ModelTesterMixin
@@ -423,7 +431,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
    def get_latents(self, seed=0, shape=(4, 4, 64, 64), fp16=False):
        dtype = torch.float16 if fp16 else torch.float32
-        image = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
+        image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
        return image
    def get_unet_model(self, fp16=False, model_id="CompVis/stable-diffusion-v1-4"):
@@ -439,7 +447,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
    def get_encoder_hidden_states(self, seed=0, shape=(4, 77, 768), fp16=False):
        dtype = torch.float16 if fp16 else torch.float32
-        hidden_states = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
+        hidden_states = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
        return hidden_states
    @parameterized.expand(

--- a/tests/models/test_models_vae.py
+++ b/tests/models/test_models_vae.py
@@ -20,7 +20,7 @@ import torch
 from diffusers import AutoencoderKL
 from diffusers.modeling_utils import ModelMixin
-from diffusers.utils import floats_tensor, load_numpy, require_torch_gpu, slow, torch_all_close, torch_device
+from diffusers.utils import floats_tensor, load_hf_numpy, require_torch_gpu, slow, torch_all_close, torch_device
 from parameterized import parameterized
 from ..test_modeling_common import ModelTesterMixin
@@ -147,7 +147,7 @@ class AutoencoderKLIntegrationTests(unittest.TestCase):
    def get_sd_image(self, seed=0, shape=(4, 3, 512, 512), fp16=False):
        dtype = torch.float16 if fp16 else torch.float32
-        image = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
+        image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
        return image
    def get_sd_vae_model(self, model_id="CompVis/stable-diffusion-v1-4", fp16=False):

--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py
@@ -28,7 +28,7 @@ from diffusers import (
    UNet2DModel,
    VQModel,
 )
-from diffusers.utils import floats_tensor, load_image, slow, torch_device
+from diffusers.utils import floats_tensor, load_image, load_numpy, slow, torch_device
 from diffusers.utils.testing_utils import require_torch_gpu
 from PIL import Image
 from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
@@ -278,11 +278,10 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
            "/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
        )
-        expected_image = load_image(
+        expected_image = load_numpy(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
-            "/in_paint/yellow_cat_sitting_on_a_park_bench.png"
+            "/yellow_cat_sitting_on_a_park_bench.npy"
        )
-        expected_image = np.array(expected_image, dtype=np.float32) / 255.0
        model_id = "runwayml/stable-diffusion-inpainting"
        pipe = StableDiffusionInpaintPipeline.from_pretrained(
@@ -307,7 +306,7 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):
        image = output.images[0]
        assert image.shape == (512, 512, 3)
-        assert np.abs(expected_image - image).max() < 1e-2
+        assert np.abs(expected_image - image).max() < 1e-3
    def test_stable_diffusion_inpaint_pipeline_fp16(self):
        init_image = load_image(
@@ -318,11 +317,10 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
            "/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
        )
-        expected_image = load_image(
+        expected_image = load_numpy(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
-            "/in_paint/yellow_cat_sitting_on_a_park_bench_fp16.png"
+            "/yellow_cat_sitting_on_a_park_bench_fp16.npy"
        )
-        expected_image = np.array(expected_image, dtype=np.float32) / 255.0
        model_id = "runwayml/stable-diffusion-inpainting"
        pipe = StableDiffusionInpaintPipeline.from_pretrained(
@@ -360,11 +358,10 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
            "/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
        )
-        expected_image = load_image(
+        expected_image = load_numpy(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
-            "/in_paint/yellow_cat_sitting_on_a_park_bench_pndm.png"
+            "/yellow_cat_sitting_on_a_park_bench_pndm.npy"
        )
-        expected_image = np.array(expected_image, dtype=np.float32) / 255.0
        model_id = "runwayml/stable-diffusion-inpainting"
        pndm = PNDMScheduler.from_config(model_id, subfolder="scheduler")
@@ -388,4 +385,4 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):
        image = output.images[0]
        assert image.shape == (512, 512, 3)
-        assert np.abs(expected_image - image).max() < 1e-2
+        assert np.abs(expected_image - image).max() < 1e-3
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py
@@ -31,7 +31,7 @@ from diffusers import (
    VQModel,
 )
 from diffusers.utils import floats_tensor, load_image, slow, torch_device
-from diffusers.utils.testing_utils import require_torch_gpu
+from diffusers.utils.testing_utils import load_numpy, require_torch_gpu
 from PIL import Image
 from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
@@ -358,11 +358,10 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase):
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
            "/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
        )
-        expected_image = load_image(
+        expected_image = load_numpy(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
-            "/in_paint/red_cat_sitting_on_a_park_bench.png"
+            "/red_cat_sitting_on_a_park_bench.npy"
        )
-        expected_image = np.array(expected_image, dtype=np.float32) / 255.0
        model_id = "CompVis/stable-diffusion-v1-4"
        pipe = StableDiffusionInpaintPipeline.from_pretrained(
@@ -389,7 +388,7 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase):
        image = output.images[0]
        assert image.shape == (512, 512, 3)
-        assert np.abs(expected_image - image).max() < 1e-2
+        assert np.abs(expected_image - image).max() < 1e-3
    def test_stable_diffusion_inpaint_legacy_pipeline_k_lms(self):
        # TODO(Anton, Patrick) - I think we can remove this test soon
@@ -401,11 +400,10 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase):
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
            "/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
        )
-        expected_image = load_image(
+        expected_image = load_numpy(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
-            "/in_paint/red_cat_sitting_on_a_park_bench_k_lms.png"
+            "/red_cat_sitting_on_a_park_bench_k_lms.npy"
        )
-        expected_image = np.array(expected_image, dtype=np.float32) / 255.0
        model_id = "CompVis/stable-diffusion-v1-4"
        lms = LMSDiscreteScheduler.from_config(model_id, subfolder="scheduler")
@@ -434,7 +432,7 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase):
        image = output.images[0]
        assert image.shape == (512, 512, 3)
-        assert np.abs(expected_image - image).max() < 1e-2
+        assert np.abs(expected_image - image).max() < 1e-3
    def test_stable_diffusion_inpaint_legacy_intermediate_state(self):
        number_of_steps = 0