"git@developer.sourcefind.cn:OpenDAS/torch-harmonics.git" did not exist on "652c4ab2761536310c77b96fe91b7280d7b002a7"
Unverified Commit 4af76d0d authored by Sayak Paul's avatar Sayak Paul Committed by GitHub
Browse files

[tests] Changes to the `torch.compile()` CI and tests (#11508)

* remove compile cuda docker.

* replace compile cuda docker path.

* better manage compilation cache.

* propagate similar to the pipeline tests.

* remove unneeded compile test.

* small.

* don't check for deleted files.
parent b5c2050a
...@@ -23,7 +23,7 @@ jobs: ...@@ -23,7 +23,7 @@ jobs:
runs-on: runs-on:
group: aws-g6-4xlarge-plus group: aws-g6-4xlarge-plus
container: container:
image: diffusers/diffusers-pytorch-compile-cuda image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus 0 options: --shm-size "16gb" --ipc host --gpus 0
steps: steps:
- name: Checkout diffusers - name: Checkout diffusers
......
...@@ -41,6 +41,12 @@ jobs: ...@@ -41,6 +41,12 @@ jobs:
run: | run: |
CHANGED_FILES="${{ steps.file_changes.outputs.all }}" CHANGED_FILES="${{ steps.file_changes.outputs.all }}"
for FILE in $CHANGED_FILES; do for FILE in $CHANGED_FILES; do
# skip anything that isn’t still on disk
if [[ ! -f "$FILE" ]]; then
echo "Skipping removed file $FILE"
continue
fi
if [[ "$FILE" == docker/*Dockerfile ]]; then if [[ "$FILE" == docker/*Dockerfile ]]; then
DOCKER_PATH="${FILE%/Dockerfile}" DOCKER_PATH="${FILE%/Dockerfile}"
DOCKER_TAG=$(basename "$DOCKER_PATH") DOCKER_TAG=$(basename "$DOCKER_PATH")
...@@ -65,7 +71,7 @@ jobs: ...@@ -65,7 +71,7 @@ jobs:
image-name: image-name:
- diffusers-pytorch-cpu - diffusers-pytorch-cpu
- diffusers-pytorch-cuda - diffusers-pytorch-cuda
- diffusers-pytorch-compile-cuda - diffusers-pytorch-cuda
- diffusers-pytorch-xformers-cuda - diffusers-pytorch-xformers-cuda
- diffusers-pytorch-minimum-cuda - diffusers-pytorch-minimum-cuda
- diffusers-flax-cpu - diffusers-flax-cpu
......
...@@ -188,7 +188,7 @@ jobs: ...@@ -188,7 +188,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-compile-cuda image: diffusers/diffusers-pytorch-cuda
options: --gpus 0 --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
......
...@@ -262,7 +262,7 @@ jobs: ...@@ -262,7 +262,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-compile-cuda image: diffusers/diffusers-pytorch-cuda
options: --gpus 0 --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
......
...@@ -316,7 +316,7 @@ jobs: ...@@ -316,7 +316,7 @@ jobs:
group: aws-g4dn-2xlarge group: aws-g4dn-2xlarge
container: container:
image: diffusers/diffusers-pytorch-compile-cuda image: diffusers/diffusers-pytorch-cuda
options: --gpus 0 --shm-size "16gb" --ipc host options: --gpus 0 --shm-size "16gb" --ipc host
steps: steps:
......
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa
RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
libgl1 \
python3.10 \
python3.10-dev \
python3-pip \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3.10 -m uv pip install --no-cache-dir \
torch \
torchvision \
torchaudio \
invisible_watermark && \
python3.10 -m pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
CMD ["/bin/bash"]
...@@ -1748,14 +1748,14 @@ class TorchCompileTesterMixin: ...@@ -1748,14 +1748,14 @@ class TorchCompileTesterMixin:
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
torch._dynamo.reset() torch.compiler.reset()
gc.collect() gc.collect()
backend_empty_cache(torch_device) backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test in case of CUDA runtime errors # clean up the VRAM after each test in case of CUDA runtime errors
super().tearDown() super().tearDown()
torch._dynamo.reset() torch.compiler.reset()
gc.collect() gc.collect()
backend_empty_cache(torch_device) backend_empty_cache(torch_device)
...@@ -1764,13 +1764,17 @@ class TorchCompileTesterMixin: ...@@ -1764,13 +1764,17 @@ class TorchCompileTesterMixin:
@is_torch_compile @is_torch_compile
@slow @slow
def test_torch_compile_recompilation_and_graph_break(self): def test_torch_compile_recompilation_and_graph_break(self):
torch._dynamo.reset() torch.compiler.reset()
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict).to(torch_device) model = self.model_class(**init_dict).to(torch_device)
model = torch.compile(model, fullgraph=True) model = torch.compile(model, fullgraph=True)
with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad(): with (
torch._inductor.utils.fresh_inductor_cache(),
torch._dynamo.config.patch(error_on_recompile=True),
torch.no_grad(),
):
_ = model(**inputs_dict) _ = model(**inputs_dict)
_ = model(**inputs_dict) _ = model(**inputs_dict)
...@@ -1798,7 +1802,7 @@ class LoraHotSwappingForModelTesterMixin: ...@@ -1798,7 +1802,7 @@ class LoraHotSwappingForModelTesterMixin:
# It is critical that the dynamo cache is reset for each test. Otherwise, if the test re-uses the same model, # It is critical that the dynamo cache is reset for each test. Otherwise, if the test re-uses the same model,
# there will be recompilation errors, as torch caches the model when run in the same process. # there will be recompilation errors, as torch caches the model when run in the same process.
super().tearDown() super().tearDown()
torch._dynamo.reset() torch.compiler.reset()
gc.collect() gc.collect()
backend_empty_cache(torch_device) backend_empty_cache(torch_device)
...@@ -1915,7 +1919,7 @@ class LoraHotSwappingForModelTesterMixin: ...@@ -1915,7 +1919,7 @@ class LoraHotSwappingForModelTesterMixin:
def test_hotswapping_compiled_model_linear(self, rank0, rank1): def test_hotswapping_compiled_model_linear(self, rank0, rank1):
# It's important to add this context to raise an error on recompilation # It's important to add this context to raise an error on recompilation
target_modules = ["to_q", "to_k", "to_v", "to_out.0"] target_modules = ["to_q", "to_k", "to_v", "to_out.0"]
with torch._dynamo.config.patch(error_on_recompile=True): with torch._dynamo.config.patch(error_on_recompile=True), torch._inductor.utils.fresh_inductor_cache():
self.check_model_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules) self.check_model_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules)
@parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa @parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa
...@@ -1925,7 +1929,7 @@ class LoraHotSwappingForModelTesterMixin: ...@@ -1925,7 +1929,7 @@ class LoraHotSwappingForModelTesterMixin:
# It's important to add this context to raise an error on recompilation # It's important to add this context to raise an error on recompilation
target_modules = ["conv", "conv1", "conv2"] target_modules = ["conv", "conv1", "conv2"]
with torch._dynamo.config.patch(error_on_recompile=True): with torch._dynamo.config.patch(error_on_recompile=True), torch._inductor.utils.fresh_inductor_cache():
self.check_model_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules) self.check_model_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules)
@parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa @parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa
...@@ -1935,7 +1939,7 @@ class LoraHotSwappingForModelTesterMixin: ...@@ -1935,7 +1939,7 @@ class LoraHotSwappingForModelTesterMixin:
# It's important to add this context to raise an error on recompilation # It's important to add this context to raise an error on recompilation
target_modules = ["to_q", "conv"] target_modules = ["to_q", "conv"]
with torch._dynamo.config.patch(error_on_recompile=True): with torch._dynamo.config.patch(error_on_recompile=True), torch._inductor.utils.fresh_inductor_cache():
self.check_model_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules) self.check_model_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules)
@parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa @parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa
......
...@@ -19,20 +19,16 @@ import torch ...@@ -19,20 +19,16 @@ import torch
from diffusers import HunyuanVideoTransformer3DModel from diffusers import HunyuanVideoTransformer3DModel
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
is_torch_compile,
require_torch_2,
require_torch_gpu,
slow,
torch_device, torch_device,
) )
from ..test_modeling_common import ModelTesterMixin from ..test_modeling_common import ModelTesterMixin, TorchCompileTesterMixin
enable_full_determinism() enable_full_determinism()
class HunyuanVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase): class HunyuanVideoTransformer3DTests(ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase):
model_class = HunyuanVideoTransformer3DModel model_class = HunyuanVideoTransformer3DModel
main_input_name = "hidden_states" main_input_name = "hidden_states"
uses_custom_attn_processor = True uses_custom_attn_processor = True
...@@ -96,23 +92,8 @@ class HunyuanVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase): ...@@ -96,23 +92,8 @@ class HunyuanVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase):
expected_set = {"HunyuanVideoTransformer3DModel"} expected_set = {"HunyuanVideoTransformer3DModel"}
super().test_gradient_checkpointing_is_applied(expected_set=expected_set) super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
@require_torch_gpu
@require_torch_2
@is_torch_compile
@slow
def test_torch_compile_recompilation_and_graph_break(self):
torch._dynamo.reset()
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict).to(torch_device) class HunyuanSkyreelsImageToVideoTransformer3DTests(ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase):
model = torch.compile(model, fullgraph=True)
with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
_ = model(**inputs_dict)
_ = model(**inputs_dict)
class HunyuanSkyreelsImageToVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase):
model_class = HunyuanVideoTransformer3DModel model_class = HunyuanVideoTransformer3DModel
main_input_name = "hidden_states" main_input_name = "hidden_states"
uses_custom_attn_processor = True uses_custom_attn_processor = True
...@@ -179,23 +160,8 @@ class HunyuanSkyreelsImageToVideoTransformer3DTests(ModelTesterMixin, unittest.T ...@@ -179,23 +160,8 @@ class HunyuanSkyreelsImageToVideoTransformer3DTests(ModelTesterMixin, unittest.T
expected_set = {"HunyuanVideoTransformer3DModel"} expected_set = {"HunyuanVideoTransformer3DModel"}
super().test_gradient_checkpointing_is_applied(expected_set=expected_set) super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
@require_torch_gpu
@require_torch_2
@is_torch_compile
@slow
def test_torch_compile_recompilation_and_graph_break(self):
torch._dynamo.reset()
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict).to(torch_device)
model = torch.compile(model, fullgraph=True)
with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
_ = model(**inputs_dict)
_ = model(**inputs_dict)
class HunyuanVideoImageToVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase): class HunyuanVideoImageToVideoTransformer3DTests(ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase):
model_class = HunyuanVideoTransformer3DModel model_class = HunyuanVideoTransformer3DModel
main_input_name = "hidden_states" main_input_name = "hidden_states"
uses_custom_attn_processor = True uses_custom_attn_processor = True
...@@ -260,23 +226,10 @@ class HunyuanVideoImageToVideoTransformer3DTests(ModelTesterMixin, unittest.Test ...@@ -260,23 +226,10 @@ class HunyuanVideoImageToVideoTransformer3DTests(ModelTesterMixin, unittest.Test
expected_set = {"HunyuanVideoTransformer3DModel"} expected_set = {"HunyuanVideoTransformer3DModel"}
super().test_gradient_checkpointing_is_applied(expected_set=expected_set) super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
@require_torch_gpu
@require_torch_2
@is_torch_compile
@slow
def test_torch_compile_recompilation_and_graph_break(self):
torch._dynamo.reset()
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict).to(torch_device) class HunyuanVideoTokenReplaceImageToVideoTransformer3DTests(
model = torch.compile(model, fullgraph=True) ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase
):
with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
_ = model(**inputs_dict)
_ = model(**inputs_dict)
class HunyuanVideoTokenReplaceImageToVideoTransformer3DTests(ModelTesterMixin, unittest.TestCase):
model_class = HunyuanVideoTransformer3DModel model_class = HunyuanVideoTransformer3DModel
main_input_name = "hidden_states" main_input_name = "hidden_states"
uses_custom_attn_processor = True uses_custom_attn_processor = True
...@@ -342,18 +295,3 @@ class HunyuanVideoTokenReplaceImageToVideoTransformer3DTests(ModelTesterMixin, u ...@@ -342,18 +295,3 @@ class HunyuanVideoTokenReplaceImageToVideoTransformer3DTests(ModelTesterMixin, u
def test_gradient_checkpointing_is_applied(self): def test_gradient_checkpointing_is_applied(self):
expected_set = {"HunyuanVideoTransformer3DModel"} expected_set = {"HunyuanVideoTransformer3DModel"}
super().test_gradient_checkpointing_is_applied(expected_set=expected_set) super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
@require_torch_gpu
@require_torch_2
@is_torch_compile
@slow
def test_torch_compile_recompilation_and_graph_break(self):
torch._dynamo.reset()
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict).to(torch_device)
model = torch.compile(model, fullgraph=True)
with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
_ = model(**inputs_dict)
_ = model(**inputs_dict)
...@@ -19,20 +19,16 @@ import torch ...@@ -19,20 +19,16 @@ import torch
from diffusers import WanTransformer3DModel from diffusers import WanTransformer3DModel
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
is_torch_compile,
require_torch_2,
require_torch_gpu,
slow,
torch_device, torch_device,
) )
from ..test_modeling_common import ModelTesterMixin from ..test_modeling_common import ModelTesterMixin, TorchCompileTesterMixin
enable_full_determinism() enable_full_determinism()
class WanTransformer3DTests(ModelTesterMixin, unittest.TestCase): class WanTransformer3DTests(ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase):
model_class = WanTransformer3DModel model_class = WanTransformer3DModel
main_input_name = "hidden_states" main_input_name = "hidden_states"
uses_custom_attn_processor = True uses_custom_attn_processor = True
...@@ -86,18 +82,3 @@ class WanTransformer3DTests(ModelTesterMixin, unittest.TestCase): ...@@ -86,18 +82,3 @@ class WanTransformer3DTests(ModelTesterMixin, unittest.TestCase):
def test_gradient_checkpointing_is_applied(self): def test_gradient_checkpointing_is_applied(self):
expected_set = {"WanTransformer3DModel"} expected_set = {"WanTransformer3DModel"}
super().test_gradient_checkpointing_is_applied(expected_set=expected_set) super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
@require_torch_gpu
@require_torch_2
@is_torch_compile
@slow
def test_torch_compile_recompilation_and_graph_break(self):
torch._dynamo.reset()
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict).to(torch_device)
model = torch.compile(model, fullgraph=True)
with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad():
_ = model(**inputs_dict)
_ = model(**inputs_dict)
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
import gc import gc
import tempfile import tempfile
import traceback
import unittest import unittest
import numpy as np import numpy as np
...@@ -39,13 +38,9 @@ from diffusers.utils.testing_utils import ( ...@@ -39,13 +38,9 @@ from diffusers.utils.testing_utils import (
backend_reset_max_memory_allocated, backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats, backend_reset_peak_memory_stats,
enable_full_determinism, enable_full_determinism,
get_python_version,
is_torch_compile,
load_image, load_image,
load_numpy, load_numpy,
require_torch_2,
require_torch_accelerator, require_torch_accelerator,
run_test_in_subprocess,
slow, slow,
torch_device, torch_device,
) )
...@@ -68,52 +63,6 @@ from ..test_pipelines_common import ( ...@@ -68,52 +63,6 @@ from ..test_pipelines_common import (
enable_full_determinism() enable_full_determinism()
# Will be run via run_test_in_subprocess
def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
error = None
try:
_ = in_queue.get(timeout=timeout)
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
pipe.unet.to(memory_format=torch.channels_last)
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.controlnet.to(memory_format=torch.channels_last)
pipe.controlnet = torch.compile(pipe.controlnet, mode="reduce-overhead", fullgraph=True)
generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "bird"
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
).resize((512, 512))
output = pipe(prompt, image, num_inference_steps=10, generator=generator, output_type="np")
image = output.images[0]
assert image.shape == (512, 512, 3)
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny_out_full.npy"
)
expected_image = np.resize(expected_image, (512, 512, 3))
assert np.abs(expected_image - image).max() < 1.0
except Exception:
error = f"{traceback.format_exc()}"
results = {"error": error}
out_queue.put(results, timeout=timeout)
out_queue.join()
class ControlNetPipelineFastTests( class ControlNetPipelineFastTests(
IPAdapterTesterMixin, IPAdapterTesterMixin,
PipelineLatentTesterMixin, PipelineLatentTesterMixin,
...@@ -1053,15 +1002,6 @@ class ControlNetPipelineSlowTests(unittest.TestCase): ...@@ -1053,15 +1002,6 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
expected_slice = np.array([0.1655, 0.1721, 0.1623, 0.1685, 0.1711, 0.1646, 0.1651, 0.1631, 0.1494]) expected_slice = np.array([0.1655, 0.1721, 0.1623, 0.1685, 0.1711, 0.1646, 0.1651, 0.1631, 0.1494])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@is_torch_compile
@require_torch_2
@unittest.skipIf(
get_python_version == (3, 12),
reason="Torch Dynamo isn't yet supported for Python 3.12.",
)
def test_stable_diffusion_compile(self):
run_test_in_subprocess(test_case=self, target_func=_test_stable_diffusion_compile, inputs=None)
def test_v11_shuffle_global_pool_conditions(self): def test_v11_shuffle_global_pool_conditions(self):
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11e_sd15_shuffle") controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11e_sd15_shuffle")
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
# limitations under the License. # limitations under the License.
import gc import gc
import traceback
import unittest import unittest
import numpy as np import numpy as np
...@@ -36,13 +35,9 @@ from diffusers.utils.import_utils import is_xformers_available ...@@ -36,13 +35,9 @@ from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache, backend_empty_cache,
enable_full_determinism, enable_full_determinism,
is_torch_compile,
load_image, load_image,
load_numpy,
require_accelerator, require_accelerator,
require_torch_2,
require_torch_accelerator, require_torch_accelerator,
run_test_in_subprocess,
slow, slow,
torch_device, torch_device,
) )
...@@ -78,53 +73,6 @@ def to_np(tensor): ...@@ -78,53 +73,6 @@ def to_np(tensor):
return tensor return tensor
# Will be run via run_test_in_subprocess
def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
error = None
try:
_ = in_queue.get(timeout=timeout)
controlnet = ControlNetXSAdapter.from_pretrained(
"UmerHA/Testing-ConrolNetXS-SD2.1-canny", torch_dtype=torch.float16
)
pipe = StableDiffusionControlNetXSPipeline.from_pretrained(
"stabilityai/stable-diffusion-2-1-base",
controlnet=controlnet,
safety_checker=None,
torch_dtype=torch.float16,
)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
pipe.unet.to(memory_format=torch.channels_last)
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "bird"
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
).resize((512, 512))
output = pipe(prompt, image, num_inference_steps=10, generator=generator, output_type="np")
image = output.images[0]
assert image.shape == (512, 512, 3)
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny_out_full.npy"
)
expected_image = np.resize(expected_image, (512, 512, 3))
assert np.abs(expected_image - image).max() < 1.0
except Exception:
error = f"{traceback.format_exc()}"
results = {"error": error}
out_queue.put(results, timeout=timeout)
out_queue.join()
class ControlNetXSPipelineFastTests( class ControlNetXSPipelineFastTests(
PipelineLatentTesterMixin, PipelineLatentTesterMixin,
PipelineKarrasSchedulerTesterMixin, PipelineKarrasSchedulerTesterMixin,
...@@ -402,8 +350,3 @@ class ControlNetXSPipelineSlowTests(unittest.TestCase): ...@@ -402,8 +350,3 @@ class ControlNetXSPipelineSlowTests(unittest.TestCase):
original_image = image[-3:, -3:, -1].flatten() original_image = image[-3:, -3:, -1].flatten()
expected_image = np.array([0.4844, 0.4937, 0.4956, 0.4663, 0.5039, 0.5044, 0.4565, 0.4883, 0.4941]) expected_image = np.array([0.4844, 0.4937, 0.4956, 0.4663, 0.5039, 0.5044, 0.4565, 0.4883, 0.4941])
assert np.allclose(original_image, expected_image, atol=1e-04) assert np.allclose(original_image, expected_image, atol=1e-04)
@is_torch_compile
@require_torch_2
def test_stable_diffusion_compile(self):
run_test_in_subprocess(test_case=self, target_func=_test_stable_diffusion_compile, inputs=None)
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
import gc import gc
import tempfile import tempfile
import time import time
import traceback
import unittest import unittest
import numpy as np import numpy as np
...@@ -49,16 +48,12 @@ from diffusers.utils.testing_utils import ( ...@@ -49,16 +48,12 @@ from diffusers.utils.testing_utils import (
backend_reset_max_memory_allocated, backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats, backend_reset_peak_memory_stats,
enable_full_determinism, enable_full_determinism,
is_torch_compile,
load_image,
load_numpy, load_numpy,
nightly, nightly,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_accelerate_version_greater, require_accelerate_version_greater,
require_torch_2,
require_torch_accelerator, require_torch_accelerator,
require_torch_multi_accelerator, require_torch_multi_accelerator,
run_test_in_subprocess,
skip_mps, skip_mps,
slow, slow,
torch_device, torch_device,
...@@ -81,39 +76,6 @@ from ..test_pipelines_common import ( ...@@ -81,39 +76,6 @@ from ..test_pipelines_common import (
enable_full_determinism() enable_full_determinism()
# Will be run via run_test_in_subprocess
def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
error = None
try:
inputs = in_queue.get(timeout=timeout)
torch_device = inputs.pop("torch_device")
seed = inputs.pop("seed")
inputs["generator"] = torch.Generator(device=torch_device).manual_seed(seed)
sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None)
sd_pipe.scheduler = DDIMScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe = sd_pipe.to(torch_device)
sd_pipe.unet.to(memory_format=torch.channels_last)
sd_pipe.unet = torch.compile(sd_pipe.unet, mode="reduce-overhead", fullgraph=True)
sd_pipe.set_progress_bar_config(disable=None)
image = sd_pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1].flatten()
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.38019, 0.28647, 0.27321, 0.40377, 0.38290, 0.35446, 0.39218, 0.38165, 0.42239])
assert np.abs(image_slice - expected_slice).max() < 5e-3
except Exception:
error = f"{traceback.format_exc()}"
results = {"error": error}
out_queue.put(results, timeout=timeout)
out_queue.join()
class StableDiffusionPipelineFastTests( class StableDiffusionPipelineFastTests(
IPAdapterTesterMixin, IPAdapterTesterMixin,
PipelineLatentTesterMixin, PipelineLatentTesterMixin,
...@@ -1224,40 +1186,6 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -1224,40 +1186,6 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
max_diff = np.abs(expected_image - image).max() max_diff = np.abs(expected_image - image).max()
assert max_diff < 8e-1 assert max_diff < 8e-1
@is_torch_compile
@require_torch_2
def test_stable_diffusion_compile(self):
seed = 0
inputs = self.get_inputs(torch_device, seed=seed)
# Can't pickle a Generator object
del inputs["generator"]
inputs["torch_device"] = torch_device
inputs["seed"] = seed
run_test_in_subprocess(test_case=self, target_func=_test_stable_diffusion_compile, inputs=inputs)
def test_stable_diffusion_lcm(self):
unet = UNet2DConditionModel.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", subfolder="unet")
sd_pipe = StableDiffusionPipeline.from_pretrained("Lykon/dreamshaper-7", unet=unet).to(torch_device)
sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe.set_progress_bar_config(disable=None)
inputs = self.get_inputs(torch_device)
inputs["num_inference_steps"] = 6
inputs["output_type"] = "pil"
image = sd_pipe(**inputs).images[0]
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_full/stable_diffusion_lcm.png"
)
image = sd_pipe.image_processor.pil_to_numpy(image)
expected_image = sd_pipe.image_processor.pil_to_numpy(expected_image)
max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
assert max_diff < 1e-2
@slow @slow
@require_torch_accelerator @require_torch_accelerator
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
import gc import gc
import random import random
import traceback
import unittest import unittest
import numpy as np import numpy as np
...@@ -41,13 +40,10 @@ from diffusers.utils.testing_utils import ( ...@@ -41,13 +40,10 @@ from diffusers.utils.testing_utils import (
backend_reset_peak_memory_stats, backend_reset_peak_memory_stats,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
is_torch_compile,
load_image, load_image,
load_numpy, load_numpy,
nightly, nightly,
require_torch_2,
require_torch_accelerator, require_torch_accelerator,
run_test_in_subprocess,
skip_mps, skip_mps,
slow, slow,
torch_device, torch_device,
...@@ -70,38 +66,6 @@ from ..test_pipelines_common import ( ...@@ -70,38 +66,6 @@ from ..test_pipelines_common import (
enable_full_determinism() enable_full_determinism()
# Will be run via run_test_in_subprocess
def _test_img2img_compile(in_queue, out_queue, timeout):
error = None
try:
inputs = in_queue.get(timeout=timeout)
torch_device = inputs.pop("torch_device")
seed = inputs.pop("seed")
inputs["generator"] = torch.Generator(device=torch_device).manual_seed(seed)
pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe.unet.set_default_attn_processor()
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
pipe.unet.to(memory_format=torch.channels_last)
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1].flatten()
assert image.shape == (1, 512, 768, 3)
expected_slice = np.array([0.0606, 0.0570, 0.0805, 0.0579, 0.0628, 0.0623, 0.0843, 0.1115, 0.0806])
assert np.abs(expected_slice - image_slice).max() < 1e-3
except Exception:
error = f"{traceback.format_exc()}"
results = {"error": error}
out_queue.put(results, timeout=timeout)
out_queue.join()
class StableDiffusionImg2ImgPipelineFastTests( class StableDiffusionImg2ImgPipelineFastTests(
IPAdapterTesterMixin, IPAdapterTesterMixin,
PipelineLatentTesterMixin, PipelineLatentTesterMixin,
...@@ -654,17 +618,6 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase): ...@@ -654,17 +618,6 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
assert out.nsfw_content_detected[0], f"Safety checker should work for prompt: {inputs['prompt']}" assert out.nsfw_content_detected[0], f"Safety checker should work for prompt: {inputs['prompt']}"
assert np.abs(out.images[0]).sum() < 1e-5 # should be all zeros assert np.abs(out.images[0]).sum() < 1e-5 # should be all zeros
@is_torch_compile
@require_torch_2
def test_img2img_compile(self):
seed = 0
inputs = self.get_inputs(torch_device, seed=seed)
# Can't pickle a Generator object
del inputs["generator"]
inputs["torch_device"] = torch_device
inputs["seed"] = seed
run_test_in_subprocess(test_case=self, target_func=_test_img2img_compile, inputs=inputs)
@nightly @nightly
@require_torch_accelerator @require_torch_accelerator
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
import gc import gc
import random import random
import traceback
import unittest import unittest
import numpy as np import numpy as np
...@@ -44,13 +43,10 @@ from diffusers.utils.testing_utils import ( ...@@ -44,13 +43,10 @@ from diffusers.utils.testing_utils import (
backend_reset_peak_memory_stats, backend_reset_peak_memory_stats,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
is_torch_compile,
load_image, load_image,
load_numpy, load_numpy,
nightly, nightly,
require_torch_2,
require_torch_accelerator, require_torch_accelerator,
run_test_in_subprocess,
slow, slow,
torch_device, torch_device,
) )
...@@ -71,40 +67,6 @@ from ..test_pipelines_common import ( ...@@ -71,40 +67,6 @@ from ..test_pipelines_common import (
enable_full_determinism() enable_full_determinism()
# Will be run via run_test_in_subprocess
def _test_inpaint_compile(in_queue, out_queue, timeout):
error = None
try:
inputs = in_queue.get(timeout=timeout)
torch_device = inputs.pop("torch_device")
seed = inputs.pop("seed")
inputs["generator"] = torch.Generator(device=torch_device).manual_seed(seed)
pipe = StableDiffusionInpaintPipeline.from_pretrained(
"botp/stable-diffusion-v1-5-inpainting", safety_checker=None
)
pipe.unet.set_default_attn_processor()
pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
pipe.unet.to(memory_format=torch.channels_last)
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
image = pipe(**inputs).images
image_slice = image[0, 253:256, 253:256, -1].flatten()
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.0689, 0.0699, 0.0790, 0.0536, 0.0470, 0.0488, 0.041, 0.0508, 0.04179])
assert np.abs(expected_slice - image_slice).max() < 3e-3
except Exception:
error = f"{traceback.format_exc()}"
results = {"error": error}
out_queue.put(results, timeout=timeout)
out_queue.join()
class StableDiffusionInpaintPipelineFastTests( class StableDiffusionInpaintPipelineFastTests(
IPAdapterTesterMixin, IPAdapterTesterMixin,
PipelineLatentTesterMixin, PipelineLatentTesterMixin,
...@@ -727,17 +689,6 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase): ...@@ -727,17 +689,6 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
# make sure that less than 2.2 GB is allocated # make sure that less than 2.2 GB is allocated
assert mem_bytes < 2.2 * 10**9 assert mem_bytes < 2.2 * 10**9
@is_torch_compile
@require_torch_2
def test_inpaint_compile(self):
seed = 0
inputs = self.get_inputs(torch_device, seed=seed)
# Can't pickle a Generator object
del inputs["generator"]
inputs["torch_device"] = torch_device
inputs["seed"] = seed
run_test_in_subprocess(test_case=self, target_func=_test_inpaint_compile, inputs=inputs)
def test_stable_diffusion_inpaint_pil_input_resolution_test(self): def test_stable_diffusion_inpaint_pil_input_resolution_test(self):
pipe = StableDiffusionInpaintPipeline.from_pretrained( pipe = StableDiffusionInpaintPipeline.from_pretrained(
"botp/stable-diffusion-v1-5-inpainting", safety_checker=None "botp/stable-diffusion-v1-5-inpainting", safety_checker=None
...@@ -964,11 +915,6 @@ class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.Te ...@@ -964,11 +915,6 @@ class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.Te
# make sure that less than 2.45 GB is allocated # make sure that less than 2.45 GB is allocated
assert mem_bytes < 2.45 * 10**9 assert mem_bytes < 2.45 * 10**9
@is_torch_compile
@require_torch_2
def test_inpaint_compile(self):
pass
def test_stable_diffusion_inpaint_pil_input_resolution_test(self): def test_stable_diffusion_inpaint_pil_input_resolution_test(self):
vae = AsymmetricAutoencoderKL.from_pretrained( vae = AsymmetricAutoencoderKL.from_pretrained(
"cross-attention/asymmetric-autoencoder-kl-x-1-5", "cross-attention/asymmetric-autoencoder-kl-x-1-5",
......
...@@ -2006,7 +2006,9 @@ class PipelineSlowTests(unittest.TestCase): ...@@ -2006,7 +2006,9 @@ class PipelineSlowTests(unittest.TestCase):
reason="Torch Dynamo isn't yet supported for Python 3.12.", reason="Torch Dynamo isn't yet supported for Python 3.12.",
) )
def test_from_save_pretrained_dynamo(self): def test_from_save_pretrained_dynamo(self):
run_test_in_subprocess(test_case=self, target_func=_test_from_save_pretrained_dynamo, inputs=None) torch.compiler.rest()
with torch._inductor.utils.fresh_inductor_cache():
run_test_in_subprocess(test_case=self, target_func=_test_from_save_pretrained_dynamo, inputs=None)
def test_from_pretrained_hub(self): def test_from_pretrained_hub(self):
model_path = "google/ddpm-cifar10-32" model_path = "google/ddpm-cifar10-32"
...@@ -2218,7 +2220,7 @@ class TestLoraHotSwappingForPipeline(unittest.TestCase): ...@@ -2218,7 +2220,7 @@ class TestLoraHotSwappingForPipeline(unittest.TestCase):
# It is critical that the dynamo cache is reset for each test. Otherwise, if the test re-uses the same model, # It is critical that the dynamo cache is reset for each test. Otherwise, if the test re-uses the same model,
# there will be recompilation errors, as torch caches the model when run in the same process. # there will be recompilation errors, as torch caches the model when run in the same process.
super().tearDown() super().tearDown()
torch._dynamo.reset() torch.compiler.reset()
gc.collect() gc.collect()
backend_empty_cache(torch_device) backend_empty_cache(torch_device)
...@@ -2343,21 +2345,21 @@ class TestLoraHotSwappingForPipeline(unittest.TestCase): ...@@ -2343,21 +2345,21 @@ class TestLoraHotSwappingForPipeline(unittest.TestCase):
def test_hotswapping_compiled_pipline_linear(self, rank0, rank1): def test_hotswapping_compiled_pipline_linear(self, rank0, rank1):
# It's important to add this context to raise an error on recompilation # It's important to add this context to raise an error on recompilation
target_modules = ["to_q", "to_k", "to_v", "to_out.0"] target_modules = ["to_q", "to_k", "to_v", "to_out.0"]
with torch._dynamo.config.patch(error_on_recompile=True): with torch._dynamo.config.patch(error_on_recompile=True), torch._inductor.utils.fresh_inductor_cache():
self.check_pipeline_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules) self.check_pipeline_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules)
@parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa @parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa
def test_hotswapping_compiled_pipline_conv2d(self, rank0, rank1): def test_hotswapping_compiled_pipline_conv2d(self, rank0, rank1):
# It's important to add this context to raise an error on recompilation # It's important to add this context to raise an error on recompilation
target_modules = ["conv", "conv1", "conv2"] target_modules = ["conv", "conv1", "conv2"]
with torch._dynamo.config.patch(error_on_recompile=True): with torch._dynamo.config.patch(error_on_recompile=True), torch._inductor.utils.fresh_inductor_cache():
self.check_pipeline_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules) self.check_pipeline_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules)
@parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa @parameterized.expand([(11, 11), (7, 13), (13, 7)]) # important to test small to large and vice versa
def test_hotswapping_compiled_pipline_both_linear_and_conv2d(self, rank0, rank1): def test_hotswapping_compiled_pipline_both_linear_and_conv2d(self, rank0, rank1):
# It's important to add this context to raise an error on recompilation # It's important to add this context to raise an error on recompilation
target_modules = ["to_q", "conv"] target_modules = ["to_q", "conv"]
with torch._dynamo.config.patch(error_on_recompile=True): with torch._dynamo.config.patch(error_on_recompile=True), torch._inductor.utils.fresh_inductor_cache():
self.check_pipeline_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules) self.check_pipeline_hotswap(do_compile=True, rank0=rank0, rank1=rank1, target_modules0=target_modules)
def test_enable_lora_hotswap_called_after_adapter_added_raises(self): def test_enable_lora_hotswap_called_after_adapter_added_raises(self):
......
...@@ -1111,14 +1111,14 @@ class PipelineTesterMixin: ...@@ -1111,14 +1111,14 @@ class PipelineTesterMixin:
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
torch._dynamo.reset() torch.compiler.reset()
gc.collect() gc.collect()
backend_empty_cache(torch_device) backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test in case of CUDA runtime errors # clean up the VRAM after each test in case of CUDA runtime errors
super().tearDown() super().tearDown()
torch._dynamo.reset() torch.compiler.reset()
gc.collect() gc.collect()
backend_empty_cache(torch_device) backend_empty_cache(torch_device)
......
import gc import gc
import random import random
import traceback
import unittest import unittest
import numpy as np import numpy as np
...@@ -27,9 +26,7 @@ from diffusers.utils.testing_utils import ( ...@@ -27,9 +26,7 @@ from diffusers.utils.testing_utils import (
floats_tensor, floats_tensor,
load_image, load_image,
nightly, nightly,
require_torch_2,
require_torch_accelerator, require_torch_accelerator,
run_test_in_subprocess,
torch_device, torch_device,
) )
from diffusers.utils.torch_utils import randn_tensor from diffusers.utils.torch_utils import randn_tensor
...@@ -45,38 +42,6 @@ from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, Pipeline ...@@ -45,38 +42,6 @@ from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, Pipeline
enable_full_determinism() enable_full_determinism()
# Will be run via run_test_in_subprocess
def _test_unidiffuser_compile(in_queue, out_queue, timeout):
error = None
try:
inputs = in_queue.get(timeout=timeout)
torch_device = inputs.pop("torch_device")
seed = inputs.pop("seed")
inputs["generator"] = torch.Generator(device=torch_device).manual_seed(seed)
pipe = UniDiffuserPipeline.from_pretrained("thu-ml/unidiffuser-v1")
# pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to(torch_device)
pipe.unet.to(memory_format=torch.channels_last)
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.set_progress_bar_config(disable=None)
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1].flatten()
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.2402, 0.2375, 0.2285, 0.2378, 0.2407, 0.2263, 0.2354, 0.2307, 0.2520])
assert np.abs(image_slice - expected_slice).max() < 1e-1
except Exception:
error = f"{traceback.format_exc()}"
results = {"error": error}
out_queue.put(results, timeout=timeout)
out_queue.join()
class UniDiffuserPipelineFastTests( class UniDiffuserPipelineFastTests(
PipelineTesterMixin, PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, unittest.TestCase PipelineTesterMixin, PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, unittest.TestCase
): ):
...@@ -690,19 +655,6 @@ class UniDiffuserPipelineSlowTests(unittest.TestCase): ...@@ -690,19 +655,6 @@ class UniDiffuserPipelineSlowTests(unittest.TestCase):
expected_text_prefix = "An astronaut" expected_text_prefix = "An astronaut"
assert text[0][: len(expected_text_prefix)] == expected_text_prefix assert text[0][: len(expected_text_prefix)] == expected_text_prefix
@unittest.skip(reason="Skip torch.compile test to speed up the slow test suite.")
@require_torch_2
def test_unidiffuser_compile(self, seed=0):
inputs = self.get_inputs(torch_device, seed=seed, generate_latents=True)
# Delete prompt and image for joint inference.
del inputs["prompt"]
del inputs["image"]
# Can't pickle a Generator object
del inputs["generator"]
inputs["torch_device"] = torch_device
inputs["seed"] = seed
run_test_in_subprocess(test_case=self, target_func=_test_unidiffuser_compile, inputs=inputs)
@nightly @nightly
@require_torch_accelerator @require_torch_accelerator
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment