Unverified Commit 474a248f authored by Aryan's avatar Aryan Committed by GitHub
Browse files

[tests] Fix HunyuanVideo Framepack device tests (#11789)

update
parent 7bc0a07b
...@@ -71,7 +71,6 @@ class HunyuanVideoFramepackPipelineFastTests( ...@@ -71,7 +71,6 @@ class HunyuanVideoFramepackPipelineFastTests(
) )
supports_dduf = False supports_dduf = False
# there is no xformers processor for Flux
test_xformers_attention = False test_xformers_attention = False
test_layerwise_casting = True test_layerwise_casting = True
test_group_offloading = True test_group_offloading = True
...@@ -360,6 +359,30 @@ class HunyuanVideoFramepackPipelineFastTests( ...@@ -360,6 +359,30 @@ class HunyuanVideoFramepackPipelineFastTests(
"VAE tiling should not affect the inference results", "VAE tiling should not affect the inference results",
) )
def test_float16_inference(self, expected_max_diff=0.2):
# NOTE: this test needs a higher tolerance because of multiple forwards through
# the model, which compounds the overall fp32 vs fp16 numerical differences. It
# shouldn't be expected that the results are the same, so we bump the tolerance.
return super().test_float16_inference(expected_max_diff)
@unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.")
def test_sequential_cpu_offload_forward_pass(self):
# https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
# this test because of MHA (example: HunyuanDiT because of AttentionPooling layer).
pass
@unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.")
def test_sequential_offload_forward_pass_twice(self):
# https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
# this test because of MHA (example: HunyuanDiT because of AttentionPooling layer).
pass
# TODO(aryan): Create a dummy gemma model with smol vocab size # TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip( @unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
......
...@@ -124,14 +124,22 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -124,14 +124,22 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
max_diff = np.abs(image_slice.flatten() - expected_slice).max() max_diff = np.abs(image_slice.flatten() - expected_slice).max()
self.assertLessEqual(max_diff, 1e-3) self.assertLessEqual(max_diff, 1e-3)
@unittest.skip("Not supported.") @unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.")
def test_sequential_cpu_offload_forward_pass(self): def test_sequential_cpu_offload_forward_pass(self):
# TODO(YiYi) need to fix later # TODO(YiYi) need to fix later
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
# this test because of MHA (example: HunyuanVideo Framepack)
pass pass
@unittest.skip("Not supported.") @unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.")
def test_sequential_offload_forward_pass_twice(self): def test_sequential_offload_forward_pass_twice(self):
# TODO(YiYi) need to fix later # TODO(YiYi) need to fix later
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
# this test because of MHA (example: HunyuanVideo Framepack)
pass pass
def test_inference_batch_single_identical(self): def test_inference_batch_single_identical(self):
......
...@@ -2270,9 +2270,10 @@ class PipelineTesterMixin: ...@@ -2270,9 +2270,10 @@ class PipelineTesterMixin:
if hasattr(module, "_diffusers_hook") if hasattr(module, "_diffusers_hook")
) )
) )
for component_name in ["vae", "vqvae"]: for component_name in ["vae", "vqvae", "image_encoder"]:
if hasattr(pipe, component_name): component = getattr(pipe, component_name, None)
getattr(pipe, component_name).to(torch_device) if isinstance(component, torch.nn.Module):
component.to(torch_device)
def run_forward(pipe): def run_forward(pipe):
torch.manual_seed(0) torch.manual_seed(0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment