Unverified Commit ba06124e authored by Aryan's avatar Aryan Committed by GitHub
Browse files

Remove CogVideoX mentions from single file docs; Test updates (#9444)

* remove mentions from single file

* update tests

* update
parent bb1b0fa1
...@@ -22,9 +22,6 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load: ...@@ -22,9 +22,6 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load:
## Supported pipelines ## Supported pipelines
- [`CogVideoXPipeline`]
- [`CogVideoXImageToVideoPipeline`]
- [`CogVideoXVideoToVideoPipeline`]
- [`StableDiffusionPipeline`] - [`StableDiffusionPipeline`]
- [`StableDiffusionImg2ImgPipeline`] - [`StableDiffusionImg2ImgPipeline`]
- [`StableDiffusionInpaintPipeline`] - [`StableDiffusionInpaintPipeline`]
...@@ -52,7 +49,6 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load: ...@@ -52,7 +49,6 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load:
- [`UNet2DConditionModel`] - [`UNet2DConditionModel`]
- [`StableCascadeUNet`] - [`StableCascadeUNet`]
- [`AutoencoderKL`] - [`AutoencoderKL`]
- [`AutoencoderKLCogVideoX`]
- [`ControlNetModel`] - [`ControlNetModel`]
- [`SD3Transformer2DModel`] - [`SD3Transformer2DModel`]
- [`FluxTransformer2DModel`] - [`FluxTransformer2DModel`]
......
...@@ -57,6 +57,7 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -57,6 +57,7 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
"callback_on_step_end_tensor_inputs", "callback_on_step_end_tensor_inputs",
] ]
) )
test_xformers_attention = False
def get_dummy_components(self): def get_dummy_components(self):
torch.manual_seed(0) torch.manual_seed(0)
...@@ -71,8 +72,8 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -71,8 +72,8 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
time_embed_dim=2, time_embed_dim=2,
text_embed_dim=32, # Must match with tiny-random-t5 text_embed_dim=32, # Must match with tiny-random-t5
num_layers=1, num_layers=1,
sample_width=16, # latent width: 2 -> final width: 16 sample_width=2, # latent width: 2 -> final width: 16
sample_height=16, # latent height: 2 -> final height: 16 sample_height=2, # latent height: 2 -> final height: 16
sample_frames=9, # latent frames: (9 - 1) / 4 + 1 = 3 -> final frames: 9 sample_frames=9, # latent frames: (9 - 1) / 4 + 1 = 3 -> final frames: 9
patch_size=2, patch_size=2,
temporal_compression_ratio=4, temporal_compression_ratio=4,
...@@ -280,10 +281,6 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -280,10 +281,6 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
"VAE tiling should not affect the inference results", "VAE tiling should not affect the inference results",
) )
@unittest.skip("xformers attention processor does not exist for CogVideoX")
def test_xformers_attention_forwardGenerator_pass(self):
pass
def test_fused_qkv_projections(self): def test_fused_qkv_projections(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator device = "cpu" # ensure determinism for the device-dependent torch.Generator
components = self.get_dummy_components() components = self.get_dummy_components()
......
...@@ -269,8 +269,9 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -269,8 +269,9 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
generator_device = "cpu" generator_device = "cpu"
components = self.get_dummy_components() components = self.get_dummy_components()
# The reason to modify it this way is because I2V Transformer limits the generation to resolutions. # The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initalization.
# See the if-statement on "self.use_learned_positional_embeddings" # This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings.
# See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py
components["transformer"] = CogVideoXTransformer3DModel.from_config( components["transformer"] = CogVideoXTransformer3DModel.from_config(
components["transformer"].config, components["transformer"].config,
sample_height=16, sample_height=16,
......
...@@ -51,6 +51,7 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC ...@@ -51,6 +51,7 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
"callback_on_step_end_tensor_inputs", "callback_on_step_end_tensor_inputs",
] ]
) )
test_xformers_attention = False
def get_dummy_components(self): def get_dummy_components(self):
torch.manual_seed(0) torch.manual_seed(0)
...@@ -65,8 +66,8 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC ...@@ -65,8 +66,8 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
time_embed_dim=2, time_embed_dim=2,
text_embed_dim=32, # Must match with tiny-random-t5 text_embed_dim=32, # Must match with tiny-random-t5
num_layers=1, num_layers=1,
sample_width=16, # latent width: 2 -> final width: 16 sample_width=2, # latent width: 2 -> final width: 16
sample_height=16, # latent height: 2 -> final height: 16 sample_height=2, # latent height: 2 -> final height: 16
sample_frames=9, # latent frames: (9 - 1) / 4 + 1 = 3 -> final frames: 9 sample_frames=9, # latent frames: (9 - 1) / 4 + 1 = 3 -> final frames: 9
patch_size=2, patch_size=2,
temporal_compression_ratio=4, temporal_compression_ratio=4,
...@@ -285,10 +286,6 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC ...@@ -285,10 +286,6 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
"VAE tiling should not affect the inference results", "VAE tiling should not affect the inference results",
) )
@unittest.skip("xformers attention processor does not exist for CogVideoX")
def test_xformers_attention_forwardGenerator_pass(self):
pass
def test_fused_qkv_projections(self): def test_fused_qkv_projections(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator device = "cpu" # ensure determinism for the device-dependent torch.Generator
components = self.get_dummy_components() components = self.get_dummy_components()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment