Unverified Commit b02e2113 authored by Aritra Roy Gosthipaty's avatar Aritra Roy Gosthipaty Committed by GitHub
Browse files

[Tests] reduce the model size in the amused fast test (#7804)



* chore: reducing model sizes

* chore: shrinks further

* chore: shrinks further

* chore: shrinking model for img2img pipeline

* chore: reducing size of model for inpaint pipeline

---------
Co-authored-by: default avatarSayak Paul <spsayakpaul@gmail.com>
parent 21f023ec
...@@ -38,17 +38,17 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -38,17 +38,17 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def get_dummy_components(self): def get_dummy_components(self):
torch.manual_seed(0) torch.manual_seed(0)
transformer = UVit2DModel( transformer = UVit2DModel(
hidden_size=32, hidden_size=8,
use_bias=False, use_bias=False,
hidden_dropout=0.0, hidden_dropout=0.0,
cond_embed_dim=32, cond_embed_dim=8,
micro_cond_encode_dim=2, micro_cond_encode_dim=2,
micro_cond_embed_dim=10, micro_cond_embed_dim=10,
encoder_hidden_size=32, encoder_hidden_size=8,
vocab_size=32, vocab_size=32,
codebook_size=32, codebook_size=8,
in_channels=32, in_channels=8,
block_out_channels=32, block_out_channels=8,
num_res_blocks=1, num_res_blocks=1,
downsample=True, downsample=True,
upsample=True, upsample=True,
...@@ -56,7 +56,7 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -56,7 +56,7 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
num_hidden_layers=1, num_hidden_layers=1,
num_attention_heads=1, num_attention_heads=1,
attention_dropout=0.0, attention_dropout=0.0,
intermediate_size=32, intermediate_size=8,
layer_norm_eps=1e-06, layer_norm_eps=1e-06,
ln_elementwise_affine=True, ln_elementwise_affine=True,
) )
...@@ -64,17 +64,17 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -64,17 +64,17 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0) torch.manual_seed(0)
vqvae = VQModel( vqvae = VQModel(
act_fn="silu", act_fn="silu",
block_out_channels=[32], block_out_channels=[8],
down_block_types=[ down_block_types=[
"DownEncoderBlock2D", "DownEncoderBlock2D",
], ],
in_channels=3, in_channels=3,
latent_channels=32, latent_channels=8,
layers_per_block=2, layers_per_block=1,
norm_num_groups=32, norm_num_groups=8,
num_vq_embeddings=32, num_vq_embeddings=8,
out_channels=3, out_channels=3,
sample_size=32, sample_size=8,
up_block_types=[ up_block_types=[
"UpDecoderBlock2D", "UpDecoderBlock2D",
], ],
...@@ -85,14 +85,14 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -85,14 +85,14 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder_config = CLIPTextConfig( text_encoder_config = CLIPTextConfig(
bos_token_id=0, bos_token_id=0,
eos_token_id=2, eos_token_id=2,
hidden_size=32, hidden_size=8,
intermediate_size=64, intermediate_size=8,
layer_norm_eps=1e-05, layer_norm_eps=1e-05,
num_attention_heads=8, num_attention_heads=1,
num_hidden_layers=3, num_hidden_layers=1,
pad_token_id=1, pad_token_id=1,
vocab_size=1000, vocab_size=1000,
projection_dim=32, projection_dim=8,
) )
text_encoder = CLIPTextModelWithProjection(text_encoder_config) text_encoder = CLIPTextModelWithProjection(text_encoder_config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
......
...@@ -42,17 +42,17 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -42,17 +42,17 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def get_dummy_components(self): def get_dummy_components(self):
torch.manual_seed(0) torch.manual_seed(0)
transformer = UVit2DModel( transformer = UVit2DModel(
hidden_size=32, hidden_size=8,
use_bias=False, use_bias=False,
hidden_dropout=0.0, hidden_dropout=0.0,
cond_embed_dim=32, cond_embed_dim=8,
micro_cond_encode_dim=2, micro_cond_encode_dim=2,
micro_cond_embed_dim=10, micro_cond_embed_dim=10,
encoder_hidden_size=32, encoder_hidden_size=8,
vocab_size=32, vocab_size=32,
codebook_size=32, codebook_size=8,
in_channels=32, in_channels=8,
block_out_channels=32, block_out_channels=8,
num_res_blocks=1, num_res_blocks=1,
downsample=True, downsample=True,
upsample=True, upsample=True,
...@@ -60,7 +60,7 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -60,7 +60,7 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
num_hidden_layers=1, num_hidden_layers=1,
num_attention_heads=1, num_attention_heads=1,
attention_dropout=0.0, attention_dropout=0.0,
intermediate_size=32, intermediate_size=8,
layer_norm_eps=1e-06, layer_norm_eps=1e-06,
ln_elementwise_affine=True, ln_elementwise_affine=True,
) )
...@@ -68,17 +68,17 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -68,17 +68,17 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0) torch.manual_seed(0)
vqvae = VQModel( vqvae = VQModel(
act_fn="silu", act_fn="silu",
block_out_channels=[32], block_out_channels=[8],
down_block_types=[ down_block_types=[
"DownEncoderBlock2D", "DownEncoderBlock2D",
], ],
in_channels=3, in_channels=3,
latent_channels=32, latent_channels=8,
layers_per_block=2, layers_per_block=1,
norm_num_groups=32, norm_num_groups=8,
num_vq_embeddings=32, num_vq_embeddings=32, # reducing this to 16 or 8 -> RuntimeError: "cdist_cuda" not implemented for 'Half'
out_channels=3, out_channels=3,
sample_size=32, sample_size=8,
up_block_types=[ up_block_types=[
"UpDecoderBlock2D", "UpDecoderBlock2D",
], ],
...@@ -89,14 +89,14 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -89,14 +89,14 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder_config = CLIPTextConfig( text_encoder_config = CLIPTextConfig(
bos_token_id=0, bos_token_id=0,
eos_token_id=2, eos_token_id=2,
hidden_size=32, hidden_size=8,
intermediate_size=64, intermediate_size=8,
layer_norm_eps=1e-05, layer_norm_eps=1e-05,
num_attention_heads=8, num_attention_heads=1,
num_hidden_layers=3, num_hidden_layers=1,
pad_token_id=1, pad_token_id=1,
vocab_size=1000, vocab_size=1000,
projection_dim=32, projection_dim=8,
) )
text_encoder = CLIPTextModelWithProjection(text_encoder_config) text_encoder = CLIPTextModelWithProjection(text_encoder_config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
......
...@@ -42,17 +42,17 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -42,17 +42,17 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def get_dummy_components(self): def get_dummy_components(self):
torch.manual_seed(0) torch.manual_seed(0)
transformer = UVit2DModel( transformer = UVit2DModel(
hidden_size=32, hidden_size=8,
use_bias=False, use_bias=False,
hidden_dropout=0.0, hidden_dropout=0.0,
cond_embed_dim=32, cond_embed_dim=8,
micro_cond_encode_dim=2, micro_cond_encode_dim=2,
micro_cond_embed_dim=10, micro_cond_embed_dim=10,
encoder_hidden_size=32, encoder_hidden_size=8,
vocab_size=32, vocab_size=32,
codebook_size=32, codebook_size=32, # codebook size needs to be consistent with num_vq_embeddings for inpaint tests
in_channels=32, in_channels=8,
block_out_channels=32, block_out_channels=8,
num_res_blocks=1, num_res_blocks=1,
downsample=True, downsample=True,
upsample=True, upsample=True,
...@@ -60,7 +60,7 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -60,7 +60,7 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
num_hidden_layers=1, num_hidden_layers=1,
num_attention_heads=1, num_attention_heads=1,
attention_dropout=0.0, attention_dropout=0.0,
intermediate_size=32, intermediate_size=8,
layer_norm_eps=1e-06, layer_norm_eps=1e-06,
ln_elementwise_affine=True, ln_elementwise_affine=True,
) )
...@@ -68,17 +68,17 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -68,17 +68,17 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0) torch.manual_seed(0)
vqvae = VQModel( vqvae = VQModel(
act_fn="silu", act_fn="silu",
block_out_channels=[32], block_out_channels=[8],
down_block_types=[ down_block_types=[
"DownEncoderBlock2D", "DownEncoderBlock2D",
], ],
in_channels=3, in_channels=3,
latent_channels=32, latent_channels=8,
layers_per_block=2, layers_per_block=1,
norm_num_groups=32, norm_num_groups=8,
num_vq_embeddings=32, num_vq_embeddings=32, # reducing this to 16 or 8 -> RuntimeError: "cdist_cuda" not implemented for 'Half'
out_channels=3, out_channels=3,
sample_size=32, sample_size=8,
up_block_types=[ up_block_types=[
"UpDecoderBlock2D", "UpDecoderBlock2D",
], ],
...@@ -89,14 +89,14 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -89,14 +89,14 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder_config = CLIPTextConfig( text_encoder_config = CLIPTextConfig(
bos_token_id=0, bos_token_id=0,
eos_token_id=2, eos_token_id=2,
hidden_size=32, hidden_size=8,
intermediate_size=64, intermediate_size=8,
layer_norm_eps=1e-05, layer_norm_eps=1e-05,
num_attention_heads=8, num_attention_heads=1,
num_hidden_layers=3, num_hidden_layers=1,
pad_token_id=1, pad_token_id=1,
vocab_size=1000, vocab_size=1000,
projection_dim=32, projection_dim=8,
) )
text_encoder = CLIPTextModelWithProjection(text_encoder_config) text_encoder = CLIPTextModelWithProjection(text_encoder_config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment