Unverified Commit ad8068e4 authored by Arka's avatar Arka Committed by GitHub
Browse files

changed channel parameters for UNET and VAE. Changed configs parameters of CLIPText (#5370)



* changed channel parameters for UNET and VAE. Decreased hidden layers size with increased attention heads and intermediate size

* changed the assertion check range

* clean up

---------
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>
parent b4cbbd5e
...@@ -82,6 +82,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout): ...@@ -82,6 +82,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
assert image.shape == (1, 512, 512, 3) assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.38019, 0.28647, 0.27321, 0.40377, 0.38290, 0.35446, 0.39218, 0.38165, 0.42239]) expected_slice = np.array([0.38019, 0.28647, 0.27321, 0.40377, 0.38290, 0.35446, 0.39218, 0.38165, 0.42239])
assert np.abs(image_slice - expected_slice).max() < 5e-3 assert np.abs(image_slice - expected_slice).max() < 5e-3
except Exception: except Exception:
error = f"{traceback.format_exc()}" error = f"{traceback.format_exc()}"
...@@ -103,14 +104,15 @@ class StableDiffusionPipelineFastTests( ...@@ -103,14 +104,15 @@ class StableDiffusionPipelineFastTests(
def get_dummy_components(self): def get_dummy_components(self):
torch.manual_seed(0) torch.manual_seed(0)
unet = UNet2DConditionModel( unet = UNet2DConditionModel(
block_out_channels=(32, 64), block_out_channels=(4, 8),
layers_per_block=2, layers_per_block=1,
sample_size=32, sample_size=32,
in_channels=4, in_channels=4,
out_channels=4, out_channels=4,
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
cross_attention_dim=32, cross_attention_dim=32,
norm_num_groups=2,
) )
scheduler = DDIMScheduler( scheduler = DDIMScheduler(
beta_start=0.00085, beta_start=0.00085,
...@@ -121,22 +123,23 @@ class StableDiffusionPipelineFastTests( ...@@ -121,22 +123,23 @@ class StableDiffusionPipelineFastTests(
) )
torch.manual_seed(0) torch.manual_seed(0)
vae = AutoencoderKL( vae = AutoencoderKL(
block_out_channels=[32, 64], block_out_channels=[4, 8],
in_channels=3, in_channels=3,
out_channels=3, out_channels=3,
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
latent_channels=4, latent_channels=4,
norm_num_groups=2,
) )
torch.manual_seed(0) torch.manual_seed(0)
text_encoder_config = CLIPTextConfig( text_encoder_config = CLIPTextConfig(
bos_token_id=0, bos_token_id=0,
eos_token_id=2, eos_token_id=2,
hidden_size=32, hidden_size=32,
intermediate_size=37, intermediate_size=64,
layer_norm_eps=1e-05, layer_norm_eps=1e-05,
num_attention_heads=4, num_attention_heads=8,
num_hidden_layers=5, num_hidden_layers=3,
pad_token_id=1, pad_token_id=1,
vocab_size=1000, vocab_size=1000,
) )
...@@ -183,7 +186,7 @@ class StableDiffusionPipelineFastTests( ...@@ -183,7 +186,7 @@ class StableDiffusionPipelineFastTests(
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5756, 0.6118, 0.5005, 0.5041, 0.5471, 0.4726, 0.4976, 0.4865, 0.4864]) expected_slice = np.array([0.3203, 0.4555, 0.4711, 0.3505, 0.3973, 0.4650, 0.5137, 0.3392, 0.4045])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
...@@ -317,7 +320,7 @@ class StableDiffusionPipelineFastTests( ...@@ -317,7 +320,7 @@ class StableDiffusionPipelineFastTests(
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 136, 136, 3) assert image.shape == (1, 136, 136, 3)
expected_slice = np.array([0.5524, 0.5626, 0.6069, 0.4727, 0.386, 0.3995, 0.4613, 0.4328, 0.4269]) expected_slice = np.array([0.4346, 0.5621, 0.5016, 0.3926, 0.4533, 0.4134, 0.5625, 0.5632, 0.5265])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
...@@ -335,7 +338,7 @@ class StableDiffusionPipelineFastTests( ...@@ -335,7 +338,7 @@ class StableDiffusionPipelineFastTests(
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5122, 0.5712, 0.4825, 0.5053, 0.5646, 0.4769, 0.5179, 0.4894, 0.4994]) expected_slice = np.array([0.3411, 0.5032, 0.4704, 0.3135, 0.4323, 0.4740, 0.5150, 0.3498, 0.4022])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
...@@ -375,7 +378,7 @@ class StableDiffusionPipelineFastTests( ...@@ -375,7 +378,7 @@ class StableDiffusionPipelineFastTests(
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4873, 0.5443, 0.4845, 0.5004, 0.5549, 0.4850, 0.5191, 0.4941, 0.5065]) expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
...@@ -394,7 +397,7 @@ class StableDiffusionPipelineFastTests( ...@@ -394,7 +397,7 @@ class StableDiffusionPipelineFastTests(
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4872, 0.5444, 0.4846, 0.5003, 0.5549, 0.4850, 0.5189, 0.4941, 0.5067]) expected_slice = np.array([0.3151, 0.5243, 0.4794, 0.3217, 0.4468, 0.4728, 0.5152, 0.3598, 0.3954])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
...@@ -413,7 +416,7 @@ class StableDiffusionPipelineFastTests( ...@@ -413,7 +416,7 @@ class StableDiffusionPipelineFastTests(
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4873, 0.5443, 0.4845, 0.5004, 0.5549, 0.4850, 0.5191, 0.4941, 0.5065]) expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
...@@ -485,7 +488,7 @@ class StableDiffusionPipelineFastTests( ...@@ -485,7 +488,7 @@ class StableDiffusionPipelineFastTests(
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5114, 0.5706, 0.4772, 0.5028, 0.5637, 0.4732, 0.5169, 0.4881, 0.4977]) expected_slice = np.array([0.3458, 0.5120, 0.4800, 0.3116, 0.4348, 0.4802, 0.5237, 0.3467, 0.3991])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
...@@ -638,7 +641,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -638,7 +641,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
image_slice = image[0, -3:, -3:, -1].flatten() image_slice = image[0, -3:, -3:, -1].flatten()
assert image.shape == (1, 512, 512, 3) assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.43625, 0.43554, 0.36670, 0.40660, 0.39703, 0.38658, 0.43936, 0.43557, 0.40592]) expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
assert np.abs(image_slice - expected_slice).max() < 3e-3 assert np.abs(image_slice - expected_slice).max() < 3e-3
def test_stable_diffusion_v1_4_with_freeu(self): def test_stable_diffusion_v1_4_with_freeu(self):
...@@ -665,7 +668,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -665,7 +668,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
image_slice = image[0, -3:, -3:, -1].flatten() image_slice = image[0, -3:, -3:, -1].flatten()
assert image.shape == (1, 512, 512, 3) assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.57400, 0.47841, 0.31625, 0.63583, 0.58306, 0.55056, 0.50825, 0.56306, 0.55748]) expected_slice = np.array([0.3458, 0.5120, 0.4800, 0.3116, 0.4348, 0.4802, 0.5237, 0.3467, 0.3991])
assert np.abs(image_slice - expected_slice).max() < 3e-3 assert np.abs(image_slice - expected_slice).max() < 3e-3
def test_stable_diffusion_ddim(self): def test_stable_diffusion_ddim(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment