Unverified Commit 52c4d32d authored by Chanchana Sornsoontorn's avatar Chanchana Sornsoontorn Committed by GitHub
Browse files

Fix typo and format BasicTransformerBlock attributes (#2953)

* ️chore(train_controlnet) fix typo in logger message

* ️chore(models) refactor modules order; make them the same as calling order

When printing the BasicTransformerBlock to stdout, I think it's crucial that the attributes order are shown in proper order. And also previously the "3. Feed Forward" comment was not making sense. It should have been close to self.ff but it's instead next to self.norm3

* correct many tests

* remove bogus file

* make style

* correct more tests

* finish tests

* fix one more

* make style

* make unclip deterministic

* 

️chore(models/attention) reorganize comments in BasicTransformerBlock class

---------
Co-authored-by: default avatarPatrick von Platen <patrick.v.platen@gmail.com>
parent c6180a31
...@@ -47,6 +47,7 @@ class StableUnCLIPImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -47,6 +47,7 @@ class StableUnCLIPImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
feature_extractor = CLIPImageProcessor(crop_size=32, size=32) feature_extractor = CLIPImageProcessor(crop_size=32, size=32)
torch.manual_seed(0)
image_encoder = CLIPVisionModelWithProjection( image_encoder = CLIPVisionModelWithProjection(
CLIPVisionConfig( CLIPVisionConfig(
hidden_size=embedder_hidden_size, hidden_size=embedder_hidden_size,
...@@ -119,16 +120,16 @@ class StableUnCLIPImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -119,16 +120,16 @@ class StableUnCLIPImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
components = { components = {
# image encoding components # image encoding components
"feature_extractor": feature_extractor, "feature_extractor": feature_extractor,
"image_encoder": image_encoder, "image_encoder": image_encoder.eval(),
# image noising components # image noising components
"image_normalizer": image_normalizer, "image_normalizer": image_normalizer.eval(),
"image_noising_scheduler": image_noising_scheduler, "image_noising_scheduler": image_noising_scheduler,
# regular denoising components # regular denoising components
"tokenizer": tokenizer, "tokenizer": tokenizer,
"text_encoder": text_encoder, "text_encoder": text_encoder.eval(),
"unet": unet, "unet": unet.eval(),
"scheduler": scheduler, "scheduler": scheduler,
"vae": vae, "vae": vae.eval(),
} }
return components return components
...@@ -169,9 +170,7 @@ class StableUnCLIPImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -169,9 +170,7 @@ class StableUnCLIPImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 32, 32, 3) assert image.shape == (1, 32, 32, 3)
expected_slice = np.array( expected_slice = np.array([0.3872, 0.7224, 0.5601, 0.4741, 0.6872, 0.5814, 0.4636, 0.3867, 0.5078])
[0.34588397, 0.7747054, 0.5453714, 0.5227859, 0.57656777, 0.6532228, 0.5177634, 0.49932978, 0.56626225]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
......
...@@ -135,7 +135,7 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -135,7 +135,7 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_slice = frames[0][-3:, -3:, -1] image_slice = frames[0][-3:, -3:, -1]
assert frames[0].shape == (64, 64, 3) assert frames[0].shape == (64, 64, 3)
expected_slice = np.array([166, 184, 167, 118, 102, 123, 108, 93, 114]) expected_slice = np.array([158.0, 160.0, 153.0, 125.0, 100.0, 121.0, 111.0, 93.0, 113.0])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
......
...@@ -143,7 +143,7 @@ class VQDiffusionPipelineFastTests(unittest.TestCase): ...@@ -143,7 +143,7 @@ class VQDiffusionPipelineFastTests(unittest.TestCase):
assert image.shape == (1, 24, 24, 3) assert image.shape == (1, 24, 24, 3)
expected_slice = np.array([0.6583, 0.6410, 0.5325, 0.5635, 0.5563, 0.4234, 0.6008, 0.5491, 0.4880]) expected_slice = np.array([0.6551, 0.6168, 0.5008, 0.5676, 0.5659, 0.4295, 0.6073, 0.5599, 0.4992])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
...@@ -187,7 +187,7 @@ class VQDiffusionPipelineFastTests(unittest.TestCase): ...@@ -187,7 +187,7 @@ class VQDiffusionPipelineFastTests(unittest.TestCase):
assert image.shape == (1, 24, 24, 3) assert image.shape == (1, 24, 24, 3)
expected_slice = np.array([0.6647, 0.6531, 0.5303, 0.5891, 0.5726, 0.4439, 0.6304, 0.5564, 0.4912]) expected_slice = np.array([0.6693, 0.6075, 0.4959, 0.5701, 0.5583, 0.4333, 0.6171, 0.5684, 0.4988])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
......
...@@ -411,10 +411,7 @@ class Transformer2DModelTests(unittest.TestCase): ...@@ -411,10 +411,7 @@ class Transformer2DModelTests(unittest.TestCase):
assert attention_scores.shape == (1, 64, 64, 64) assert attention_scores.shape == (1, 64, 64, 64)
output_slice = attention_scores[0, -1, -3:, -3:] output_slice = attention_scores[0, -1, -3:, -3:]
expected_slice = torch.tensor([0.0143, -0.6909, -2.1547, -1.8893, 1.4097, 0.1359, -0.2521, -1.3359, 0.2598])
expected_slice = torch.tensor(
[-0.2555, -0.8877, -2.4739, -2.2251, 1.2714, 0.0807, -0.4161, -1.6408, -0.0471], device=torch_device
)
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3)
def test_spatial_transformer_timestep(self): def test_spatial_transformer_timestep(self):
...@@ -445,14 +442,12 @@ class Transformer2DModelTests(unittest.TestCase): ...@@ -445,14 +442,12 @@ class Transformer2DModelTests(unittest.TestCase):
output_slice_1 = attention_scores_1[0, -1, -3:, -3:] output_slice_1 = attention_scores_1[0, -1, -3:, -3:]
output_slice_2 = attention_scores_2[0, -1, -3:, -3:] output_slice_2 = attention_scores_2[0, -1, -3:, -3:]
expected_slice_1 = torch.tensor( expected_slice = torch.tensor([-0.3923, -1.0923, -1.7144, -1.5570, 1.4154, 0.1738, -0.1157, -1.2998, -0.1703])
[-0.1874, -0.9704, -1.4290, -1.3357, 1.5138, 0.3036, -0.0976, -1.1667, 0.1283], device=torch_device
)
expected_slice_2 = torch.tensor( expected_slice_2 = torch.tensor(
[-0.3493, -1.0924, -1.6161, -1.5016, 1.4245, 0.1367, -0.2526, -1.3109, -0.0547], device=torch_device [-0.4311, -1.1376, -1.7732, -1.5997, 1.3450, 0.0964, -0.1569, -1.3590, -0.2348]
) )
assert torch.allclose(output_slice_1.flatten(), expected_slice_1, atol=1e-3) assert torch.allclose(output_slice_1.flatten(), expected_slice, atol=1e-3)
assert torch.allclose(output_slice_2.flatten(), expected_slice_2, atol=1e-3) assert torch.allclose(output_slice_2.flatten(), expected_slice_2, atol=1e-3)
def test_spatial_transformer_dropout(self): def test_spatial_transformer_dropout(self):
......
...@@ -57,7 +57,7 @@ class CrossAttnDownBlock2DTests(UNetBlockTesterMixin, unittest.TestCase): ...@@ -57,7 +57,7 @@ class CrossAttnDownBlock2DTests(UNetBlockTesterMixin, unittest.TestCase):
return init_dict, inputs_dict return init_dict, inputs_dict
def test_output(self): def test_output(self):
expected_slice = [0.2440, -0.6953, -0.2140, -0.3874, 0.1966, 1.2077, 0.0441, -0.7718, 0.2800] expected_slice = [0.2238, -0.7396, -0.2255, -0.3829, 0.1925, 1.1665, 0.0603, -0.7295, 0.1983]
super().test_output(expected_slice) super().test_output(expected_slice)
...@@ -175,7 +175,7 @@ class UNetMidBlock2DCrossAttnTests(UNetBlockTesterMixin, unittest.TestCase): ...@@ -175,7 +175,7 @@ class UNetMidBlock2DCrossAttnTests(UNetBlockTesterMixin, unittest.TestCase):
return init_dict, inputs_dict return init_dict, inputs_dict
def test_output(self): def test_output(self):
expected_slice = [0.1879, 2.2653, 0.5987, 1.1568, -0.8454, -1.6109, -0.8919, 0.8306, 1.6758] expected_slice = [0.0187, 2.4220, 0.4484, 1.1203, -0.6121, -1.5122, -0.8270, 0.7851, 1.8335]
super().test_output(expected_slice) super().test_output(expected_slice)
...@@ -237,7 +237,7 @@ class CrossAttnUpBlock2DTests(UNetBlockTesterMixin, unittest.TestCase): ...@@ -237,7 +237,7 @@ class CrossAttnUpBlock2DTests(UNetBlockTesterMixin, unittest.TestCase):
return init_dict, inputs_dict return init_dict, inputs_dict
def test_output(self): def test_output(self):
expected_slice = [-0.2796, -0.4364, -0.1067, -0.2693, 0.1894, 0.3869, -0.3470, 0.4584, 0.5091] expected_slice = [-0.1403, -0.3515, -0.0420, -0.1425, 0.3167, 0.5094, -0.2181, 0.5931, 0.5582]
super().test_output(expected_slice) super().test_output(expected_slice)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment