"src/vscode:/vscode.git/clone" did not exist on "470f51cd26c75974ef88c697c0a94412a20f2264"
Unverified Commit 80bc0c0c authored by Will Berman's avatar Will Berman Committed by GitHub
Browse files

config fixes (#3060)

parent 091a0582
...@@ -105,7 +105,7 @@ class StableDiffusionPipeline(DiffusionPipeline): ...@@ -105,7 +105,7 @@ class StableDiffusionPipeline(DiffusionPipeline):
) )
model = ModelWrapper(unet, scheduler.alphas_cumprod) model = ModelWrapper(unet, scheduler.alphas_cumprod)
if scheduler.prediction_type == "v_prediction": if scheduler.config.prediction_type == "v_prediction":
self.k_diffusion_model = CompVisVDenoiser(model) self.k_diffusion_model = CompVisVDenoiser(model)
else: else:
self.k_diffusion_model = CompVisDenoiser(model) self.k_diffusion_model = CompVisDenoiser(model)
......
...@@ -60,9 +60,9 @@ class AudioDiffusionPipeline(DiffusionPipeline): ...@@ -60,9 +60,9 @@ class AudioDiffusionPipeline(DiffusionPipeline):
input_module = self.vqvae if self.vqvae is not None else self.unet input_module = self.vqvae if self.vqvae is not None else self.unet
# For backwards compatibility # For backwards compatibility
sample_size = ( sample_size = (
(input_module.sample_size, input_module.sample_size) (input_module.config.sample_size, input_module.config.sample_size)
if type(input_module.sample_size) == int if type(input_module.config.sample_size) == int
else input_module.sample_size else input_module.config.sample_size
) )
return sample_size return sample_size
......
...@@ -113,7 +113,7 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade ...@@ -113,7 +113,7 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
model = ModelWrapper(unet, scheduler.alphas_cumprod) model = ModelWrapper(unet, scheduler.alphas_cumprod)
if scheduler.prediction_type == "v_prediction": if scheduler.config.prediction_type == "v_prediction":
self.k_diffusion_model = CompVisVDenoiser(model) self.k_diffusion_model = CompVisVDenoiser(model)
else: else:
self.k_diffusion_model = CompVisDenoiser(model) self.k_diffusion_model = CompVisDenoiser(model)
......
...@@ -115,8 +115,11 @@ class PipelineFastTests(unittest.TestCase): ...@@ -115,8 +115,11 @@ class PipelineFastTests(unittest.TestCase):
output = pipe(generator=generator, steps=4, return_dict=False) output = pipe(generator=generator, steps=4, return_dict=False)
image_from_tuple = output[0][0] image_from_tuple = output[0][0]
assert audio.shape == (1, (self.dummy_unet.sample_size[1] - 1) * mel.hop_length) assert audio.shape == (1, (self.dummy_unet.config.sample_size[1] - 1) * mel.hop_length)
assert image.height == self.dummy_unet.sample_size[0] and image.width == self.dummy_unet.sample_size[1] assert (
image.height == self.dummy_unet.config.sample_size[0]
and image.width == self.dummy_unet.config.sample_size[1]
)
image_slice = np.frombuffer(image.tobytes(), dtype="uint8")[:10] image_slice = np.frombuffer(image.tobytes(), dtype="uint8")[:10]
image_from_tuple_slice = np.frombuffer(image_from_tuple.tobytes(), dtype="uint8")[:10] image_from_tuple_slice = np.frombuffer(image_from_tuple.tobytes(), dtype="uint8")[:10]
expected_slice = np.array([69, 255, 255, 255, 0, 0, 77, 181, 12, 127]) expected_slice = np.array([69, 255, 255, 255, 0, 0, 77, 181, 12, 127])
...@@ -133,14 +136,14 @@ class PipelineFastTests(unittest.TestCase): ...@@ -133,14 +136,14 @@ class PipelineFastTests(unittest.TestCase):
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
np.random.seed(0) np.random.seed(0)
raw_audio = np.random.uniform(-1, 1, ((dummy_vqvae_and_unet[0].sample_size[1] - 1) * mel.hop_length,)) raw_audio = np.random.uniform(-1, 1, ((dummy_vqvae_and_unet[0].config.sample_size[1] - 1) * mel.hop_length,))
generator = torch.Generator(device=device).manual_seed(42) generator = torch.Generator(device=device).manual_seed(42)
output = pipe(raw_audio=raw_audio, generator=generator, start_step=5, steps=10) output = pipe(raw_audio=raw_audio, generator=generator, start_step=5, steps=10)
image = output.images[0] image = output.images[0]
assert ( assert (
image.height == self.dummy_vqvae_and_unet[0].sample_size[0] image.height == self.dummy_vqvae_and_unet[0].config.sample_size[0]
and image.width == self.dummy_vqvae_and_unet[0].sample_size[1] and image.width == self.dummy_vqvae_and_unet[0].config.sample_size[1]
) )
image_slice = np.frombuffer(image.tobytes(), dtype="uint8")[:10] image_slice = np.frombuffer(image.tobytes(), dtype="uint8")[:10]
expected_slice = np.array([120, 117, 110, 109, 138, 167, 138, 148, 132, 121]) expected_slice = np.array([120, 117, 110, 109, 138, 167, 138, 148, 132, 121])
...@@ -183,8 +186,8 @@ class PipelineIntegrationTests(unittest.TestCase): ...@@ -183,8 +186,8 @@ class PipelineIntegrationTests(unittest.TestCase):
audio = output.audios[0] audio = output.audios[0]
image = output.images[0] image = output.images[0]
assert audio.shape == (1, (pipe.unet.sample_size[1] - 1) * pipe.mel.hop_length) assert audio.shape == (1, (pipe.unet.config.sample_size[1] - 1) * pipe.mel.hop_length)
assert image.height == pipe.unet.sample_size[0] and image.width == pipe.unet.sample_size[1] assert image.height == pipe.unet.config.sample_size[0] and image.width == pipe.unet.config.sample_size[1]
image_slice = np.frombuffer(image.tobytes(), dtype="uint8")[:10] image_slice = np.frombuffer(image.tobytes(), dtype="uint8")[:10]
expected_slice = np.array([151, 167, 154, 144, 122, 134, 121, 105, 70, 26]) expected_slice = np.array([151, 167, 154, 144, 122, 134, 121, 105, 70, 26])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment