Unverified Commit e97a633b authored by M. Tolga Cangöz's avatar M. Tolga Cangöz Committed by GitHub
Browse files

Update access of configuration attributes (#7343)


Co-authored-by: default avatarSayak Paul <spsayakpaul@gmail.com>
parent 01ac37b3
...@@ -355,7 +355,7 @@ unet_traced = torch.jit.load("unet_traced.pt") ...@@ -355,7 +355,7 @@ unet_traced = torch.jit.load("unet_traced.pt")
class TracedUNet(torch.nn.Module): class TracedUNet(torch.nn.Module):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.in_channels = pipe.unet.in_channels self.in_channels = pipe.unet.config.in_channels
self.device = pipe.unet.device self.device = pipe.unet.device
def forward(self, latent_model_input, t, encoder_hidden_states): def forward(self, latent_model_input, t, encoder_hidden_states):
......
...@@ -210,7 +210,7 @@ Stable Diffusion 은 text-to-image *latent diffusion* 모델입니다. latent di ...@@ -210,7 +210,7 @@ Stable Diffusion 은 text-to-image *latent diffusion* 모델입니다. latent di
```py ```py
>>> latents = torch.randn( >>> latents = torch.randn(
... (batch_size, unet.in_channels, height // 8, width // 8), ... (batch_size, unet.config.in_channels, height // 8, width // 8),
... generator=generator, ... generator=generator,
... device=torch_device, ... device=torch_device,
... ) ... )
......
...@@ -224,7 +224,7 @@ class StableDiffusionIPEXPipeline( ...@@ -224,7 +224,7 @@ class StableDiffusionIPEXPipeline(
# 5. Prepare latent variables # 5. Prepare latent variables
latents = self.prepare_latents( latents = self.prepare_latents(
batch_size * num_images_per_prompt, batch_size * num_images_per_prompt,
self.unet.in_channels, self.unet.config.in_channels,
height, height,
width, width,
prompt_embeds.dtype, prompt_embeds.dtype,
...@@ -679,7 +679,7 @@ class StableDiffusionIPEXPipeline( ...@@ -679,7 +679,7 @@ class StableDiffusionIPEXPipeline(
timesteps = self.scheduler.timesteps timesteps = self.scheduler.timesteps
# 5. Prepare latent variables # 5. Prepare latent variables
num_channels_latents = self.unet.in_channels num_channels_latents = self.unet.config.in_channels
latents = self.prepare_latents( latents = self.prepare_latents(
batch_size * num_images_per_prompt, batch_size * num_images_per_prompt,
num_channels_latents, num_channels_latents,
......
...@@ -917,7 +917,7 @@ class TensorRTStableDiffusionPipeline(StableDiffusionPipeline): ...@@ -917,7 +917,7 @@ class TensorRTStableDiffusionPipeline(StableDiffusionPipeline):
text_embeddings = self.__encode_prompt(prompt, negative_prompt) text_embeddings = self.__encode_prompt(prompt, negative_prompt)
# Pre-initialize latents # Pre-initialize latents
num_channels_latents = self.unet.in_channels num_channels_latents = self.unet.config.in_channels
latents = self.prepare_latents( latents = self.prepare_latents(
batch_size, batch_size,
num_channels_latents, num_channels_latents,
......
...@@ -1195,9 +1195,9 @@ def superres_check_against_original(dump_path, unet_checkpoint_path): ...@@ -1195,9 +1195,9 @@ def superres_check_against_original(dump_path, unet_checkpoint_path):
if_II_model = IFStageIII(device="cuda", dir_or_name=orig_path, model_kwargs={"precision": "fp32"}).model if_II_model = IFStageIII(device="cuda", dir_or_name=orig_path, model_kwargs={"precision": "fp32"}).model
batch_size = 1 batch_size = 1
channels = model.in_channels // 2 channels = model.config.in_channels // 2
height = model.sample_size height = model.config.sample_size
width = model.sample_size width = model.config.sample_size
height = 1024 height = 1024
width = 1024 width = 1024
......
...@@ -613,7 +613,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin): ...@@ -613,7 +613,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
for image_ in image: for image_ in image:
image_ = image_.convert("RGB") image_ = image_.convert("RGB")
image_ = resize(image_, self.unet.sample_size) image_ = resize(image_, self.unet.config.sample_size)
image_ = np.array(image_) image_ = np.array(image_)
image_ = image_.astype(np.float32) image_ = image_.astype(np.float32)
image_ = image_ / 127.5 - 1 image_ = image_ / 127.5 - 1
......
...@@ -662,7 +662,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin): ...@@ -662,7 +662,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
for image_ in image: for image_ in image:
image_ = image_.convert("RGB") image_ = image_.convert("RGB")
image_ = resize(image_, self.unet.sample_size) image_ = resize(image_, self.unet.config.sample_size)
image_ = np.array(image_) image_ = np.array(image_)
image_ = image_.astype(np.float32) image_ = image_.astype(np.float32)
image_ = image_ / 127.5 - 1 image_ = image_ / 127.5 - 1
......
...@@ -654,7 +654,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin): ...@@ -654,7 +654,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
for image_ in image: for image_ in image:
image_ = image_.convert("RGB") image_ = image_.convert("RGB")
image_ = resize(image_, self.unet.sample_size) image_ = resize(image_, self.unet.config.sample_size)
image_ = np.array(image_) image_ = np.array(image_)
image_ = image_.astype(np.float32) image_ = image_.astype(np.float32)
image_ = image_ / 127.5 - 1 image_ = image_ / 127.5 - 1
...@@ -701,7 +701,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin): ...@@ -701,7 +701,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
for mask_image_ in mask_image: for mask_image_ in mask_image:
mask_image_ = mask_image_.convert("L") mask_image_ = mask_image_.convert("L")
mask_image_ = resize(mask_image_, self.unet.sample_size) mask_image_ = resize(mask_image_, self.unet.config.sample_size)
mask_image_ = np.array(mask_image_) mask_image_ = np.array(mask_image_)
mask_image_ = mask_image_[None, None, :] mask_image_ = mask_image_[None, None, :]
new_mask_image.append(mask_image_) new_mask_image.append(mask_image_)
......
...@@ -698,7 +698,7 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin): ...@@ -698,7 +698,7 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
for image_ in image: for image_ in image:
image_ = image_.convert("RGB") image_ = image_.convert("RGB")
image_ = resize(image_, self.unet.sample_size) image_ = resize(image_, self.unet.config.sample_size)
image_ = np.array(image_) image_ = np.array(image_)
image_ = image_.astype(np.float32) image_ = image_.astype(np.float32)
image_ = image_ / 127.5 - 1 image_ = image_ / 127.5 - 1
...@@ -778,7 +778,7 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin): ...@@ -778,7 +778,7 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
for mask_image_ in mask_image: for mask_image_ in mask_image:
mask_image_ = mask_image_.convert("L") mask_image_ = mask_image_.convert("L")
mask_image_ = resize(mask_image_, self.unet.sample_size) mask_image_ = resize(mask_image_, self.unet.config.sample_size)
mask_image_ = np.array(mask_image_) mask_image_ = np.array(mask_image_)
mask_image_ = mask_image_[None, None, :] mask_image_ = mask_image_[None, None, :]
new_mask_image.append(mask_image_) new_mask_image.append(mask_image_)
......
...@@ -469,7 +469,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline): ...@@ -469,7 +469,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
latents = self.prepare_latents( latents = self.prepare_latents(
batch_size * num_images_per_prompt, batch_size * num_images_per_prompt,
self.num_latent_channels, self.config.num_latent_channels,
height, height,
width, width,
latents_dtype, latents_dtype,
...@@ -498,12 +498,12 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline): ...@@ -498,12 +498,12 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
# 7. Check that sizes of image and latents match # 7. Check that sizes of image and latents match
num_channels_image = image.shape[1] num_channels_image = image.shape[1]
if self.num_latent_channels + num_channels_image != self.num_unet_input_channels: if self.config.num_latent_channels + num_channels_image != self.config.num_unet_input_channels:
raise ValueError( raise ValueError(
"Incorrect configuration settings! The config of `pipeline.unet` expects" "Incorrect configuration settings! The config of `pipeline.unet` expects"
f" {self.num_unet_input_channels} but received `num_channels_latents`: {self.num_latent_channels} +" f" {self.config.num_unet_input_channels} but received `num_channels_latents`: {self.config.num_latent_channels} +"
f" `num_channels_image`: {num_channels_image} " f" `num_channels_image`: {num_channels_image} "
f" = {self.num_latent_channels + num_channels_image}. Please verify the config of" f" = {self.config.num_latent_channels + num_channels_image}. Please verify the config of"
" `pipeline.unet` or your `image` input." " `pipeline.unet` or your `image` input."
) )
......
...@@ -680,7 +680,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -680,7 +680,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
timesteps = self.scheduler.timesteps timesteps = self.scheduler.timesteps
# 5. Prepare latent variables # 5. Prepare latent variables
num_channels_latents = self.unet.in_channels num_channels_latents = self.unet.config.in_channels
latents = self.prepare_latents( latents = self.prepare_latents(
batch_size * num_images_per_prompt, batch_size * num_images_per_prompt,
num_channels_latents, num_channels_latents,
...@@ -713,7 +713,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -713,7 +713,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
boxes = torch.zeros(max_objs, 4, device=device, dtype=self.text_encoder.dtype) boxes = torch.zeros(max_objs, 4, device=device, dtype=self.text_encoder.dtype)
boxes[:n_objs] = torch.tensor(gligen_boxes) boxes[:n_objs] = torch.tensor(gligen_boxes)
text_embeddings = torch.zeros( text_embeddings = torch.zeros(
max_objs, self.unet.cross_attention_dim, device=device, dtype=self.text_encoder.dtype max_objs, self.unet.config.cross_attention_dim, device=device, dtype=self.text_encoder.dtype
) )
text_embeddings[:n_objs] = _text_embeddings text_embeddings[:n_objs] = _text_embeddings
# Generate a mask for each object that is entity described by phrases # Generate a mask for each object that is entity described by phrases
......
...@@ -847,7 +847,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM ...@@ -847,7 +847,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
timesteps = self.scheduler.timesteps timesteps = self.scheduler.timesteps
# 5. Prepare latent variables # 5. Prepare latent variables
num_channels_latents = self.unet.in_channels num_channels_latents = self.unet.config.in_channels
latents = self.prepare_latents( latents = self.prepare_latents(
batch_size * num_images_per_prompt, batch_size * num_images_per_prompt,
num_channels_latents, num_channels_latents,
......
...@@ -233,7 +233,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin): ...@@ -233,7 +233,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
sigmas = self._convert_to_karras(ramp) sigmas = self._convert_to_karras(ramp)
timesteps = self.sigma_to_t(sigmas) timesteps = self.sigma_to_t(sigmas)
sigmas = np.concatenate([sigmas, [self.sigma_min]]).astype(np.float32) sigmas = np.concatenate([sigmas, [self.config.sigma_min]]).astype(np.float32)
self.sigmas = torch.from_numpy(sigmas).to(device=device) self.sigmas = torch.from_numpy(sigmas).to(device=device)
if str(device).startswith("mps"): if str(device).startswith("mps"):
......
...@@ -233,7 +233,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin): ...@@ -233,7 +233,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
""" """
# Clipping the minimum of all lambda(t) for numerical stability. # Clipping the minimum of all lambda(t) for numerical stability.
# This is critical for cosine (squaredcos_cap_v2) noise schedule. # This is critical for cosine (squaredcos_cap_v2) noise schedule.
clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.lambda_min_clipped).item() clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.config.lambda_min_clipped).item()
self.noisiest_timestep = self.config.num_train_timesteps - 1 - clipped_idx self.noisiest_timestep = self.config.num_train_timesteps - 1 - clipped_idx
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891 # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
......
...@@ -325,7 +325,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin): ...@@ -325,7 +325,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
log_sigmas = np.log(sigmas) log_sigmas = np.log(sigmas)
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
if self.use_karras_sigmas: if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas) sigmas = self._convert_to_karras(in_sigmas=sigmas)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
......
...@@ -343,7 +343,7 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin): ...@@ -343,7 +343,7 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
" 'linear' or 'log_linear'" " 'linear' or 'log_linear'"
) )
if self.use_karras_sigmas: if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
......
...@@ -288,7 +288,7 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): ...@@ -288,7 +288,7 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
log_sigmas = np.log(sigmas) log_sigmas = np.log(sigmas)
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
if self.use_karras_sigmas: if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas) sigmas = self._convert_to_karras(in_sigmas=sigmas)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
......
...@@ -782,7 +782,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -782,7 +782,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
# update inputs_dict for ip-adapter # update inputs_dict for ip-adapter
batch_size = inputs_dict["encoder_hidden_states"].shape[0] batch_size = inputs_dict["encoder_hidden_states"].shape[0]
# for ip-adapter image_embeds has shape [batch_size, num_image, embed_dim] # for ip-adapter image_embeds has shape [batch_size, num_image, embed_dim]
image_embeds = floats_tensor((batch_size, 1, model.cross_attention_dim)).to(torch_device) image_embeds = floats_tensor((batch_size, 1, model.config.cross_attention_dim)).to(torch_device)
inputs_dict["added_cond_kwargs"] = {"image_embeds": [image_embeds]} inputs_dict["added_cond_kwargs"] = {"image_embeds": [image_embeds]}
# make ip_adapter_1 and ip_adapter_2 # make ip_adapter_1 and ip_adapter_2
...@@ -854,7 +854,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -854,7 +854,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
# update inputs_dict for ip-adapter # update inputs_dict for ip-adapter
batch_size = inputs_dict["encoder_hidden_states"].shape[0] batch_size = inputs_dict["encoder_hidden_states"].shape[0]
# for ip-adapter-plus image_embeds has shape [batch_size, num_image, sequence_length, embed_dim] # for ip-adapter-plus image_embeds has shape [batch_size, num_image, sequence_length, embed_dim]
image_embeds = floats_tensor((batch_size, 1, 1, model.cross_attention_dim)).to(torch_device) image_embeds = floats_tensor((batch_size, 1, 1, model.config.cross_attention_dim)).to(torch_device)
inputs_dict["added_cond_kwargs"] = {"image_embeds": [image_embeds]} inputs_dict["added_cond_kwargs"] = {"image_embeds": [image_embeds]}
# make ip_adapter_1 and ip_adapter_2 # make ip_adapter_1 and ip_adapter_2
......
...@@ -272,17 +272,17 @@ class ConfigTester(unittest.TestCase): ...@@ -272,17 +272,17 @@ class ConfigTester(unittest.TestCase):
# now loading it with SampleObject2 should put f into `_use_default_values` # now loading it with SampleObject2 should put f into `_use_default_values`
config = SampleObject2.from_config(tmpdirname) config = SampleObject2.from_config(tmpdirname)
assert "f" in config._use_default_values assert "f" in config.config._use_default_values
assert config.f == [1, 3] assert config.config.f == [1, 3]
# now loading the config, should **NOT** use [1, 3] for `f`, but the default [1, 4] value # now loading the config, should **NOT** use [1, 3] for `f`, but the default [1, 4] value
# **BECAUSE** it is part of `config._use_default_values` # **BECAUSE** it is part of `config.config._use_default_values`
new_config = SampleObject4.from_config(config.config) new_config = SampleObject4.from_config(config.config)
assert new_config.f == [5, 4] assert new_config.config.f == [5, 4]
config.config._use_default_values.pop() config.config._use_default_values.pop()
new_config_2 = SampleObject4.from_config(config.config) new_config_2 = SampleObject4.from_config(config.config)
assert new_config_2.f == [1, 3] assert new_config_2.config.f == [1, 3]
# Nevertheless "e" should still be correctly loaded to [1, 3] from SampleObject2 instead of defaulting to [1, 5] # Nevertheless "e" should still be correctly loaded to [1, 3] from SampleObject2 instead of defaulting to [1, 5]
assert new_config_2.e == [1, 3] assert new_config_2.config.e == [1, 3]
...@@ -137,7 +137,7 @@ class PipelineIntegrationTests(unittest.TestCase): ...@@ -137,7 +137,7 @@ class PipelineIntegrationTests(unittest.TestCase):
audio_slice = audio[0, -3:, -3:] audio_slice = audio[0, -3:, -3:]
assert audio.shape == (1, 2, pipe.unet.sample_size) assert audio.shape == (1, 2, pipe.unet.config.sample_size)
expected_slice = np.array([-0.0192, -0.0231, -0.0318, -0.0059, 0.0002, -0.0020]) expected_slice = np.array([-0.0192, -0.0231, -0.0318, -0.0059, 0.0002, -0.0020])
assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2
...@@ -155,7 +155,7 @@ class PipelineIntegrationTests(unittest.TestCase): ...@@ -155,7 +155,7 @@ class PipelineIntegrationTests(unittest.TestCase):
audio_slice = audio[0, -3:, -3:] audio_slice = audio[0, -3:, -3:]
assert audio.shape == (1, 2, pipe.unet.sample_size) assert audio.shape == (1, 2, pipe.unet.config.sample_size)
expected_slice = np.array([-0.0367, -0.0488, -0.0771, -0.0525, -0.0444, -0.0341]) expected_slice = np.array([-0.0367, -0.0488, -0.0771, -0.0525, -0.0444, -0.0341])
assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(audio_slice.flatten() - expected_slice).max() < 1e-2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment