Unverified Commit 7271f8b7 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

Fix UniPC scheduler for 1D (#5276)

parent dfcce3ca
...@@ -276,13 +276,13 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): ...@@ -276,13 +276,13 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
https://arxiv.org/abs/2205.11487 https://arxiv.org/abs/2205.11487
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, height, width = sample.shape batch_size, channels, *remaining_dims = sample.shape
if dtype not in (torch.float32, torch.float64): if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
# Flatten sample for doing quantile calculation along each image # Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width) sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
abs_sample = sample.abs() # "a certain percentile absolute pixel value" abs_sample = sample.abs() # "a certain percentile absolute pixel value"
...@@ -290,11 +290,10 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): ...@@ -290,11 +290,10 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
s = torch.clamp( s = torch.clamp(
s, min=1, max=self.config.sample_max_value s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1] ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
sample = sample.reshape(batch_size, channels, height, width) sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype) sample = sample.to(dtype)
return sample return sample
......
...@@ -298,13 +298,13 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -298,13 +298,13 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
https://arxiv.org/abs/2205.11487 https://arxiv.org/abs/2205.11487
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, height, width = sample.shape batch_size, channels, *remaining_dims = sample.shape
if dtype not in (torch.float32, torch.float64): if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
# Flatten sample for doing quantile calculation along each image # Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width) sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
abs_sample = sample.abs() # "a certain percentile absolute pixel value" abs_sample = sample.abs() # "a certain percentile absolute pixel value"
...@@ -312,11 +312,10 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -312,11 +312,10 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
s = torch.clamp( s = torch.clamp(
s, min=1, max=self.config.sample_max_value s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1] ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
sample = sample.reshape(batch_size, channels, height, width) sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype) sample = sample.to(dtype)
return sample return sample
......
...@@ -330,13 +330,13 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -330,13 +330,13 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
https://arxiv.org/abs/2205.11487 https://arxiv.org/abs/2205.11487
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, height, width = sample.shape batch_size, channels, *remaining_dims = sample.shape
if dtype not in (torch.float32, torch.float64): if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
# Flatten sample for doing quantile calculation along each image # Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width) sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
abs_sample = sample.abs() # "a certain percentile absolute pixel value" abs_sample = sample.abs() # "a certain percentile absolute pixel value"
...@@ -344,11 +344,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -344,11 +344,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
s = torch.clamp( s = torch.clamp(
s, min=1, max=self.config.sample_max_value s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1] ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
sample = sample.reshape(batch_size, channels, height, width) sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype) sample = sample.to(dtype)
return sample return sample
......
...@@ -344,13 +344,13 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -344,13 +344,13 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
https://arxiv.org/abs/2205.11487 https://arxiv.org/abs/2205.11487
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, height, width = sample.shape batch_size, channels, *remaining_dims = sample.shape
if dtype not in (torch.float32, torch.float64): if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
# Flatten sample for doing quantile calculation along each image # Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width) sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
abs_sample = sample.abs() # "a certain percentile absolute pixel value" abs_sample = sample.abs() # "a certain percentile absolute pixel value"
...@@ -358,11 +358,10 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -358,11 +358,10 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
s = torch.clamp( s = torch.clamp(
s, min=1, max=self.config.sample_max_value s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1] ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
sample = sample.reshape(batch_size, channels, height, width) sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype) sample = sample.to(dtype)
return sample return sample
......
...@@ -268,13 +268,13 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -268,13 +268,13 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
https://arxiv.org/abs/2205.11487 https://arxiv.org/abs/2205.11487
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, height, width = sample.shape batch_size, channels, *remaining_dims = sample.shape
if dtype not in (torch.float32, torch.float64): if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
# Flatten sample for doing quantile calculation along each image # Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width) sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
abs_sample = sample.abs() # "a certain percentile absolute pixel value" abs_sample = sample.abs() # "a certain percentile absolute pixel value"
...@@ -282,11 +282,10 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -282,11 +282,10 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
s = torch.clamp( s = torch.clamp(
s, min=1, max=self.config.sample_max_value s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1] ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
sample = sample.reshape(batch_size, channels, height, width) sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype) sample = sample.to(dtype)
return sample return sample
......
...@@ -288,13 +288,13 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -288,13 +288,13 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
https://arxiv.org/abs/2205.11487 https://arxiv.org/abs/2205.11487
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, height, width = sample.shape batch_size, channels, *remaining_dims = sample.shape
if dtype not in (torch.float32, torch.float64): if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
# Flatten sample for doing quantile calculation along each image # Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width) sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
abs_sample = sample.abs() # "a certain percentile absolute pixel value" abs_sample = sample.abs() # "a certain percentile absolute pixel value"
...@@ -302,11 +302,10 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -302,11 +302,10 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
s = torch.clamp( s = torch.clamp(
s, min=1, max=self.config.sample_max_value s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1] ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
sample = sample.reshape(batch_size, channels, height, width) sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype) sample = sample.to(dtype)
return sample return sample
......
...@@ -298,13 +298,13 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin): ...@@ -298,13 +298,13 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
https://arxiv.org/abs/2205.11487 https://arxiv.org/abs/2205.11487
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, height, width = sample.shape batch_size, channels, *remaining_dims = sample.shape
if dtype not in (torch.float32, torch.float64): if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
# Flatten sample for doing quantile calculation along each image # Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width) sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
abs_sample = sample.abs() # "a certain percentile absolute pixel value" abs_sample = sample.abs() # "a certain percentile absolute pixel value"
...@@ -312,11 +312,10 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin): ...@@ -312,11 +312,10 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
s = torch.clamp( s = torch.clamp(
s, min=1, max=self.config.sample_max_value s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1] ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
sample = sample.reshape(batch_size, channels, height, width) sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype) sample = sample.to(dtype)
return sample return sample
......
...@@ -302,13 +302,13 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin): ...@@ -302,13 +302,13 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
https://arxiv.org/abs/2205.11487 https://arxiv.org/abs/2205.11487
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, height, width = sample.shape batch_size, channels, *remaining_dims = sample.shape
if dtype not in (torch.float32, torch.float64): if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
# Flatten sample for doing quantile calculation along each image # Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width) sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
abs_sample = sample.abs() # "a certain percentile absolute pixel value" abs_sample = sample.abs() # "a certain percentile absolute pixel value"
...@@ -316,11 +316,10 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin): ...@@ -316,11 +316,10 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
s = torch.clamp( s = torch.clamp(
s, min=1, max=self.config.sample_max_value s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1] ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
sample = sample.reshape(batch_size, channels, height, width) sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype) sample = sample.to(dtype)
return sample return sample
......
...@@ -282,13 +282,13 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -282,13 +282,13 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
https://arxiv.org/abs/2205.11487 https://arxiv.org/abs/2205.11487
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, height, width = sample.shape batch_size, channels, *remaining_dims = sample.shape
if dtype not in (torch.float32, torch.float64): if dtype not in (torch.float32, torch.float64):
sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
# Flatten sample for doing quantile calculation along each image # Flatten sample for doing quantile calculation along each image
sample = sample.reshape(batch_size, channels * height * width) sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
abs_sample = sample.abs() # "a certain percentile absolute pixel value" abs_sample = sample.abs() # "a certain percentile absolute pixel value"
...@@ -296,11 +296,10 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -296,11 +296,10 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
s = torch.clamp( s = torch.clamp(
s, min=1, max=self.config.sample_max_value s, min=1, max=self.config.sample_max_value
) # When clamped to min=1, equivalent to standard clipping to [-1, 1] ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
sample = sample.reshape(batch_size, channels, height, width) sample = sample.reshape(batch_size, channels, *remaining_dims)
sample = sample.to(dtype) sample = sample.to(dtype)
return sample return sample
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment