@@ -344,7 +344,7 @@ class ControlNetModel(ModelMixin, ConfigMixin):
...
@@ -344,7 +344,7 @@ class ControlNetModel(ModelMixin, ConfigMixin):
`processor (`dict` of `AttentionProcessor` or `AttentionProcessor`):
`processor (`dict` of `AttentionProcessor` or `AttentionProcessor`):
The instantiated processor class or a dictionary of processor classes that will be set as the processor
The instantiated processor class or a dictionary of processor classes that will be set as the processor
of **all** `Attention` layers.
of **all** `Attention` layers.
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainablae attention processors.:
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors.:
"""
"""
count=len(self.attn_processors.keys())
count=len(self.attn_processors.keys())
...
@@ -379,24 +379,24 @@ class ControlNetModel(ModelMixin, ConfigMixin):
...
@@ -379,24 +379,24 @@ class ControlNetModel(ModelMixin, ConfigMixin):
Args:
Args:
slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
`"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
`"max"`, maximum amount of memory will be saved by running only one slice at a time. If a number is
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
@@ -105,7 +105,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
...
@@ -105,7 +105,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
self.attention_head_dim=attention_head_dim
self.attention_head_dim=attention_head_dim
inner_dim=num_attention_heads*attention_head_dim
inner_dim=num_attention_heads*attention_head_dim
# 1. Transformer2DModel can process both standard continous images of shape `(batch_size, num_channels, width, height)` as well as quantized image embeddings of shape `(batch_size, num_image_vectors)`
# 1. Transformer2DModel can process both standard continuous images of shape `(batch_size, num_channels, width, height)` as well as quantized image embeddings of shape `(batch_size, num_image_vectors)`
# Define whether input is continuous or discrete depending on configuration
# Define whether input is continuous or discrete depending on configuration
@@ -197,7 +197,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
...
@@ -197,7 +197,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
timestep_input_dim=block_out_channels[0]
timestep_input_dim=block_out_channels[0]
else:
else:
raiseValueError(
raiseValueError(
f"{time_embedding_type} does not exist. Pleaes make sure to use one of `fourier` or `positional`."
f"{time_embedding_type} does not exist. Please make sure to use one of `fourier` or `positional`."
)
)
self.time_embedding=TimestepEmbedding(
self.time_embedding=TimestepEmbedding(
...
@@ -391,7 +391,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
...
@@ -391,7 +391,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
`processor (`dict` of `AttentionProcessor` or `AttentionProcessor`):
`processor (`dict` of `AttentionProcessor` or `AttentionProcessor`):
The instantiated processor class or a dictionary of processor classes that will be set as the processor
The instantiated processor class or a dictionary of processor classes that will be set as the processor
of **all** `Attention` layers.
of **all** `Attention` layers.
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainablae attention processors.:
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors.:
"""
"""
count=len(self.attn_processors.keys())
count=len(self.attn_processors.keys())
...
@@ -425,24 +425,24 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
...
@@ -425,24 +425,24 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
Args:
Args:
slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
`"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
`"max"`, maximum amount of memory will be saved by running only one slice at a time. If a number is
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
@@ -287,7 +287,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
...
@@ -287,7 +287,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
timestep_input_dim=block_out_channels[0]
timestep_input_dim=block_out_channels[0]
else:
else:
raiseValueError(
raiseValueError(
f"{time_embedding_type} does not exist. Pleaes make sure to use one of `fourier` or `positional`."
f"{time_embedding_type} does not exist. Please make sure to use one of `fourier` or `positional`."
)
)
self.time_embedding=TimestepEmbedding(
self.time_embedding=TimestepEmbedding(
...
@@ -481,7 +481,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
...
@@ -481,7 +481,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
`processor (`dict` of `AttentionProcessor` or `AttentionProcessor`):
`processor (`dict` of `AttentionProcessor` or `AttentionProcessor`):
The instantiated processor class or a dictionary of processor classes that will be set as the processor
The instantiated processor class or a dictionary of processor classes that will be set as the processor
of **all** `Attention` layers.
of **all** `Attention` layers.
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainablae attention processors.:
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors.:
"""
"""
count=len(self.attn_processors.keys())
count=len(self.attn_processors.keys())
...
@@ -515,24 +515,24 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
...
@@ -515,24 +515,24 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
Args:
Args:
slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
`"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
`"max"`, maximum amount of memory will be saved by running only one slice at a time. If a number is
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`