Fix typos in docs and comments (#11416)

* Fix typos in docs and comments * Apply style fixes --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>

Fix typos in docs and comments (#11416)
* Fix typos in docs and comments * Apply style fixes --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
86294d3c · co63oc · GitHub · d70f8ee1 · 86294d3c · 86294d3c
Unverified Commit 86294d3c authored May 01, 2025 by co63oc Committed by GitHub Apr 30, 2025
20 changed files
--- a/examples/community/pipeline_controlnet_xl_kolors_inpaint.py
+++ b/examples/community/pipeline_controlnet_xl_kolors_inpaint.py
@@ -1782,7 +1782,7 @@ class KolorsControlNetInpaintPipeline(
                    )

                    if guess_mode and self.do_classifier_free_guidance:
-                        # Infered ControlNet only for the conditional batch.
+                        # Inferred ControlNet only for the conditional batch.
                        # To apply the output of ControlNet to both the unconditional and conditional batches,
                        # add 0 to the unconditional batch to keep it unchanged.
                        down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples]

--- a/examples/community/pipeline_fabric.py
+++ b/examples/community/pipeline_fabric.py
@@ -559,7 +559,7 @@ class FabricPipeline(DiffusionPipeline):
                End point for providing feedback (between 0 and 1).
            min_weight (`float`, *optional*, defaults to `.05`):
                Minimum weight for feedback.
-            max_weight (`float`, *optional*, defults tp `1.0`):
+            max_weight (`float`, *optional*, defaults tp `1.0`):
                Maximum weight for feedback.
            neg_scale (`float`, *optional*, defaults to `.5`):
                Scale factor for negative feedback.

--- a/examples/community/pipeline_faithdiff_stable_diffusion_xl.py
+++ b/examples/community/pipeline_faithdiff_stable_diffusion_xl.py
@@ -118,7 +118,7 @@ EXAMPLE_DOC_STRING = """
        >>> # Here we need use pipeline internal unet model
        >>> pipe.unet = pipe.unet_model.from_pretrained(model_id, subfolder="unet", variant="fp16", use_safetensors=True)
        >>>
-        >>> # Load aditional layers to the model
+        >>> # Load additional layers to the model
        >>> pipe.unet.load_additional_layers(weight_path="proc_data/faithdiff/FaithDiff.bin", dtype=dtype)
        >>>
        >>> # Enable vae tiling

--- a/examples/community/pipeline_stable_diffusion_boxdiff.py
+++ b/examples/community/pipeline_stable_diffusion_boxdiff.py
@@ -72,7 +72,7 @@ class GaussianSmoothing(nn.Module):
    """
    Copied from official repo: https://github.com/showlab/BoxDiff/blob/master/utils/gaussian_smoothing.py
    Apply gaussian smoothing on a
-    1d, 2d or 3d tensor. Filtering is performed seperately for each channel
+    1d, 2d or 3d tensor. Filtering is performed separately for each channel
    in the input using a depthwise convolution.
    Arguments:
        channels (int, sequence): Number of channels of the input tensors. Output will

--- a/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py
+++ b/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py
@@ -1509,7 +1509,7 @@ class StableDiffusionXL_AE_Pipeline(

        add_time_ids = add_time_ids.repeat(batch_size, 1).to(DEVICE)

-        # interative sampling
+        # interactive sampling
        self.scheduler.set_timesteps(num_inference_steps)
        latents_list = [latents]
        pred_x0_list = []
@@ -1548,7 +1548,7 @@ class StableDiffusionXL_AE_Pipeline(
        x: torch.FloatTensor,
    ):
        """
-        predict the sampe the next step in the denoise process.
+        predict the sample the next step in the denoise process.
        """
        ref_noise = model_output[:1, :, :, :].expand(model_output.shape)
        alpha_prod_t = self.scheduler.alphas_cumprod[timestep]

--- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
+++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py
@@ -132,7 +132,7 @@ def _preprocess_adapter_image(image, height, width):
            image = torch.cat(image, dim=0)
        else:
            raise ValueError(
-                f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}"
+                f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}"
            )
    return image


--- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
+++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py
@@ -150,7 +150,7 @@ def _preprocess_adapter_image(image, height, width):
            image = torch.cat(image, dim=0)
        else:
            raise ValueError(
-                f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}"
+                f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}"
            )
    return image


--- a/examples/community/regional_prompting_stable_diffusion.py
+++ b/examples/community/regional_prompting_stable_diffusion.py
@@ -220,7 +220,7 @@ class RegionalPromptingStableDiffusionPipeline(StableDiffusionPipeline):
            revers = True

            def pcallback(s_self, step: int, timestep: int, latents: torch.Tensor, selfs=None):
-                if "PRO" in mode:  # in Prompt mode, make masks from sum of attension maps
+                if "PRO" in mode:  # in Prompt mode, make masks from sum of attention maps
                    self.step = step

                    if len(self.attnmaps_sizes) > 3:
@@ -552,9 +552,9 @@ def get_attn_maps(self, attn):

 def reset_attnmaps(self):  # init parameters in every batch
    self.step = 0
-    self.attnmaps = {}  # maked from attention maps
+    self.attnmaps = {}  # made from attention maps
    self.attnmaps_sizes = []  # height,width set of u-net blocks
-    self.attnmasks = {}  # maked from attnmaps for regions
+    self.attnmasks = {}  # made from attnmaps for regions
    self.maskready = False
    self.history = {}


--- a/examples/community/sde_drag.py
+++ b/examples/community/sde_drag.py
@@ -97,7 +97,7 @@ class SdeDragPipeline(DiffusionPipeline):
            steps (`int`, *optional*, defaults to 200):
                The number of sampling iterations.
            step_size (`int`, *optional*, defaults to 2):
-                The drag diatance of each drag step.
+                The drag distance of each drag step.
            image_scale (`float`, *optional*, defaults to 0.3):
                To avoid duplicating the content, use image_scale to perturbs the source.
            adapt_radius (`int`, *optional*, defaults to 5):

--- a/examples/community/unclip_image_interpolation.py
+++ b/examples/community/unclip_image_interpolation.py
@@ -284,7 +284,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
                )
        else:
            raise AssertionError(
-                f"Expected 'image' or 'image_embeddings' to be not None with types List[PIL.Image] or torch.Tensor respectively. Received {type(image)} and {type(image_embeddings)} repsectively"
+                f"Expected 'image' or 'image_embeddings' to be not None with types List[PIL.Image] or torch.Tensor respectively. Received {type(image)} and {type(image_embeddings)} respectively"
            )

        original_image_embeddings = self._encode_image(

--- a/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py
+++ b/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py
@@ -1012,7 +1012,7 @@ def main(args):
    unet = get_peft_model(unet, lora_config)

    # 9. Handle mixed precision and device placement
-    # For mixed precision training we cast all non-trainable weigths to half-precision
+    # For mixed precision training we cast all non-trainable weights to half-precision
    # as these weights are only used for inference, keeping weights in full precision is not required.
    weight_dtype = torch.float32
    if accelerator.mixed_precision == "fp16":

--- a/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py
+++ b/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py
@@ -829,7 +829,7 @@ def main(args):
        )

    # 8. Handle mixed precision and device placement
-    # For mixed precision training we cast all non-trainable weigths to half-precision
+    # For mixed precision training we cast all non-trainable weights to half-precision
    # as these weights are only used for inference, keeping weights in full precision is not required.
    weight_dtype = torch.float32
    if accelerator.mixed_precision == "fp16":

--- a/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py
+++ b/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py
@@ -1026,7 +1026,7 @@ def main(args):
    unet = get_peft_model(unet, lora_config)

    # 9. Handle mixed precision and device placement
-    # For mixed precision training we cast all non-trainable weigths to half-precision
+    # For mixed precision training we cast all non-trainable weights to half-precision
    # as these weights are only used for inference, keeping weights in full precision is not required.
    weight_dtype = torch.float32
    if accelerator.mixed_precision == "fp16":

--- a/examples/consistency_distillation/train_lcm_distill_sd_wds.py
+++ b/examples/consistency_distillation/train_lcm_distill_sd_wds.py
@@ -962,7 +962,7 @@ def main(args):
        )

    # 9. Handle mixed precision and device placement
-    # For mixed precision training we cast all non-trainable weigths to half-precision
+    # For mixed precision training we cast all non-trainable weights to half-precision
    # as these weights are only used for inference, keeping weights in full precision is not required.
    weight_dtype = torch.float32
    if accelerator.mixed_precision == "fp16":

--- a/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py
+++ b/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py
@@ -1021,7 +1021,7 @@ def main(args):
        )

    # 9. Handle mixed precision and device placement
-    # For mixed precision training we cast all non-trainable weigths to half-precision
+    # For mixed precision training we cast all non-trainable weights to half-precision
    # as these weights are only used for inference, keeping weights in full precision is not required.
    weight_dtype = torch.float32
    if accelerator.mixed_precision == "fp16":

--- a/examples/controlnet/README_flux.md
+++ b/examples/controlnet/README_flux.md
@@ -411,7 +411,7 @@ export CAPTION_COLUMN='caption_column'

 export CACHE_DIR="/data/train_csr/.cache/huggingface/"
 export OUTPUT_DIR='/data/train_csr/FLUX/MODEL_OUT/'$MODEL_TYPE
-# The first step is to use Python to precompute all caches.Replace the first line below with this line. (I am not sure why using acclerate would cause problems.)
+# The first step is to use Python to precompute all caches.Replace the first line below with this line. (I am not sure why using accelerate would cause problems.)

 CUDA_VISIBLE_DEVICES=0 python3 train_controlnet_flux.py \


--- a/examples/dreambooth/README_flux.md
+++ b/examples/dreambooth/README_flux.md
@@ -173,13 +173,13 @@ accelerate launch train_dreambooth_lora_flux.py \
 ### Target Modules
 When LoRA was first adapted from language models to diffusion models, it was applied to the cross-attention layers in the Unet that relate the image representations with the prompts that describe them. 
 More recently, SOTA text-to-image diffusion models replaced the Unet with a diffusion Transformer(DiT). With this change, we may also want to explore 
-applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma seperated string
+applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma separated string
 the exact modules for LoRA training. Here are some examples of target modules you can provide: 
 - for attention only layers: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0"`
 - to train the same modules as in the fal trainer: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2"`
 - to train the same modules as in ostris ai-toolkit / replicate trainer: `--lora_blocks="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2,norm1_context.linear, norm1.linear,norm.linear,proj_mlp,proj_out"`
 > [!NOTE]
-> `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma seperated string:
+> `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma separated string:
 > **single DiT blocks**: to target the ith single transformer block, add the prefix `single_transformer_blocks.i`, e.g. - `single_transformer_blocks.i.attn.to_k`
 > **MMDiT blocks**: to target the ith MMDiT block, add the prefix `transformer_blocks.i`, e.g. - `transformer_blocks.i.attn.to_k` 
 > [!NOTE]

--- a/examples/dreambooth/README_hidream.md
+++ b/examples/dreambooth/README_hidream.md
@@ -107,7 +107,7 @@ To better track our training experiments, we're using the following flags in the

 Additionally, we welcome you to explore the following CLI arguments:

-* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
+* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
 * `--rank`: The rank of the LoRA layers. The higher the rank, the more parameters are trained. The default is 16.

 We provide several options for optimizing memory optimization:

--- a/examples/dreambooth/README_lumina2.md
+++ b/examples/dreambooth/README_lumina2.md
@@ -113,7 +113,7 @@ To better track our training experiments, we're using the following flags in the

 Additionally, we welcome you to explore the following CLI arguments:

-* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
+* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
 * `--system_prompt`: A custom system prompt to provide additional personality to the model.
 * `--max_sequence_length`: Maximum sequence length to use for text embeddings.


--- a/examples/dreambooth/README_sana.md
+++ b/examples/dreambooth/README_sana.md
@@ -113,7 +113,7 @@ To better track our training experiments, we're using the following flags in the

 Additionally, we welcome you to explore the following CLI arguments:

-* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
+* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
 * `--complex_human_instruction`: Instructions for complex human attention as shown in [here](https://github.com/NVlabs/Sana/blob/main/configs/sana_app_config/Sana_1600M_app.yaml#L55).
 * `--max_sequence_length`: Maximum sequence length to use for text embeddings.