Unverified Commit 86294d3c authored by co63oc's avatar co63oc Committed by GitHub
Browse files

Fix typos in docs and comments (#11416)



* Fix typos in docs and comments

* Apply style fixes

---------
Co-authored-by: default avatarSayak Paul <spsayakpaul@gmail.com>
Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
parent d70f8ee1
......@@ -1782,7 +1782,7 @@ class KolorsControlNetInpaintPipeline(
)
if guess_mode and self.do_classifier_free_guidance:
# Infered ControlNet only for the conditional batch.
# Inferred ControlNet only for the conditional batch.
# To apply the output of ControlNet to both the unconditional and conditional batches,
# add 0 to the unconditional batch to keep it unchanged.
down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples]
......
......@@ -559,7 +559,7 @@ class FabricPipeline(DiffusionPipeline):
End point for providing feedback (between 0 and 1).
min_weight (`float`, *optional*, defaults to `.05`):
Minimum weight for feedback.
max_weight (`float`, *optional*, defults tp `1.0`):
max_weight (`float`, *optional*, defaults tp `1.0`):
Maximum weight for feedback.
neg_scale (`float`, *optional*, defaults to `.5`):
Scale factor for negative feedback.
......
......@@ -118,7 +118,7 @@ EXAMPLE_DOC_STRING = """
>>> # Here we need use pipeline internal unet model
>>> pipe.unet = pipe.unet_model.from_pretrained(model_id, subfolder="unet", variant="fp16", use_safetensors=True)
>>>
>>> # Load aditional layers to the model
>>> # Load additional layers to the model
>>> pipe.unet.load_additional_layers(weight_path="proc_data/faithdiff/FaithDiff.bin", dtype=dtype)
>>>
>>> # Enable vae tiling
......
......@@ -72,7 +72,7 @@ class GaussianSmoothing(nn.Module):
"""
Copied from official repo: https://github.com/showlab/BoxDiff/blob/master/utils/gaussian_smoothing.py
Apply gaussian smoothing on a
1d, 2d or 3d tensor. Filtering is performed seperately for each channel
1d, 2d or 3d tensor. Filtering is performed separately for each channel
in the input using a depthwise convolution.
Arguments:
channels (int, sequence): Number of channels of the input tensors. Output will
......
......@@ -1509,7 +1509,7 @@ class StableDiffusionXL_AE_Pipeline(
add_time_ids = add_time_ids.repeat(batch_size, 1).to(DEVICE)
# interative sampling
# interactive sampling
self.scheduler.set_timesteps(num_inference_steps)
latents_list = [latents]
pred_x0_list = []
......@@ -1548,7 +1548,7 @@ class StableDiffusionXL_AE_Pipeline(
x: torch.FloatTensor,
):
"""
predict the sampe the next step in the denoise process.
predict the sample the next step in the denoise process.
"""
ref_noise = model_output[:1, :, :, :].expand(model_output.shape)
alpha_prod_t = self.scheduler.alphas_cumprod[timestep]
......
......@@ -132,7 +132,7 @@ def _preprocess_adapter_image(image, height, width):
image = torch.cat(image, dim=0)
else:
raise ValueError(
f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}"
f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}"
)
return image
......
......@@ -150,7 +150,7 @@ def _preprocess_adapter_image(image, height, width):
image = torch.cat(image, dim=0)
else:
raise ValueError(
f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}"
f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}"
)
return image
......
......@@ -220,7 +220,7 @@ class RegionalPromptingStableDiffusionPipeline(StableDiffusionPipeline):
revers = True
def pcallback(s_self, step: int, timestep: int, latents: torch.Tensor, selfs=None):
if "PRO" in mode: # in Prompt mode, make masks from sum of attension maps
if "PRO" in mode: # in Prompt mode, make masks from sum of attention maps
self.step = step
if len(self.attnmaps_sizes) > 3:
......@@ -552,9 +552,9 @@ def get_attn_maps(self, attn):
def reset_attnmaps(self): # init parameters in every batch
self.step = 0
self.attnmaps = {} # maked from attention maps
self.attnmaps = {} # made from attention maps
self.attnmaps_sizes = [] # height,width set of u-net blocks
self.attnmasks = {} # maked from attnmaps for regions
self.attnmasks = {} # made from attnmaps for regions
self.maskready = False
self.history = {}
......
......@@ -97,7 +97,7 @@ class SdeDragPipeline(DiffusionPipeline):
steps (`int`, *optional*, defaults to 200):
The number of sampling iterations.
step_size (`int`, *optional*, defaults to 2):
The drag diatance of each drag step.
The drag distance of each drag step.
image_scale (`float`, *optional*, defaults to 0.3):
To avoid duplicating the content, use image_scale to perturbs the source.
adapt_radius (`int`, *optional*, defaults to 5):
......
......@@ -284,7 +284,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
)
else:
raise AssertionError(
f"Expected 'image' or 'image_embeddings' to be not None with types List[PIL.Image] or torch.Tensor respectively. Received {type(image)} and {type(image_embeddings)} repsectively"
f"Expected 'image' or 'image_embeddings' to be not None with types List[PIL.Image] or torch.Tensor respectively. Received {type(image)} and {type(image_embeddings)} respectively"
)
original_image_embeddings = self._encode_image(
......
......@@ -1012,7 +1012,7 @@ def main(args):
unet = get_peft_model(unet, lora_config)
# 9. Handle mixed precision and device placement
# For mixed precision training we cast all non-trainable weigths to half-precision
# For mixed precision training we cast all non-trainable weights to half-precision
# as these weights are only used for inference, keeping weights in full precision is not required.
weight_dtype = torch.float32
if accelerator.mixed_precision == "fp16":
......
......@@ -829,7 +829,7 @@ def main(args):
)
# 8. Handle mixed precision and device placement
# For mixed precision training we cast all non-trainable weigths to half-precision
# For mixed precision training we cast all non-trainable weights to half-precision
# as these weights are only used for inference, keeping weights in full precision is not required.
weight_dtype = torch.float32
if accelerator.mixed_precision == "fp16":
......
......@@ -1026,7 +1026,7 @@ def main(args):
unet = get_peft_model(unet, lora_config)
# 9. Handle mixed precision and device placement
# For mixed precision training we cast all non-trainable weigths to half-precision
# For mixed precision training we cast all non-trainable weights to half-precision
# as these weights are only used for inference, keeping weights in full precision is not required.
weight_dtype = torch.float32
if accelerator.mixed_precision == "fp16":
......
......@@ -962,7 +962,7 @@ def main(args):
)
# 9. Handle mixed precision and device placement
# For mixed precision training we cast all non-trainable weigths to half-precision
# For mixed precision training we cast all non-trainable weights to half-precision
# as these weights are only used for inference, keeping weights in full precision is not required.
weight_dtype = torch.float32
if accelerator.mixed_precision == "fp16":
......
......@@ -1021,7 +1021,7 @@ def main(args):
)
# 9. Handle mixed precision and device placement
# For mixed precision training we cast all non-trainable weigths to half-precision
# For mixed precision training we cast all non-trainable weights to half-precision
# as these weights are only used for inference, keeping weights in full precision is not required.
weight_dtype = torch.float32
if accelerator.mixed_precision == "fp16":
......
......@@ -411,7 +411,7 @@ export CAPTION_COLUMN='caption_column'
export CACHE_DIR="/data/train_csr/.cache/huggingface/"
export OUTPUT_DIR='/data/train_csr/FLUX/MODEL_OUT/'$MODEL_TYPE
# The first step is to use Python to precompute all caches.Replace the first line below with this line. (I am not sure why using acclerate would cause problems.)
# The first step is to use Python to precompute all caches.Replace the first line below with this line. (I am not sure why using accelerate would cause problems.)
CUDA_VISIBLE_DEVICES=0 python3 train_controlnet_flux.py \
......
......@@ -173,13 +173,13 @@ accelerate launch train_dreambooth_lora_flux.py \
### Target Modules
When LoRA was first adapted from language models to diffusion models, it was applied to the cross-attention layers in the Unet that relate the image representations with the prompts that describe them.
More recently, SOTA text-to-image diffusion models replaced the Unet with a diffusion Transformer(DiT). With this change, we may also want to explore
applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma seperated string
applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma separated string
the exact modules for LoRA training. Here are some examples of target modules you can provide:
- for attention only layers: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0"`
- to train the same modules as in the fal trainer: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2"`
- to train the same modules as in ostris ai-toolkit / replicate trainer: `--lora_blocks="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2,norm1_context.linear, norm1.linear,norm.linear,proj_mlp,proj_out"`
> [!NOTE]
> `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma seperated string:
> `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma separated string:
> **single DiT blocks**: to target the ith single transformer block, add the prefix `single_transformer_blocks.i`, e.g. - `single_transformer_blocks.i.attn.to_k`
> **MMDiT blocks**: to target the ith MMDiT block, add the prefix `transformer_blocks.i`, e.g. - `transformer_blocks.i.attn.to_k`
> [!NOTE]
......
......@@ -107,7 +107,7 @@ To better track our training experiments, we're using the following flags in the
Additionally, we welcome you to explore the following CLI arguments:
* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
* `--rank`: The rank of the LoRA layers. The higher the rank, the more parameters are trained. The default is 16.
We provide several options for optimizing memory optimization:
......
......@@ -113,7 +113,7 @@ To better track our training experiments, we're using the following flags in the
Additionally, we welcome you to explore the following CLI arguments:
* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
* `--system_prompt`: A custom system prompt to provide additional personality to the model.
* `--max_sequence_length`: Maximum sequence length to use for text embeddings.
......
......@@ -113,7 +113,7 @@ To better track our training experiments, we're using the following flags in the
Additionally, we welcome you to explore the following CLI arguments:
* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only.
* `--complex_human_instruction`: Instructions for complex human attention as shown in [here](https://github.com/NVlabs/Sana/blob/main/configs/sana_app_config/Sana_1600M_app.yaml#L55).
* `--max_sequence_length`: Maximum sequence length to use for text embeddings.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment