enable stable-xl textual inversion (#6421)

* enable stable-xl textual inversion * check if optimizer_2 exists * check text_encoder_2 before using * add textual inversion for sdxl in a single file * fix style * fix example style * reset for error changes * add readme for sdxl * fix style * disable autocast as it will cause cast error when weight_dtype=bf16 * fix spelling error * fix style and readme and 8bit optimizer * add README_sdxl.md link * add tracker key on log_validation * run style * rm the second center crop --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>

enable stable-xl textual inversion (#6421)
* enable stable-xl textual inversion * check if optimizer_2 exists * check text_encoder_2 before using * add textual inversion for sdxl in a single file * fix style * fix example style * reset for error changes * add readme for sdxl * fix style * disable autocast as it will cause cast error when weight_dtype=bf16 * fix spelling error * fix style and readme and 8bit optimizer * add README_sdxl.md link * add tracker key on log_validation * run style * rm the second center crop --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
aa1797e1 · jiqing-feng · GitHub · 5bacc2f5 · aa1797e1 · aa1797e1
Unverified Commit aa1797e1 authored Jan 09, 2024 by jiqing-feng Committed by GitHub Jan 09, 2024
4 changed files
--- a/examples/textual_inversion/README.md
+++ b/examples/textual_inversion/README.md
@@ -60,6 +60,8 @@ Now we can launch the training using:

 **___Note: Change the `resolution` to 768 if you are using the [stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2) 768x768 model.___**

+**___Note: Please follow the [README_sdxl.md](./README_sdxl.md) if you are using the [stable-diffusion-xl](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0).___**
+
 ```bash
 export MODEL_NAME="runwayml/stable-diffusion-v1-5"
 export DATA_DIR="./cat"

--- a/examples/textual_inversion/README_sdxl.md
+++ b/examples/textual_inversion/README_sdxl.md
+## Textual Inversion fine-tuning example for SDXL
+
+```
+export MODEL_NAME="stabilityai/stable-diffusion-xl-base-1.0"
+export DATA_DIR="./cat"
+
+accelerate launch textual_inversion_sdxl.py \
+  --pretrained_model_name_or_path=$MODEL_NAME \
+  --train_data_dir=$DATA_DIR \
+  --learnable_property="object" \
+  --placeholder_token="<cat-toy>" \
+  --initializer_token="toy" \
+  --mixed_precision="bf16" \
+  --resolution=768 \
+  --train_batch_size=1 \
+  --gradient_accumulation_steps=4 \
+  --max_train_steps=500 \
+  --learning_rate=5.0e-04 \
+  --scale_lr \
+  --lr_scheduler="constant" \
+  --lr_warmup_steps=0 \
+  --save_as_full_pipeline \
+  --output_dir="./textual_inversion_cat_sdxl"
+```
+
+For now, only training of the first text encoder is supported. 
\ No newline at end of file
--- a/examples/textual_inversion/test_textual_inversion_sdxl.py
+++ b/examples/textual_inversion/test_textual_inversion_sdxl.py
+# coding=utf-8
+# Copyright 2023 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import sys
+import tempfile
+
+
+sys.path.append("..")
+from test_examples_utils import ExamplesTestsAccelerate, run_command  # noqa: E402
+
+
+logging.basicConfig(level=logging.DEBUG)
+
+logger = logging.getLogger()
+stream_handler = logging.StreamHandler(sys.stdout)
+logger.addHandler(stream_handler)
+
+
+class TextualInversionSdxl(ExamplesTestsAccelerate):
+    def test_textual_inversion_sdxl(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            test_args = f"""
+                examples/textual_inversion/textual_inversion_sdxl.py
+                --pretrained_model_name_or_path hf-internal-testing/tiny-sdxl-pipe
+                --train_data_dir docs/source/en/imgs
+                --learnable_property object
+                --placeholder_token <cat-toy>
+                --initializer_token a
+                --save_steps 1
+                --num_vectors 2
+                --resolution 64
+                --train_batch_size 1
+                --gradient_accumulation_steps 1
+                --max_train_steps 2
+                --learning_rate 5.0e-04
+                --scale_lr
+                --lr_scheduler constant
+                --lr_warmup_steps 0
+                --output_dir {tmpdir}
+                """.split()
+
+            run_command(self._launch_args + test_args)
+            # save_pretrained smoke test
+            self.assertTrue(os.path.isfile(os.path.join(tmpdir, "learned_embeds.safetensors")))
+
+    def test_textual_inversion_sdxl_checkpointing(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            test_args = f"""
+                examples/textual_inversion/textual_inversion_sdxl.py
+                --pretrained_model_name_or_path hf-internal-testing/tiny-sdxl-pipe
+                --train_data_dir docs/source/en/imgs
+                --learnable_property object
+                --placeholder_token <cat-toy>
+                --initializer_token a
+                --save_steps 1
+                --num_vectors 2
+                --resolution 64
+                --train_batch_size 1
+                --gradient_accumulation_steps 1
+                --max_train_steps 3
+                --learning_rate 5.0e-04
+                --scale_lr
+                --lr_scheduler constant
+                --lr_warmup_steps 0
+                --output_dir {tmpdir}
+                --checkpointing_steps=1
+                --checkpoints_total_limit=2
+                """.split()
+
+            run_command(self._launch_args + test_args)
+
+            # check checkpoint directories exist
+            self.assertEqual(
+                {x for x in os.listdir(tmpdir) if "checkpoint" in x},
+                {"checkpoint-2", "checkpoint-3"},
+            )
+
+    def test_textual_inversion_sdxl_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            test_args = f"""
+                examples/textual_inversion/textual_inversion_sdxl.py
+                --pretrained_model_name_or_path hf-internal-testing/tiny-sdxl-pipe
+                --train_data_dir docs/source/en/imgs
+                --learnable_property object
+                --placeholder_token <cat-toy>
+                --initializer_token a
+                --save_steps 1
+                --num_vectors 2
+                --resolution 64
+                --train_batch_size 1
+                --gradient_accumulation_steps 1
+                --max_train_steps 2
+                --learning_rate 5.0e-04
+                --scale_lr
+                --lr_scheduler constant
+                --lr_warmup_steps 0
+                --output_dir {tmpdir}
+                --checkpointing_steps=1
+                """.split()
+
+            run_command(self._launch_args + test_args)
+
+            # check checkpoint directories exist
+            self.assertEqual(
+                {x for x in os.listdir(tmpdir) if "checkpoint" in x},
+                {"checkpoint-1", "checkpoint-2"},
+            )
+
+            resume_run_args = f"""
+                examples/textual_inversion/textual_inversion_sdxl.py
+                --pretrained_model_name_or_path hf-internal-testing/tiny-sdxl-pipe
+                --train_data_dir docs/source/en/imgs
+                --learnable_property object
+                --placeholder_token <cat-toy>
+                --initializer_token a
+                --save_steps 1
+                --num_vectors 2
+                --resolution 64
+                --train_batch_size 1
+                --gradient_accumulation_steps 1
+                --max_train_steps 2
+                --learning_rate 5.0e-04
+                --scale_lr
+                --lr_scheduler constant
+                --lr_warmup_steps 0
+                --output_dir {tmpdir}
+                --checkpointing_steps=1
+                --resume_from_checkpoint=checkpoint-2
+                --checkpoints_total_limit=2
+                """.split()
+
+            run_command(self._launch_args + resume_run_args)
+
+            # check checkpoint directories exist
+            self.assertEqual(
+                {x for x in os.listdir(tmpdir) if "checkpoint" in x},
+                {"checkpoint-2", "checkpoint-3"},
+            )
--- a/examples/textual_inversion/textual_inversion_sdxl.py
+++ b/examples/textual_inversion/textual_inversion_sdxl.py