Unverified Commit eade4308 authored by apolinário's avatar apolinário Committed by GitHub
Browse files

Update IF name to XL (#3262)


Co-authored-by: default avatarmultimodalart <joaopaulo.passos+multimodal@gmail.com>
parent fa31da29
...@@ -29,7 +29,7 @@ Our work underscores the potential of larger UNet architectures in the first sta ...@@ -29,7 +29,7 @@ Our work underscores the potential of larger UNet architectures in the first sta
Before you can use IF, you need to accept its usage conditions. To do so: Before you can use IF, you need to accept its usage conditions. To do so:
1. Make sure to have a [Hugging Face account](https://huggingface.co/join) and be logged in 1. Make sure to have a [Hugging Face account](https://huggingface.co/join) and be logged in
2. Accept the license on the model card of [DeepFloyd/IF-I-IF-v1.0](https://huggingface.co/DeepFloyd/IF-I-IF-v1.0). Accepting the license on the stage I model card will auto accept for the other IF models. 2. Accept the license on the model card of [DeepFloyd/IF-I-XL-v1.0](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0). Accepting the license on the stage I model card will auto accept for the other IF models.
3. Make sure to login locally. Install `huggingface_hub` 3. Make sure to login locally. Install `huggingface_hub`
```sh ```sh
pip install huggingface_hub --upgrade pip install huggingface_hub --upgrade
...@@ -62,7 +62,7 @@ The following sections give more in-detail examples of how to use IF. Specifical ...@@ -62,7 +62,7 @@ The following sections give more in-detail examples of how to use IF. Specifical
**Available checkpoints** **Available checkpoints**
- *Stage-1* - *Stage-1*
- [DeepFloyd/IF-I-IF-v1.0](https://huggingface.co/DeepFloyd/IF-I-IF-v1.0) - [DeepFloyd/IF-I-XL-v1.0](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0)
- [DeepFloyd/IF-I-L-v1.0](https://huggingface.co/DeepFloyd/IF-I-L-v1.0) - [DeepFloyd/IF-I-L-v1.0](https://huggingface.co/DeepFloyd/IF-I-L-v1.0)
- [DeepFloyd/IF-I-M-v1.0](https://huggingface.co/DeepFloyd/IF-I-M-v1.0) - [DeepFloyd/IF-I-M-v1.0](https://huggingface.co/DeepFloyd/IF-I-M-v1.0)
...@@ -90,7 +90,7 @@ from diffusers.utils import pt_to_pil ...@@ -90,7 +90,7 @@ from diffusers.utils import pt_to_pil
import torch import torch
# stage 1 # stage 1
stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
stage_1.enable_model_cpu_offload() stage_1.enable_model_cpu_offload()
# stage 2 # stage 2
...@@ -162,7 +162,7 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB") ...@@ -162,7 +162,7 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
original_image = original_image.resize((768, 512)) original_image = original_image.resize((768, 512))
# stage 1 # stage 1
stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
stage_1.enable_model_cpu_offload() stage_1.enable_model_cpu_offload()
# stage 2 # stage 2
...@@ -244,7 +244,7 @@ mask_image = Image.open(BytesIO(response.content)) ...@@ -244,7 +244,7 @@ mask_image = Image.open(BytesIO(response.content))
mask_image = mask_image mask_image = mask_image
# stage 1 # stage 1
stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
stage_1.enable_model_cpu_offload() stage_1.enable_model_cpu_offload()
# stage 2 # stage 2
...@@ -305,7 +305,7 @@ In addition to being loaded with `from_pretrained`, Pipelines can also be loaded ...@@ -305,7 +305,7 @@ In addition to being loaded with `from_pretrained`, Pipelines can also be loaded
```python ```python
from diffusers import IFPipeline, IFSuperResolutionPipeline from diffusers import IFPipeline, IFSuperResolutionPipeline
pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0") pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0")
pipe_2 = IFSuperResolutionPipeline.from_pretrained("DeepFloyd/IF-II-L-v1.0") pipe_2 = IFSuperResolutionPipeline.from_pretrained("DeepFloyd/IF-II-L-v1.0")
...@@ -326,7 +326,7 @@ pipe_2 = IFInpaintingSuperResolutionPipeline(**pipe_2.components) ...@@ -326,7 +326,7 @@ pipe_2 = IFInpaintingSuperResolutionPipeline(**pipe_2.components)
The simplest optimization to run IF faster is to move all model components to the GPU. The simplest optimization to run IF faster is to move all model components to the GPU.
```py ```py
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.to("cuda") pipe.to("cuda")
``` ```
...@@ -352,7 +352,7 @@ the input image which also determines how many steps to run in the denoising pro ...@@ -352,7 +352,7 @@ the input image which also determines how many steps to run in the denoising pro
A smaller number will vary the image less but run faster. A smaller number will vary the image less but run faster.
```py ```py
pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.to("cuda") pipe.to("cuda")
image = pipe(image=image, prompt="<prompt>", strength=0.3).images image = pipe(image=image, prompt="<prompt>", strength=0.3).images
...@@ -364,7 +364,7 @@ with IF and it might not give expected results. ...@@ -364,7 +364,7 @@ with IF and it might not give expected results.
```py ```py
import torch import torch
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.to("cuda") pipe.to("cuda")
pipe.text_encoder = torch.compile(pipe.text_encoder) pipe.text_encoder = torch.compile(pipe.text_encoder)
...@@ -378,14 +378,14 @@ When optimizing for GPU memory, we can use the standard diffusers cpu offloading ...@@ -378,14 +378,14 @@ When optimizing for GPU memory, we can use the standard diffusers cpu offloading
Either the model based CPU offloading, Either the model based CPU offloading,
```py ```py
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()
``` ```
or the more aggressive layer based CPU offloading. or the more aggressive layer based CPU offloading.
```py ```py
pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.enable_sequential_cpu_offload() pipe.enable_sequential_cpu_offload()
``` ```
...@@ -395,13 +395,13 @@ Additionally, T5 can be loaded in 8bit precision ...@@ -395,13 +395,13 @@ Additionally, T5 can be loaded in 8bit precision
from transformers import T5EncoderModel from transformers import T5EncoderModel
text_encoder = T5EncoderModel.from_pretrained( text_encoder = T5EncoderModel.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit" "DeepFloyd/IF-I-XL-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit"
) )
from diffusers import DiffusionPipeline from diffusers import DiffusionPipeline
pipe = DiffusionPipeline.from_pretrained( pipe = DiffusionPipeline.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", "DeepFloyd/IF-I-XL-v1.0",
text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder
unet=None, unet=None,
device_map="auto", device_map="auto",
...@@ -422,13 +422,13 @@ from transformers import T5EncoderModel ...@@ -422,13 +422,13 @@ from transformers import T5EncoderModel
from diffusers.utils import pt_to_pil from diffusers.utils import pt_to_pil
text_encoder = T5EncoderModel.from_pretrained( text_encoder = T5EncoderModel.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit" "DeepFloyd/IF-I-XL-v1.0", subfolder="text_encoder", device_map="auto", load_in_8bit=True, variant="8bit"
) )
# text to image # text to image
pipe = DiffusionPipeline.from_pretrained( pipe = DiffusionPipeline.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", "DeepFloyd/IF-I-XL-v1.0",
text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder text_encoder=text_encoder, # pass the previously instantiated 8bit text encoder
unet=None, unet=None,
device_map="auto", device_map="auto",
...@@ -444,7 +444,7 @@ gc.collect() ...@@ -444,7 +444,7 @@ gc.collect()
torch.cuda.empty_cache() torch.cuda.empty_cache()
pipe = IFPipeline.from_pretrained( pipe = IFPipeline.from_pretrained(
"DeepFloyd/IF-I-IF-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16, device_map="auto" "DeepFloyd/IF-I-XL-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16, device_map="auto"
) )
generator = torch.Generator().manual_seed(0) generator = torch.Generator().manual_seed(0)
......
...@@ -41,7 +41,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -41,7 +41,7 @@ EXAMPLE_DOC_STRING = """
>>> from diffusers.utils import pt_to_pil >>> from diffusers.utils import pt_to_pil
>>> import torch >>> import torch
>>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) >>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
>>> pipe.enable_model_cpu_offload() >>> pipe.enable_model_cpu_offload()
>>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"' >>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'
......
...@@ -70,7 +70,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -70,7 +70,7 @@ EXAMPLE_DOC_STRING = """
>>> original_image = original_image.resize((768, 512)) >>> original_image = original_image.resize((768, 512))
>>> pipe = IFImg2ImgPipeline.from_pretrained( >>> pipe = IFImg2ImgPipeline.from_pretrained(
... "DeepFloyd/IF-I-IF-v1.0", ... "DeepFloyd/IF-I-XL-v1.0",
... variant="fp16", ... variant="fp16",
... torch_dtype=torch.float16, ... torch_dtype=torch.float16,
... ) ... )
......
...@@ -73,7 +73,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -73,7 +73,7 @@ EXAMPLE_DOC_STRING = """
>>> original_image = original_image.resize((768, 512)) >>> original_image = original_image.resize((768, 512))
>>> pipe = IFImg2ImgPipeline.from_pretrained( >>> pipe = IFImg2ImgPipeline.from_pretrained(
... "DeepFloyd/IF-I-IF-v1.0", ... "DeepFloyd/IF-I-XL-v1.0",
... variant="fp16", ... variant="fp16",
... torch_dtype=torch.float16, ... torch_dtype=torch.float16,
... ) ... )
......
...@@ -76,7 +76,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -76,7 +76,7 @@ EXAMPLE_DOC_STRING = """
>>> mask_image = mask_image >>> mask_image = mask_image
>>> pipe = IFInpaintingPipeline.from_pretrained( >>> pipe = IFInpaintingPipeline.from_pretrained(
... "DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16 ... "DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16
... ) ... )
>>> pipe.enable_model_cpu_offload() >>> pipe.enable_model_cpu_offload()
......
...@@ -78,7 +78,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -78,7 +78,7 @@ EXAMPLE_DOC_STRING = """
>>> mask_image = mask_image >>> mask_image = mask_image
>>> pipe = IFInpaintingPipeline.from_pretrained( >>> pipe = IFInpaintingPipeline.from_pretrained(
... "DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16 ... "DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16
... ) ... )
>>> pipe.enable_model_cpu_offload() >>> pipe.enable_model_cpu_offload()
......
...@@ -45,7 +45,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -45,7 +45,7 @@ EXAMPLE_DOC_STRING = """
>>> from diffusers.utils import pt_to_pil >>> from diffusers.utils import pt_to_pil
>>> import torch >>> import torch
>>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) >>> pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
>>> pipe.enable_model_cpu_offload() >>> pipe.enable_model_cpu_offload()
>>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"' >>> prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'
......
...@@ -94,7 +94,7 @@ class IFPipelineSlowTests(unittest.TestCase): ...@@ -94,7 +94,7 @@ class IFPipelineSlowTests(unittest.TestCase):
def test_all(self): def test_all(self):
# if # if
pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF-v1.0", variant="fp16", torch_dtype=torch.float16) pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe_2 = IFSuperResolutionPipeline.from_pretrained( pipe_2 = IFSuperResolutionPipeline.from_pretrained(
"DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16, text_encoder=None, tokenizer=None "DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16, text_encoder=None, tokenizer=None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment