Unverified Commit 45171174 authored by w4ffl35's avatar w4ffl35 Committed by GitHub
Browse files

Prevent online access when desired when using download_from_original_stable_diffusion_ckpt (#4271)

Prevent online access when desired

- Bypass requests with config files option added to download_from_original_stable_diffusion_ckpt
- Adds local_files_only flags to all from_pretrained requests
parent 4c4fe042
......@@ -778,7 +778,7 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder=None):
if text_encoder is None:
config_name = "openai/clip-vit-large-patch14"
config = CLIPTextConfig.from_pretrained(config_name)
config = CLIPTextConfig.from_pretrained(config_name, local_files_only=local_files_only)
ctx = init_empty_weights if is_accelerate_available() else nullcontext
with ctx():
......@@ -832,8 +832,8 @@ protected = {re.escape(x[0]): x[1] for x in textenc_transformer_conversion_lst}
textenc_pattern = re.compile("|".join(protected.keys()))
def convert_paint_by_example_checkpoint(checkpoint):
config = CLIPVisionConfig.from_pretrained("openai/clip-vit-large-patch14")
def convert_paint_by_example_checkpoint(checkpoint, local_files_only=False):
config = CLIPVisionConfig.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
model = PaintByExampleImageEncoder(config)
keys = list(checkpoint.keys())
......@@ -900,13 +900,13 @@ def convert_paint_by_example_checkpoint(checkpoint):
def convert_open_clip_checkpoint(
checkpoint, config_name, prefix="cond_stage_model.model.", has_projection=False, **config_kwargs
checkpoint, config_name, prefix="cond_stage_model.model.", has_projection=False, local_files_only=False, **config_kwargs
):
# text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder")
# text_model = CLIPTextModelWithProjection.from_pretrained(
# "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", projection_dim=1280
# )
config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs)
config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs, local_files_only=local_files_only)
ctx = init_empty_weights if is_accelerate_available() else nullcontext
with ctx():
......@@ -971,7 +971,7 @@ def convert_open_clip_checkpoint(
return text_model
def stable_unclip_image_encoder(original_config):
def stable_unclip_image_encoder(original_config, local_files_only=False):
"""
Returns the image processor and clip image encoder for the img2img unclip pipeline.
......@@ -989,13 +989,13 @@ def stable_unclip_image_encoder(original_config):
if clip_model_name == "ViT-L/14":
feature_extractor = CLIPImageProcessor()
image_encoder = CLIPVisionModelWithProjection.from_pretrained("openai/clip-vit-large-patch14")
image_encoder = CLIPVisionModelWithProjection.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
else:
raise NotImplementedError(f"Unknown CLIP checkpoint name in stable diffusion checkpoint {clip_model_name}")
elif sd_clip_image_embedder_class == "FrozenOpenCLIPImageEmbedder":
feature_extractor = CLIPImageProcessor()
image_encoder = CLIPVisionModelWithProjection.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K")
image_encoder = CLIPVisionModelWithProjection.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K", local_files_only=local_files_only)
else:
raise NotImplementedError(
f"Unknown CLIP image embedder class in stable diffusion checkpoint {sd_clip_image_embedder_class}"
......@@ -1116,6 +1116,7 @@ def download_from_original_stable_diffusion_ckpt(
vae=None,
text_encoder=None,
tokenizer=None,
config_files=None,
) -> DiffusionPipeline:
"""
Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml`
......@@ -1175,6 +1176,14 @@ def download_from_original_stable_diffusion_ckpt(
[CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer)
to use. If this parameter is `None`, the function will load a new instance of [CLIPTokenizer] by itself, if
needed.
config_files (`Dict[str, str]`, *optional*, defaults to `None`):
A dictionary mapping from config file names to their contents. If this parameter is `None`, the function
will load the config files by itself, if needed.
Valid keys are:
- `v1`: Config file for Stable Diffusion v1
- `v2`: Config file for Stable Diffusion v2
- `xl`: Config file for Stable Diffusion XL
- `xl_refiner`: Config file for Stable Diffusion XL Refiner
return: A StableDiffusionPipeline object representing the passed-in `.ckpt`/`.safetensors` file.
"""
......@@ -1396,14 +1405,14 @@ def download_from_original_stable_diffusion_ckpt(
else:
vae.load_state_dict(converted_vae_checkpoint)
elif vae is None:
vae = AutoencoderKL.from_pretrained(vae_path)
vae = AutoencoderKL.from_pretrained(vae_path, local_files_only=local_files_only)
if model_type == "FrozenOpenCLIPEmbedder":
config_name = "stabilityai/stable-diffusion-2"
config_kwargs = {"subfolder": "text_encoder"}
text_model = convert_open_clip_checkpoint(checkpoint, config_name, **config_kwargs)
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer")
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer", local_files_only=local_files_only)
if stable_unclip is None:
if controlnet:
......@@ -1455,12 +1464,12 @@ def download_from_original_stable_diffusion_ckpt(
elif stable_unclip == "txt2img":
if stable_unclip_prior is None or stable_unclip_prior == "karlo":
karlo_model = "kakaobrain/karlo-v1-alpha"
prior = PriorTransformer.from_pretrained(karlo_model, subfolder="prior")
prior = PriorTransformer.from_pretrained(karlo_model, subfolder="prior", local_files_only=local_files_only)
prior_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
prior_text_model = CLIPTextModelWithProjection.from_pretrained("openai/clip-vit-large-patch14")
prior_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
prior_text_model = CLIPTextModelWithProjection.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
prior_scheduler = UnCLIPScheduler.from_pretrained(karlo_model, subfolder="prior_scheduler")
prior_scheduler = UnCLIPScheduler.from_pretrained(karlo_model, subfolder="prior_scheduler", local_files_only=local_files_only)
prior_scheduler = DDPMScheduler.from_config(prior_scheduler.config)
else:
raise NotImplementedError(f"unknown prior for stable unclip model: {stable_unclip_prior}")
......@@ -1486,8 +1495,8 @@ def download_from_original_stable_diffusion_ckpt(
raise NotImplementedError(f"unknown `stable_unclip` type: {stable_unclip}")
elif model_type == "PaintByExample":
vision_model = convert_paint_by_example_checkpoint(checkpoint)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker")
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only)
pipe = PaintByExamplePipeline(
vae=vae,
image_encoder=vision_model,
......@@ -1500,11 +1509,11 @@ def download_from_original_stable_diffusion_ckpt(
text_model = convert_ldm_clip_checkpoint(
checkpoint, local_files_only=local_files_only, text_encoder=text_encoder
)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") if tokenizer is None else tokenizer
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only) if tokenizer is None else tokenizer
if load_safety_checker:
safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker")
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker")
safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only)
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only)
else:
safety_checker = None
feature_extractor = None
......@@ -1532,9 +1541,9 @@ def download_from_original_stable_diffusion_ckpt(
)
elif model_type in ["SDXL", "SDXL-Refiner"]:
if model_type == "SDXL":
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
text_encoder = convert_ldm_clip_checkpoint(checkpoint, local_files_only=local_files_only)
tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only)
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
config_kwargs = {"projection_dim": 1280}
......@@ -1555,7 +1564,7 @@ def download_from_original_stable_diffusion_ckpt(
else:
tokenizer = None
text_encoder = None
tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only)
config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
config_kwargs = {"projection_dim": 1280}
......@@ -1577,7 +1586,7 @@ def download_from_original_stable_diffusion_ckpt(
else:
text_config = create_ldm_bert_config(original_config)
text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased", local_files_only=local_files_only)
pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
return pipe
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment