Unverified Commit 17a55534 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Enable code-specific revision for code on the Hub (#23799)

* Enable code-specific revision for code on the Hub

* invalidate old revision
parent edf77728
...@@ -316,7 +316,7 @@ def get_cached_module_file( ...@@ -316,7 +316,7 @@ def get_cached_module_file(
) )
new_files.append(f"{module_needed}.py") new_files.append(f"{module_needed}.py")
if len(new_files) > 0: if len(new_files) > 0 and revision is None:
new_files = "\n".join([f"- {f}" for f in new_files]) new_files = "\n".join([f"- {f}" for f in new_files])
repo_type_str = "" if repo_type is None else f"{repo_type}s/" repo_type_str = "" if repo_type is None else f"{repo_type}s/"
url = f"https://huggingface.co/{repo_type_str}{pretrained_model_name_or_path}" url = f"https://huggingface.co/{repo_type_str}{pretrained_model_name_or_path}"
...@@ -340,6 +340,7 @@ def get_class_from_dynamic_module( ...@@ -340,6 +340,7 @@ def get_class_from_dynamic_module(
revision: Optional[str] = None, revision: Optional[str] = None,
local_files_only: bool = False, local_files_only: bool = False,
repo_type: Optional[str] = None, repo_type: Optional[str] = None,
code_revision: Optional[str] = None,
**kwargs, **kwargs,
): ):
""" """
...@@ -391,6 +392,10 @@ def get_class_from_dynamic_module( ...@@ -391,6 +392,10 @@ def get_class_from_dynamic_module(
If `True`, will only try to load the tokenizer configuration from local files. If `True`, will only try to load the tokenizer configuration from local files.
repo_type (`str`, *optional*): repo_type (`str`, *optional*):
Specify the repo type (useful when downloading from a space for instance). Specify the repo type (useful when downloading from a space for instance).
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than the
rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based system for
storing models and other artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
<Tip> <Tip>
...@@ -415,12 +420,12 @@ def get_class_from_dynamic_module( ...@@ -415,12 +420,12 @@ def get_class_from_dynamic_module(
# Catch the name of the repo if it's specified in `class_reference` # Catch the name of the repo if it's specified in `class_reference`
if "--" in class_reference: if "--" in class_reference:
repo_id, class_reference = class_reference.split("--") repo_id, class_reference = class_reference.split("--")
# Invalidate revision since it's not relevant for this repo
revision = "main"
else: else:
repo_id = pretrained_model_name_or_path repo_id = pretrained_model_name_or_path
module_file, class_name = class_reference.split(".") module_file, class_name = class_reference.split(".")
if code_revision is None and pretrained_model_name_or_path == repo_id:
code_revision = revision
# And lastly we get the class inside our newly created module # And lastly we get the class inside our newly created module
final_module = get_cached_module_file( final_module = get_cached_module_file(
repo_id, repo_id,
...@@ -430,7 +435,7 @@ def get_class_from_dynamic_module( ...@@ -430,7 +435,7 @@ def get_class_from_dynamic_module(
resume_download=resume_download, resume_download=resume_download,
proxies=proxies, proxies=proxies,
use_auth_token=use_auth_token, use_auth_token=use_auth_token,
revision=revision, revision=code_revision,
local_files_only=local_files_only, local_files_only=local_files_only,
repo_type=repo_type, repo_type=repo_type,
) )
......
...@@ -128,6 +128,11 @@ FROM_PRETRAINED_TORCH_DOCSTRING = """ ...@@ -128,6 +128,11 @@ FROM_PRETRAINED_TORCH_DOCSTRING = """
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
should only be set to `True` for repositories you trust and in which you have read the code, as it will should only be set to `True` for repositories you trust and in which you have read the code, as it will
execute code present on the Hub on your local machine. execute code present on the Hub on your local machine.
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
allowed by git.
kwargs (additional keyword arguments, *optional*): kwargs (additional keyword arguments, *optional*):
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or `output_attentions=True`). Behaves differently depending on whether a `config` is provided or
...@@ -224,6 +229,11 @@ FROM_PRETRAINED_TF_DOCSTRING = """ ...@@ -224,6 +229,11 @@ FROM_PRETRAINED_TF_DOCSTRING = """
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
should only be set to `True` for repositories you trust and in which you have read the code, as it will should only be set to `True` for repositories you trust and in which you have read the code, as it will
execute code present on the Hub on your local machine. execute code present on the Hub on your local machine.
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
allowed by git.
kwargs (additional keyword arguments, *optional*): kwargs (additional keyword arguments, *optional*):
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or `output_attentions=True`). Behaves differently depending on whether a `config` is provided or
...@@ -320,6 +330,11 @@ FROM_PRETRAINED_FLAX_DOCSTRING = """ ...@@ -320,6 +330,11 @@ FROM_PRETRAINED_FLAX_DOCSTRING = """
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
should only be set to `True` for repositories you trust and in which you have read the code, as it will should only be set to `True` for repositories you trust and in which you have read the code, as it will
execute code present on the Hub on your local machine. execute code present on the Hub on your local machine.
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
allowed by git.
kwargs (additional keyword arguments, *optional*): kwargs (additional keyword arguments, *optional*):
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or `output_attentions=True`). Behaves differently depending on whether a `config` is provided or
...@@ -408,6 +423,7 @@ class _BaseAutoModelClass: ...@@ -408,6 +423,7 @@ class _BaseAutoModelClass:
else: else:
repo_id = config.name_or_path repo_id = config.name_or_path
model_class = get_class_from_dynamic_module(class_ref, repo_id, **kwargs) model_class = get_class_from_dynamic_module(class_ref, repo_id, **kwargs)
_ = kwargs.pop("code_revision", None)
return model_class._from_config(config, **kwargs) return model_class._from_config(config, **kwargs)
elif type(config) in cls._model_mapping.keys(): elif type(config) in cls._model_mapping.keys():
model_class = _get_model_class(config, cls._model_mapping) model_class = _get_model_class(config, cls._model_mapping)
...@@ -425,6 +441,7 @@ class _BaseAutoModelClass: ...@@ -425,6 +441,7 @@ class _BaseAutoModelClass:
kwargs["_from_auto"] = True kwargs["_from_auto"] = True
hub_kwargs_names = [ hub_kwargs_names = [
"cache_dir", "cache_dir",
"code_revision",
"force_download", "force_download",
"local_files_only", "local_files_only",
"proxies", "proxies",
...@@ -464,6 +481,7 @@ class _BaseAutoModelClass: ...@@ -464,6 +481,7 @@ class _BaseAutoModelClass:
model_class = get_class_from_dynamic_module( model_class = get_class_from_dynamic_module(
class_ref, pretrained_model_name_or_path, **hub_kwargs, **kwargs class_ref, pretrained_model_name_or_path, **hub_kwargs, **kwargs
) )
_ = hub_kwargs.pop("code_revision", None)
return model_class.from_pretrained( return model_class.from_pretrained(
pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
) )
......
...@@ -938,6 +938,7 @@ class AutoConfig: ...@@ -938,6 +938,7 @@ class AutoConfig:
) )
class_ref = config_dict["auto_map"]["AutoConfig"] class_ref = config_dict["auto_map"]["AutoConfig"]
config_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs) config_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
_ = kwargs.pop("code_revision", None)
return config_class.from_pretrained(pretrained_model_name_or_path, **kwargs) return config_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
elif "model_type" in config_dict: elif "model_type" in config_dict:
config_class = CONFIG_MAPPING[config_dict["model_type"]] config_class = CONFIG_MAPPING[config_dict["model_type"]]
......
...@@ -337,6 +337,7 @@ class AutoFeatureExtractor: ...@@ -337,6 +337,7 @@ class AutoFeatureExtractor:
feature_extractor_class = get_class_from_dynamic_module( feature_extractor_class = get_class_from_dynamic_module(
feature_extractor_auto_map, pretrained_model_name_or_path, **kwargs feature_extractor_auto_map, pretrained_model_name_or_path, **kwargs
) )
_ = kwargs.pop("code_revision", None)
else: else:
feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class) feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class)
......
...@@ -361,6 +361,7 @@ class AutoImageProcessor: ...@@ -361,6 +361,7 @@ class AutoImageProcessor:
image_processor_class = get_class_from_dynamic_module( image_processor_class = get_class_from_dynamic_module(
image_processor_auto_map, pretrained_model_name_or_path, **kwargs image_processor_auto_map, pretrained_model_name_or_path, **kwargs
) )
_ = kwargs.pop("code_revision", None)
else: else:
image_processor_class = image_processor_class_from_name(image_processor_class) image_processor_class = image_processor_class_from_name(image_processor_class)
......
...@@ -259,6 +259,7 @@ class AutoProcessor: ...@@ -259,6 +259,7 @@ class AutoProcessor:
processor_class = get_class_from_dynamic_module( processor_class = get_class_from_dynamic_module(
processor_auto_map, pretrained_model_name_or_path, **kwargs processor_auto_map, pretrained_model_name_or_path, **kwargs
) )
_ = kwargs.pop("code_revision", None)
else: else:
processor_class = processor_class_from_name(processor_class) processor_class = processor_class_from_name(processor_class)
......
...@@ -678,6 +678,7 @@ class AutoTokenizer: ...@@ -678,6 +678,7 @@ class AutoTokenizer:
else: else:
class_ref = tokenizer_auto_map[0] class_ref = tokenizer_auto_map[0]
tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs) tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
_ = kwargs.pop("code_revision", None)
elif use_fast and not config_tokenizer_class.endswith("Fast"): elif use_fast and not config_tokenizer_class.endswith("Fast"):
tokenizer_class_candidate = f"{config_tokenizer_class}Fast" tokenizer_class_candidate = f"{config_tokenizer_class}Fast"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment