"git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "8e1234b7580ab88a7950f43197090fc1f63a1f32"
Unverified Commit 936ab7ba authored by oOraph's avatar oOraph Committed by GitHub
Browse files

fix from_pretrained in offline mode when model is preloaded in cache (#31010)



* Unit test to verify fix
Signed-off-by: default avatarRaphael Glon <oOraph@users.noreply.github.com>

* fix from_pretrained in offline mode when model is preloaded in cache
Signed-off-by: default avatarRaphael Glon <oOraph@users.noreply.github.com>

* minor: fmt
Signed-off-by: default avatarRaphael Glon <oOraph@users.noreply.github.com>

---------
Signed-off-by: default avatarRaphael Glon <oOraph@users.noreply.github.com>
Co-authored-by: default avatarRaphael Glon <oOraph@users.noreply.github.com>
parent 537deb78
...@@ -3392,70 +3392,70 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix ...@@ -3392,70 +3392,70 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
) )
if resolved_archive_file is not None: if resolved_archive_file is not None:
is_sharded = True is_sharded = True
if not local_files_only and not is_offline_mode():
if not local_files_only and resolved_archive_file is not None: if resolved_archive_file is not None:
if filename in [WEIGHTS_NAME, WEIGHTS_INDEX_NAME]: if filename in [WEIGHTS_NAME, WEIGHTS_INDEX_NAME]:
# If the PyTorch file was found, check if there is a safetensors file on the repository # If the PyTorch file was found, check if there is a safetensors file on the repository
# If there is no safetensors file on the repositories, start an auto conversion # If there is no safetensors file on the repositories, start an auto conversion
safe_weights_name = SAFE_WEIGHTS_INDEX_NAME if is_sharded else SAFE_WEIGHTS_NAME safe_weights_name = SAFE_WEIGHTS_INDEX_NAME if is_sharded else SAFE_WEIGHTS_NAME
has_file_kwargs = {
"revision": revision,
"proxies": proxies,
"token": token,
}
cached_file_kwargs = {
"cache_dir": cache_dir,
"force_download": force_download,
"resume_download": resume_download,
"local_files_only": local_files_only,
"user_agent": user_agent,
"subfolder": subfolder,
"_raise_exceptions_for_gated_repo": False,
"_raise_exceptions_for_missing_entries": False,
"_commit_hash": commit_hash,
**has_file_kwargs,
}
if not has_file(pretrained_model_name_or_path, safe_weights_name, **has_file_kwargs):
Thread(
target=auto_conversion,
args=(pretrained_model_name_or_path,),
kwargs={"ignore_errors_during_conversion": True, **cached_file_kwargs},
name="Thread-autoconversion",
).start()
else:
# Otherwise, no PyTorch file was found, maybe there is a TF or Flax model file.
# We try those to give a helpful error message.
has_file_kwargs = { has_file_kwargs = {
"revision": revision, "revision": revision,
"proxies": proxies, "proxies": proxies,
"token": token, "token": token,
} }
cached_file_kwargs = { if has_file(pretrained_model_name_or_path, TF2_WEIGHTS_NAME, **has_file_kwargs):
"cache_dir": cache_dir, raise EnvironmentError(
"force_download": force_download, f"{pretrained_model_name_or_path} does not appear to have a file named"
"resume_download": resume_download, f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file for TensorFlow weights."
"local_files_only": local_files_only, " Use `from_tf=True` to load this model from those weights."
"user_agent": user_agent, )
"subfolder": subfolder, elif has_file(pretrained_model_name_or_path, FLAX_WEIGHTS_NAME, **has_file_kwargs):
"_raise_exceptions_for_gated_repo": False, raise EnvironmentError(
"_raise_exceptions_for_missing_entries": False, f"{pretrained_model_name_or_path} does not appear to have a file named"
"_commit_hash": commit_hash, f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file for Flax weights. Use"
**has_file_kwargs, " `from_flax=True` to load this model from those weights."
} )
if not has_file(pretrained_model_name_or_path, safe_weights_name, **has_file_kwargs): elif variant is not None and has_file(
Thread( pretrained_model_name_or_path, WEIGHTS_NAME, **has_file_kwargs
target=auto_conversion, ):
args=(pretrained_model_name_or_path,), raise EnvironmentError(
kwargs={"ignore_errors_during_conversion": True, **cached_file_kwargs}, f"{pretrained_model_name_or_path} does not appear to have a file named"
name="Thread-autoconversion", f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file without the variant"
).start() f" {variant}. Use `variant=None` to load this model from those weights."
else: )
# Otherwise, no PyTorch file was found, maybe there is a TF or Flax model file. else:
# We try those to give a helpful error message. raise EnvironmentError(
has_file_kwargs = { f"{pretrained_model_name_or_path} does not appear to have a file named"
"revision": revision, f" {_add_variant(WEIGHTS_NAME, variant)}, {_add_variant(SAFE_WEIGHTS_NAME, variant)},"
"proxies": proxies, f" {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or {FLAX_WEIGHTS_NAME}."
"token": token, )
}
if has_file(pretrained_model_name_or_path, TF2_WEIGHTS_NAME, **has_file_kwargs):
raise EnvironmentError(
f"{pretrained_model_name_or_path} does not appear to have a file named"
f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file for TensorFlow weights."
" Use `from_tf=True` to load this model from those weights."
)
elif has_file(pretrained_model_name_or_path, FLAX_WEIGHTS_NAME, **has_file_kwargs):
raise EnvironmentError(
f"{pretrained_model_name_or_path} does not appear to have a file named"
f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file for Flax weights. Use"
" `from_flax=True` to load this model from those weights."
)
elif variant is not None and has_file(
pretrained_model_name_or_path, WEIGHTS_NAME, **has_file_kwargs
):
raise EnvironmentError(
f"{pretrained_model_name_or_path} does not appear to have a file named"
f" {_add_variant(WEIGHTS_NAME, variant)} but there is a file without the variant"
f" {variant}. Use `variant=None` to load this model from those weights."
)
else:
raise EnvironmentError(
f"{pretrained_model_name_or_path} does not appear to have a file named"
f" {_add_variant(WEIGHTS_NAME, variant)}, {_add_variant(SAFE_WEIGHTS_NAME, variant)},"
f" {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or {FLAX_WEIGHTS_NAME}."
)
except EnvironmentError: except EnvironmentError:
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted
# to the original exception. # to the original exception.
......
...@@ -33,6 +33,7 @@ from requests.exceptions import HTTPError ...@@ -33,6 +33,7 @@ from requests.exceptions import HTTPError
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
AutoModel, AutoModel,
AutoModelForImageClassification,
AutoModelForSequenceClassification, AutoModelForSequenceClassification,
OwlViTForObjectDetection, OwlViTForObjectDetection,
PretrainedConfig, PretrainedConfig,
...@@ -76,7 +77,6 @@ sys.path.append(str(Path(__file__).parent.parent / "utils")) ...@@ -76,7 +77,6 @@ sys.path.append(str(Path(__file__).parent.parent / "utils"))
from test_module.custom_configuration import CustomConfig, NoSuperInitConfig # noqa E402 from test_module.custom_configuration import CustomConfig, NoSuperInitConfig # noqa E402
if is_torch_available(): if is_torch_available():
import torch import torch
from safetensors.torch import save_file as safe_save_file from safetensors.torch import save_file as safe_save_file
...@@ -194,6 +194,97 @@ if is_torch_available(): ...@@ -194,6 +194,97 @@ if is_torch_available():
attention_mask = _prepare_4d_attention_mask(mask, dtype=inputs_embeds.dtype) attention_mask = _prepare_4d_attention_mask(mask, dtype=inputs_embeds.dtype)
return attention_mask return attention_mask
class TestOffline(unittest.TestCase):
def test_offline(self):
# Ugly setup with monkeypatches, amending env vars here is too late as libs have already been imported
from huggingface_hub import constants
from transformers.utils import hub
offlfine_env = hub._is_offline_mode
hub_cache_env = constants.HF_HUB_CACHE
hub_cache_env1 = constants.HUGGINGFACE_HUB_CACHE
default_cache = constants.default_cache_path
transformers_cache = hub.TRANSFORMERS_CACHE
try:
hub._is_offline_mode = True
with tempfile.TemporaryDirectory() as tmpdir:
LOG.info("Temporary cache dir %s", tmpdir)
constants.HF_HUB_CACHE = tmpdir
constants.HUGGINGFACE_HUB_CACHE = tmpdir
constants.default_cache_path = tmpdir
hub.TRANSFORMERS_CACHE = tmpdir
# First offline load should fail
try:
AutoModelForImageClassification.from_pretrained(
TINY_IMAGE_CLASSIF, revision="main", use_auth_token=None
)
except OSError:
LOG.info("Loading model %s in offline mode failed as expected", TINY_IMAGE_CLASSIF)
else:
self.fail("Loading model {} in offline mode should fail".format(TINY_IMAGE_CLASSIF))
# Download model -> Huggingface Hub not concerned by our offline mode
LOG.info("Downloading %s for offline tests", TINY_IMAGE_CLASSIF)
hub_api = HfApi()
local_dir = hub_api.snapshot_download(TINY_IMAGE_CLASSIF, cache_dir=tmpdir)
LOG.info("Model %s downloaded in %s", TINY_IMAGE_CLASSIF, local_dir)
AutoModelForImageClassification.from_pretrained(
TINY_IMAGE_CLASSIF, revision="main", use_auth_token=None
)
finally:
# Tear down: reset env as it was before calling this test
hub._is_offline_mode = offlfine_env
constants.HF_HUB_CACHE = hub_cache_env
constants.HUGGINGFACE_HUB_CACHE = hub_cache_env1
constants.default_cache_path = default_cache
hub.TRANSFORMERS_CACHE = transformers_cache
def test_local_files_only(self):
# Ugly setup with monkeypatches, amending env vars here is too late as libs have already been imported
from huggingface_hub import constants
from transformers.utils import hub
hub_cache_env = constants.HF_HUB_CACHE
hub_cache_env1 = constants.HUGGINGFACE_HUB_CACHE
default_cache = constants.default_cache_path
transformers_cache = hub.TRANSFORMERS_CACHE
try:
with tempfile.TemporaryDirectory() as tmpdir:
LOG.info("Temporary cache dir %s", tmpdir)
constants.HF_HUB_CACHE = tmpdir
constants.HUGGINGFACE_HUB_CACHE = tmpdir
constants.default_cache_path = tmpdir
hub.TRANSFORMERS_CACHE = tmpdir
try:
AutoModelForImageClassification.from_pretrained(
TINY_IMAGE_CLASSIF, revision="main", use_auth_token=None, local_files_only=True
)
except OSError:
LOG.info("Loading model %s in offline mode failed as expected", TINY_IMAGE_CLASSIF)
else:
self.fail("Loading model {} in offline mode should fail".format(TINY_IMAGE_CLASSIF))
LOG.info("Downloading %s for offline tests", TINY_IMAGE_CLASSIF)
hub_api = HfApi()
local_dir = hub_api.snapshot_download(TINY_IMAGE_CLASSIF, cache_dir=tmpdir)
LOG.info("Model %s downloaded in %s", TINY_IMAGE_CLASSIF, local_dir)
AutoModelForImageClassification.from_pretrained(
TINY_IMAGE_CLASSIF, revision="main", use_auth_token=None, local_files_only=True
)
finally:
# Tear down: reset env as it was before calling this test
constants.HF_HUB_CACHE = hub_cache_env
constants.HUGGINGFACE_HUB_CACHE = hub_cache_env1
constants.default_cache_path = default_cache
hub.TRANSFORMERS_CACHE = transformers_cache
if is_flax_available(): if is_flax_available():
from transformers import FlaxBertModel from transformers import FlaxBertModel
...@@ -205,6 +296,9 @@ if is_tf_available(): ...@@ -205,6 +296,9 @@ if is_tf_available():
TINY_T5 = "patrickvonplaten/t5-tiny-random" TINY_T5 = "patrickvonplaten/t5-tiny-random"
TINY_BERT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-bert-for-token-classification" TINY_BERT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-bert-for-token-classification"
TINY_MISTRAL = "hf-internal-testing/tiny-random-MistralForCausalLM" TINY_MISTRAL = "hf-internal-testing/tiny-random-MistralForCausalLM"
TINY_IMAGE_CLASSIF = "hf-internal-testing/tiny-random-SiglipForImageClassification"
LOG = logging.get_logger(__name__)
def check_models_equal(model1, model2): def check_models_equal(model1, model2):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment