Unverified Commit 67239f73 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Revert falcon exception (#26472)

* Revert "Falcon: fix revision propagation (#26006)"

This reverts commit 118c676ef3124423e5d062b665f05cde55bc9a90.

* Revert "Put Falcon back (#25960)"

This reverts commit 22a69f1d.
parent 0b192de1
...@@ -32,12 +32,7 @@ from ...utils import ( ...@@ -32,12 +32,7 @@ from ...utils import (
logging, logging,
requires_backends, requires_backends,
) )
from .configuration_auto import ( from .configuration_auto import AutoConfig, model_type_to_module_name, replace_list_option_in_docstrings
AutoConfig,
model_type_to_module_name,
replace_list_option_in_docstrings,
sanitize_code_revision,
)
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
...@@ -471,9 +466,6 @@ class _BaseAutoModelClass: ...@@ -471,9 +466,6 @@ class _BaseAutoModelClass:
commit_hash = kwargs.pop("_commit_hash", None) commit_hash = kwargs.pop("_commit_hash", None)
adapter_kwargs = kwargs.pop("adapter_kwargs", None) adapter_kwargs = kwargs.pop("adapter_kwargs", None)
revision = hub_kwargs.pop("revision", None)
hub_kwargs["revision"] = sanitize_code_revision(pretrained_model_name_or_path, revision, trust_remote_code)
token = hub_kwargs.pop("token", None) token = hub_kwargs.pop("token", None)
use_auth_token = hub_kwargs.pop("use_auth_token", None) use_auth_token = hub_kwargs.pop("use_auth_token", None)
if use_auth_token is not None: if use_auth_token is not None:
......
...@@ -1031,9 +1031,6 @@ class AutoConfig: ...@@ -1031,9 +1031,6 @@ class AutoConfig:
trust_remote_code = kwargs.pop("trust_remote_code", None) trust_remote_code = kwargs.pop("trust_remote_code", None)
code_revision = kwargs.pop("code_revision", None) code_revision = kwargs.pop("code_revision", None)
revision = kwargs.pop("revision", None)
kwargs["revision"] = sanitize_code_revision(pretrained_model_name_or_path, revision, trust_remote_code)
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
has_remote_code = "auto_map" in config_dict and "AutoConfig" in config_dict["auto_map"] has_remote_code = "auto_map" in config_dict and "AutoConfig" in config_dict["auto_map"]
has_local_code = "model_type" in config_dict and config_dict["model_type"] in CONFIG_MAPPING has_local_code = "model_type" in config_dict and config_dict["model_type"] in CONFIG_MAPPING
...@@ -1081,24 +1078,3 @@ class AutoConfig: ...@@ -1081,24 +1078,3 @@ class AutoConfig:
"match!" "match!"
) )
CONFIG_MAPPING.register(model_type, config, exist_ok=exist_ok) CONFIG_MAPPING.register(model_type, config, exist_ok=exist_ok)
def sanitize_code_revision(pretrained_model_name_or_path, revision, trust_remote_code):
if revision in ["main", None] and not trust_remote_code:
revision_dict = {
"tiiuae/falcon-7b": "4e2d06f0a7c6370ebabbc30c6f59377ae8f73d76",
"tiiuae/falcon-7b-instruct": "f8dac3fff96d5debd43edf56fb4e1abcfffbef28",
"tiiuae/falcon-40b": "f1ba7d328c06aa6fbb4a8afd3c756f46d7e6b232",
"tiiuae/falcon-40b-instruct": "7475ff8cfc36ed9a962b658ae3c33391566a85a5",
}
if isinstance(pretrained_model_name_or_path, str) and pretrained_model_name_or_path.lower() in revision_dict:
revision = revision_dict.get(pretrained_model_name_or_path.lower())
logger.warning(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
f"transformers library implementation. {pretrained_model_name_or_path}'s revision is set to a version that doesn't "
f"leverage remote code ({revision}).\n\nIn order to override this, please set a revision manually or set "
"`trust_remote_code=True`."
)
return revision
...@@ -13,12 +13,8 @@ ...@@ -13,12 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Falcon configuration""" """ Falcon configuration"""
import os
from typing import Optional, Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto.configuration_auto import sanitize_code_revision
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
...@@ -193,26 +189,3 @@ class FalconConfig(PretrainedConfig): ...@@ -193,26 +189,3 @@ class FalconConfig(PretrainedConfig):
) )
if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0: if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
raise ValueError(f"`rope_scaling`'s factor field must be an float > 1, got {rope_scaling_factor}") raise ValueError(f"`rope_scaling`'s factor field must be an float > 1, got {rope_scaling_factor}")
@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: Union[str, os.PathLike],
cache_dir: Optional[Union[str, os.PathLike]] = None,
force_download: bool = False,
local_files_only: bool = False,
token: Optional[Union[str, bool]] = None,
revision: str = "main",
**kwargs,
) -> "PretrainedConfig":
revision = sanitize_code_revision(pretrained_model_name_or_path, revision, kwargs.get("trust_remote_code"))
return super().from_pretrained(
pretrained_model_name_or_path,
cache_dir=cache_dir,
force_download=force_download,
local_files_only=local_files_only,
token=token,
revision=revision,
**kwargs,
)
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
"""PyTorch Falcon model.""" """PyTorch Falcon model."""
import math import math
import os
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union
import torch import torch
...@@ -39,7 +38,6 @@ from ...utils import ( ...@@ -39,7 +38,6 @@ from ...utils import (
is_flash_attn_available, is_flash_attn_available,
logging, logging,
) )
from ..auto.configuration_auto import sanitize_code_revision
from .configuration_falcon import FalconConfig from .configuration_falcon import FalconConfig
...@@ -977,37 +975,6 @@ class FalconPreTrainedModel(PreTrainedModel): ...@@ -977,37 +975,6 @@ class FalconPreTrainedModel(PreTrainedModel):
for layer_past in past_key_value for layer_past in past_key_value
) )
@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: Optional[Union[str, os.PathLike]],
*model_args,
config: Optional[Union[str, os.PathLike]] = None,
cache_dir: Optional[Union[str, os.PathLike]] = None,
ignore_mismatched_sizes: bool = False,
force_download: bool = False,
local_files_only: bool = False,
token: Optional[Union[str, bool]] = None,
revision: str = "main",
use_safetensors: bool = None,
**kwargs,
):
revision = sanitize_code_revision(pretrained_model_name_or_path, revision, kwargs.get("trust_remote_code"))
return super().from_pretrained(
pretrained_model_name_or_path,
*model_args,
config=config,
cache_dir=cache_dir,
ignore_mismatched_sizes=ignore_mismatched_sizes,
force_download=force_download,
local_files_only=local_files_only,
token=token,
revision=revision,
use_safetensors=use_safetensors,
**kwargs,
)
@add_start_docstrings( @add_start_docstrings(
"The bare Falcon Model transformer outputting raw hidden-states without any specific head on top.", "The bare Falcon Model transformer outputting raw hidden-states without any specific head on top.",
......
...@@ -20,16 +20,13 @@ import unittest ...@@ -20,16 +20,13 @@ import unittest
from parameterized import parameterized from parameterized import parameterized
from transformers import ( from transformers import (
AutoConfig,
AutoModel,
AutoModelForCausalLM, AutoModelForCausalLM,
AutoTokenizer, AutoTokenizer,
FalconConfig, FalconConfig,
is_torch_available, is_torch_available,
set_seed, set_seed,
) )
from transformers.testing_utils import CaptureLogger, require_bitsandbytes, require_torch, slow, tooslow, torch_device from transformers.testing_utils import require_bitsandbytes, require_torch, slow, torch_device
from transformers.utils import logging as transformers_logging
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -538,132 +535,3 @@ class FalconLanguageGenerationTest(unittest.TestCase): ...@@ -538,132 +535,3 @@ class FalconLanguageGenerationTest(unittest.TestCase):
self.assertLess(unpadded_inputs.input_ids.shape[-1], padded_inputs.input_ids.shape[-1]) # left-padding exists self.assertLess(unpadded_inputs.input_ids.shape[-1], padded_inputs.input_ids.shape[-1]) # left-padding exists
self.assertEqual(unpadded_gen_text[0], expected_output) self.assertEqual(unpadded_gen_text[0], expected_output)
self.assertEqual(padded_gen_text[0], expected_output) self.assertEqual(padded_gen_text[0], expected_output)
# TODO Lysandre: Remove this in version v4.34
class FalconOverrideTest(unittest.TestCase):
supported_checkpoints = [
"tiiuae/falcon-7b",
"tiiuae/falcon-7b-instruct",
"tiiuae/falcon-40b",
"tiiuae/falcon-40b-instruct",
]
latest_revisions = {
"tiiuae/falcon-7b": "f7796529e36b2d49094450fb038cc7c4c86afa44",
"tiiuae/falcon-7b-instruct": "eb410fb6ffa9028e97adb801f0d6ec46d02f8b07",
"tiiuae/falcon-40b": "561820f7eef0cc56a31ea38af15ca1acb07fab5d",
"tiiuae/falcon-40b-instruct": "ca78eac0ed45bf64445ff0687fabba1598daebf3",
}
def test_config_without_remote_code(self):
logger_ = transformers_logging.get_logger("transformers.models.auto.configuration_auto")
for supported_checkpoint in self.supported_checkpoints:
with CaptureLogger(logger_) as cm:
config1 = FalconConfig.from_pretrained(supported_checkpoint, trust_remote_code=False)
config2 = FalconConfig.from_pretrained(supported_checkpoint)
self.assertIn(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
"transformers library implementation.",
cm.out,
)
self.assertEqual(config1.to_dict(), config2.to_dict())
def test_auto_config_without_remote_code(self):
logger_ = transformers_logging.get_logger("transformers.models.auto.configuration_auto")
for supported_checkpoint in self.supported_checkpoints:
with CaptureLogger(logger_) as cm:
config1 = AutoConfig.from_pretrained(supported_checkpoint, trust_remote_code=False)
config2 = AutoConfig.from_pretrained(supported_checkpoint)
self.assertIn(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
"transformers library implementation.",
cm.out,
)
self.assertEqual(config1.to_dict(), config2.to_dict())
def test_config_with_remote_code(self):
for supported_checkpoint in self.supported_checkpoints:
config = FalconConfig.from_pretrained(supported_checkpoint, trust_remote_code=True)
self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])
def test_auto_config_with_remote_code(self):
for supported_checkpoint in self.supported_checkpoints:
config = AutoConfig.from_pretrained(supported_checkpoint, trust_remote_code=True)
self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])
def test_config_with_specific_revision(self):
for supported_checkpoint in self.supported_checkpoints:
config = FalconConfig.from_pretrained(
supported_checkpoint, revision=self.latest_revisions[supported_checkpoint], trust_remote_code=True
)
self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])
def test_auto_config_with_specific_revision(self):
for supported_checkpoint in self.supported_checkpoints:
config = AutoConfig.from_pretrained(
supported_checkpoint, revision=self.latest_revisions[supported_checkpoint], trust_remote_code=True
)
self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])
@tooslow
def test_model_without_remote_code(self):
logger_ = transformers_logging.get_logger("transformers.models.auto.configuration_auto")
for supported_checkpoint in self.supported_checkpoints:
with CaptureLogger(logger_) as cm:
config1 = FalconModel.from_pretrained(supported_checkpoint, trust_remote_code=False).config
config2 = FalconModel.from_pretrained(supported_checkpoint).config
# trust_remote_code only works with Auto Classes !
config3 = FalconModel.from_pretrained(supported_checkpoint, trust_remote_code=True).config
self.assertIn(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
"transformers library implementation.",
cm.out,
)
self.assertEqual(config1.to_dict(), config2.to_dict())
self.assertEqual(config1.to_dict(), config3.to_dict())
@tooslow
def test_auto_model_without_remote_code(self):
logger_ = transformers_logging.get_logger("transformers.models.auto.configuration_auto")
for supported_checkpoint in self.supported_checkpoints:
with CaptureLogger(logger_) as cm:
config1 = AutoModel.from_pretrained(supported_checkpoint, trust_remote_code=False).config
config2 = AutoModel.from_pretrained(supported_checkpoint).config
self.assertIn(
"The Falcon model was initialized without `trust_remote_code=True`, and will therefore leverage the "
"transformers library implementation.",
cm.out,
)
self.assertEqual(config1.to_dict(), config2.to_dict())
@tooslow
def test_auto_model_with_remote_code(self):
for supported_checkpoint in self.supported_checkpoints:
config = AutoModel.from_pretrained(supported_checkpoint, trust_remote_code=True).config
self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])
@tooslow
def test_auto_model_with_specific_revision(self):
for supported_checkpoint in self.supported_checkpoints:
config = AutoModel.from_pretrained(
supported_checkpoint, revision=self.latest_revisions[supported_checkpoint], trust_remote_code=True
).config
self.assertIn(config.model_type, ["RefinedWebModel", "RefinedWeb"])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment