Unverified Commit defdcd28 authored by Samuel Xu's avatar Samuel Xu Committed by GitHub
Browse files

Remove Roberta Dependencies from XLM Roberta Flax and Tensorflow models (#21047)

* Added flax model code

* Added tf changes

* missed some

* Added copy comments

* Added style hints

* Fixed copy statements

* Added suggested fixes

* Made some fixes

* Style fixup

* Added necessary copy statements

* Fixing copy statements

* Added more copies

* Final copy fix

* Some bugfixes

* Adding imports to init

* Fixed up all make fixup errors

* Fixed doc errors

* Auto model changes
parent 023f51fe
...@@ -146,6 +146,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h ...@@ -146,6 +146,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h
[[autodoc]] TFXLMRobertaModel [[autodoc]] TFXLMRobertaModel
- call - call
## TFXLMRobertaForCausalLM
[[autodoc]] TFXLMRobertaForCausalLM
- call
## TFXLMRobertaForMaskedLM ## TFXLMRobertaForMaskedLM
[[autodoc]] TFXLMRobertaForMaskedLM [[autodoc]] TFXLMRobertaForMaskedLM
...@@ -176,6 +181,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h ...@@ -176,6 +181,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h
[[autodoc]] FlaxXLMRobertaModel [[autodoc]] FlaxXLMRobertaModel
- __call__ - __call__
## FlaxXLMRobertaForCausalLM
[[autodoc]] FlaxXLMRobertaForCausalLM
- __call__
## FlaxXLMRobertaForMaskedLM ## FlaxXLMRobertaForMaskedLM
[[autodoc]] FlaxXLMRobertaForMaskedLM [[autodoc]] FlaxXLMRobertaForMaskedLM
......
...@@ -3153,12 +3153,14 @@ else: ...@@ -3153,12 +3153,14 @@ else:
_import_structure["models.xlm_roberta"].extend( _import_structure["models.xlm_roberta"].extend(
[ [
"TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST", "TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
"TFXLMRobertaForCausalLM",
"TFXLMRobertaForMaskedLM", "TFXLMRobertaForMaskedLM",
"TFXLMRobertaForMultipleChoice", "TFXLMRobertaForMultipleChoice",
"TFXLMRobertaForQuestionAnswering", "TFXLMRobertaForQuestionAnswering",
"TFXLMRobertaForSequenceClassification", "TFXLMRobertaForSequenceClassification",
"TFXLMRobertaForTokenClassification", "TFXLMRobertaForTokenClassification",
"TFXLMRobertaModel", "TFXLMRobertaModel",
"TFXLMRobertaPreTrainedModel",
] ]
) )
_import_structure["models.xlnet"].extend( _import_structure["models.xlnet"].extend(
...@@ -3435,12 +3437,15 @@ else: ...@@ -3435,12 +3437,15 @@ else:
) )
_import_structure["models.xlm_roberta"].extend( _import_structure["models.xlm_roberta"].extend(
[ [
"FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
"FlaxXLMRobertaForMaskedLM", "FlaxXLMRobertaForMaskedLM",
"FlaxXLMRobertaForMultipleChoice", "FlaxXLMRobertaForMultipleChoice",
"FlaxXLMRobertaForQuestionAnswering", "FlaxXLMRobertaForQuestionAnswering",
"FlaxXLMRobertaForSequenceClassification", "FlaxXLMRobertaForSequenceClassification",
"FlaxXLMRobertaForTokenClassification", "FlaxXLMRobertaForTokenClassification",
"FlaxXLMRobertaModel", "FlaxXLMRobertaModel",
"FlaxXLMRobertaForCausalLM",
"FlaxXLMRobertaPreTrainedModel",
] ]
) )
...@@ -6022,12 +6027,14 @@ if TYPE_CHECKING: ...@@ -6022,12 +6027,14 @@ if TYPE_CHECKING:
) )
from .models.xlm_roberta import ( from .models.xlm_roberta import (
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST, TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
TFXLMRobertaForCausalLM,
TFXLMRobertaForMaskedLM, TFXLMRobertaForMaskedLM,
TFXLMRobertaForMultipleChoice, TFXLMRobertaForMultipleChoice,
TFXLMRobertaForQuestionAnswering, TFXLMRobertaForQuestionAnswering,
TFXLMRobertaForSequenceClassification, TFXLMRobertaForSequenceClassification,
TFXLMRobertaForTokenClassification, TFXLMRobertaForTokenClassification,
TFXLMRobertaModel, TFXLMRobertaModel,
TFXLMRobertaPreTrainedModel,
) )
from .models.xlnet import ( from .models.xlnet import (
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST, TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST,
...@@ -6240,12 +6247,15 @@ if TYPE_CHECKING: ...@@ -6240,12 +6247,15 @@ if TYPE_CHECKING:
) )
from .models.xglm import FlaxXGLMForCausalLM, FlaxXGLMModel, FlaxXGLMPreTrainedModel from .models.xglm import FlaxXGLMForCausalLM, FlaxXGLMModel, FlaxXGLMPreTrainedModel
from .models.xlm_roberta import ( from .models.xlm_roberta import (
FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
FlaxXLMRobertaForCausalLM,
FlaxXLMRobertaForMaskedLM, FlaxXLMRobertaForMaskedLM,
FlaxXLMRobertaForMultipleChoice, FlaxXLMRobertaForMultipleChoice,
FlaxXLMRobertaForQuestionAnswering, FlaxXLMRobertaForQuestionAnswering,
FlaxXLMRobertaForSequenceClassification, FlaxXLMRobertaForSequenceClassification,
FlaxXLMRobertaForTokenClassification, FlaxXLMRobertaForTokenClassification,
FlaxXLMRobertaModel, FlaxXLMRobertaModel,
FlaxXLMRobertaPreTrainedModel,
) )
else: else:
......
...@@ -142,6 +142,7 @@ FLAX_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict( ...@@ -142,6 +142,7 @@ FLAX_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
("roberta", "FlaxRobertaForCausalLM"), ("roberta", "FlaxRobertaForCausalLM"),
("roberta-prelayernorm", "FlaxRobertaPreLayerNormForCausalLM"), ("roberta-prelayernorm", "FlaxRobertaPreLayerNormForCausalLM"),
("xglm", "FlaxXGLMForCausalLM"), ("xglm", "FlaxXGLMForCausalLM"),
("xlm-roberta", "FlaxXLMRobertaForCausalLM"),
] ]
) )
......
...@@ -180,6 +180,7 @@ TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict( ...@@ -180,6 +180,7 @@ TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
("transfo-xl", "TFTransfoXLLMHeadModel"), ("transfo-xl", "TFTransfoXLLMHeadModel"),
("xglm", "TFXGLMForCausalLM"), ("xglm", "TFXGLMForCausalLM"),
("xlm", "TFXLMWithLMHeadModel"), ("xlm", "TFXLMWithLMHeadModel"),
("xlm-roberta", "TFXLMRobertaForCausalLM"),
("xlnet", "TFXLNetLMHeadModel"), ("xlnet", "TFXLNetLMHeadModel"),
] ]
) )
......
...@@ -79,12 +79,14 @@ except OptionalDependencyNotAvailable: ...@@ -79,12 +79,14 @@ except OptionalDependencyNotAvailable:
else: else:
_import_structure["modeling_tf_xlm_roberta"] = [ _import_structure["modeling_tf_xlm_roberta"] = [
"TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST", "TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
"TFXLMRobertaForCausalLM",
"TFXLMRobertaForMaskedLM", "TFXLMRobertaForMaskedLM",
"TFXLMRobertaForMultipleChoice", "TFXLMRobertaForMultipleChoice",
"TFXLMRobertaForQuestionAnswering", "TFXLMRobertaForQuestionAnswering",
"TFXLMRobertaForSequenceClassification", "TFXLMRobertaForSequenceClassification",
"TFXLMRobertaForTokenClassification", "TFXLMRobertaForTokenClassification",
"TFXLMRobertaModel", "TFXLMRobertaModel",
"TFXLMRobertaPreTrainedModel",
] ]
try: try:
...@@ -94,12 +96,15 @@ except OptionalDependencyNotAvailable: ...@@ -94,12 +96,15 @@ except OptionalDependencyNotAvailable:
pass pass
else: else:
_import_structure["modeling_flax_xlm_roberta"] = [ _import_structure["modeling_flax_xlm_roberta"] = [
"FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
"FlaxXLMRobertaForMaskedLM", "FlaxXLMRobertaForMaskedLM",
"FlaxXLMRobertaForCausalLM",
"FlaxXLMRobertaForMultipleChoice", "FlaxXLMRobertaForMultipleChoice",
"FlaxXLMRobertaForQuestionAnswering", "FlaxXLMRobertaForQuestionAnswering",
"FlaxXLMRobertaForSequenceClassification", "FlaxXLMRobertaForSequenceClassification",
"FlaxXLMRobertaForTokenClassification", "FlaxXLMRobertaForTokenClassification",
"FlaxXLMRobertaModel", "FlaxXLMRobertaModel",
"FlaxXLMRobertaPreTrainedModel",
] ]
if TYPE_CHECKING: if TYPE_CHECKING:
...@@ -151,12 +156,14 @@ if TYPE_CHECKING: ...@@ -151,12 +156,14 @@ if TYPE_CHECKING:
else: else:
from .modeling_tf_xlm_roberta import ( from .modeling_tf_xlm_roberta import (
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST, TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
TFXLMRobertaForCausalLM,
TFXLMRobertaForMaskedLM, TFXLMRobertaForMaskedLM,
TFXLMRobertaForMultipleChoice, TFXLMRobertaForMultipleChoice,
TFXLMRobertaForQuestionAnswering, TFXLMRobertaForQuestionAnswering,
TFXLMRobertaForSequenceClassification, TFXLMRobertaForSequenceClassification,
TFXLMRobertaForTokenClassification, TFXLMRobertaForTokenClassification,
TFXLMRobertaModel, TFXLMRobertaModel,
TFXLMRobertaPreTrainedModel,
) )
try: try:
...@@ -166,12 +173,15 @@ if TYPE_CHECKING: ...@@ -166,12 +173,15 @@ if TYPE_CHECKING:
pass pass
else: else:
from .modeling_flax_xlm_roberta import ( from .modeling_flax_xlm_roberta import (
FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
FlaxXLMRobertaForCausalLM,
FlaxXLMRobertaForMaskedLM, FlaxXLMRobertaForMaskedLM,
FlaxXLMRobertaForMultipleChoice, FlaxXLMRobertaForMultipleChoice,
FlaxXLMRobertaForQuestionAnswering, FlaxXLMRobertaForQuestionAnswering,
FlaxXLMRobertaForSequenceClassification, FlaxXLMRobertaForSequenceClassification,
FlaxXLMRobertaForTokenClassification, FlaxXLMRobertaForTokenClassification,
FlaxXLMRobertaModel, FlaxXLMRobertaModel,
FlaxXLMRobertaPreTrainedModel,
) )
else: else:
......
...@@ -286,7 +286,7 @@ class XLMRobertaSelfAttention(nn.Module): ...@@ -286,7 +286,7 @@ class XLMRobertaSelfAttention(nn.Module):
return outputs return outputs
# Copied from transformers.models.roberta.modeling_roberta.RobertaSelfOutput # Copied from transformers.models.roberta.modeling_roberta.RobertaSelfOutput with Roberta->XLMRoberta
class XLMRobertaSelfOutput(nn.Module): class XLMRobertaSelfOutput(nn.Module):
def __init__(self, config): def __init__(self, config):
super().__init__() super().__init__()
...@@ -351,7 +351,7 @@ class XLMRobertaAttention(nn.Module): ...@@ -351,7 +351,7 @@ class XLMRobertaAttention(nn.Module):
return outputs return outputs
# Copied from transformers.models.roberta.modeling_roberta.RobertaIntermediate # Copied from transformers.models.roberta.modeling_roberta.RobertaIntermediate with Roberta->XLMRoberta
class XLMRobertaIntermediate(nn.Module): class XLMRobertaIntermediate(nn.Module):
def __init__(self, config): def __init__(self, config):
super().__init__() super().__init__()
...@@ -367,7 +367,7 @@ class XLMRobertaIntermediate(nn.Module): ...@@ -367,7 +367,7 @@ class XLMRobertaIntermediate(nn.Module):
return hidden_states return hidden_states
# Copied from transformers.models.roberta.modeling_roberta.RobertaOutput # Copied from transformers.models.roberta.modeling_roberta.RobertaOutput with Roberta->XLMRoberta
class XLMRobertaOutput(nn.Module): class XLMRobertaOutput(nn.Module):
def __init__(self, config): def __init__(self, config):
super().__init__() super().__init__()
...@@ -567,7 +567,7 @@ class XLMRobertaEncoder(nn.Module): ...@@ -567,7 +567,7 @@ class XLMRobertaEncoder(nn.Module):
) )
# Copied from transformers.models.roberta.modeling_roberta.RobertaPooler # Copied from transformers.models.roberta.modeling_roberta.RobertaPooler with Roberta->XLMRoberta
class XLMRobertaPooler(nn.Module): class XLMRobertaPooler(nn.Module):
def __init__(self, config): def __init__(self, config):
super().__init__() super().__init__()
...@@ -1455,7 +1455,7 @@ class XLMRobertaForTokenClassification(XLMRobertaPreTrainedModel): ...@@ -1455,7 +1455,7 @@ class XLMRobertaForTokenClassification(XLMRobertaPreTrainedModel):
) )
# Copied from transformers.models.roberta.modeling_roberta.RobertaClassificationHead # Copied from transformers.models.roberta.modeling_roberta.RobertaClassificationHead with Roberta->XLMRoberta
class XLMRobertaClassificationHead(nn.Module): class XLMRobertaClassificationHead(nn.Module):
"""Head for sentence-level classification tasks.""" """Head for sentence-level classification tasks."""
......
...@@ -1152,6 +1152,16 @@ class FlaxXGLMPreTrainedModel(metaclass=DummyObject): ...@@ -1152,6 +1152,16 @@ class FlaxXGLMPreTrainedModel(metaclass=DummyObject):
requires_backends(self, ["flax"]) requires_backends(self, ["flax"])
FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = None
class FlaxXLMRobertaForCausalLM(metaclass=DummyObject):
_backends = ["flax"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["flax"])
class FlaxXLMRobertaForMaskedLM(metaclass=DummyObject): class FlaxXLMRobertaForMaskedLM(metaclass=DummyObject):
_backends = ["flax"] _backends = ["flax"]
...@@ -1192,3 +1202,10 @@ class FlaxXLMRobertaModel(metaclass=DummyObject): ...@@ -1192,3 +1202,10 @@ class FlaxXLMRobertaModel(metaclass=DummyObject):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
requires_backends(self, ["flax"]) requires_backends(self, ["flax"])
class FlaxXLMRobertaPreTrainedModel(metaclass=DummyObject):
_backends = ["flax"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["flax"])
...@@ -2646,6 +2646,13 @@ class TFXLMWithLMHeadModel(metaclass=DummyObject): ...@@ -2646,6 +2646,13 @@ class TFXLMWithLMHeadModel(metaclass=DummyObject):
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = None TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = None
class TFXLMRobertaForCausalLM(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFXLMRobertaForMaskedLM(metaclass=DummyObject): class TFXLMRobertaForMaskedLM(metaclass=DummyObject):
_backends = ["tf"] _backends = ["tf"]
...@@ -2688,6 +2695,13 @@ class TFXLMRobertaModel(metaclass=DummyObject): ...@@ -2688,6 +2695,13 @@ class TFXLMRobertaModel(metaclass=DummyObject):
requires_backends(self, ["tf"]) requires_backends(self, ["tf"])
class TFXLMRobertaPreTrainedModel(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = None TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment