Unverified Commit c89bdfbe authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Reorganize repo (#8580)

* Put models in subfolders

* Styling

* Fix imports in tests

* More fixes in test imports

* Sneaky hidden imports

* Fix imports in doc files

* More sneaky imports

* Finish fixing tests

* Fix examples

* Fix path for copies

* More fixes for examples

* Fix dummy files

* More fixes for example

* More model import fixes

* Is this why you're unhappy GitHub?

* Fix imports in conver command
parent 90150733
......@@ -20,7 +20,7 @@ import argparse
import pytorch_lightning as pl
import torch
from transformers.modeling_longformer import LongformerForQuestionAnswering, LongformerModel
from transformers import LongformerForQuestionAnswering, LongformerModel
class LightningModel(pl.LightningModule):
......
......@@ -24,23 +24,23 @@ import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss
from torch.nn import functional as F
from .activations import ACT2FN, gelu
from .configuration_longformer import LongformerConfig
from .file_utils import (
from ...activations import ACT2FN, gelu
from ...file_utils import (
ModelOutput,
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_model_forward,
replace_return_docstrings,
)
from .modeling_outputs import MaskedLMOutput, SequenceClassifierOutput, TokenClassifierOutput
from .modeling_utils import (
from ...modeling_outputs import MaskedLMOutput, SequenceClassifierOutput, TokenClassifierOutput
from ...modeling_utils import (
PreTrainedModel,
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
prune_linear_layer,
)
from .utils import logging
from ...utils import logging
from .configuration_longformer import LongformerConfig
logger = logging.get_logger(__name__)
......@@ -285,7 +285,7 @@ def _compute_global_attention_mask(input_ids, sep_token_id, before_sep_token=Tru
return attention_mask
# Copied from transformers.modeling_roberta.create_position_ids_from_input_ids
# Copied from transformers.models.roberta.modeling_roberta.create_position_ids_from_input_ids
def create_position_ids_from_input_ids(input_ids, padding_idx):
"""
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
......@@ -307,7 +307,7 @@ class LongformerEmbeddings(nn.Module):
Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
"""
# Copied from transformers.modeling_bert.BertEmbeddings.__init__
# Copied from transformers.models.bert.modeling_bert.BertEmbeddings.__init__
def __init__(self, config):
super().__init__()
self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
......@@ -336,7 +336,7 @@ class LongformerEmbeddings(nn.Module):
else:
position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
# Copied from transformers.modeling_bert.BertEmbeddings.forward
# Copied from transformers.models.bert.modeling_bert.BertEmbeddings.forward
if input_ids is not None:
input_shape = input_ids.size()
else:
......@@ -918,7 +918,7 @@ class LongformerSelfAttention(nn.Module):
return global_attn_output, global_attn_probs
# Copied from transformers.modeling_bert.BertSelfOutput
# Copied from transformers.models.bert.modeling_bert.BertSelfOutput
class LongformerSelfOutput(nn.Module):
def __init__(self, config):
super().__init__()
......@@ -973,7 +973,7 @@ class LongformerAttention(nn.Module):
return outputs
# Copied from transformers.modeling_bert.BertIntermediate
# Copied from transformers.models.bert.modeling_bert.BertIntermediate
class LongformerIntermediate(nn.Module):
def __init__(self, config):
super().__init__()
......@@ -989,7 +989,7 @@ class LongformerIntermediate(nn.Module):
return hidden_states
# Copied from transformers.modeling_bert.BertOutput
# Copied from transformers.models.bert.modeling_bert.BertOutput
class LongformerOutput(nn.Module):
def __init__(self, config):
super().__init__()
......@@ -1114,7 +1114,7 @@ class LongformerEncoder(nn.Module):
)
# Copied from transformers.modeling_bert.BertPooler
# Copied from transformers.models.bert.modeling_bert.BertPooler
class LongformerPooler(nn.Module):
def __init__(self, config):
super().__init__()
......@@ -1130,7 +1130,7 @@ class LongformerPooler(nn.Module):
return pooled_output
# Copied from transformers.modeling_roberta.RobertaLMHead with Roberta->Longformer
# Copied from transformers.models.roberta.modeling_roberta.RobertaLMHead with Roberta->Longformer
class LongformerLMHead(nn.Module):
"""Longformer Head for masked language modeling."""
......
......@@ -21,15 +21,14 @@ import tensorflow as tf
from transformers.activations_tf import get_tf_activation
from .configuration_longformer import LongformerConfig
from .file_utils import (
from ...file_utils import (
ModelOutput,
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_model_forward,
)
from .modeling_tf_outputs import TFMaskedLMOutput, TFQuestionAnsweringModelOutput
from .modeling_tf_utils import (
from ...modeling_tf_outputs import TFMaskedLMOutput, TFQuestionAnsweringModelOutput
from ...modeling_tf_utils import (
TFMaskedLanguageModelingLoss,
TFPreTrainedModel,
TFQuestionAnsweringLoss,
......@@ -37,8 +36,9 @@ from .modeling_tf_utils import (
keras_serializable,
shape_list,
)
from .tokenization_utils import BatchEncoding
from .utils import logging
from ...tokenization_utils import BatchEncoding
from ...utils import logging
from .configuration_longformer import LongformerConfig
logger = logging.get_logger(__name__)
......@@ -226,7 +226,7 @@ def _compute_global_attention_mask(input_ids_shape, sep_token_indices, before_se
return attention_mask
# Copied from transformers.modeling_tf_roberta.TFRobertaLMHead
# Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaLMHead
class TFLongformerLMHead(tf.keras.layers.Layer):
"""Roberta Head for masked language modeling."""
......@@ -260,7 +260,7 @@ class TFLongformerLMHead(tf.keras.layers.Layer):
return x
# Copied from transformers.modeling_tf_roberta.TFRobertaEmbeddings
# Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaEmbeddings
class TFLongformerEmbeddings(tf.keras.layers.Layer):
"""
Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
......@@ -420,7 +420,7 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer):
return tf.reshape(logits, [batch_size, length, self.vocab_size])
# Copied from transformers.modeling_tf_bert.TFBertIntermediate
# Copied from transformers.models.bert.modeling_tf_bert.TFBertIntermediate
class TFLongformerIntermediate(tf.keras.layers.Layer):
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
......@@ -441,7 +441,7 @@ class TFLongformerIntermediate(tf.keras.layers.Layer):
return hidden_states
# Copied from transformers.modeling_tf_bert.TFBertOutput
# Copied from transformers.models.bert.modeling_tf_bert.TFBertOutput
class TFLongformerOutput(tf.keras.layers.Layer):
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
......@@ -460,7 +460,7 @@ class TFLongformerOutput(tf.keras.layers.Layer):
return hidden_states
# Copied from transformers.modeling_tf_bert.TFBertPooler
# Copied from transformers.models.bert.modeling_tf_bert.TFBertPooler
class TFLongformerPooler(tf.keras.layers.Layer):
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
......@@ -481,7 +481,7 @@ class TFLongformerPooler(tf.keras.layers.Layer):
return pooled_output
# Copied from transformers.modeling_tf_bert.TFBertSelfOutput
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput
class TFLongformerSelfOutput(tf.keras.layers.Layer):
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
......
......@@ -13,8 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .tokenization_roberta import RobertaTokenizer
from .utils import logging
from ...utils import logging
from ..roberta.tokenization_roberta import RobertaTokenizer
logger = logging.get_logger(__name__)
......
......@@ -13,9 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from ...utils import logging
from ..roberta.tokenization_roberta_fast import RobertaTokenizerFast
from .tokenization_longformer import LongformerTokenizer
from .tokenization_roberta_fast import RobertaTokenizerFast
from .utils import logging
logger = logging.get_logger(__name__)
......
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.
from ...file_utils import is_tf_available, is_tokenizers_available, is_torch_available
from .configuration_lxmert import LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP, LxmertConfig
from .tokenization_lxmert import LxmertTokenizer
if is_tokenizers_available():
from .tokenization_lxmert_fast import LxmertTokenizerFast
if is_torch_available():
from .modeling_lxmert import (
LxmertEncoder,
LxmertForPreTraining,
LxmertForQuestionAnswering,
LxmertModel,
LxmertPreTrainedModel,
LxmertVisualFeatureEncoder,
LxmertXLayer,
)
if is_tf_available():
from .modeling_tf_lxmert import (
TF_LXMERT_PRETRAINED_MODEL_ARCHIVE_LIST,
TFLxmertForPreTraining,
TFLxmertMainLayer,
TFLxmertModel,
TFLxmertPreTrainedModel,
TFLxmertVisualFeatureEncoder,
)
......@@ -15,8 +15,8 @@
""" LXMERT model configuration """
from .configuration_utils import PretrainedConfig
from .utils import logging
from ...configuration_utils import PretrainedConfig
from ...utils import logging
logger = logging.get_logger(__name__)
......
......@@ -25,17 +25,17 @@ import torch
from torch import nn
from torch.nn import CrossEntropyLoss, SmoothL1Loss
from .activations import ACT2FN, gelu
from .configuration_lxmert import LxmertConfig
from .file_utils import (
from ...activations import ACT2FN, gelu
from ...file_utils import (
ModelOutput,
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_model_forward,
replace_return_docstrings,
)
from .modeling_utils import PreTrainedModel
from .utils import logging
from ...modeling_utils import PreTrainedModel
from ...utils import logging
from .configuration_lxmert import LxmertConfig
logger = logging.get_logger(__name__)
......
......@@ -22,18 +22,18 @@ from typing import Dict, Optional, Tuple
import tensorflow as tf
from .activations_tf import get_tf_activation
from .configuration_lxmert import LxmertConfig
from .file_utils import (
from ...activations_tf import get_tf_activation
from ...file_utils import (
ModelOutput,
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_model_forward,
replace_return_docstrings,
)
from .modeling_tf_utils import TFPreTrainedModel, get_initializer, keras_serializable, shape_list
from .tokenization_utils_base import BatchEncoding
from .utils import logging
from ...modeling_tf_utils import TFPreTrainedModel, get_initializer, keras_serializable, shape_list
from ...tokenization_utils_base import BatchEncoding
from ...utils import logging
from .configuration_lxmert import LxmertConfig
logger = logging.get_logger(__name__)
......
......@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .tokenization_bert import BertTokenizer
from ..bert.tokenization_bert import BertTokenizer
####################################################
......
......@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .tokenization_bert_fast import BertTokenizerFast
from ..bert.tokenization_bert_fast import BertTokenizerFast
from .tokenization_lxmert import LxmertTokenizer
......
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.
from ...file_utils import is_sentencepiece_available, is_tf_available, is_torch_available
from .configuration_marian import MarianConfig
if is_sentencepiece_available():
from .tokenization_marian import MarianTokenizer
if is_torch_available():
from .modeling_marian import MarianMTModel
if is_tf_available():
from .modeling_tf_marian import TFMarianMTModel
......@@ -14,7 +14,7 @@
# limitations under the License.
""" Marian model configuration """
from .configuration_bart import BartConfig
from ..bart.configuration_bart import BartConfig
PRETRAINED_CONFIG_ARCHIVE_MAP = {
......
......@@ -3,7 +3,7 @@ import os
from pathlib import Path
from typing import List, Tuple
from transformers.convert_marian_to_pytorch import (
from transformers.models.marian.convert_marian_to_pytorch import (
FRONT_MATTER_TEMPLATE,
_parse_readme,
convert_all_sentencepiece_models,
......
......@@ -15,8 +15,8 @@
"""PyTorch MarianMTModel model, ported from the Marian C++ repo."""
from ..bart.modeling_bart import BartForConditionalGeneration
from .configuration_marian import MarianConfig
from .modeling_bart import BartForConditionalGeneration
# See all Marian models at https://huggingface.co/models?search=Helsinki-NLP
......
......@@ -14,10 +14,10 @@
# limitations under the License.
"""TF Marian model, ported from the fairseq repo."""
from ...file_utils import add_start_docstrings, is_tf_available
from ...utils import logging
from ..bart.modeling_tf_bart import BART_START_DOCSTRING, LARGE_NEGATIVE, TFBartForConditionalGeneration
from .configuration_marian import MarianConfig
from .file_utils import add_start_docstrings, is_tf_available
from .modeling_tf_bart import BART_START_DOCSTRING, LARGE_NEGATIVE, TFBartForConditionalGeneration
from .utils import logging
if is_tf_available():
......
......@@ -7,9 +7,9 @@ from typing import Dict, List, Optional, Tuple, Union
import sentencepiece
from .file_utils import add_start_docstrings
from .tokenization_utils import BatchEncoding, PreTrainedTokenizer
from .tokenization_utils_base import PREPARE_SEQ2SEQ_BATCH_DOCSTRING
from ...file_utils import add_start_docstrings
from ...tokenization_utils import BatchEncoding, PreTrainedTokenizer
from ...tokenization_utils_base import PREPARE_SEQ2SEQ_BATCH_DOCSTRING
vocab_files_names = {
......
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.
from ...file_utils import is_sentencepiece_available, is_tf_available, is_tokenizers_available, is_torch_available
from .configuration_mbart import MBartConfig
if is_sentencepiece_available():
from .tokenization_mbart import MBartTokenizer
if is_tokenizers_available():
from .tokenization_mbart_fast import MBartTokenizerFast
if is_torch_available():
from .modeling_mbart import MBartForConditionalGeneration
if is_tf_available():
from .modeling_tf_mbart import TFMBartForConditionalGeneration
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment