Unverified Commit c89bdfbe authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Reorganize repo (#8580)

* Put models in subfolders

* Styling

* Fix imports in tests

* More fixes in test imports

* Sneaky hidden imports

* Fix imports in doc files

* More sneaky imports

* Finish fixing tests

* Fix examples

* Fix path for copies

* More fixes for examples

* Fix dummy files

* More fixes for example

* More model import fixes

* Is this why you're unhappy GitHub?

* Fix imports in conver command
parent 90150733
......@@ -17,7 +17,7 @@ import json
import os
import unittest
from transformers.tokenization_ctrl import VOCAB_FILES_NAMES, CTRLTokenizer
from transformers.models.ctrl.tokenization_ctrl import VOCAB_FILES_NAMES, CTRLTokenizer
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -18,8 +18,8 @@ import re
import unittest
from typing import Tuple
from transformers.models.deberta.tokenization_deberta import DebertaTokenizer
from transformers.testing_utils import require_torch
from transformers.tokenization_deberta import DebertaTokenizer
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -19,8 +19,8 @@ import os
import unittest
from transformers.file_utils import cached_property
from transformers.models.fsmt.tokenization_fsmt import VOCAB_FILES_NAMES, FSMTTokenizer
from transformers.testing_utils import slow
from transformers.tokenization_fsmt import VOCAB_FILES_NAMES, FSMTTokenizer
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -18,8 +18,8 @@ import os
import unittest
from transformers import FunnelTokenizer, FunnelTokenizerFast
from transformers.models.funnel.tokenization_funnel import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_funnel import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -19,8 +19,8 @@ import os
import unittest
from transformers import GPT2Tokenizer, GPT2TokenizerFast
from transformers.models.gpt2.tokenization_gpt2 import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_gpt2 import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -19,8 +19,8 @@ import os
import unittest
from transformers import HerbertTokenizer, HerbertTokenizerFast
from transformers.models.herbert.tokenization_herbert import VOCAB_FILES_NAMES
from transformers.testing_utils import get_tests_dir, require_tokenizers, slow
from transformers.tokenization_herbert import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -18,8 +18,8 @@ import os
import unittest
from transformers import LayoutLMTokenizer, LayoutLMTokenizerFast
from transformers.models.layoutlm.tokenization_layoutlm import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_layoutlm import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -18,8 +18,8 @@ import os
import unittest
from transformers import LxmertTokenizer, LxmertTokenizerFast
from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_bert import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -25,7 +25,7 @@ from transformers.testing_utils import _sentencepiece_available, _torch_availabl
if _sentencepiece_available:
from transformers.tokenization_marian import save_json, vocab_files_names
from transformers.models.marian.tokenization_marian import save_json, vocab_files_names
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -24,7 +24,7 @@ if _sentencepiece_available:
if is_torch_available():
from transformers.modeling_bart import shift_tokens_right
from transformers.models.bart.modeling_bart import shift_tokens_right
EN_CODE = 250004
RO_CODE = 250020
......
......@@ -19,8 +19,8 @@ import os
import unittest
from transformers import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
from transformers.models.openai.tokenization_openai import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers
from transformers.tokenization_openai import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -16,7 +16,7 @@
import os
import unittest
from transformers.tokenization_phobert import VOCAB_FILES_NAMES, PhobertTokenizer
from transformers.models.phobert.tokenization_phobert import VOCAB_FILES_NAMES, PhobertTokenizer
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -18,15 +18,15 @@ import os
import unittest
from transformers import BatchEncoding
from transformers.testing_utils import require_torch, slow
from transformers.tokenization_bert import (
from transformers.models.bert.tokenization_bert import (
BasicTokenizer,
WordpieceTokenizer,
_is_control,
_is_punctuation,
_is_whitespace,
)
from transformers.tokenization_prophetnet import VOCAB_FILES_NAMES, ProphetNetTokenizer
from transformers.models.prophetnet.tokenization_prophetnet import VOCAB_FILES_NAMES, ProphetNetTokenizer
from transformers.testing_utils import require_torch, slow
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -5,17 +5,17 @@ import tempfile
from unittest import TestCase
from transformers import BartTokenizer, BartTokenizerFast, DPRQuestionEncoderTokenizer, DPRQuestionEncoderTokenizerFast
from transformers.configuration_bart import BartConfig
from transformers.configuration_dpr import DPRConfig
from transformers.file_utils import is_datasets_available, is_faiss_available, is_torch_available
from transformers.models.bart.configuration_bart import BartConfig
from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.models.dpr.configuration_dpr import DPRConfig
from transformers.models.roberta.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
from transformers.testing_utils import require_datasets, require_faiss, require_tokenizers, require_torch, slow
from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
if is_torch_available() and is_datasets_available() and is_faiss_available():
from transformers.configuration_rag import RagConfig
from transformers.tokenization_rag import RagTokenizer
from transformers.models.rag.configuration_rag import RagConfig
from transformers.models.rag.tokenization_rag import RagTokenizer
@require_faiss
......
......@@ -19,8 +19,8 @@ import os
import unittest
from transformers import AddedToken, RobertaTokenizer, RobertaTokenizerFast
from transformers.models.roberta.tokenization_roberta import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tokenizers, slow
from transformers.tokenization_roberta import VOCAB_FILES_NAMES
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -17,7 +17,7 @@
import os
import unittest
from transformers.tokenization_transfo_xl import VOCAB_FILES_NAMES, TransfoXLTokenizer
from transformers.models.transfo_xl.tokenization_transfo_xl import VOCAB_FILES_NAMES, TransfoXLTokenizer
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -19,8 +19,8 @@ from typing import Callable, Optional
import numpy as np
from transformers import BatchEncoding, BertTokenizer, BertTokenizerFast, PreTrainedTokenizer, TensorType, TokenSpan
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
from transformers.testing_utils import require_tf, require_tokenizers, require_torch, slow
from transformers.tokenization_gpt2 import GPT2Tokenizer
class TokenizerUtilsTest(unittest.TestCase):
......
......@@ -18,8 +18,8 @@ import json
import os
import unittest
from transformers.models.xlm.tokenization_xlm import VOCAB_FILES_NAMES, XLMTokenizer
from transformers.testing_utils import slow
from transformers.tokenization_xlm import VOCAB_FILES_NAMES, XLMTokenizer
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -18,8 +18,8 @@ import os
import unittest
from transformers.file_utils import cached_property
from transformers.models.xlm_prophetnet.tokenization_xlm_prophetnet import SPIECE_UNDERLINE, XLMProphetNetTokenizer
from transformers.testing_utils import require_sentencepiece, slow
from transformers.tokenization_xlm_prophetnet import SPIECE_UNDERLINE, XLMProphetNetTokenizer
from .test_tokenization_common import TokenizerTesterMixin
......
......@@ -37,10 +37,11 @@ REFERENCE_CODE = """ def __init__(self, config):
class CopyCheckTester(unittest.TestCase):
def setUp(self):
self.transformer_dir = tempfile.mkdtemp()
os.makedirs(os.path.join(self.transformer_dir, "models/bert/"))
check_copies.TRANSFORMER_PATH = self.transformer_dir
shutil.copy(
os.path.join(git_repo_path, "src/transformers/modeling_bert.py"),
os.path.join(self.transformer_dir, "modeling_bert.py"),
os.path.join(git_repo_path, "src/transformers/models/bert/modeling_bert.py"),
os.path.join(self.transformer_dir, "models/bert/modeling_bert.py"),
)
def tearDown(self):
......@@ -62,27 +63,27 @@ class CopyCheckTester(unittest.TestCase):
self.assertTrue(f.read(), expected)
def test_find_code_in_transformers(self):
code = check_copies.find_code_in_transformers("modeling_bert.BertLMPredictionHead")
code = check_copies.find_code_in_transformers("models.bert.modeling_bert.BertLMPredictionHead")
self.assertEqual(code, REFERENCE_CODE)
def test_is_copy_consistent(self):
# Base copy consistency
self.check_copy_consistency(
"# Copied from transformers.modeling_bert.BertLMPredictionHead",
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead",
"BertLMPredictionHead",
REFERENCE_CODE + "\n",
)
# With no empty line at the end
self.check_copy_consistency(
"# Copied from transformers.modeling_bert.BertLMPredictionHead",
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead",
"BertLMPredictionHead",
REFERENCE_CODE,
)
# Copy consistency with rename
self.check_copy_consistency(
"# Copied from transformers.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"TestModelLMPredictionHead",
re.sub("Bert", "TestModel", REFERENCE_CODE),
)
......@@ -90,14 +91,14 @@ class CopyCheckTester(unittest.TestCase):
# Copy consistency with a really long name
long_class_name = "TestModelWithAReallyLongNameBecauseSomePeopleLikeThatForSomeReasonIReallyDontUnderstand"
self.check_copy_consistency(
f"# Copied from transformers.modeling_bert.BertLMPredictionHead with Bert->{long_class_name}",
f"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->{long_class_name}",
f"{long_class_name}LMPredictionHead",
re.sub("Bert", long_class_name, REFERENCE_CODE),
)
# Copy consistency with overwrite
self.check_copy_consistency(
"# Copied from transformers.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"TestModelLMPredictionHead",
REFERENCE_CODE,
overwrite_result=re.sub("Bert", "TestModel", REFERENCE_CODE),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment