Unverified Commit 26f8b2cb authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Make Barthez tokenizer tests a bit faster (#10399)

* Make Barthez tokenizer tests a bit faster

* Quality
parent b040e6ef
......@@ -33,8 +33,9 @@ class BarthezTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def setUp(self):
super().setUp()
tokenizer = BarthezTokenizer.from_pretrained("moussaKam/mbarthez")
tokenizer = BarthezTokenizerFast.from_pretrained("moussaKam/mbarthez")
tokenizer.save_pretrained(self.tmpdirname)
tokenizer.save_pretrained(self.tmpdirname, legacy_format=False)
self.tokenizer = tokenizer
@require_torch
......
......@@ -238,7 +238,7 @@ class TokenizerTesterMixin:
tokenizer = self.get_rust_tokenizer()
for parameter_name, parameter in signature.parameters.items():
if parameter.default != inspect.Parameter.empty:
if parameter.default != inspect.Parameter.empty and parameter_name != "tokenizer_file":
self.assertIn(parameter_name, tokenizer.init_kwargs)
def test_rust_and_python_full_tokenizers(self):
......
......@@ -12,18 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import unittest
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBartTokenizer, MBartTokenizerFast, is_torch_available
from transformers.file_utils import is_sentencepiece_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
from .test_tokenization_common import TokenizerTesterMixin
if is_sentencepiece_available():
from .test_tokenization_xlm_roberta import SAMPLE_VOCAB
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
if is_torch_available():
......
......@@ -17,14 +17,12 @@ import tempfile
import unittest
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBart50Tokenizer, MBart50TokenizerFast, is_torch_available
from transformers.file_utils import is_sentencepiece_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
from .test_tokenization_common import TokenizerTesterMixin
if is_sentencepiece_available():
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
if is_torch_available():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment