fix python2 tests

6dacc79d · thomwolf · 36bca545 · 6dacc79d · 6dacc79d
Commit 6dacc79d authored Jul 05, 2019 by thomwolf
Showing with 7 additions and 5 deletions

pytorch_transformers/tests/tokenization_tests_commons.py pytorch_transformers/tests/tokenization_tests_commons.py +2 -4

pytorch_transformers/tokenization_utils.py pytorch_transformers/tokenization_utils.py +5 -1

No files found.
--- a/pytorch_transformers/tests/tokenization_tests_commons.py
+++ b/pytorch_transformers/tests/tokenization_tests_commons.py
@@ -12,9 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function, unicode_literals

 import os
 import sys
@@ -47,7 +45,7 @@ def create_and_check_save_and_load_tokenizer(tester, tokenizer_class, *inputs, *
 def create_and_check_pickle_tokenizer(tester, tokenizer_class, *inputs, **kwargs):
    tokenizer = tokenizer_class(*inputs, **kwargs)

-    text = "Munich and Berlin are nice cities"
+    text = u"Munich and Berlin are nice cities"
    filename = u"/tmp/tokenizer.bin"

    subwords = tokenizer.tokenize(text)

--- a/pytorch_transformers/tokenization_utils.py
+++ b/pytorch_transformers/tokenization_utils.py
@@ -101,8 +101,12 @@ class PreTrainedTokenizer(object):
            max_len = cls.max_model_input_sizes[pretrained_model_name_or_path]
            kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)

+        # Merge resolved_vocab_files arguments in kwargs.
+        for args_name, file_path in resolved_vocab_files.items():
+            kwargs[args_name] = file_path
+
        # Instantiate tokenizer.
-        tokenizer = cls(*inputs, **resolved_vocab_files, **kwargs)
+        tokenizer = cls(*inputs, **kwargs)

        return tokenizer