`output_token_type` -> `token_type_ids`

c832f43a · LysandreJik · 3927d775 · c832f43a · c832f43a · c832f43a
Commit c832f43a authored Sep 24, 2019 by LysandreJik
3 changed files
--- a/examples/utils_glue.py
+++ b/examples/utils_glue.py
@@ -413,7 +413,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
            max_length=max_seq_length,
            truncate_first_sequence=True  # We're truncating the first sequence as a priority
        )
-        input_ids, segment_ids = inputs["input_ids"], inputs["output_token_type"]
+        input_ids, segment_ids = inputs["input_ids"], inputs["token_type_ids"]
        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.

--- a/pytorch_transformers/tests/tokenization_tests_commons.py
+++ b/pytorch_transformers/tests/tokenization_tests_commons.py
@@ -197,7 +197,7 @@ class CommonTestCases:
                seq_0 = "Test this method."
                seq_1 = "With these inputs."
                information = tokenizer.encode_plus(seq_0, seq_1, add_special_tokens=True, output_token_type=True)
-                sequences, mask = information["input_ids"], information["output_token_type"]
+                sequences, mask = information["input_ids"], information["token_type_ids"]
                assert len(sequences) == len(mask)
        def test_number_of_added_tokens(self):

--- a/pytorch_transformers/tokenization_utils.py
+++ b/pytorch_transformers/tokenization_utils.py
@@ -765,7 +765,7 @@ class PreTrainedTokenizer(object):
                information["input_ids"] = sequence_tokens
            if output_token_type:
-                information["output_token_type"] = [0] * len(information["input_ids"])
+                information["token_type_ids"] = [0] * len(information["input_ids"])
        else:
            first_sentence_tokens = get_input_ids(text)
            second_sentence_tokens = get_input_ids(text_pair)
@@ -780,7 +780,7 @@ class PreTrainedTokenizer(object):
                )
                if output_token_type:
-                    information["output_token_type"] = self.create_mask_from_sequences(text, text_pair)
+                    information["token_type_ids"] = self.create_mask_from_sequences(text, text_pair)
            else:
                logger.warning("No special tokens were added. The two sequences have been concatenated.")
                sequence = first_sentence_tokens + second_sentence_tokens
@@ -789,7 +789,7 @@ class PreTrainedTokenizer(object):
                    information["overflowing_tokens"] = sequence[max_length - stride:]
                    sequence = sequence[:max_length]
                if output_token_type:
-                    information["output_token_type"] = [0] * len(sequence)
+                    information["token_type_ids"] = [0] * len(sequence)
                information["input_ids"] = sequence