Force the return of token type IDs (#3439)

ffcffebe · Lysandre Debut · GitHub · 010e0460 · ffcffebe · ffcffebe
Unverified Commit ffcffebe authored Mar 26, 2020 by Lysandre Debut Committed by GitHub Mar 26, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

examples/utils_multiple_choice.py examples/utils_multiple_choice.py +3 -1

src/transformers/data/processors/squad.py src/transformers/data/processors/squad.py +1 -0

No files found.
--- a/examples/utils_multiple_choice.py
+++ b/examples/utils_multiple_choice.py
@@ -320,7 +320,9 @@ def convert_examples_to_features(
            else:
                text_b = example.question + " " + ending
-            inputs = tokenizer.encode_plus(text_a, text_b, add_special_tokens=True, max_length=max_length,)
+            inputs = tokenizer.encode_plus(
+                text_a, text_b, add_special_tokens=True, max_length=max_length, return_token_type_ids=True
+            )
            if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
                logger.info(
                    "Attention! you are cropping tokens (swag task is ok). "

--- a/src/transformers/data/processors/squad.py
+++ b/src/transformers/data/processors/squad.py
@@ -139,6 +139,7 @@ def squad_convert_example_to_features(example, max_seq_length, doc_stride, max_q
            pad_to_max_length=True,
            stride=max_seq_length - doc_stride - len(truncated_query) - sequence_pair_added_tokens,
            truncation_strategy="only_second" if tokenizer.padding_side == "right" else "only_first",
+            return_token_type_ids=True,
        )
        paragraph_len = min(