Unverified Commit ffcffebe authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Force the return of token type IDs (#3439)

parent 010e0460
...@@ -320,7 +320,9 @@ def convert_examples_to_features( ...@@ -320,7 +320,9 @@ def convert_examples_to_features(
else: else:
text_b = example.question + " " + ending text_b = example.question + " " + ending
inputs = tokenizer.encode_plus(text_a, text_b, add_special_tokens=True, max_length=max_length,) inputs = tokenizer.encode_plus(
text_a, text_b, add_special_tokens=True, max_length=max_length, return_token_type_ids=True
)
if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0: if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
logger.info( logger.info(
"Attention! you are cropping tokens (swag task is ok). " "Attention! you are cropping tokens (swag task is ok). "
......
...@@ -139,6 +139,7 @@ def squad_convert_example_to_features(example, max_seq_length, doc_stride, max_q ...@@ -139,6 +139,7 @@ def squad_convert_example_to_features(example, max_seq_length, doc_stride, max_q
pad_to_max_length=True, pad_to_max_length=True,
stride=max_seq_length - doc_stride - len(truncated_query) - sequence_pair_added_tokens, stride=max_seq_length - doc_stride - len(truncated_query) - sequence_pair_added_tokens,
truncation_strategy="only_second" if tokenizer.padding_side == "right" else "only_first", truncation_strategy="only_second" if tokenizer.padding_side == "right" else "only_first",
return_token_type_ids=True,
) )
paragraph_len = min( paragraph_len = min(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment