Expose attention_masks and input_lengths arguments to batch_encode_plus

348e19aa · Morgan Funtowicz · c2407fdd · 348e19aa
Commit 348e19aa authored Dec 09, 2019 by Morgan Funtowicz
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 5 deletions

transformers/pipelines.py transformers/pipelines.py +2 -5

No files found.
--- a/transformers/pipelines.py
+++ b/transformers/pipelines.py
@@ -149,14 +149,11 @@ class QuestionAnsweringPipeline(Pipeline):
        # Map to tuple (question, context)
        texts = [(text['question'], text['context']) for text in texts]
        inputs = self.tokenizer.batch_encode_plus(
-            texts, add_special_tokens=True, return_tensors='tf' if is_tf_available() else 'pt'
+            texts, add_special_tokens=False, return_tensors='tf' if is_tf_available() else 'pt',
+            return_attention_masks=True, return_input_lengths=False
        )
-        # Remove special_tokens_mask to avoid KeyError
-        special_tokens_mask, input_len = inputs.pop('special_tokens_mask'), inputs.pop('input_len')
        # TODO : Harmonize model arguments across all model
        inputs['attention_mask'] = inputs.pop('encoder_attention_mask')