Add --input option to interactive.py to support reading from file

Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/484 Differential Revision: D13880636 Pulled By: myleott fbshipit-source-id: 984b2e1c3b281c28243102eb971ea45ec891d94e

Add --input option to interactive.py to support reading from file
Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/484 Differential Revision: D13880636 Pulled By: myleott fbshipit-source-id: 984b2e1c3b281c28243102eb971ea45ec891d94e
3dce7c9f · Myle Ott · Facebook Github Bot · 42be3ebd · 3dce7c9f · 3dce7c9f
Commit 3dce7c9f authored Jan 30, 2019 by Myle Ott Committed by Facebook Github Bot Jan 30, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 5 deletions

fairseq/options.py fairseq/options.py +2 -0

interactive.py interactive.py +6 -5

tests/test_binaries.py tests/test_binaries.py +1 -0

No files found.
--- a/fairseq/options.py
+++ b/fairseq/options.py
@@ -379,6 +379,8 @@ def add_interactive_args(parser):
    # fmt: off
    group.add_argument('--buffer-size', default=0, type=int, metavar='N',
                       help='read this many sentences into a buffer before processing them')
+    group.add_argument('--input', default='-', type=str, metavar='FILE',
+                       help='file to read from; use - for stdin')
    # fmt: on



--- a/interactive.py
+++ b/interactive.py
@@ -10,9 +10,10 @@ Translate raw text with a trained model. Batches data on-the-fly.
 """

 from collections import namedtuple
-import numpy as np
+import fileinput
 import sys

+import numpy as np
 import torch

 from fairseq import data, options, tasks, tokenizer, utils
@@ -23,9 +24,9 @@ Batch = namedtuple('Batch', 'srcs tokens lengths')
 Translation = namedtuple('Translation', 'src_str hypos pos_scores alignments')


-def buffered_read(buffer_size):
+def buffered_read(input, buffer_size):
    buffer = []
-    for src_str in sys.stdin:
+    for src_str in fileinput.input(files=[input], openhook=fileinput.hook_encoded("utf-8")):
        buffer.append(src_str.strip())
        if len(buffer) >= buffer_size:
            yield buffer
@@ -165,12 +166,12 @@ def main(args):
    if args.buffer_size > 1:
        print('| Sentence buffer size:', args.buffer_size)
    print('| Type the input sentence and press return:')
-    for inputs in buffered_read(args.buffer_size):
+    for inputs in buffered_read(args.input, args.buffer_size):
        indices = []
        results = []
        for batch, batch_indices in make_batches(inputs, args, task, max_positions):
            indices.extend(batch_indices)
-            results += process_batch(batch)
+            results.extend(process_batch(batch))

        for i in np.argsort(indices):
            result = results[i]

--- a/tests/test_binaries.py
+++ b/tests/test_binaries.py
@@ -282,6 +282,7 @@ def generate_main(data_dir, extra_flags=None):

    # evaluate model interactively
    generate_args.buffer_size = 0
+    generate_args.input = '-'
    generate_args.max_sentences = None
    orig_stdin = sys.stdin
    sys.stdin = StringIO('h e l l o\n')