Fix --prefix-size

7f538f54 · Myle Ott · 2a681d99 · 7f538f54 · 7f538f54
Commit 7f538f54 authored Apr 30, 2018 by Myle Ott
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 4 deletions

fairseq/options.py fairseq/options.py +1 -1

fairseq/sequence_generator.py fairseq/sequence_generator.py +3 -3

No files found.
--- a/fairseq/options.py
+++ b/fairseq/options.py
@@ -236,7 +236,7 @@ def add_generation_args(parser):
    group.add_argument('--score-reference', action='store_true',
                       help='just score the reference translation')
    group.add_argument('--prefix-size', default=0, type=int, metavar='PS',
-                       help=('initialize generation by target prefix of given length'))
+                       help='initialize generation by target prefix of given length')
    group.add_argument('--sampling', action='store_true',
                       help='sample hypotheses instead of using beam search')
    return group

--- a/fairseq/sequence_generator.py
+++ b/fairseq/sequence_generator.py
@@ -322,7 +322,6 @@ class SequenceGenerator(object):
                else:
                    # take the best 2 x beam_size predictions. We'll choose the first
                    # beam_size of these which don't predict eos to continue with.
-
                    torch.topk(
                        probs.view(bsz, -1),
                        k=min(cand_size, probs.view(bsz, -1).size(1) - 1),  # -1 so we never select pad
@@ -375,10 +374,9 @@ class SequenceGenerator(object):
            assert step < maxlen

            if len(finalized_sents) > 0:
-                # construct batch_idxs which holds indices of batches to keep for the next pass
-
                new_bsz = bsz - len(finalized_sents)

+                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = torch.ones(bsz).type_as(cand_indices)
                batch_mask[cand_indices.new(finalized_sents)] = 0
                batch_idxs = batch_mask.nonzero().squeeze(-1)
@@ -390,6 +388,8 @@ class SequenceGenerator(object):

                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]
+                if prefix_tokens is not None:
+                    prefix_tokens = prefix_tokens[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
                scores_buf.resize_as_(scores)