interactive.py 2.97 KB
Newer Older
Myle Ott's avatar
Myle Ott committed
1
#!/usr/bin/env python3 -u
Louis Martin's avatar
Louis Martin committed
2
3
4
5
6
7
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
Myle Ott's avatar
Myle Ott committed
8

Louis Martin's avatar
Louis Martin committed
9
10
11
12
import sys
import torch
from torch.autograd import Variable

Myle Ott's avatar
Myle Ott committed
13
from fairseq import options, tokenizer, utils
Louis Martin's avatar
Louis Martin committed
14
15
16
from fairseq.sequence_generator import SequenceGenerator


Myle Ott's avatar
Myle Ott committed
17
def main(args):
Louis Martin's avatar
Louis Martin committed
18
    print(args)
Myle Ott's avatar
Myle Ott committed
19
20
    assert not args.sampling or args.nbest == args.beam, \
        '--sampling requires --nbest to be equal to --beam'
Louis Martin's avatar
Louis Martin committed
21
22
23
24
25

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load ensemble
    print('| loading model(s) from {}'.format(', '.join(args.path)))
Myle Ott's avatar
Myle Ott committed
26
27
    models, model_args = utils.load_ensemble_for_inference(args.path, data_dir=args.data)
    src_dict, dst_dict = models[0].src_dict, models[0].dst_dict
Louis Martin's avatar
Louis Martin committed
28

Myle Ott's avatar
Myle Ott committed
29
30
    print('| [{}] dictionary: {} types'.format(model_args.source_lang, len(src_dict)))
    print('| [{}] dictionary: {} types'.format(model_args.target_lang, len(dst_dict)))
Louis Martin's avatar
Louis Martin committed
31
32
33
34

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
Myle Ott's avatar
Myle Ott committed
35
36
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
        )
Louis Martin's avatar
Louis Martin committed
37
38
39
40
41
42
43
44
45
46
47
48
49

    # Initialize generator
    translator = SequenceGenerator(
        models, beam_size=args.beam, stop_early=(not args.no_early_stop),
        normalize_scores=(not args.unnormalized), len_penalty=args.lenpen,
        unk_penalty=args.unkpen)
    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

50
    print('| Type the input sentence and press return:')
Louis Martin's avatar
Louis Martin committed
51
52
    for src_str in sys.stdin:
        src_str = src_str.strip()
Myle Ott's avatar
Myle Ott committed
53
        src_tokens = tokenizer.Tokenizer.tokenize(src_str, src_dict, add_if_not_exist=False).long()
Louis Martin's avatar
Louis Martin committed
54
55
        if use_cuda:
            src_tokens = src_tokens.cuda()
Myle Ott's avatar
Myle Ott committed
56
57
58
59
60
        src_lengths = src_tokens.new([src_tokens.numel()])
        translations = translator.generate(
            Variable(src_tokens.view(1, -1)),
            Variable(src_lengths.view(-1)),
        )
Louis Martin's avatar
Louis Martin committed
61
62
63
64
65
66
67
68
69
70
        hypos = translations[0]
        print('O\t{}'.format(src_str))

        # Process top predictions
        for hypo in hypos[:min(len(hypos), args.nbest)]:
            hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                hypo_tokens=hypo['tokens'].int().cpu(),
                src_str=src_str,
                alignment=hypo['alignment'].int().cpu(),
                align_dict=align_dict,
Myle Ott's avatar
Myle Ott committed
71
                dst_dict=dst_dict,
Myle Ott's avatar
Myle Ott committed
72
73
                remove_bpe=args.remove_bpe,
            )
Louis Martin's avatar
Louis Martin committed
74
            print('H\t{}\t{}'.format(hypo['score'], hypo_str))
75
            print('A\t{}'.format(' '.join(map(str, alignment))))
Louis Martin's avatar
Louis Martin committed
76

Myle Ott's avatar
Myle Ott committed
77

Louis Martin's avatar
Louis Martin committed
78
if __name__ == '__main__':
Myle Ott's avatar
Myle Ott committed
79
80
81
    parser = options.get_generation_parser()
    args = parser.parse_args()
    main(args)