score.py 3.08 KB
Newer Older
Louis Martin's avatar
Louis Martin committed
1
#!/usr/bin/env python3
Sergey Edunov's avatar
Sergey Edunov committed
2
3
4
5
6
7
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
Myle Ott's avatar
Myle Ott committed
8
9
10
"""
BLEU scoring of generated translations against reference translations.
"""
Sergey Edunov's avatar
Sergey Edunov committed
11
12
13
14
15

import argparse
import os
import sys

16
from fairseq import bleu
alexeib's avatar
alexeib committed
17
from fairseq.data import dictionary
Sergey Edunov's avatar
Sergey Edunov committed
18
19


Myle Ott's avatar
Myle Ott committed
20
def get_parser():
Sergey Edunov's avatar
Sergey Edunov committed
21
    parser = argparse.ArgumentParser(description='Command-line script for BLEU scoring.')
22
    # fmt: off
Sergey Edunov's avatar
Sergey Edunov committed
23
    parser.add_argument('-s', '--sys', default='-', help='system output')
Sergey Edunov's avatar
Sergey Edunov committed
24
    parser.add_argument('-r', '--ref', required=True, help='references')
Sergey Edunov's avatar
Sergey Edunov committed
25
26
27
28
    parser.add_argument('-o', '--order', default=4, metavar='N',
                        type=int, help='consider ngrams up to this order')
    parser.add_argument('--ignore-case', action='store_true',
                        help='case-insensitive scoring')
Myle Ott's avatar
Myle Ott committed
29
30
    parser.add_argument('--sacrebleu', action='store_true',
                        help='score with sacrebleu')
31
32
    parser.add_argument('--sentence-bleu', action='store_true',
                        help='report sentence-level BLEUs (i.e., with +1 smoothing)')
33
    # fmt: on
Myle Ott's avatar
Myle Ott committed
34
    return parser
Sergey Edunov's avatar
Sergey Edunov committed
35

Myle Ott's avatar
Myle Ott committed
36
37
38

def main():
    parser = get_parser()
Sergey Edunov's avatar
Sergey Edunov committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
    args = parser.parse_args()
    print(args)

    assert args.sys == '-' or os.path.exists(args.sys), \
        "System output file {} does not exist".format(args.sys)
    assert os.path.exists(args.ref), \
        "Reference file {} does not exist".format(args.ref)

    dict = dictionary.Dictionary()

    def readlines(fd):
        for line in fd.readlines():
            if args.ignore_case:
                yield line.lower()
53
            else:
ngimel's avatar
ngimel committed
54
                yield line
Sergey Edunov's avatar
Sergey Edunov committed
55

Myle Ott's avatar
Myle Ott committed
56
57
58
59
60
61
    if args.sacrebleu:
        import sacrebleu

        def score(fdsys):
            with open(args.ref) as fdref:
                print(sacrebleu.corpus_bleu(fdsys, [fdref]))
62
63
64
65
66
67
68
69
70
71
    elif args.sentence_bleu:
        def score(fdsys):
            with open(args.ref) as fdref:
                scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
                for i, (sys_tok, ref_tok) in enumerate(zip(readlines(fdsys), readlines(fdref))):
                    scorer.reset(one_init=True)
                    sys_tok = dict.encode_line(sys_tok)
                    ref_tok = dict.encode_line(ref_tok)
                    scorer.add(ref_tok, sys_tok)
                    print(i, scorer.result_string(args.order))
Myle Ott's avatar
Myle Ott committed
72
73
74
75
76
    else:
        def score(fdsys):
            with open(args.ref) as fdref:
                scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
                for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
77
78
                    sys_tok = dict.encode_line(sys_tok)
                    ref_tok = dict.encode_line(ref_tok)
Myle Ott's avatar
Myle Ott committed
79
80
                    scorer.add(ref_tok, sys_tok)
                print(scorer.result_string(args.order))
Sergey Edunov's avatar
Sergey Edunov committed
81
82
83
84
85
86
87
88
89
90

    if args.sys == '-':
        score(sys.stdin)
    else:
        with open(args.sys, 'r') as f:
            score(f)


if __name__ == '__main__':
    main()