score.py 2.41 KB
Newer Older
Louis Martin's avatar
Louis Martin committed
1
#!/usr/bin/env python3
Sergey Edunov's avatar
Sergey Edunov committed
2
3
4
5
6
7
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
Myle Ott's avatar
Myle Ott committed
8
9
10
"""
BLEU scoring of generated translations against reference translations.
"""
Sergey Edunov's avatar
Sergey Edunov committed
11
12
13
14
15

import argparse
import os
import sys

16
from fairseq import bleu
alexeib's avatar
alexeib committed
17
from fairseq.data import dictionary
Sergey Edunov's avatar
Sergey Edunov committed
18
19


Myle Ott's avatar
Myle Ott committed
20
def get_parser():
Sergey Edunov's avatar
Sergey Edunov committed
21
    parser = argparse.ArgumentParser(description='Command-line script for BLEU scoring.')
22
    # fmt: off
Sergey Edunov's avatar
Sergey Edunov committed
23
    parser.add_argument('-s', '--sys', default='-', help='system output')
Sergey Edunov's avatar
Sergey Edunov committed
24
    parser.add_argument('-r', '--ref', required=True, help='references')
Sergey Edunov's avatar
Sergey Edunov committed
25
26
27
28
    parser.add_argument('-o', '--order', default=4, metavar='N',
                        type=int, help='consider ngrams up to this order')
    parser.add_argument('--ignore-case', action='store_true',
                        help='case-insensitive scoring')
Myle Ott's avatar
Myle Ott committed
29
30
    parser.add_argument('--sacrebleu', action='store_true',
                        help='score with sacrebleu')
31
    # fmt: on
Myle Ott's avatar
Myle Ott committed
32
    return parser
Sergey Edunov's avatar
Sergey Edunov committed
33

Myle Ott's avatar
Myle Ott committed
34
35
36

def main():
    parser = get_parser()
Sergey Edunov's avatar
Sergey Edunov committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
    args = parser.parse_args()
    print(args)

    assert args.sys == '-' or os.path.exists(args.sys), \
        "System output file {} does not exist".format(args.sys)
    assert os.path.exists(args.ref), \
        "Reference file {} does not exist".format(args.ref)

    dict = dictionary.Dictionary()

    def readlines(fd):
        for line in fd.readlines():
            if args.ignore_case:
                yield line.lower()
51
            else:
ngimel's avatar
ngimel committed
52
                yield line
Sergey Edunov's avatar
Sergey Edunov committed
53

Myle Ott's avatar
Myle Ott committed
54
55
56
57
58
59
60
61
62
63
64
    if args.sacrebleu:
        import sacrebleu

        def score(fdsys):
            with open(args.ref) as fdref:
                print(sacrebleu.corpus_bleu(fdsys, [fdref]))
    else:
        def score(fdsys):
            with open(args.ref) as fdref:
                scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
                for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
65
66
                    sys_tok = dict.encode_line(sys_tok)
                    ref_tok = dict.encode_line(ref_tok)
Myle Ott's avatar
Myle Ott committed
67
68
                    scorer.add(ref_tok, sys_tok)
                print(scorer.result_string(args.order))
Sergey Edunov's avatar
Sergey Edunov committed
69
70
71
72
73
74
75
76
77
78

    if args.sys == '-':
        score(sys.stdin)
    else:
        with open(args.sys, 'r') as f:
            score(f)


if __name__ == '__main__':
    main()