score.py 2.08 KB
Newer Older
Louis Martin's avatar
Louis Martin committed
1
#!/usr/bin/env python3
Sergey Edunov's avatar
Sergey Edunov committed
2
3
4
5
6
7
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
Myle Ott's avatar
Myle Ott committed
8
9
10
"""
BLEU scoring of generated translations against reference translations.
"""
Sergey Edunov's avatar
Sergey Edunov committed
11
12
13
14
15

import argparse
import os
import sys

alexeib's avatar
alexeib committed
16
17
from fairseq import bleu, tokenizer
from fairseq.data import dictionary
Sergey Edunov's avatar
Sergey Edunov committed
18
19


Myle Ott's avatar
Myle Ott committed
20
def get_parser():
Sergey Edunov's avatar
Sergey Edunov committed
21
22
    parser = argparse.ArgumentParser(description='Command-line script for BLEU scoring.')
    parser.add_argument('-s', '--sys', default='-', help='system output')
Sergey Edunov's avatar
Sergey Edunov committed
23
    parser.add_argument('-r', '--ref', required=True, help='references')
Sergey Edunov's avatar
Sergey Edunov committed
24
25
26
27
    parser.add_argument('-o', '--order', default=4, metavar='N',
                        type=int, help='consider ngrams up to this order')
    parser.add_argument('--ignore-case', action='store_true',
                        help='case-insensitive scoring')
Myle Ott's avatar
Myle Ott committed
28
    return parser
Sergey Edunov's avatar
Sergey Edunov committed
29

Myle Ott's avatar
Myle Ott committed
30
31
32

def main():
    parser = get_parser()
Sergey Edunov's avatar
Sergey Edunov committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    args = parser.parse_args()
    print(args)

    assert args.sys == '-' or os.path.exists(args.sys), \
        "System output file {} does not exist".format(args.sys)
    assert os.path.exists(args.ref), \
        "Reference file {} does not exist".format(args.ref)

    dict = dictionary.Dictionary()

    def readlines(fd):
        for line in fd.readlines():
            if args.ignore_case:
                yield line.lower()
            yield line

    def score(fdsys):
        with open(args.ref) as fdref:
            scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
            for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
                sys_tok = tokenizer.Tokenizer.tokenize(sys_tok, dict)
                ref_tok = tokenizer.Tokenizer.tokenize(ref_tok, dict)
                scorer.add(ref_tok, sys_tok)
            print(scorer.result_string(args.order))

    if args.sys == '-':
        score(sys.stdin)
    else:
        with open(args.sys, 'r') as f:
            score(f)


if __name__ == '__main__':
    main()