Commit ecf13b0e authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 317908656
parent 67996f87
...@@ -92,7 +92,11 @@ def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): ...@@ -92,7 +92,11 @@ def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
tf.io.gfile.GFile(ref_filename).read()).strip().splitlines() tf.io.gfile.GFile(ref_filename).read()).strip().splitlines()
hyp_lines = tokenizer.native_to_unicode( hyp_lines = tokenizer.native_to_unicode(
tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines() tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines()
return bleu_on_list(ref_lines, hyp_lines, case_sensitive)
def bleu_on_list(ref_lines, hyp_lines, case_sensitive=False):
"""Compute BLEU for two list of strings (reference and hypothesis)."""
if len(ref_lines) != len(hyp_lines): if len(ref_lines) != len(hyp_lines):
raise ValueError( raise ValueError(
"Reference and translation files have different number of " "Reference and translation files have different number of "
......
...@@ -59,6 +59,14 @@ class ComputeBleuTest(tf.test.TestCase): ...@@ -59,6 +59,14 @@ class ComputeBleuTest(tf.test.TestCase):
tokenized = compute_bleu.bleu_tokenize(s) tokenized = compute_bleu.bleu_tokenize(s)
self.assertEqual(["Test0", ",", "1", "two", ",", "3"], tokenized) self.assertEqual(["Test0", ",", "1", "two", ",", "3"], tokenized)
def test_bleu_list(self):
ref = ["test 1 two 3", "more tests!"]
hyp = ["test 1 two 3", "More tests!"]
uncased_score = compute_bleu.bleu_on_list(ref, hyp, False)
cased_score = compute_bleu.bleu_on_list(ref, hyp, True)
self.assertEqual(uncased_score, 100)
self.assertLess(cased_score, 100)
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment