Commit 27207a27 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Merge pull request #8311 from stagedml:bleu-numoflines

PiperOrigin-RevId: 301924393
parents d697041a cdfc1ddd
...@@ -47,7 +47,9 @@ class UnicodeRegex(object): ...@@ -47,7 +47,9 @@ class UnicodeRegex(object):
self.symbol_re = re.compile("([" + self.property_chars("S") + "])") self.symbol_re = re.compile("([" + self.property_chars("S") + "])")
def property_chars(self, prefix): def property_chars(self, prefix):
return "".join(six.unichr(x) for x in range(sys.maxunicode) return "".join(
six.unichr(x)
for x in range(sys.maxunicode)
if unicodedata.category(six.unichr(x)).startswith(prefix)) if unicodedata.category(six.unichr(x)).startswith(prefix))
...@@ -92,9 +94,10 @@ def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): ...@@ -92,9 +94,10 @@ def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines() tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines()
if len(ref_lines) != len(hyp_lines): if len(ref_lines) != len(hyp_lines):
raise ValueError("Reference and translation files have different number of " raise ValueError(
"lines. If training only a few steps (100-200), the " "Reference and translation files have different number of "
"translation may be empty.") "lines (%d VS %d). If training only a few steps (100-200), the "
"translation may be empty." % (len(ref_lines), len(hyp_lines)))
if not case_sensitive: if not case_sensitive:
ref_lines = [x.lower() for x in ref_lines] ref_lines = [x.lower() for x in ref_lines]
hyp_lines = [x.lower() for x in hyp_lines] hyp_lines = [x.lower() for x in hyp_lines]
...@@ -116,18 +119,23 @@ def main(unused_argv): ...@@ -116,18 +119,23 @@ def main(unused_argv):
def define_compute_bleu_flags(): def define_compute_bleu_flags():
"""Add flags for computing BLEU score.""" """Add flags for computing BLEU score."""
flags.DEFINE_string( flags.DEFINE_string(
name="translation", default=None, name="translation",
default=None,
help=flags_core.help_wrap("File containing translated text.")) help=flags_core.help_wrap("File containing translated text."))
flags.mark_flag_as_required("translation") flags.mark_flag_as_required("translation")
flags.DEFINE_string( flags.DEFINE_string(
name="reference", default=None, name="reference",
default=None,
help=flags_core.help_wrap("File containing reference translation.")) help=flags_core.help_wrap("File containing reference translation."))
flags.mark_flag_as_required("reference") flags.mark_flag_as_required("reference")
flags.DEFINE_enum( flags.DEFINE_enum(
name="bleu_variant", short_name="bv", default="both", name="bleu_variant",
enum_values=["both", "uncased", "cased"], case_sensitive=False, short_name="bv",
default="both",
enum_values=["both", "uncased", "cased"],
case_sensitive=False,
help=flags_core.help_wrap( help=flags_core.help_wrap(
"Specify one or more BLEU variants to calculate. Variants: \"cased\"" "Specify one or more BLEU variants to calculate. Variants: \"cased\""
", \"uncased\", or \"both\".")) ", \"uncased\", or \"both\"."))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment