Fix BLEU computation when ratio=0.0

PiperOrigin-RevId: 387502039

Fix BLEU computation when ratio=0.0
PiperOrigin-RevId: 387502039
04ef7772 · Yuexin Wu · A. Unique TensorFlower · 24f28eef · 04ef7772
Commit 04ef7772 authored Jul 28, 2021 by Yuexin Wu Committed by A. Unique TensorFlower Jul 28, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 14 deletions

official/nlp/metrics/bleu.py official/nlp/metrics/bleu.py +15 -14

No files found.
--- a/official/nlp/metrics/bleu.py
+++ b/official/nlp/metrics/bleu.py
@@ -89,8 +89,7 @@ def _get_ngrams_with_counter(segment, max_order):
  Args:
    segment: text segment from which n-grams will be extracted.
-    max_order: maximum length in tokens of the n-grams returned by this
+    max_order: maximum length in tokens of the n-grams returned by this methods.
-        methods.
  Returns:
    The Counter containing all n-grams upto max_order in segment
@@ -104,15 +103,17 @@ def _get_ngrams_with_counter(segment, max_order):
  return ngram_counts
-def compute_bleu(reference_corpus, translation_corpus, max_order=4,
+def compute_bleu(reference_corpus,
+                 translation_corpus,
+                 max_order=4,
                 use_bp=True):
  """Computes BLEU score of translated segments against one or more references.
  Args:
-    reference_corpus: list of references for each translation. Each
+    reference_corpus: list of references for each translation. Each reference
-        reference should be tokenized into a list of tokens.
+      should be tokenized into a list of tokens.
-    translation_corpus: list of translations to score. Each translation
+    translation_corpus: list of translations to score. Each translation should
-        should be tokenized into a list of tokens.
+      be tokenized into a list of tokens.
    max_order: Maximum n-gram order to use when computing BLEU score.
    use_bp: boolean, whether to apply brevity penalty.
@@ -134,15 +135,14 @@ def compute_bleu(reference_corpus, translation_corpus, max_order=4,
    ref_ngram_counts = _get_ngrams_with_counter(references, max_order)
    translation_ngram_counts = _get_ngrams_with_counter(translations, max_order)
-    overlap = dict((ngram,
+    overlap = dict((ngram, min(count, translation_ngram_counts[ngram]))
-                    min(count, translation_ngram_counts[ngram]))
                   for ngram, count in ref_ngram_counts.items())
    for ngram in overlap:
      matches_by_order[len(ngram) - 1] += overlap[ngram]
    for ngram in translation_ngram_counts:
-      possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[
+      possible_matches_by_order[len(ngram) -
-          ngram]
+                                1] += translation_ngram_counts[ngram]
  precisions = [0] * max_order
  smooth = 1.0
@@ -151,8 +151,8 @@ def compute_bleu(reference_corpus, translation_corpus, max_order=4,
    if possible_matches_by_order[i] > 0:
      precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i]
      if matches_by_order[i] > 0:
-        precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[
+        precisions[i] = float(
-            i]
+            matches_by_order[i]) / possible_matches_by_order[i]
      else:
        smooth *= 2
        precisions[i] = 1.0 / (smooth * possible_matches_by_order[i])
@@ -165,7 +165,8 @@ def compute_bleu(reference_corpus, translation_corpus, max_order=4,
  if use_bp:
    ratio = translation_length / reference_length
-    bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0
+    bp = 0. if ratio < 1e-6 else math.exp(1 -
+                                          1. / ratio) if ratio < 1.0 else 1.0
  bleu = geo_mean * bp
  return np.float32(bleu)