Some refactoring + Google Landmarks dataset scripts (#7014)

* Merged commit includes the following changes: 253126424 by Andre Araujo: Scripts to compute metrics for Google Landmarks dataset. Also, a small fix to metric in retrieval case: avoids duplicate predicted images. -- 253118971 by Andre Araujo: Metrics for Google Landmarks dataset. -- 253106953 by Andre Araujo: Library to read files from Google Landmarks challenges. -- 250700636 by Andre Araujo: Handle case of aggregation extraction with empty set of input features. -- 250516819 by Andre Araujo: Add minimum size for DELF extractor. -- 250435822 by Andre Araujo: Add max_image_size/min_image_size for open-source DELF proto / module. -- 250414606 by Andre Araujo: Refactor extract_aggregation to allow reuse with different datasets. -- 250356863 by Andre Araujo: Remove unnecessary cmd_args variable from boxes_and_features_extraction. -- 249783379 by Andre Araujo: Create directory for writing mapping file if it does not exist. -- 249581591 by Andre Araujo: Refactor scripts to extract boxes and features from images in Revisited datasets. Also, change tf.logging.info --> print for easier logging in open source code. -- 249511821 by Andre Araujo: Small change to function for file/directory handling. -- 249289499 by Andre Araujo: Internal change. -- PiperOrigin-RevId: 253126424 * Updating DELF init to adjust to latest changes * Editing init files for python packages * Edit D2R dataset reader to work with py3. PiperOrigin-RevId: 253135576 * DELF package: fix import ordering

Some refactoring + Google Landmarks dataset scripts (#7014)
* Merged commit includes the following changes: 253126424 by Andre Araujo: Scripts to compute metrics for Google Landmarks dataset. Also, a small fix to metric in retrieval case: avoids duplicate predicted images. -- 253118971 by Andre Araujo: Metrics for Google Landmarks dataset. -- 253106953 by Andre Araujo: Library to read files from Google Landmarks challenges. -- 250700636 by Andre Araujo: Handle case of aggregation extraction with empty set of input features. -- 250516819 by Andre Araujo: Add minimum size for DELF extractor. -- 250435822 by Andre Araujo: Add max_image_size/min_image_size for open-source DELF proto / module. -- 250414606 by Andre Araujo: Refactor extract_aggregation to allow reuse with different datasets. -- 250356863 by Andre Araujo: Remove unnecessary cmd_args variable from boxes_and_features_extraction. -- 249783379 by Andre Araujo: Create directory for writing mapping file if it does not exist. -- 249581591 by Andre Araujo: Refactor scripts to extract boxes and features from images in Revisited datasets. Also, change tf.logging.info --> print for easier logging in open source code. -- 249511821 by Andre Araujo: Small change to function for file/directory handling. -- 249289499 by Andre Araujo: Internal change. -- PiperOrigin-RevId: 253126424 * Updating DELF init to adjust to latest changes * Editing init files for python packages * Edit D2R dataset reader to work with py3. PiperOrigin-RevId: 253135576 * DELF package: fix import ordering
7cd29f8c · André Araujo · aquariusjay · d8a09064 · 7cd29f8c · 7cd29f8c
Commit 7cd29f8c authored Jun 13, 2019 by André Araujo Committed by aquariusjay Jun 13, 2019
2 changed files
--- a/research/delf/delf/python/google_landmarks_dataset/metrics.py
+++ b/research/delf/delf/python/google_landmarks_dataset/metrics.py
+# Copyright 2019 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python module to compute metrics for Google Landmarks dataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+def _CountPositives(solution):
+  """Counts number of test images with non-empty ground-truth in `solution`.
+  Args:
+    solution: Dict mapping test image ID to list of ground-truth IDs.
+  Returns:
+    count: Number of test images with non-empty ground-truth.
+  """
+  count = 0
+  for v in solution.values():
+    if v:
+      count += 1
+  return count
+def GlobalAveragePrecision(predictions,
+                           recognition_solution,
+                           ignore_non_gt_test_images=False):
+  """Computes global average precision for recognition prediction.
+  Args:
+    predictions: Dict mapping test image ID to a dict with keys 'class'
+      (integer) and 'score' (float).
+    recognition_solution: Dict mapping test image ID to list of ground-truth
+      landmark IDs.
+    ignore_non_gt_test_images: If True, ignore test images which do not have
+      associated ground-truth landmark IDs. For the Google Landmark Recognition
+      challenge, this should be set to False.
+  Returns:
+    gap: Global average precision score (float).
+  """
+  # Compute number of expected results.
+  num_positives = _CountPositives(recognition_solution)
+  gap = 0.0
+  total_predictions = 0
+  correct_predictions = 0
+  # Sort predictions according to Kaggle's convention:
+  # - first by score (descending);
+  # - then by key (ascending);
+  # - then by class (ascending).
+  sorted_predictions_by_key_class = sorted(
+      predictions.items(), key=lambda item: (item[0], item[1]['class']))
+  sorted_predictions = sorted(
+      sorted_predictions_by_key_class,
+      key=lambda item: item[1]['score'],
+      reverse=True)
+  # Loop over sorted predictions (descending order) and compute GAPs.
+  for key, prediction in sorted_predictions:
+    if ignore_non_gt_test_images and not recognition_solution[key]:
+      continue
+    total_predictions += 1
+    if prediction['class'] in recognition_solution[key]:
+      correct_predictions += 1
+      gap += correct_predictions / total_predictions
+  gap /= num_positives
+  return gap
+def Top1Accuracy(predictions, recognition_solution):
+  """Computes top-1 accuracy for recognition prediction.
+  Note that test images without ground-truth are ignored.
+  Args:
+    predictions: Dict mapping test image ID to a dict with keys 'class'
+      (integer) and 'score' (float).
+    recognition_solution: Dict mapping test image ID to list of ground-truth
+      landmark IDs.
+  Returns:
+    accuracy: Top-1 accuracy (float).
+  """
+  # Loop over test images in solution. If it has at least one class label, we
+  # check if the predicion is correct.
+  num_correct_predictions = 0
+  num_test_images_with_ground_truth = 0
+  for key, ground_truth in recognition_solution.items():
+    if ground_truth:
+      num_test_images_with_ground_truth += 1
+      if key in predictions:
+        if predictions[key]['class'] in ground_truth:
+          num_correct_predictions += 1
+  return num_correct_predictions / num_test_images_with_ground_truth
+def MeanAveragePrecision(predictions, retrieval_solution, max_predictions=100):
+  """Computes mean average precision for retrieval prediction.
+  Args:
+    predictions: Dict mapping test image ID to a list of strings corresponding
+      to index image IDs.
+    retrieval_solution: Dict mapping test image ID to list of ground-truth image
+      IDs.
+    max_predictions: Maximum number of predictions per query to take into
+      account. For the Google Landmark Retrieval challenge, this should be set
+      to 100.
+  Returns:
+    mean_ap: Mean average precision score (float).
+  Raises:
+    ValueError: If a test image in `predictions` is not included in
+      `retrieval_solutions`.
+  """
+  # Compute number of test images.
+  num_test_images = len(retrieval_solution.keys())
+  # Loop over predictions for each query and compute mAP.
+  mean_ap = 0.0
+  for key, prediction in predictions.items():
+    if key not in retrieval_solution:
+      raise ValueError('Test image %s is not part of retrieval_solution' % key)
+    # Loop over predicted images, keeping track of those which were already
+    # used (duplicates are skipped).
+    ap = 0.0
+    already_predicted = set()
+    num_expected_retrieved = min(len(retrieval_solution[key]), max_predictions)
+    num_correct = 0
+    for i in range(min(len(prediction), max_predictions)):
+      if prediction[i] not in already_predicted:
+        if prediction[i] in retrieval_solution[key]:
+          num_correct += 1
+          ap += num_correct / (i + 1)
+        already_predicted.add(prediction[i])
+    ap /= num_expected_retrieved
+    mean_ap += ap
+  mean_ap /= num_test_images
+  return mean_ap
+def MeanPrecisions(predictions, retrieval_solution, max_predictions=100):
+  """Computes mean precisions for retrieval prediction.
+  Args:
+    predictions: Dict mapping test image ID to a list of strings corresponding
+      to index image IDs.
+    retrieval_solution: Dict mapping test image ID to list of ground-truth image
+      IDs.
+    max_predictions: Maximum number of predictions per query to take into
+      account.
+  Returns:
+    mean_precisions: NumPy array with mean precisions at ranks 1 through
+      `max_predictions`.
+  Raises:
+    ValueError: If a test image in `predictions` is not included in
+      `retrieval_solutions`.
+  """
+  # Compute number of test images.
+  num_test_images = len(retrieval_solution.keys())
+  # Loop over predictions for each query and compute precisions@k.
+  precisions = np.zeros((num_test_images, max_predictions))
+  count_test_images = 0
+  for key, prediction in predictions.items():
+    if key not in retrieval_solution:
+      raise ValueError('Test image %s is not part of retrieval_solution' % key)
+    # Loop over predicted images, keeping track of those which were already
+    # used (duplicates are skipped).
+    already_predicted = set()
+    num_correct = 0
+    for i in range(max_predictions):
+      if i < len(prediction):
+        if prediction[i] not in already_predicted:
+          if prediction[i] in retrieval_solution[key]:
+            num_correct += 1
+          already_predicted.add(prediction[i])
+      precisions[count_test_images, i] = num_correct / (i + 1)
+    count_test_images += 1
+  mean_precisions = np.mean(precisions, axis=0)
+  return mean_precisions
+def MeanMedianPosition(predictions, retrieval_solution, max_predictions=100):
+  """Computes mean and median positions of first correct image.
+  Args:
+    predictions: Dict mapping test image ID to a list of strings corresponding
+      to index image IDs.
+    retrieval_solution: Dict mapping test image ID to list of ground-truth image
+      IDs.
+    max_predictions: Maximum number of predictions per query to take into
+      account.
+  Returns:
+    mean_position: Float.
+    median_position: Float.
+  Raises:
+    ValueError: If a test image in `predictions` is not included in
+      `retrieval_solutions`.
+  """
+  # Compute number of test images.
+  num_test_images = len(retrieval_solution.keys())
+  # Loop over predictions for each query to find first correct ranked image.
+  positions = (max_predictions + 1) * np.ones((num_test_images))
+  count_test_images = 0
+  for key, prediction in predictions.items():
+    if key not in retrieval_solution:
+      raise ValueError('Test image %s is not part of retrieval_solution' % key)
+    for i in range(min(len(prediction), max_predictions)):
+      if prediction[i] in retrieval_solution[key]:
+        positions[count_test_images] = i + 1
+        break
+    count_test_images += 1
+  mean_position = np.mean(positions)
+  median_position = np.median(positions)
+  return mean_position, median_position
--- a/research/delf/delf/python/google_landmarks_dataset/metrics_test.py
+++ b/research/delf/delf/python/google_landmarks_dataset/metrics_test.py
+# Copyright 2019 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Google Landmarks dataset metric computation."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+from delf.python.google_landmarks_dataset import metrics
+def _CreateRecognitionSolution():
+  """Creates recognition solution to be used in tests.
+  Returns:
+    solution: Dict mapping test image ID to list of ground-truth landmark IDs.
+  """
+  return {
+      '0123456789abcdef': [0, 12],
+      '0223456789abcdef': [100, 200, 300],
+      '0323456789abcdef': [1],
+      '0423456789abcdef': [],
+      '0523456789abcdef': [],
+  }
+def _CreateRecognitionPredictions():
+  """Creates recognition predictions to be used in tests.
+  Returns:
+    predictions: Dict mapping test image ID to a dict with keys 'class'
+      (integer) and 'score' (float).
+  """
+  return {
+      '0223456789abcdef': {
+          'class': 0,
+          'score': 0.01
+      },
+      '0323456789abcdef': {
+          'class': 1,
+          'score': 10.0
+      },
+      '0423456789abcdef': {
+          'class': 150,
+          'score': 15.0
+      },
+  }
+def _CreateRetrievalSolution():
+  """Creates retrieval solution to be used in tests.
+  Returns:
+    solution: Dict mapping test image ID to list of ground-truth image IDs.
+  """
+  return {
+      '0123456789abcdef': ['fedcba9876543210', 'fedcba9876543220'],
+      '0223456789abcdef': ['fedcba9876543210'],
+      '0323456789abcdef': [
+          'fedcba9876543230', 'fedcba9876543240', 'fedcba9876543250'
+      ],
+      '0423456789abcdef': ['fedcba9876543230'],
+  }
+def _CreateRetrievalPredictions():
+  """Creates retrieval predictions to be used in tests.
+  Returns:
+    predictions: Dict mapping test image ID to a list with predicted index image
+    ids.
+  """
+  return {
+      '0223456789abcdef': ['fedcba9876543200', 'fedcba9876543210'],
+      '0323456789abcdef': ['fedcba9876543240'],
+      '0423456789abcdef': ['fedcba9876543230', 'fedcba9876543240'],
+  }
+class MetricsTest(tf.test.TestCase):
+  def testGlobalAveragePrecisionWorks(self):
+    # Define input.
+    predictions = _CreateRecognitionPredictions()
+    solution = _CreateRecognitionSolution()
+    # Run tested function.
+    gap = metrics.GlobalAveragePrecision(predictions, solution)
+    # Define expected results.
+    expected_gap = 0.166667
+    # Compare actual and expected results.
+    self.assertAllClose(gap, expected_gap)
+  def testGlobalAveragePrecisionIgnoreNonGroundTruthWorks(self):
+    # Define input.
+    predictions = _CreateRecognitionPredictions()
+    solution = _CreateRecognitionSolution()
+    # Run tested function.
+    gap = metrics.GlobalAveragePrecision(
+        predictions, solution, ignore_non_gt_test_images=True)
+    # Define expected results.
+    expected_gap = 0.333333
+    # Compare actual and expected results.
+    self.assertAllClose(gap, expected_gap)
+  def testTop1AccuracyWorks(self):
+    # Define input.
+    predictions = _CreateRecognitionPredictions()
+    solution = _CreateRecognitionSolution()
+    # Run tested function.
+    accuracy = metrics.Top1Accuracy(predictions, solution)
+    # Define expected results.
+    expected_accuracy = 0.333333
+    # Compare actual and expected results.
+    self.assertAllClose(accuracy, expected_accuracy)
+  def testMeanAveragePrecisionWorks(self):
+    # Define input.
+    predictions = _CreateRetrievalPredictions()
+    solution = _CreateRetrievalSolution()
+    # Run tested function.
+    mean_ap = metrics.MeanAveragePrecision(predictions, solution)
+    # Define expected results.
+    expected_mean_ap = 0.458333
+    # Compare actual and expected results.
+    self.assertAllClose(mean_ap, expected_mean_ap)
+  def testMeanAveragePrecisionMaxPredictionsWorks(self):
+    # Define input.
+    predictions = _CreateRetrievalPredictions()
+    solution = _CreateRetrievalSolution()
+    # Run tested function.
+    mean_ap = metrics.MeanAveragePrecision(
+        predictions, solution, max_predictions=1)
+    # Define expected results.
+    expected_mean_ap = 0.5
+    # Compare actual and expected results.
+    self.assertAllClose(mean_ap, expected_mean_ap)
+  def testMeanPrecisionsWorks(self):
+    # Define input.
+    predictions = _CreateRetrievalPredictions()
+    solution = _CreateRetrievalSolution()
+    # Run tested function.
+    mean_precisions = metrics.MeanPrecisions(
+        predictions, solution, max_predictions=2)
+    # Define expected results.
+    expected_mean_precisions = [0.5, 0.375]
+    # Compare actual and expected results.
+    self.assertAllClose(mean_precisions, expected_mean_precisions)
+  def testMeanMedianPositionWorks(self):
+    # Define input.
+    predictions = _CreateRetrievalPredictions()
+    solution = _CreateRetrievalSolution()
+    # Run tested function.
+    mean_position, median_position = metrics.MeanMedianPosition(
+        predictions, solution)
+    # Define expected results.
+    expected_mean_position = 26.25
+    expected_median_position = 1.5
+    # Compare actual and expected results.
+    self.assertAllClose(mean_position, expected_mean_position)
+    self.assertAllClose(median_position, expected_median_position)
+  def testMeanMedianPositionMaxPredictionsWorks(self):
+    # Define input.
+    predictions = _CreateRetrievalPredictions()
+    solution = _CreateRetrievalSolution()
+    # Run tested function.
+    mean_position, median_position = metrics.MeanMedianPosition(
+        predictions, solution, max_predictions=1)
+    # Define expected results.
+    expected_mean_position = 1.5
+    expected_median_position = 1.5
+    # Compare actual and expected results.
+    self.assertAllClose(mean_position, expected_mean_position)
+    self.assertAllClose(median_position, expected_median_position)
+if __name__ == '__main__':
+  tf.test.main()