utils.py

# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Contains a collection of util functions for training and evaluating."""

from absl import logging
import numpy as np
import tensorflow as tf
from official.vision.dataloaders import tfexample_utils


def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2):
  """Dequantize the feature from the byte format to the float format.

  Args:
    feat_vector: the input 1-d vector.
    max_quantized_value: the maximum of the quantized value.
    min_quantized_value: the minimum of the quantized value.

  Returns:
    A float vector which has the same shape as feat_vector.
  """
  assert max_quantized_value > min_quantized_value
  quantized_range = max_quantized_value - min_quantized_value
  scalar = quantized_range / 255.0
  bias = (quantized_range / 512.0) + min_quantized_value
  return feat_vector * scalar + bias


def MakeSummary(name, value):
  """Creates a tf.Summary proto with the given name and value."""
  summary = tf.Summary()
  val = summary.value.add()
  val.tag = str(name)
  val.simple_value = float(value)
  return summary


def AddGlobalStepSummary(summary_writer,
                         global_step_val,
                         global_step_info_dict,
                         summary_scope="Eval"):
  """Add the global_step summary to the Tensorboard.

  Args:
    summary_writer: Tensorflow summary_writer.
    global_step_val: a int value of the global step.
    global_step_info_dict: a dictionary of the evaluation metrics calculated for
      a mini-batch.
    summary_scope: Train or Eval.

  Returns:
    A string of this global_step summary
  """
  this_hit_at_one = global_step_info_dict["hit_at_one"]
  this_perr = global_step_info_dict["perr"]
  this_loss = global_step_info_dict["loss"]
  examples_per_second = global_step_info_dict.get("examples_per_second", -1)

  summary_writer.add_summary(
      MakeSummary("GlobalStep/" + summary_scope + "_Hit@1", this_hit_at_one),
      global_step_val)
  summary_writer.add_summary(
      MakeSummary("GlobalStep/" + summary_scope + "_Perr", this_perr),
      global_step_val)
  summary_writer.add_summary(
      MakeSummary("GlobalStep/" + summary_scope + "_Loss", this_loss),
      global_step_val)

  if examples_per_second != -1:
    summary_writer.add_summary(
        MakeSummary("GlobalStep/" + summary_scope + "_Example_Second",
                    examples_per_second), global_step_val)

  summary_writer.flush()
  info = (
      "global_step {0} | Batch Hit@1: {1:.3f} | Batch PERR: {2:.3f} | Batch "
      "Loss: {3:.3f} | Examples_per_sec: {4:.3f}").format(
          global_step_val, this_hit_at_one, this_perr, this_loss,
          examples_per_second)
  return info


def AddEpochSummary(summary_writer,
                    global_step_val,
                    epoch_info_dict,
                    summary_scope="Eval"):
  """Add the epoch summary to the Tensorboard.

  Args:
    summary_writer: Tensorflow summary_writer.
    global_step_val: a int value of the global step.
    epoch_info_dict: a dictionary of the evaluation metrics calculated for the
      whole epoch.
    summary_scope: Train or Eval.

  Returns:
    A string of this global_step summary
  """
  epoch_id = epoch_info_dict["epoch_id"]
  avg_hit_at_one = epoch_info_dict["avg_hit_at_one"]
  avg_perr = epoch_info_dict["avg_perr"]
  avg_loss = epoch_info_dict["avg_loss"]
  aps = epoch_info_dict["aps"]
  gap = epoch_info_dict["gap"]
  mean_ap = np.mean(aps)

  summary_writer.add_summary(
      MakeSummary("Epoch/" + summary_scope + "_Avg_Hit@1", avg_hit_at_one),
      global_step_val)
  summary_writer.add_summary(
      MakeSummary("Epoch/" + summary_scope + "_Avg_Perr", avg_perr),
      global_step_val)
  summary_writer.add_summary(
      MakeSummary("Epoch/" + summary_scope + "_Avg_Loss", avg_loss),
      global_step_val)
  summary_writer.add_summary(
      MakeSummary("Epoch/" + summary_scope + "_MAP", mean_ap), global_step_val)
  summary_writer.add_summary(
      MakeSummary("Epoch/" + summary_scope + "_GAP", gap), global_step_val)
  summary_writer.flush()

  info = ("epoch/eval number {0} | Avg_Hit@1: {1:.3f} | Avg_PERR: {2:.3f} "
          "| MAP: {3:.3f} | GAP: {4:.3f} | Avg_Loss: {5:3f} | num_classes: {6}"
         ).format(epoch_id, avg_hit_at_one, avg_perr, mean_ap, gap, avg_loss,
                  len(aps))
  return info


def GetListOfFeatureNamesAndSizes(feature_names, feature_sizes):
  """Extract the list of feature names and the dimensionality.

  Args:
    feature_names: string containing comma separated list of feature names
    feature_sizes: string containing comma separated list of feature sizes

  Returns:
    List of the feature names and list of the dimensionality of each feature.
    Elements in the first/second list are strings/integers.
  """
  list_of_feature_names = [
      feature_names.strip() for feature_names in feature_names.split(",")
  ]
  list_of_feature_sizes = [
      int(feature_sizes) for feature_sizes in feature_sizes.split(",")
  ]
  if len(list_of_feature_names) != len(list_of_feature_sizes):
    logging.error(
        "length of the feature names (=%r) != length of feature "
        "sizes (=%r)", str(len(list_of_feature_names)),
        str(len(list_of_feature_sizes)))

  return list_of_feature_names, list_of_feature_sizes


def ClipGradientNorms(gradients_to_variables, max_norm):
  """Clips the gradients by the given value.

  Args:
    gradients_to_variables: A list of gradient to variable pairs (tuples).
    max_norm: the maximum norm value.

  Returns:
    A list of clipped gradient to variable pairs.
  """
  clipped_grads_and_vars = []
  for grad, var in gradients_to_variables:
    if grad is not None:
      if isinstance(grad, tf.IndexedSlices):
        tmp = tf.clip_by_norm(grad.values, max_norm)
        grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
      else:
        grad = tf.clip_by_norm(grad, max_norm)
    clipped_grads_and_vars.append((grad, var))
  return clipped_grads_and_vars


def CombineGradients(tower_grads):
  """Calculate the combined gradient for each shared variable across all towers.

  Note that this function provides a synchronization point across all towers.

  Args:
    tower_grads: List of lists of (gradient, variable) tuples. The outer list is
      over individual gradients. The inner list is over the gradient calculation
      for each tower.

  Returns:
     List of pairs of (gradient, variable) where the gradient has been summed
     across all towers.
  """
  filtered_grads = [
      [x for x in grad_list if x[0] is not None] for grad_list in tower_grads
  ]
  final_grads = []
  for i in range(len(filtered_grads[0])):
    grads = [filtered_grads[t][i] for t in range(len(filtered_grads))]
    grad = tf.stack([x[0] for x in grads], 0)
    grad = tf.reduce_sum(grad, 0)
    final_grads.append((
        grad,
        filtered_grads[0][i][1],
    ))

  return final_grads


def MakeYt8mExample(num_segment: int = 5) -> tf.train.SequenceExample:
  """Generate fake data for unit tests."""
  rgb = np.random.randint(low=256, size=1024, dtype=np.uint8)
  audio = np.random.randint(low=256, size=128, dtype=np.uint8)

  seq_example = tf.train.SequenceExample()
  seq_example.context.feature["id"].bytes_list.value[:] = [b"id001"]
  seq_example.context.feature["labels"].int64_list.value[:] = [1, 2, 3, 4]
  seq_example.context.feature["segment_labels"].int64_list.value[:] = (
      [4] * num_segment)
  seq_example.context.feature["segment_start_times"].int64_list.value[:] = [
      i * 5 for i in range(num_segment)
  ]
  seq_example.context.feature["segment_scores"].float_list.value[:] = (
      [0.] * num_segment)
  tfexample_utils.put_bytes_list_to_feature(
      seq_example, rgb.tobytes(), key="rgb", repeat_num=120)
  tfexample_utils.put_bytes_list_to_feature(
      seq_example, audio.tobytes(), key="audio", repeat_num=120)

  return seq_example


# TODO(yeqing): Move the test related functions to test_utils.
def MakeExampleWithFloatFeatures(
    num_segment: int = 5) -> tf.train.SequenceExample:
  """Generate fake data for unit tests."""
  rgb = np.random.rand(1, 2048).astype(np.float32)
  audio = np.random.rand(256).astype(np.float32)

  seq_example = tf.train.SequenceExample()
  seq_example.context.feature["id"].bytes_list.value[:] = [b"id001"]
  seq_example.context.feature["labels"].int64_list.value[:] = [1, 2, 3, 4]
  seq_example.context.feature["segment_labels"].int64_list.value[:] = (
      [4] * num_segment)
  seq_example.context.feature["segment_start_times"].int64_list.value[:] = [
      i * 5 for i in range(num_segment)
  ]
  seq_example.context.feature["segment_scores"].float_list.value[:] = (
      [0.] * num_segment)
  seq_example.context.feature[
      "VIDEO_EMBEDDING/context_feature/floats"].float_list.value[:] = (
          audio.tolist())

  tfexample_utils.put_float_list_to_feature(
      seq_example, rgb.tolist(), key="FEATURE/feature/floats")

  return seq_example