Merge pull request #1 from PurdueCAM2Project/linting

Linting

Merge pull request #1 from PurdueCAM2Project/linting
Linting
93f22151 · Vishnu Banna · GitHub · 9a1b8990 · 5dfe788d · 93f22151
Unverified Commit 93f22151 authored Jun 03, 2021 by Vishnu Banna Committed by GitHub Jun 03, 2021
14 changed files
--- a/official/README-TPU.md
+++ b/official/README-TPU.md
@@ -26,4 +26,7 @@
 *   [shapemask](vision/detection): An object detection and instance segmentation model using shape priors. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/ZbXgVoc6Rf6mBRlPj0JpLA).

 ## Recommendation
+*   [dlrm](recommendation/ranking): [Deep Learning Recommendation Model for
+Personalization and Recommendation Systems](https://arxiv.org/abs/1906.00091).
+*   [dcn v2](recommendation/ranking): [Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535).
 *   [ncf](recommendation): Neural Collaborative Filtering. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/0k3gKjZlR1ewkVTRyLB6IQ).
--- a/official/modeling/optimization/configs/optimizer_config.py
+++ b/official/modeling/optimization/configs/optimizer_config.py
@@ -180,11 +180,15 @@ class EMAConfig(BaseOptimizerConfig):

  Attributes:
    name: 'str', name of the optimizer.
+    trainable_weights_only: 'bool', if True, only model trainable weights will
+      be updated. Otherwise, all model weights will be updated. This mainly
+      affects batch normalization parameters.
    average_decay: 'float', average decay value.
    start_step: 'int', start step to apply moving average.
    dynamic_decay: 'bool', whether to apply dynamic decay or not.
  """
  name: str = "ExponentialMovingAverage"
+  trainable_weights_only: bool = True
  average_decay: float = 0.99
  start_step: int = 0
  dynamic_decay: bool = True

--- a/official/modeling/optimization/ema_optimizer.py
+++ b/official/modeling/optimization/ema_optimizer.py
@@ -48,6 +48,7 @@ class ExponentialMovingAverage(tf.keras.optimizers.Optimizer):

  def __init__(self,
               optimizer: tf.keras.optimizers.Optimizer,
+               trainable_weights_only: bool = True,
               average_decay: float = 0.99,
               start_step: int = 0,
               dynamic_decay: bool = True,
@@ -58,6 +59,9 @@ class ExponentialMovingAverage(tf.keras.optimizers.Optimizer):
    Args:
      optimizer: `tf.keras.optimizers.Optimizer` that will be
        used to compute and apply gradients.
+      trainable_weights_only: 'bool', if True, only model trainable weights will
+        be updated. Otherwise, all model weights will be updated. This mainly
+        affects batch normalization parameters.
      average_decay: float. Decay to use to maintain the moving averages
        of trained variables.
      start_step: int. What step to start the moving average.
@@ -72,6 +76,7 @@ class ExponentialMovingAverage(tf.keras.optimizers.Optimizer):
    """
    super().__init__(name, **kwargs)
    self._average_decay = average_decay
+    self._trainable_weights_only = trainable_weights_only
    self._start_step = tf.constant(start_step, tf.float32)
    self._dynamic_decay = dynamic_decay
    self._optimizer = optimizer
@@ -81,12 +86,17 @@ class ExponentialMovingAverage(tf.keras.optimizers.Optimizer):

  def shadow_copy(self, model: tf.keras.Model):
    """Creates shadow variables for the given model weights."""
-    for var in model.weights:
+
+    if self._trainable_weights_only:
+      self._model_weights = model.trainable_variables
+    else:
+      self._model_weights = model.variables
+    for var in self._model_weights:
      self.add_slot(var, 'average', initializer='zeros')
+
    self._average_weights = [
-        self.get_slot(var, 'average') for var in model.weights
+        self.get_slot(var, 'average') for var in self._model_weights
    ]
-    self._model_weights = model.weights

  @property
  def has_shadow_copy(self):

--- a/official/nlp/data/classifier_data_lib.py
+++ b/official/nlp/data/classifier_data_lib.py
@@ -1316,8 +1316,8 @@ class AXgProcessor(DataProcessor):
    return examples


-class SuperGLUERTEProcessor(DataProcessor):
-  """Processor for the RTE dataset (SuperGLUE version)."""
+class SuperGLUEDataProcessor(DataProcessor):
+  """Processor for the SuperGLUE dataset."""

  def get_train_examples(self, data_dir):
    """See base class."""
@@ -1334,6 +1334,70 @@ class SuperGLUERTEProcessor(DataProcessor):
    return self._create_examples(
        self._read_jsonl(os.path.join(data_dir, "test.jsonl")), "test")

+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training/dev/test sets."""
+    raise NotImplementedError()
+
+
+class BoolQProcessor(SuperGLUEDataProcessor):
+  """Processor for the BoolQ dataset (SuperGLUE diagnostics dataset)."""
+
+  def get_labels(self):
+    """See base class."""
+    return ["True", "False"]
+
+  @staticmethod
+  def get_processor_name():
+    """See base class."""
+    return "BoolQ"
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training/dev/test sets."""
+    examples = []
+    for line in lines:
+      guid = "%s-%s" % (set_type, self.process_text_fn(str(line["idx"])))
+      text_a = self.process_text_fn(line["question"])
+      text_b = self.process_text_fn(line["passage"])
+      if set_type == "test":
+        label = "False"
+      else:
+        label = str(line["label"])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+
+class CBProcessor(SuperGLUEDataProcessor):
+  """Processor for the CB dataset (SuperGLUE diagnostics dataset)."""
+
+  def get_labels(self):
+    """See base class."""
+    return ["entailment", "neutral", "contradiction"]
+
+  @staticmethod
+  def get_processor_name():
+    """See base class."""
+    return "CB"
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training/dev/test sets."""
+    examples = []
+    for line in lines:
+      guid = "%s-%s" % (set_type, self.process_text_fn(str(line["idx"])))
+      text_a = self.process_text_fn(line["premise"])
+      text_b = self.process_text_fn(line["hypothesis"])
+      if set_type == "test":
+        label = "entailment"
+      else:
+        label = self.process_text_fn(line["label"])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+
+class SuperGLUERTEProcessor(SuperGLUEDataProcessor):
+  """Processor for the RTE dataset (SuperGLUE version)."""
+
  def get_labels(self):
    """See base class."""
    # All datasets are converted to 2-class split, where for 3-class datasets we

--- a/official/nlp/data/create_finetuning_data.py
+++ b/official/nlp/data/create_finetuning_data.py
@@ -50,7 +50,7 @@ flags.DEFINE_enum(
    "classification_task_name", "MNLI", [
        "AX", "COLA", "IMDB", "MNLI", "MRPC", "PAWS-X", "QNLI", "QQP", "RTE",
        "SST-2", "STS-B", "WNLI", "XNLI", "XTREME-XNLI", "XTREME-PAWS-X",
-        "AX-g", "SUPERGLUE-RTE"
+        "AX-g", "SUPERGLUE-RTE", "CB", "BoolQ"
    ], "The name of the task to train BERT classifier. The "
    "difference between XTREME-XNLI and XNLI is: 1. the format "
    "of input tsv files; 2. the dev set for XTREME is english "
@@ -243,7 +243,11 @@ def generate_classifier_dataset():
        "ax-g":
            classifier_data_lib.AXgProcessor,
        "superglue-rte":
-            classifier_data_lib.SuperGLUERTEProcessor
+            classifier_data_lib.SuperGLUERTEProcessor,
+        "cb":
+            classifier_data_lib.CBProcessor,
+        "boolq":
+            classifier_data_lib.BoolQProcessor,
    }
    task_name = FLAGS.classification_task_name.lower()
    if task_name not in processors:

--- a/official/nlp/finetuning/superglue/run_superglue.py
+++ b/official/nlp/finetuning/superglue/run_superglue.py
@@ -27,6 +27,7 @@ import tensorflow as tf

 from official.common import distribute_utils
 # Imports registered experiment configs.
+from official.common import registry_imports  # pylint: disable=unused-import
 from official.core import exp_factory
 from official.core import task_factory
 from official.core import train_lib
@@ -64,6 +65,8 @@ EVAL_METRIC_MAP = {

 AXG_CLASS_NAMES = ['entailment', 'not_entailment']
 RTE_CLASS_NAMES = ['entailment', 'not_entailment']
+CB_CLASS_NAMES = ['entailment', 'neutral', 'contradiction']
+BOOLQ_CLASS_NAMES = ['True', 'False']


 def _override_exp_config_by_file(exp_config, exp_config_files):
@@ -153,7 +156,9 @@ def _write_submission_file(task, seq_length):
  write_fn = binary_helper.write_superglue_classification
  write_fn_map = {
      'RTE': functools.partial(write_fn, class_names=RTE_CLASS_NAMES),
-      'AX-g': functools.partial(write_fn, class_names=AXG_CLASS_NAMES)
+      'AX-g': functools.partial(write_fn, class_names=AXG_CLASS_NAMES),
+      'CB': functools.partial(write_fn, class_names=CB_CLASS_NAMES),
+      'BoolQ': functools.partial(write_fn, class_names=BOOLQ_CLASS_NAMES)
  }
  logging.info('Predicting %s', FLAGS.test_input_path)
  write_fn_map[FLAGS.task_name](

--- a/official/recommendation/ranking/README.md
+++ b/official/recommendation/ranking/README.md
@@ -16,8 +16,8 @@ When training on TPUs we use
 [TPUEmbedding layer](https://github.com/tensorflow/recommenders/blob/main/tensorflow_recommenders/layers/embedding/tpu_embedding_layer.py)
 for categorical features. TPU embedding supports large embedding tables with
 fast lookup, the size of embedding tables scales linearly with the size of TPU
-pod. We can have up to 96 GB embedding tables for TPU v3-8 and 6.14 TB for
-v3-512 and 24.6 TB for TPU Pod v3-2048.
+pod. We can have up to 90 GB embedding tables for TPU v3-8 and 5.6 TB for
+v3-512 and 22,4 TB for TPU Pod v3-2048.

 The Model code is in
 [TensorFlow Recommenders](https://github.com/tensorflow/recommenders/tree/main/tensorflow_recommenders/experimental/models)
@@ -25,16 +25,30 @@ library, while input pipeline, configuration and training loop is here.

 ## Prerequisites
 To get started, download the code from TensorFlow models GitHub repository or
-use the pre-installed Google Cloud VM. We also need to install [TensorFlow
-Recommenders](https://www.tensorflow.org/recommenders) library.
+use the pre-installed Google Cloud VM.

 ```bash
 git clone https://github.com/tensorflow/models.git
-pip install -r models/official/requirements.txt
 export PYTHONPATH=$PYTHONPATH:$(pwd)/models
 ```

-Make sure to use TensorFlow 2.4+.
+We also need to install
+[TensorFlow Recommenders](https://www.tensorflow.org/recommenders) library.
+If you are using [tf-nightly](https://pypi.org/project/tf-nightly/) make
+sure to install
+[tensorflow-recommenders](https://pypi.org/project/tensorflow-recommenders/)
+without its dependancies by passing `--no-deps` argument.
+
+For tf-nightly:
+```bash
+pip install tensorflow-recommenders --no-deps
+```
+
+For stable TensorFlow 2.4+ [releases](https://pypi.org/project/tensorflow/):
+```bash
+pip install tensorflow-recommenders
+```
+

 ## Dataset

@@ -98,10 +112,10 @@ export EXPERIMENT_NAME=my_experiment_name
 export BUCKET_NAME="gs://my_dlrm_bucket"
 export DATA_DIR="${BUCKET_NAME}/data"

-python3 official/recommendation/ranking/main.py --mode=train_and_eval \
+python3 models/official/recommendation/ranking/train.py --mode=train_and_eval \
 --model_dir=${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} --params_override="
 runtime:
-    distribution_strategy='tpu'
+    distribution_strategy: 'tpu'
 task:
    use_synthetic_data: false
    train_data:
@@ -125,7 +139,7 @@ trainer:
    checkpoint_interval: 100000
    validation_steps: 5440
    train_steps: 256054
-    steps_per_execution: 1000
+    steps_per_loop: 1000
 "
 ```


--- a/official/requirements.txt
+++ b/official/requirements.txt
@@ -12,7 +12,6 @@ tensorflow-hub>=0.6.0
 tensorflow-model-optimization>=0.4.1
 tensorflow-datasets
 tensorflow-addons
-tensorflow-recommenders>=0.5.0
 dataclasses;python_version<"3.7"
 gin-config
 tf_slim>=1.1.0

--- a/official/utils/testing/scripts/presubmit.sh
+++ b/official/utils/testing/scripts/presubmit.sh
@@ -31,8 +31,8 @@ py_test() {
  local exit_code=0

  echo "===========Running Python test============"
-
-  for test_file in `find official/ -name '*test.py' -print`
+  # Skipping Ranking tests, TODO(b/189265753) remove it once the issue is fixed.
+  for test_file in `find official/ -name '*test.py' -print | grep -v 'official/recommendation/ranking'`
  do
    echo "####=======Testing ${test_file}=======####"
    ${PY_BINARY} "${test_file}"

--- a/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
@@ -80,6 +80,7 @@ trainer:
  optimizer_config:
    ema:
      average_decay: 0.9999
+      trainable_weights_only: false
    learning_rate:
      cosine:
        decay_steps: 73682

--- a/official/vision/beta/configs/image_classification.py
+++ b/official/vision/beta/configs/image_classification.py
@@ -227,7 +227,8 @@ def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig:
                  }
              },
              'ema': {
-                  'average_decay': 0.9999
+                  'average_decay': 0.9999,
+                  'trainable_weights_only': False,
              },
              'learning_rate': {
                  'type': 'cosine',

--- a/official/vision/beta/configs/video_classification.py
+++ b/official/vision/beta/configs/video_classification.py
@@ -254,7 +254,12 @@ def video_classification_ucf101() -> cfg.ExperimentConfig:
          'task.validation_data.is_training != None',
          'task.train_data.num_classes == task.validation_data.num_classes',
      ])
-  add_trainer(config, train_batch_size=64, eval_batch_size=16, train_epochs=100)
+  add_trainer(
+      config,
+      train_batch_size=64,
+      eval_batch_size=16,
+      learning_rate=0.8,
+      train_epochs=100)
  return config



--- a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
+++ b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
@@ -331,13 +331,13 @@ class YoloPAN(tf.keras.layers.Layer):

    Args:
      minimum_depth: `int` depth of the smallest branch of the FPN.
-      inputs: `dict[str, tf.InputSpec]` of the shape of input args as a dictionary of
-        lists.
+      inputs: `dict[str, tf.InputSpec]` of the shape of input args as a
+        dictionary of lists.

    Returns:
      The unscaled depths of the FPN branches.
    """
-    
+
    depths = []
    if len(inputs.keys()) > 3 or self._fpn_filter_scale > 1:
      for i in range(self._min_level, self._max_level + 1):
@@ -386,8 +386,8 @@ class YoloDecoder(tf.keras.Model):
               kernel_regularizer=None,
               bias_regularizer=None,
               **kwargs):
-    """Yolo Decoder initialization function. A unified model that ties all decoder
-    components into a conditionally build YOLO decder.
+    """Yolo Decoder initialization function. A unified model that ties all
+    decoder components into a conditionally build YOLO decoder.

    Args:
      input_specs: `dict[str, tf.InputSpec]`: input specs of each of the inputs
@@ -409,7 +409,7 @@ class YoloDecoder(tf.keras.Model):
        zero.
      kernel_initializer: kernel_initializer for convolutional layers.
      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
-      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
+      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
      **kwargs: keyword arguments to be passed.
    """


--- a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
+++ b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
@@ -1152,8 +1152,8 @@ class SAM(tf.keras.layers.Layer):

  def call(self, inputs, training=None):
    if self._use_pooling:
-      depth_max = tf.reduce_max(inputs, axis=-1, keep_dims=True)
-      depth_avg = tf.reduce_mean(inputs, axis=-1, keep_dims=True)
+      depth_max = tf.reduce_max(inputs, axis=-1, keepdims=True)
+      depth_avg = tf.reduce_mean(inputs, axis=-1, keepdims=True)
      input_maps = tf.concat([depth_avg, depth_max], axis=-1)
    else:
      input_maps = inputs
@@ -1545,7 +1545,7 @@ class DarkRouteProcess(tf.keras.layers.Layer):
      elif layer == 'spp':
        self.layers.append(self._spp(self._filters, dark_conv_args))
      elif layer == 'sam':
-        self.layers.append(self._sam(-1, _args))
+        self.layers.append(self._sam(-1, dark_conv_args))

    self._lim = len(self.layers)
    super().build(input_shape)