Merge branch 'purdue-yolo' into detection_generator_pr

5ffcc5b6 · Anirudh Vegesana · GitHub · 0b81a843 · 76e0c014 · 5ffcc5b6
Unverified Commit 5ffcc5b6 authored Jul 21, 2021 by Anirudh Vegesana Committed by GitHub Jul 21, 2021
20 changed files
--- a/official/nlp/modeling/ops/sampling_module.py
+++ b/official/nlp/modeling/ops/sampling_module.py
@@ -15,7 +15,7 @@
 """Sampling module for top_k, top_p and greedy decoding."""

 import abc
-from typing import Any, Callable, Dict
+from typing import Any, Callable, Dict, Optional

 import numpy as np
 import tensorflow as tf
@@ -98,10 +98,10 @@ def sample_top_p(logits, top_p):
  ], -1)

  # Scatter sorted indices to original indexes.
-  indices_to_remove = scatter_values_on_batch_indices(
-      sorted_indices_to_remove, sorted_indices)
-  top_p_logits = set_tensor_by_indices_to_value(
-      logits, indices_to_remove, np.NINF)
+  indices_to_remove = scatter_values_on_batch_indices(sorted_indices_to_remove,
+                                                      sorted_indices)
+  top_p_logits = set_tensor_by_indices_to_value(logits, indices_to_remove,
+                                                np.NINF)
  return top_p_logits


@@ -121,13 +121,12 @@ def scatter_values_on_batch_indices(values, batch_indices):
  tensor_shape = decoding_module.shape_list(batch_indices)
  broad_casted_batch_dims = tf.reshape(
      tf.broadcast_to(
-          tf.expand_dims(tf.range(tensor_shape[0]), axis=-1),
-          tensor_shape), [1, -1])
+          tf.expand_dims(tf.range(tensor_shape[0]), axis=-1), tensor_shape),
+      [1, -1])
  pair_indices = tf.transpose(
      tf.concat([broad_casted_batch_dims,
                 tf.reshape(batch_indices, [1, -1])], 0))
-  return tf.scatter_nd(pair_indices,
-                       tf.reshape(values, [-1]), tensor_shape)
+  return tf.scatter_nd(pair_indices, tf.reshape(values, [-1]), tensor_shape)


 def set_tensor_by_indices_to_value(input_tensor, indices, value):
@@ -137,6 +136,7 @@ def set_tensor_by_indices_to_value(input_tensor, indices, value):
    input_tensor: float (batch_size, dim)
    indices: bool (batch_size, dim)
    value: float scalar
+
  Returns:
    output_tensor: same shape as input_tensor.
  """
@@ -150,11 +150,12 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):

  def __init__(self,
               symbols_to_logits_fn,
-               length_normalization_fn: Callable[[int, tf.DType], float],
               vocab_size: int,
               max_decode_length: int,
               eos_id: int,
               padded_decode: bool,
+               length_normalization_fn: Optional[Callable[[int, tf.DType],
+                                                          float]] = None,
               top_k=0,
               top_p=1.0,
               sample_temperature=0.0,
@@ -170,8 +171,8 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):
    self.max_decode_length = max_decode_length
    self.top_k = tf.convert_to_tensor(top_k, dtype=tf.int32)
    self.top_p = tf.convert_to_tensor(top_p, dtype=tf.float32)
-    self.sample_temperature = tf.convert_to_tensor(sample_temperature,
-                                                   dtype=tf.float32)
+    self.sample_temperature = tf.convert_to_tensor(
+        sample_temperature, dtype=tf.float32)
    self.enable_greedy = enable_greedy
    super(SamplingModule, self).__init__(
        length_normalization_fn=length_normalization_fn, dtype=dtype)
@@ -330,12 +331,9 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):

    return state, state_shape_invariants

-  def _get_new_alive_state(
-      self,
-      new_seq: tf.Tensor,
-      new_log_probs: tf.Tensor,
-      new_finished_flags: tf.Tensor,
-      new_cache: Dict[str, tf.Tensor]) -> Dict[str, Any]:
+  def _get_new_alive_state(self, new_seq: tf.Tensor, new_log_probs: tf.Tensor,
+                           new_finished_flags: tf.Tensor,
+                           new_cache: Dict[str, tf.Tensor]) -> Dict[str, Any]:
    """Gather the sequences that are still alive.

    This function resets the sequences in the alive_state that are finished.
@@ -360,9 +358,7 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):
        decoding_module.StateKeys.ALIVE_CACHE: new_cache
    }

-  def _get_new_finished_state(self,
-                              state: Dict[str, Any],
-                              new_seq: tf.Tensor,
+  def _get_new_finished_state(self, state: Dict[str, Any], new_seq: tf.Tensor,
                              new_log_probs: tf.Tensor,
                              new_finished_flags: tf.Tensor,
                              batch_size: int) -> Dict[str, tf.Tensor]:
@@ -421,10 +417,9 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):
      length_norm = self.length_normalization_fn(self.max_decode_length + 1,
                                                 self.dtype)
      alive_log_probs = alive_log_probs / length_norm
-    seq_cond = decoding_module.expand_to_same_rank(
-        finished_cond, finished_seq)
-    score_cond = decoding_module.expand_to_same_rank(
-        finished_cond, finished_scores)
+    seq_cond = decoding_module.expand_to_same_rank(finished_cond, finished_seq)
+    score_cond = decoding_module.expand_to_same_rank(finished_cond,
+                                                     finished_scores)
    finished_seq = tf.where(seq_cond, finished_seq, alive_seq)
    finished_scores = tf.where(score_cond, finished_scores, alive_log_probs)
    return finished_seq, finished_scores

--- a/official/nlp/train.py
+++ b/official/nlp/train.py
@@ -66,4 +66,5 @@ def main(_):

 if __name__ == '__main__':
  tfm_flags.define_flags()
+  flags.mark_flags_as_required(['experiment', 'mode', 'model_dir'])
  app.run(main)
--- a/official/recommendation/ranking/README.md
+++ b/official/recommendation/ranking/README.md
@@ -111,6 +111,7 @@ export TPU_NAME=my-dlrm-tpu
 export EXPERIMENT_NAME=my_experiment_name
 export BUCKET_NAME="gs://my_dlrm_bucket"
 export DATA_DIR="${BUCKET_NAME}/data"
+export EMBEDDING_DIM=32

 python3 models/official/recommendation/ranking/train.py --mode=train_and_eval \
 --model_dir=${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} --params_override="
@@ -126,8 +127,8 @@ task:
        global_batch_size: 16384
    model:
        num_dense_features: 13
-        bottom_mlp: [512,256,128]
-        embedding_dim: 128
+        bottom_mlp: [512,256,${EMBEDDING_DIM}]
+        embedding_dim: ${EMBEDDING_DIM}
        top_mlp: [1024,1024,512,256,1]
        interaction: 'dot'
        vocab_sizes: [39884406, 39043, 17289, 7420, 20263, 3, 7120, 1543, 63,
@@ -135,8 +136,8 @@ task:
            39979771, 25641295, 39664984, 585935, 12972, 108, 36]
 trainer:
    use_orbit: true
-    validation_interval: 90000
-    checkpoint_interval: 100000
+    validation_interval: 85352
+    checkpoint_interval: 85352
    validation_steps: 5440
    train_steps: 256054
    steps_per_loop: 1000
@@ -154,7 +155,9 @@ Training on GPUs are similar to TPU training. Only distribution strategy needs
 to be updated and number of GPUs provided (for 4 GPUs):

 ```shell
-python3 official/recommendation/ranking/main.py --mode=train_and_eval \
+export EMBEDDING_DIM=8
+
+python3 official/recommendation/ranking/train.py --mode=train_and_eval \
 --model_dir=${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} --params_override="
 runtime:
  distribution_strategy: 'mirrored'

--- a/official/utils/misc/distribution_utils.py
+++ b/official/utils/misc/distribution_utils.py
@@ -12,6 +12,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-"""Helper functions for running models in a distributed setting."""
-# pylint: disable=wildcard-import
-from official.common.distribute_utils import *
--- a/official/recommendation/ranking/configs/config.py
+++ b/official/recommendation/ranking/configs/config.py
@@ -13,7 +13,7 @@
 # limitations under the License.

 """Ranking Model configuration definition."""
-from typing import Optional, List
+from typing import Optional, List, Union
 import dataclasses

 from official.core import exp_factory
@@ -59,7 +59,13 @@ class ModelConfig(hyperparams.Config):
    num_dense_features: Number of dense features.
    vocab_sizes: Vocab sizes for each of the sparse features. The order agrees
      with the order of the input data.
-    embedding_dim: Embedding dimension.
+    embedding_dim: An integer or a list of embedding table dimensions.
+      If it's an integer then all tables will have the same embedding dimension.
+      If it's a list then the length should match with `vocab_sizes`.
+    size_threshold: A threshold for table sizes below which a keras
+        embedding layer is used, and above which a TPU embedding layer is used.
+        If it's -1 then only keras embedding layer will be used for all tables,
+        if 0 only then only TPU embedding layer will be used.
    bottom_mlp: The sizes of hidden layers for bottom MLP applied to dense
      features.
    top_mlp: The sizes of hidden layers for top MLP.
@@ -68,7 +74,8 @@ class ModelConfig(hyperparams.Config):
  """
  num_dense_features: int = 13
  vocab_sizes: List[int] = dataclasses.field(default_factory=list)
-  embedding_dim: int = 8
+  embedding_dim: Union[int, List[int]] = 8
+  size_threshold: int = 50_000
  bottom_mlp: List[int] = dataclasses.field(default_factory=list)
  top_mlp: List[int] = dataclasses.field(default_factory=list)
  interaction: str = 'dot'
@@ -188,7 +195,7 @@ def default_config() -> Config:
      runtime=cfg.RuntimeConfig(),
      task=Task(
          model=ModelConfig(
-              embedding_dim=4,
+              embedding_dim=8,
              vocab_sizes=vocab_sizes,
              bottom_mlp=[64, 32, 4],
              top_mlp=[64, 32, 1]),

--- a/official/recommendation/ranking/data/__init__.py
+++ b/official/recommendation/ranking/data/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/official/recommendation/ranking/data_pipeline.py
+++ b/official/recommendation/ranking/data_pipeline.py
@@ -136,7 +136,7 @@ class CriteoTsvReader:
    num_replicas = ctx.num_replicas_in_sync if ctx else 1

    if params.is_training:
-      dataset_size = 10000 * batch_size * num_replicas
+      dataset_size = 1000 * batch_size * num_replicas
    else:
      dataset_size = 1000 * batch_size * num_replicas
    dense_tensor = tf.random.uniform(
@@ -169,6 +169,7 @@ class CriteoTsvReader:
                  'sparse_features': sparse_tensor_elements}, label_tensor

    dataset = tf.data.Dataset.from_tensor_slices(input_elem)
+    dataset = dataset.cache()
    if params.is_training:
      dataset = dataset.repeat()


--- a/official/recommendation/ranking/data_pipeline_test.py
+++ b/official/recommendation/ranking/data_pipeline_test.py
@@ -17,8 +17,8 @@
 from absl.testing import parameterized
 import tensorflow as tf

-from official.recommendation.ranking import data_pipeline
 from official.recommendation.ranking.configs import config
+from official.recommendation.ranking.data import data_pipeline


 class DataPipelineTest(parameterized.TestCase, tf.test.TestCase):

--- a/official/recommendation/ranking/task.py
+++ b/official/recommendation/ranking/task.py
@@ -15,7 +15,7 @@
 """Task for the Ranking model."""

 import math
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union

 import tensorflow as tf
 import tensorflow_recommenders as tfrs
@@ -23,36 +23,49 @@ import tensorflow_recommenders as tfrs
 from official.core import base_task
 from official.core import config_definitions
 from official.recommendation.ranking import common
-from official.recommendation.ranking import data_pipeline
 from official.recommendation.ranking.configs import config
-
+from official.recommendation.ranking.data import data_pipeline

 RuntimeConfig = config_definitions.RuntimeConfig


 def _get_tpu_embedding_feature_config(
    vocab_sizes: List[int],
-    embedding_dim: int,
+    embedding_dim: Union[int, List[int]],
    table_name_prefix: str = 'embedding_table'
 ) -> Dict[str, tf.tpu.experimental.embedding.FeatureConfig]:
  """Returns TPU embedding feature config.

+  i'th table config will have vocab size of vocab_sizes[i] and embedding
+  dimension of embedding_dim if embedding_dim is an int or embedding_dim[i] if
+  embedding_dim is a list).
  Args:
    vocab_sizes: List of sizes of categories/id's in the table.
-    embedding_dim: Embedding dimension.
+    embedding_dim: An integer or a list of embedding table dimensions.
    table_name_prefix: a prefix for embedding tables.
  Returns:
    A dictionary of feature_name, FeatureConfig pairs.
  """
+  if isinstance(embedding_dim, List):
+    if len(vocab_sizes) != len(embedding_dim):
+      raise ValueError(
+          f'length of vocab_sizes: {len(vocab_sizes)} is not equal to the '
+          f'length of embedding_dim: {len(embedding_dim)}')
+  elif isinstance(embedding_dim, int):
+    embedding_dim = [embedding_dim] * len(vocab_sizes)
+  else:
+    raise ValueError('embedding_dim is not either a list or an int, got '
+                     f'{type(embedding_dim)}')
+
  feature_config = {}

  for i, vocab_size in enumerate(vocab_sizes):
    table_config = tf.tpu.experimental.embedding.TableConfig(
        vocabulary_size=vocab_size,
-        dim=embedding_dim,
+        dim=embedding_dim[i],
        combiner='mean',
        initializer=tf.initializers.TruncatedNormal(
-            mean=0.0, stddev=1 / math.sqrt(embedding_dim)),
+            mean=0.0, stddev=1 / math.sqrt(embedding_dim[i])),
        name=table_name_prefix + '_%s' % i)
    feature_config[str(i)] = tf.tpu.experimental.embedding.FeatureConfig(
        table=table_config)
@@ -72,7 +85,7 @@ class RankingTask(base_task.Task):
    """Task initialization.

    Args:
-      params: the RannkingModel task configuration instance.
+      params: the RankingModel task configuration instance.
      optimizer_config: Optimizer configuration instance.
      logging_dir: a string pointing to where the model, summaries etc. will be
        saved.
@@ -125,15 +138,18 @@ class RankingTask(base_task.Task):
        self.optimizer_config.embedding_optimizer)
    embedding_optimizer.learning_rate = lr_callable

-    emb_feature_config = _get_tpu_embedding_feature_config(
-        vocab_sizes=self.task_config.model.vocab_sizes,
-        embedding_dim=self.task_config.model.embedding_dim)
+    feature_config = _get_tpu_embedding_feature_config(
+        embedding_dim=self.task_config.model.embedding_dim,
+        vocab_sizes=self.task_config.model.vocab_sizes)

-    tpu_embedding = tfrs.layers.embedding.TPUEmbedding(
-        emb_feature_config, embedding_optimizer)
+    embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding(
+        feature_config=feature_config,
+        optimizer=embedding_optimizer,
+        size_threshold=self.task_config.model.size_threshold)

    if self.task_config.model.interaction == 'dot':
-      feature_interaction = tfrs.layers.feature_interaction.DotInteraction()
+      feature_interaction = tfrs.layers.feature_interaction.DotInteraction(
+          skip_gather=True)
    elif self.task_config.model.interaction == 'cross':
      feature_interaction = tf.keras.Sequential([
          tf.keras.layers.Concatenate(),
@@ -145,7 +161,7 @@ class RankingTask(base_task.Task):
          f'is not supported it must be either \'dot\' or \'cross\'.')

    model = tfrs.experimental.models.Ranking(
-        embedding_layer=tpu_embedding,
+        embedding_layer=embedding_layer,
        bottom_stack=tfrs.layers.blocks.MLP(
            units=self.task_config.model.bottom_mlp, final_activation='relu'),
        feature_interaction=feature_interaction,
@@ -184,3 +200,5 @@ class RankingTask(base_task.Task):
  @property
  def optimizer_config(self) -> config.OptimizationConfig:
    return self._optimizer_config
+
+
--- a/official/recommendation/ranking/task_test.py
+++ b/official/recommendation/ranking/task_test.py
@@ -18,8 +18,8 @@ from absl.testing import parameterized
 import tensorflow as tf

 from official.core import exp_factory
-from official.recommendation.ranking import data_pipeline
 from official.recommendation.ranking import task
+from official.recommendation.ranking.data import data_pipeline


 class TaskTest(parameterized.TestCase, tf.test.TestCase):
@@ -34,6 +34,8 @@ class TaskTest(parameterized.TestCase, tf.test.TestCase):
    params.task.train_data.global_batch_size = 16
    params.task.validation_data.global_batch_size = 16
    params.task.model.vocab_sizes = [40, 12, 11, 13, 2, 5]
+    params.task.model.embedding_dim = 8
+    params.task.model.bottom_mlp = [64, 32, 8]
    params.task.use_synthetic_data = True
    params.task.model.num_dense_features = 5


--- a/official/recommendation/ranking/train.py
+++ b/official/recommendation/ranking/train.py
@@ -20,15 +20,14 @@ from absl import app
 from absl import flags
 from absl import logging

-import orbit
 import tensorflow as tf

+from official.common import distribute_utils
 from official.core import base_trainer
 from official.core import train_lib
 from official.core import train_utils
 from official.recommendation.ranking import common
 from official.recommendation.ranking.task import RankingTask
-from official.utils.misc import distribution_utils
 from official.utils.misc import keras_utils

 FLAGS = flags.FLAGS
@@ -86,7 +85,7 @@ def main(_) -> None:

  enable_tensorboard = params.trainer.callbacks.enable_tensorboard

-  strategy = distribution_utils.get_distribution_strategy(
+  strategy = distribute_utils.get_distribution_strategy(
      distribution_strategy=params.runtime.distribution_strategy,
      all_reduce_alg=params.runtime.all_reduce_alg,
      num_gpus=params.runtime.num_gpus,
@@ -95,6 +94,21 @@ def main(_) -> None:
  with strategy.scope():
    model = task.build_model()

+  def get_dataset_fn(params):
+    return lambda input_context: task.build_inputs(params, input_context)
+
+  train_dataset = None
+  if 'train' in mode:
+    train_dataset = strategy.distribute_datasets_from_function(
+        get_dataset_fn(params.task.train_data),
+        options=tf.distribute.InputOptions(experimental_fetch_to_device=False))
+
+  validation_dataset = None
+  if 'eval' in mode:
+    validation_dataset = strategy.distribute_datasets_from_function(
+        get_dataset_fn(params.task.validation_data),
+        options=tf.distribute.InputOptions(experimental_fetch_to_device=False))
+
  if params.trainer.use_orbit:
    with strategy.scope():
      checkpoint_exporter = train_utils.maybe_create_best_ckpt_exporter(
@@ -106,6 +120,8 @@ def main(_) -> None:
          optimizer=model.optimizer,
          train='train' in mode,
          evaluate='eval' in mode,
+          train_dataset=train_dataset,
+          validation_dataset=validation_dataset,
          checkpoint_exporter=checkpoint_exporter)

    train_lib.run_experiment(
@@ -117,16 +133,6 @@ def main(_) -> None:
        trainer=trainer)

  else:  # Compile/fit
-    train_dataset = None
-    if 'train' in mode:
-      train_dataset = orbit.utils.make_distributed_dataset(
-          strategy, task.build_inputs, params.task.train_data)
-
-    eval_dataset = None
-    if 'eval' in mode:
-      eval_dataset = orbit.utils.make_distributed_dataset(
-          strategy, task.build_inputs, params.task.validation_data)
-
    checkpoint = tf.train.Checkpoint(model=model, optimizer=model.optimizer)

    latest_checkpoint = tf.train.latest_checkpoint(model_dir)
@@ -169,7 +175,7 @@ def main(_) -> None:
          initial_epoch=initial_epoch,
          epochs=num_epochs,
          steps_per_epoch=params.trainer.validation_interval,
-          validation_data=eval_dataset,
+          validation_data=validation_dataset,
          validation_steps=eval_steps,
          callbacks=callbacks,
      )
@@ -177,7 +183,7 @@ def main(_) -> None:
      logging.info('Train history: %s', history.history)
    elif mode == 'eval':
      logging.info('Evaluation started')
-      validation_output = model.evaluate(eval_dataset, steps=eval_steps)
+      validation_output = model.evaluate(validation_dataset, steps=eval_steps)
      logging.info('Evaluation output: %s', validation_output)
    else:
      raise NotImplementedError('The mode is not implemented: %s' % mode)

--- a/official/recommendation/ranking/train_test.py
+++ b/official/recommendation/ranking/train_test.py
@@ -40,6 +40,8 @@ def _get_params_override(vocab_sizes,
      'task': {
          'model': {
              'vocab_sizes': vocab_sizes,
+              'embedding_dim': [8] * len(vocab_sizes),
+              'bottom_mlp': [64, 32, 8],
              'interaction': interaction,
          },
          'train_data': {

--- a/official/requirements.txt
+++ b/official/requirements.txt
 six
 google-api-python-client>=1.6.7
-google-cloud-bigquery>=0.31.0
 kaggle>=1.3.9
 numpy>=1.15.4
 oauth2client

--- a/official/vision/beta/README.md
+++ b/official/vision/beta/README.md
 This directory contains the new design of TF model garden vision framework.
-Stay tuned.
--- a/official/vision/beta/configs/backbones.py
+++ b/official/vision/beta/configs/backbones.py
@@ -80,6 +80,11 @@ class SpineNetMobile(hyperparams.Config):
  expand_ratio: int = 6
  min_level: int = 3
  max_level: int = 7
+  # If use_keras_upsampling_2d is True, model uses UpSampling2D keras layer
+  # instead of optimized custom TF op. It makes model be more keras style. We
+  # set this flag to True when we apply QAT from model optimization toolkit
+  # that requires the model should use keras layers.
+  use_keras_upsampling_2d: bool = False


 @dataclasses.dataclass

--- a/official/vision/beta/configs/maskrcnn.py
+++ b/official/vision/beta/configs/maskrcnn.py
@@ -78,6 +78,7 @@ class DataConfig(cfg.DataConfig):
  parser: Parser = Parser()
  shuffle_buffer_size: int = 10000
  file_type: str = 'tfrecord'
+  drop_remainder: bool = True


 @dataclasses.dataclass
@@ -215,7 +216,8 @@ class Losses(hyperparams.Config):
 class MaskRCNNTask(cfg.TaskConfig):
  model: MaskRCNN = MaskRCNN()
  train_data: DataConfig = DataConfig(is_training=True)
-  validation_data: DataConfig = DataConfig(is_training=False)
+  validation_data: DataConfig = DataConfig(is_training=False,
+                                           drop_remainder=False)
  losses: Losses = Losses()
  init_checkpoint: Optional[str] = None
  init_checkpoint_modules: str = 'all'  # all or backbone
@@ -260,7 +262,8 @@ def fasterrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
-              global_batch_size=eval_batch_size)),
+              global_batch_size=eval_batch_size,
+              drop_remainder=False)),
      trainer=cfg.TrainerConfig(
          train_steps=22500,
          validation_steps=coco_val_samples // eval_batch_size,
@@ -324,7 +327,8 @@ def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
-              global_batch_size=eval_batch_size)),
+              global_batch_size=eval_batch_size,
+              drop_remainder=False)),
      trainer=cfg.TrainerConfig(
          train_steps=22500,
          validation_steps=coco_val_samples // eval_batch_size,
@@ -401,7 +405,8 @@ def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig:
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
-              global_batch_size=eval_batch_size)),
+              global_batch_size=eval_batch_size,
+              drop_remainder=False)),
      trainer=cfg.TrainerConfig(
          train_steps=steps_per_epoch * 350,
          validation_steps=coco_val_samples // eval_batch_size,
@@ -486,7 +491,8 @@ def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig:
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
-              global_batch_size=eval_batch_size)),
+              global_batch_size=eval_batch_size,
+              drop_remainder=False)),
      trainer=cfg.TrainerConfig(
          train_steps=steps_per_epoch * 500,
          validation_steps=coco_val_samples // eval_batch_size,

--- a/official/vision/beta/configs/retinanet.py
+++ b/official/vision/beta/configs/retinanet.py
@@ -130,6 +130,13 @@ class RetinaNet(hyperparams.Config):
  norm_activation: common.NormActivation = common.NormActivation()


+@dataclasses.dataclass
+class ExportConfig(hyperparams.Config):
+  output_normalized_coordinates: bool = False
+  cast_num_detections_to_float: bool = False
+  cast_detection_classes_to_float: bool = False
+
+
 @dataclasses.dataclass
 class RetinaNetTask(cfg.TaskConfig):
  model: RetinaNet = RetinaNet()
@@ -140,6 +147,7 @@ class RetinaNetTask(cfg.TaskConfig):
  init_checkpoint_modules: str = 'all'  # all or backbone
  annotation_file: Optional[str] = None
  per_category_metrics: bool = False
+  export_config: ExportConfig = ExportConfig()


 @exp_factory.register_config_factory('retinanet')
@@ -338,7 +346,8 @@ def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig:
                      model_id='49',
                      stochastic_depth_drop_rate=0.2,
                      min_level=3,
-                      max_level=7)),
+                      max_level=7,
+                      use_keras_upsampling_2d=False)),
              decoder=decoders.Decoder(
                  type='identity', identity=decoders.Identity()),
              head=RetinaNetHead(num_filters=48, use_separable_conv=True),

--- a/official/vision/beta/data/process_coco_few_shot.sh
+++ b/official/vision/beta/data/process_coco_few_shot.sh
@@ -3,17 +3,19 @@
 # Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`.

 tmp_dir=$(mktemp -d -t coco-XXXXXXXXXX)
+base_image_dir="/tmp/coco_images"
 output_dir="/tmp/coco_few_shot"
-while getopts "o:" o; do
+while getopts ":i:o:" o; do
  case "${o}" in
    o) output_dir=${OPTARG} ;;
-    *) echo "Usage: ${0} [-o <output_dir>]" 1>&2; exit 1 ;;
+    i) base_image_dir=${OPTARG} ;;
+    *) echo "Usage: ${0} [-i <base_image_dir>] [-o <output_dir>]" 1>&2; exit 1 ;;
  esac
 done

 cocosplit_url="dl.yf.io/fs-det/datasets/cocosplit"
 wget --recursive --no-parent -q --show-progress --progress=bar:force:noscroll \
-    -P "${tmp_dir}" -A "5k.json,*10shot*.json,*30shot*.json" \
+    -P "${tmp_dir}" -A "trainvalno5k.json,5k.json,*10shot*.json,*30shot*.json" \
    "http://${cocosplit_url}/"
 mv "${tmp_dir}/${cocosplit_url}/"* "${tmp_dir}"
 rm -rf "${tmp_dir}/${cocosplit_url}/"
@@ -25,8 +27,8 @@ for seed in {0..9}; do
  for shots in 10 30; do
    python create_coco_tf_record.py \
        --logtostderr \
-        --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
-        --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
+        --image_dir="${base_image_dir}/train2014" \
+        --image_dir="${base_image_dir}/val2014" \
        --image_info_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
        --object_annotations_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
        --caption_annotations_file="" \
@@ -37,12 +39,32 @@ done

 python create_coco_tf_record.py \
    --logtostderr \
-    --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
-    --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
+    --image_dir="${base_image_dir}/train2014" \
+    --image_dir="${base_image_dir}/val2014" \
    --image_info_file="${tmp_dir}/datasplit/5k.json" \
    --object_annotations_file="${tmp_dir}/datasplit/5k.json" \
    --caption_annotations_file="" \
    --output_file_prefix="${output_dir}/5k" \
    --num_shards=10

+python create_coco_tf_record.py \
+    --logtostderr \
+    --image_dir="${base_image_dir}/train2014" \
+    --image_dir="${base_image_dir}/val2014" \
+    --image_info_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
+    --object_annotations_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
+    --caption_annotations_file="" \
+    --output_file_prefix="${output_dir}/trainvalno5k_base" \
+    --num_shards=200
+
+python create_coco_tf_record.py \
+    --logtostderr \
+    --image_dir="${base_image_dir}/train2014" \
+    --image_dir="${base_image_dir}/val2014" \
+    --image_info_file="${tmp_dir}/datasplit/5k_base.json" \
+    --object_annotations_file="${tmp_dir}/datasplit/5k_base.json" \
+    --caption_annotations_file="" \
+    --output_file_prefix="${output_dir}/5k_base" \
+    --num_shards=10
+
 rm -rf "${tmp_dir}"
--- a/official/vision/beta/data/process_coco_few_shot_json_files.py
+++ b/official/vision/beta/data/process_coco_few_shot_json_files.py
@@ -76,10 +76,30 @@ for _seed, _shots in itertools.product(SEEDS, SHOTS):
            _shots,
            _category))

+# Base class IDs, as defined in
+# https://github.com/ucbdrive/few-shot-object-detection/blob/master/fsdet/evaluation/coco_evaluation.py#L60-L65
+BASE_CLASS_IDS = [8, 10, 11, 13, 14, 15, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
+                  35, 36, 37, 38, 39, 40, 41, 42, 43, 46, 47, 48, 49, 50, 51,
+                  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 65, 70, 73, 74, 75,
+                  76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
+

 def main(unused_argv):
  workdir = FLAGS.workdir

+  # Filter novel class annotations from the training and validation sets.
+  for name in ('trainvalno5k', '5k'):
+    file_path = os.path.join(workdir, 'datasplit', '{}.json'.format(name))
+    with tf.io.gfile.GFile(file_path, 'r') as f:
+      json_dict = json.load(f)
+
+    json_dict['annotations'] = [a for a in json_dict['annotations']
+                                if a['category_id'] in BASE_CLASS_IDS]
+    output_path = os.path.join(
+        workdir, 'datasplit', '{}_base.json'.format(name))
+    with tf.io.gfile.GFile(output_path, 'w') as f:
+      json.dump(json_dict, f)
+
  for seed, shots in itertools.product(SEEDS, SHOTS):
    # Retrieve all examples for a given seed and shots setting.
    file_paths = [os.path.join(workdir, suffix)

--- a/official/vision/beta/dataloaders/tfds_factory.py
+++ b/official/vision/beta/dataloaders/tfds_factory.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""TFDS factory functions."""
+from official.vision.beta.dataloaders import decoder as base_decoder
+from official.vision.beta.dataloaders import tfds_detection_decoders
+from official.vision.beta.dataloaders import tfds_segmentation_decoders
+from official.vision.beta.dataloaders import tfds_classification_decoders
+
+
+def get_classification_decoder(tfds_name: str) -> base_decoder.Decoder:
+  """Gets classification decoder.
+
+  Args:
+    tfds_name: `str`, name of the tfds classification decoder.
+  Returns:
+    `base_decoder.Decoder` instance.
+  Raises:
+    ValueError if the tfds_name doesn't exist in the available decoders.
+  """
+  if tfds_name in tfds_classification_decoders.TFDS_ID_TO_DECODER_MAP:
+    decoder = tfds_classification_decoders.TFDS_ID_TO_DECODER_MAP[tfds_name]()
+  else:
+    raise ValueError(
+        f'TFDS Classification {tfds_name} is not supported')
+  return decoder
+
+
+def get_detection_decoder(tfds_name: str) -> base_decoder.Decoder:
+  """Gets detection decoder.
+
+  Args:
+    tfds_name: `str`, name of the tfds detection decoder.
+  Returns:
+    `base_decoder.Decoder` instance.
+  Raises:
+    ValueError if the tfds_name doesn't exist in the available decoders.
+  """
+  if tfds_name in tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP:
+    decoder = tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP[tfds_name]()
+  else:
+    raise ValueError(f'TFDS Detection {tfds_name} is not supported')
+  return decoder
+
+
+def get_segmentation_decoder(tfds_name: str) -> base_decoder.Decoder:
+  """Gets segmentation decoder.
+
+  Args:
+    tfds_name: `str`, name of the tfds segmentation decoder.
+  Returns:
+    `base_decoder.Decoder` instance.
+  Raises:
+    ValueError if the tfds_name doesn't exist in the available decoders.
+  """
+  if tfds_name in tfds_segmentation_decoders.TFDS_ID_TO_DECODER_MAP:
+    decoder = tfds_segmentation_decoders.TFDS_ID_TO_DECODER_MAP[tfds_name]()
+  else:
+    raise ValueError(f'TFDS Segmentation {tfds_name} is not supported')
+  return decoder