remove wrong files

c8354cb4 · Kaushik Shivakumar · 9efe44f1 · 9efe44f1 · 9efe44f1 · 9efe44f1
Commit c8354cb4 authored Jul 13, 2020 by Kaushik Shivakumar
18 changed files
--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""A Beam job to generate embedding data for camera trap images.
-
-This tool runs inference with an exported Object Detection model in
-`saved_model` format and produce raw embeddings for camera trap data. These
-embeddings contain an object-centric feature embedding from Faster R-CNN, the
-datetime that the image was taken (normalized in a specific way), and the
-position of the object of interest. By default, only the highest-scoring object
-embedding is included.
-
-Steps to generate a embedding dataset:
-1. Use object_detection/export_inference_graph.py to get a Faster R-CNN
-  `saved_model` for inference. The input node must accept a tf.Example proto.
-2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
-  protos containing images for inference.
-
-Example Usage:
--------------
-python tensorflow_models/object_detection/export_inference_graph.py \
-    --alsologtostderr \
-    --input_type tf_example \
-    --pipeline_config_path path/to/faster_rcnn_model.config \
-    --trained_checkpoint_prefix path/to/model.ckpt \
-    --output_directory path/to/exported_model_directory \
-    --additional_output_tensor_names detection_features
-
-python generate_embedding_data.py \
-    --alsologtostderr \
-    --embedding_input_tfrecord path/to/input_tfrecords* \
-    --embedding_output_tfrecord path/to/output_tfrecords \
-    --embedding_model_dir path/to/exported_model_directory/saved_model
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import datetime
-import os
-import threading
-
-import numpy as np
-import six
-import tensorflow.compat.v1 as tf
-
-try:
-  import apache_beam as beam  # pylint:disable=g-import-not-at-top
-except ModuleNotFoundError:
-  pass
-
-
-class GenerateEmbeddingDataFn(beam.DoFn):
-  """Generates embedding data for camera trap images.
-
-  This Beam DoFn performs inference with an object detection `saved_model` and
-  produces contextual embedding vectors.
-  """
-  session_lock = threading.Lock()
-
-  def __init__(self, model_dir, top_k_embedding_count,
-               bottom_k_embedding_count):
-    """Initialization function.
-
-    Args:
-      model_dir: A directory containing saved model.
-      top_k_embedding_count: the number of high-confidence embeddings to store
-      bottom_k_embedding_count: the number of low-confidence embeddings to store
-    """
-    self._model_dir = model_dir
-    self._session = None
-    self._num_examples_processed = beam.metrics.Metrics.counter(
-        'embedding_data_generation', 'num_tf_examples_processed')
-    self._top_k_embedding_count = top_k_embedding_count
-    self._bottom_k_embedding_count = bottom_k_embedding_count
-
-  def start_bundle(self):
-    self._load_inference_model()
-
-  def _load_inference_model(self):
-    # Because initialization of the tf.Session is expensive we share
-    # one instance across all threads in the worker. This is possible since
-    # tf.Session.run() is thread safe.
-    with self.session_lock:
-      if self._session is None:
-        graph = tf.Graph()
-        self._session = tf.Session(graph=graph)
-        with graph.as_default():
-          meta_graph = tf.saved_model.loader.load(
-              self._session, [tf.saved_model.tag_constants.SERVING],
-              self._model_dir)
-        signature = meta_graph.signature_def['serving_default']
-        input_tensor_name = signature.inputs['input_tensor'].name
-        detection_features_name = signature.outputs['detection_features'].name
-        detection_boxes_name = signature.outputs['detection_boxes'].name
-        num_detections_name = signature.outputs['num_detections'].name
-        self._input = graph.get_tensor_by_name(input_tensor_name)
-        self._embedding_node = graph.get_tensor_by_name(detection_features_name)
-        self._box_node = graph.get_tensor_by_name(detection_boxes_name)
-        self._scores_node = graph.get_tensor_by_name(
-            signature.outputs['detection_scores'].name)
-        self._num_detections = graph.get_tensor_by_name(num_detections_name)
-        tf.logging.info(signature.outputs['detection_features'].name)
-        tf.logging.info(signature.outputs['detection_boxes'].name)
-        tf.logging.info(signature.outputs['num_detections'].name)
-
-  def process(self, tfrecord_entry):
-    return self._run_inference_and_generate_embedding(tfrecord_entry)
-
-  def _run_inference_and_generate_embedding(self, tfrecord_entry):
-    input_example = tf.train.Example.FromString(tfrecord_entry)
-    # Convert date_captured datetime string to unix time integer and store
-
-    def get_date_captured(example):
-      date_captured = datetime.datetime.strptime(
-          six.ensure_str(
-              example.features.feature[
-                  'image/date_captured'].bytes_list.value[0]),
-          '%Y-%m-%d %H:%M:%S')
-      return date_captured
-
-    try:
-      date_captured = get_date_captured(input_example)
-    except Exception:  # pylint: disable=broad-except
-      # we require date_captured to be available for all images
-      return []
-
-    def embed_date_captured(date_captured):
-      """Encodes the datetime of the image."""
-      embedded_date_captured = []
-      month_max = 12.0
-      day_max = 31.0
-      hour_max = 24.0
-      minute_max = 60.0
-      min_year = 1990.0
-      max_year = 2030.0
-
-      year = (date_captured.year-min_year)/float(max_year-min_year)
-      embedded_date_captured.append(year)
-
-      month = (date_captured.month-1)/month_max
-      embedded_date_captured.append(month)
-
-      day = (date_captured.day-1)/day_max
-      embedded_date_captured.append(day)
-
-      hour = date_captured.hour/hour_max
-      embedded_date_captured.append(hour)
-
-      minute = date_captured.minute/minute_max
-      embedded_date_captured.append(minute)
-
-      return np.asarray(embedded_date_captured)
-
-    def embed_position_and_size(box):
-      """Encodes the bounding box of the object of interest."""
-      ymin = box[0]
-      xmin = box[1]
-      ymax = box[2]
-      xmax = box[3]
-      w = xmax - xmin
-      h = ymax - ymin
-      x = xmin + w / 2.0
-      y = ymin + h / 2.0
-      return np.asarray([x, y, w, h])
-
-    unix_time = (
-        (date_captured - datetime.datetime.fromtimestamp(0)).total_seconds())
-
-    example = tf.train.Example()
-    example.features.feature['image/unix_time'].float_list.value.extend(
-        [unix_time])
-
-    (detection_features, detection_boxes, num_detections,
-     detection_scores) = self._session.run(
-         [
-             self._embedding_node, self._box_node, self._num_detections[0],
-             self._scores_node
-         ],
-         feed_dict={self._input: [tfrecord_entry]})
-
-    num_detections = int(num_detections)
-    embed_all = []
-    score_all = []
-
-    detection_features = np.asarray(detection_features)
-
-    def get_bb_embedding(detection_features, detection_boxes, detection_scores,
-                         index):
-      embedding = detection_features[0][index]
-      pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0)
-
-      box = detection_boxes[0][index]
-      position_embedding = embed_position_and_size(box)
-
-      score = detection_scores[0][index]
-      return np.concatenate((pooled_embedding, position_embedding)), score
-
-    temporal_embedding = embed_date_captured(date_captured)
-
-    embedding_count = 0
-    for index in range(min(num_detections, self._top_k_embedding_count)):
-      bb_embedding, score = get_bb_embedding(
-          detection_features, detection_boxes, detection_scores, index)
-      embed_all.extend(bb_embedding)
-      embed_all.extend(temporal_embedding)
-      score_all.append(score)
-      embedding_count += 1
-
-    for index in range(
-        max(0, num_detections - 1),
-        max(-1, num_detections - 1 - self._bottom_k_embedding_count), -1):
-      bb_embedding, score = get_bb_embedding(
-          detection_features, detection_boxes, detection_scores, index)
-      embed_all.extend(bb_embedding)
-      embed_all.extend(temporal_embedding)
-      score_all.append(score)
-      embedding_count += 1
-
-    if embedding_count == 0:
-      bb_embedding, score = get_bb_embedding(
-          detection_features, detection_boxes, detection_scores, 0)
-      embed_all.extend(bb_embedding)
-      embed_all.extend(temporal_embedding)
-      score_all.append(score)
-
-    # Takes max in case embedding_count is 0.
-    embedding_length = len(embed_all) // max(1, embedding_count)
-
-    embed_all = np.asarray(embed_all)
-
-    example.features.feature['image/embedding'].float_list.value.extend(
-        embed_all)
-    example.features.feature['image/embedding_score'].float_list.value.extend(
-        score_all)
-    example.features.feature['image/embedding_length'].int64_list.value.append(
-        embedding_length)
-    example.features.feature['image/embedding_count'].int64_list.value.append(
-        embedding_count)
-
-    # Add other essential example attributes
-    example.features.feature['image/encoded'].bytes_list.value.extend(
-        input_example.features.feature['image/encoded'].bytes_list.value)
-    example.features.feature['image/height'].int64_list.value.extend(
-        input_example.features.feature['image/height'].int64_list.value)
-    example.features.feature['image/width'].int64_list.value.extend(
-        input_example.features.feature['image/width'].int64_list.value)
-    example.features.feature['image/source_id'].bytes_list.value.extend(
-        input_example.features.feature['image/source_id'].bytes_list.value)
-    example.features.feature['image/location'].bytes_list.value.extend(
-        input_example.features.feature['image/location'].bytes_list.value)
-
-    example.features.feature['image/date_captured'].bytes_list.value.extend(
-        input_example.features.feature['image/date_captured'].bytes_list.value)
-
-    example.features.feature['image/class/text'].bytes_list.value.extend(
-        input_example.features.feature['image/class/text'].bytes_list.value)
-    example.features.feature['image/class/label'].int64_list.value.extend(
-        input_example.features.feature['image/class/label'].int64_list.value)
-
-    example.features.feature['image/seq_id'].bytes_list.value.extend(
-        input_example.features.feature['image/seq_id'].bytes_list.value)
-    example.features.feature['image/seq_num_frames'].int64_list.value.extend(
-        input_example.features.feature['image/seq_num_frames'].int64_list.value)
-    example.features.feature['image/seq_frame_num'].int64_list.value.extend(
-        input_example.features.feature['image/seq_frame_num'].int64_list.value)
-
-    example.features.feature['image/object/bbox/ymax'].float_list.value.extend(
-        input_example.features.feature[
-            'image/object/bbox/ymax'].float_list.value)
-    example.features.feature['image/object/bbox/ymin'].float_list.value.extend(
-        input_example.features.feature[
-            'image/object/bbox/ymin'].float_list.value)
-    example.features.feature['image/object/bbox/xmax'].float_list.value.extend(
-        input_example.features.feature[
-            'image/object/bbox/xmax'].float_list.value)
-    example.features.feature['image/object/bbox/xmin'].float_list.value.extend(
-        input_example.features.feature[
-            'image/object/bbox/xmin'].float_list.value)
-    example.features.feature[
-        'image/object/class/score'].float_list.value.extend(
-            input_example.features.feature[
-                'image/object/class/score'].float_list.value)
-    example.features.feature[
-        'image/object/class/label'].int64_list.value.extend(
-            input_example.features.feature[
-                'image/object/class/label'].int64_list.value)
-    example.features.feature[
-        'image/object/class/text'].bytes_list.value.extend(
-            input_example.features.feature[
-                'image/object/class/text'].bytes_list.value)
-
-    self._num_examples_processed.inc(1)
-    return [example]
-
-
-def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
-                       top_k_embedding_count, bottom_k_embedding_count,
-                       num_shards):
-  """Returns a beam pipeline to run object detection inference.
-
-  Args:
-    pipeline: Initialized beam pipeline.
-    input_tfrecord: An TFRecord of tf.train.Example protos containing images.
-    output_tfrecord: An TFRecord of tf.train.Example protos that contain images
-      in the input TFRecord and the detections from the model.
-    model_dir: Path to `saved_model` to use for inference.
-    top_k_embedding_count: The number of high-confidence embeddings to store.
-    bottom_k_embedding_count: The number of low-confidence embeddings to store.
-    num_shards: The number of output shards.
-  """
-  input_collection = (
-      pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
-          input_tfrecord,
-          coder=beam.coders.BytesCoder()))
-  output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
-      GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
-                              bottom_k_embedding_count))
-  output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
-  _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
-      output_tfrecord,
-      num_shards=num_shards,
-      coder=beam.coders.ProtoCoder(tf.train.Example))
-
-
-def parse_args(argv):
-  """Command-line argument parser.
-
-  Args:
-    argv: command line arguments
-  Returns:
-    beam_args: Arguments for the beam pipeline.
-    pipeline_args: Arguments for the pipeline options, such as runner type.
-  """
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-      '--embedding_input_tfrecord',
-      dest='embedding_input_tfrecord',
-      required=True,
-      help='TFRecord containing images in tf.Example format for object '
-      'detection.')
-  parser.add_argument(
-      '--embedding_output_tfrecord',
-      dest='embedding_output_tfrecord',
-      required=True,
-      help='TFRecord containing embeddings in tf.Example format.')
-  parser.add_argument(
-      '--embedding_model_dir',
-      dest='embedding_model_dir',
-      required=True,
-      help='Path to directory containing an object detection SavedModel with'
-      'detection_box_classifier_features in the output.')
-  parser.add_argument(
-      '--top_k_embedding_count',
-      dest='top_k_embedding_count',
-      default=1,
-      help='The number of top k embeddings to add to the memory bank.')
-  parser.add_argument(
-      '--bottom_k_embedding_count',
-      dest='bottom_k_embedding_count',
-      default=0,
-      help='The number of bottom k embeddings to add to the memory bank.')
-  parser.add_argument(
-      '--num_shards',
-      dest='num_shards',
-      default=0,
-      help='Number of output shards.')
-  beam_args, pipeline_args = parser.parse_known_args(argv)
-  return beam_args, pipeline_args
-
-
-def main(argv=None, save_main_session=True):
-  """Runs the Beam pipeline that performs inference.
-
-  Args:
-    argv: Command line arguments.
-    save_main_session: Whether to save the main session.
-  """
-  args, pipeline_args = parse_args(argv)
-
-  pipeline_options = beam.options.pipeline_options.PipelineOptions(
-            pipeline_args)
-  pipeline_options.view_as(
-      beam.options.pipeline_options.SetupOptions).save_main_session = (
-          save_main_session)
-
-  dirname = os.path.dirname(args.embedding_output_tfrecord)
-  tf.io.gfile.makedirs(dirname)
-
-  p = beam.Pipeline(options=pipeline_options)
-
-  construct_pipeline(
-      p,
-      args.embedding_input_tfrecord,
-      args.embedding_output_tfrecord,
-      args.embedding_model_dir,
-      args.top_k_embedding_count,
-      args.bottom_k_embedding_count,
-      args.num_shards)
-
-  p.run()
-
-
-if __name__ == '__main__':
-  main()
-
--- a/research/object_detection/dataset_tools/context_rcnn/view_model_inputs.py
+++ b/research/object_detection/dataset_tools/context_rcnn/view_model_inputs.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""A Beam job to generate embedding data for camera trap images.
-
-This tool runs inference with an exported Object Detection model in
-`saved_model` format and produce raw embeddings for camera trap data. These
-embeddings contain an object-centric feature embedding from Faster R-CNN, the
-datetime that the image was taken (normalized in a specific way), and the
-position of the object of interest. By default, only the highest-scoring object
-embedding is included.
-
-Steps to generate a embedding dataset:
-1. Use object_detection/export_inference_graph.py to get a Faster R-CNN
-  `saved_model` for inference. The input node must accept a tf.Example proto.
-2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
-  protos containing images for inference.
-
-Example Usage:
--------------
-python tensorflow_models/object_detection/export_inference_graph.py \
-    --alsologtostderr \
-    --input_type tf_example \
-    --pipeline_config_path path/to/faster_rcnn_model.config \
-    --trained_checkpoint_prefix path/to/model.ckpt \
-    --output_directory path/to/exported_model_directory \
-    --additional_output_tensor_names detection_features
-
-python generate_embedding_data.py \
-    --alsologtostderr \
-    --embedding_input_tfrecord path/to/input_tfrecords* \
-    --embedding_output_tfrecord path/to/output_tfrecords \
-    --embedding_model_dir path/to/exported_model_directory/saved_model
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import datetime
-import os
-import threading
-
-import numpy as np
-import six
-import tensorflow.compat.v1 as tf
-
-try:
-  import apache_beam as beam  # pylint:disable=g-import-not-at-top
-except ModuleNotFoundError:
-  pass
-
-def _load_inference_model(args):
-# Because initialization of the tf.Session is expensive we share
-# one instance across all threads in the worker. This is possible since
-# tf.Session.run() is thread safe.
-    print(args)
-    args = vars(args)
-    session_lock = threading.Lock()
-    session = None
-    with session_lock:
-      if session is None:
-        graph = tf.Graph()
-        session = tf.Session(graph=graph)
-        with graph.as_default():
-          meta_graph = tf.saved_model.loader.load(
-              session, [tf.saved_model.tag_constants.SERVING],
-              args['embedding_model_dir'])
-        signature = meta_graph.signature_def['serving_default']
-        print(signature.inputs)
-        print(type(signature.inputs))
-        input_tensor_name = signature.inputs['input_tensor'].name
-        print(input_tensor_name)
-        _input = graph.get_tensor_by_name(input_tensor_name)
-        print(_input.shape)
-
-        detection_features_name = signature.outputs['detection_features'].name
-        detection_boxes_name = signature.outputs['detection_boxes'].name
-        num_detections_name = signature.outputs['num_detections'].name
-        
-        self._embedding_node = graph.get_tensor_by_name(detection_features_name)
-        self._box_node = graph.get_tensor_by_name(detection_boxes_name)
-        self._scores_node = graph.get_tensor_by_name(
-            signature.outputs['detection_scores'].name)
-        self._num_detections = graph.get_tensor_by_name(num_detections_name)
-        tf.logging.info(signature.outputs['detection_features'].name)
-        tf.logging.info(signature.outputs['detection_boxes'].name)
-        tf.logging.info(signature.outputs['num_detections'].name)
-        print("Hello")
-
-def parse_args(argv):
-  """Command-line argument parser.
-
-  Args:
-    argv: command line arguments
-  Returns:
-    beam_args: Arguments for the beam pipeline.
-    pipeline_args: Arguments for the pipeline options, such as runner type.
-  """
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-      '--embedding_input_tfrecord',
-      dest='embedding_input_tfrecord',
-      required=True,
-      help='TFRecord containing images in tf.Example format for object '
-      'detection.')
-  parser.add_argument(
-      '--embedding_output_tfrecord',
-      dest='embedding_output_tfrecord',
-      required=True,
-      help='TFRecord containing embeddings in tf.Example format.')
-  parser.add_argument(
-      '--embedding_model_dir',
-      dest='embedding_model_dir',
-      required=True,
-      help='Path to directory containing an object detection SavedModel with'
-      'detection_box_classifier_features in the output.')
-  parser.add_argument(
-      '--top_k_embedding_count',
-      dest='top_k_embedding_count',
-      default=1,
-      help='The number of top k embeddings to add to the memory bank.')
-  parser.add_argument(
-      '--bottom_k_embedding_count',
-      dest='bottom_k_embedding_count',
-      default=0,
-      help='The number of bottom k embeddings to add to the memory bank.')
-  parser.add_argument(
-      '--num_shards',
-      dest='num_shards',
-      default=0,
-      help='Number of output shards.')
-  beam_args, pipeline_args = parser.parse_known_args(argv)
-  return beam_args, pipeline_args
-
-
-def main(argv=None, save_main_session=True):
-  """Runs the Beam pipeline that performs inference.
-
-  Args:
-    argv: Command line arguments.
-    save_main_session: Whether to save the main session.
-  """
-  args, pipeline_args = parse_args(argv)
-  _load_inference_model(args)
-
-if __name__ == '__main__':
-  main()
-
--- a/research/object_detection/meta_architectures/__init__.py
+++ b/research/object_detection/meta_architectures/__init__.py
--- a/research/object_detection/meta_architectures/center_net_meta_arch.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch.py
--- a/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
--- a/research/object_detection/meta_architectures/context_rcnn_lib.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib.py
-# Lint as: python3
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Library functions for ContextRCNN."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-
-
-# The negative value used in padding the invalid weights.
-_NEGATIVE_PADDING_VALUE = -100000
-
-
-def filter_weight_value(weights, values, valid_mask):
-  """Filters weights and values based on valid_mask.
-
-  _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
-  avoid their contribution in softmax. 0 will be set for the invalid elements in
-  the values.
-
-  Args:
-    weights: A float Tensor of shape [batch_size, input_size, context_size].
-    values: A float Tensor of shape [batch_size, context_size,
-      projected_dimension].
-    valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
-      valid and False means invalid.
-
-  Returns:
-    weights: A float Tensor of shape [batch_size, input_size, context_size].
-    values: A float Tensor of shape [batch_size, context_size,
-      projected_dimension].
-
-  Raises:
-    ValueError: If shape of doesn't match.
-  """
-  w_batch_size, _, w_context_size = weights.shape
-  v_batch_size, v_context_size, _ = values.shape
-  m_batch_size, m_context_size = valid_mask.shape
-  if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
-    raise ValueError("Please make sure the first dimension of the input"
-                     " tensors are the same.")
-
-  if w_context_size != v_context_size:
-    raise ValueError("Please make sure the third dimension of weights matches"
-                     " the second dimension of values.")
-
-  if w_context_size != m_context_size:
-    raise ValueError("Please make sure the third dimension of the weights"
-                     " matches the second dimension of the valid_mask.")
-
-  valid_mask = valid_mask[..., tf.newaxis]
-
-  # Force the invalid weights to be very negative so it won't contribute to
-  # the softmax.
-  weights += tf.transpose(
-      tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
-      _NEGATIVE_PADDING_VALUE,
-      perm=[0, 2, 1])
-
-  # Force the invalid values to be 0.
-  values *= tf.cast(valid_mask, values.dtype)
-
-  return weights, values
-
-
-def compute_valid_mask(num_valid_elements, num_elements):
-  """Computes mask of valid entries within padded context feature.
-
-  Args:
-    num_valid_elements: A int32 Tensor of shape [batch_size].
-    num_elements: An int32 Tensor.
-
-  Returns:
-    A boolean Tensor of the shape [batch_size, num_elements]. True means
-      valid and False means invalid.
-  """
-  batch_size = num_valid_elements.shape[0]
-  element_idxs = tf.range(num_elements, dtype=tf.int32)
-  batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1])
-  num_valid_elements = num_valid_elements[..., tf.newaxis]
-  valid_mask = tf.less(batch_element_idxs, num_valid_elements)
-  return valid_mask
-
-
-def project_features(features, projection_dimension, is_training, normalize):
-  """Projects features to another feature space.
-
-  Args:
-    features: A float Tensor of shape [batch_size, features_size,
-      num_features].
-    projection_dimension: A int32 Tensor.
-    is_training: A boolean Tensor (affecting batch normalization).
-    normalize: A boolean Tensor. If true, the output features will be l2
-      normalized on the last dimension.
-
-  Returns:
-    A float Tensor of shape [batch, features_size, projection_dimension].
-  """
-  # TODO(guanhangwu) Figure out a better way of specifying the batch norm
-  # params.
-  batch_norm_params = {
-      "is_training": is_training,
-      "decay": 0.97,
-      "epsilon": 0.001,
-      "center": True,
-      "scale": True
-  }
-
-  batch_size, _, num_features = features.shape
-  features = tf.reshape(features, [-1, num_features])
-  projected_features = slim.fully_connected(
-      features,
-      num_outputs=projection_dimension,
-      activation_fn=tf.nn.relu6,
-      normalizer_fn=slim.batch_norm,
-      normalizer_params=batch_norm_params)
-
-  projected_features = tf.reshape(projected_features,
-                                  [batch_size, -1, projection_dimension])
-
-  if normalize:
-    projected_features = tf.math.l2_normalize(projected_features, axis=-1)
-
-  return projected_features
-
-
-def attention_block(input_features, context_features, bottleneck_dimension,
-                    output_dimension, attention_temperature, valid_mask,
-                    is_training):
-  """Generic attention block.
-
-  Args:
-    input_features: A float Tensor of shape [batch_size, input_size,
-      num_input_features].
-    context_features: A float Tensor of shape [batch_size, context_size,
-      num_context_features].
-    bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
-      for intermediate projections.
-    output_dimension: A int32 Tensor representing the last dimension of the
-      output feature.
-    attention_temperature: A float Tensor. It controls the temperature of the
-      softmax for weights calculation. The formula for calculation as follows:
-        weights = exp(weights / temperature) / sum(exp(weights / temperature))
-    valid_mask: A boolean Tensor of shape [batch_size, context_size].
-    is_training: A boolean Tensor (affecting batch normalization).
-
-  Returns:
-    A float Tensor of shape [batch_size, input_size, output_dimension].
-  """
-
-  with tf.variable_scope("AttentionBlock"):
-    queries = project_features(
-        input_features, bottleneck_dimension, is_training, normalize=True)
-    keys = project_features(
-        context_features, bottleneck_dimension, is_training, normalize=True)
-    values = project_features(
-        context_features, bottleneck_dimension, is_training, normalize=True)
-
-  weights = tf.matmul(queries, keys, transpose_b=True)
-
-  weights, values = filter_weight_value(weights, values, valid_mask)
-
-  weights = tf.nn.softmax(weights / attention_temperature)
-
-  features = tf.matmul(weights, values)
-  output_features = project_features(
-      features, output_dimension, is_training, normalize=False)
-  return output_features
-
-
-def compute_box_context_attention(box_features, context_features,
-                                  valid_context_size, bottleneck_dimension,
-                                  attention_temperature, is_training):
-  """Computes the attention feature from the context given a batch of box.
-
-  Args:
-    box_features: A float Tensor of shape [batch_size, max_num_proposals,
-      height, width, channels]. It is pooled features from first stage
-      proposals.
-    context_features: A float Tensor of shape [batch_size, context_size,
-      num_context_features].
-    valid_context_size: A int32 Tensor of shape [batch_size].
-    bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
-      for intermediate projections.
-    attention_temperature: A float Tensor. It controls the temperature of the
-      softmax for weights calculation. The formula for calculation as follows:
-        weights = exp(weights / temperature) / sum(exp(weights / temperature))
-    is_training: A boolean Tensor (affecting batch normalization).
-
-  Returns:
-    A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels].
-  """
-  _, context_size, _ = context_features.shape
-  valid_mask = compute_valid_mask(valid_context_size, context_size)
-
-  channels = box_features.shape[-1]
-  # Average pools over height and width dimension so that the shape of
-  # box_features becomes [batch_size, max_num_proposals, channels].
-  box_features = tf.reduce_mean(box_features, [2, 3])
-
-  output_features = attention_block(box_features, context_features,
-                                    bottleneck_dimension, channels.value,
-                                    attention_temperature, valid_mask,
-                                    is_training)
-
-  # Expands the dimension back to match with the original feature map.
-  output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]
-
-  return output_features
--- a/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py
-# Lint as: python3
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for context_rcnn_lib."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import unittest
-from absl.testing import parameterized
-import tensorflow.compat.v1 as tf
-
-from object_detection.meta_architectures import context_rcnn_lib
-from object_detection.utils import test_case
-from object_detection.utils import tf_version
-
-_NEGATIVE_PADDING_VALUE = -100000
-
-
-@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
-class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
-                         tf.test.TestCase):
-  """Tests for the functions in context_rcnn_lib."""
-
-  def test_compute_valid_mask(self):
-    num_elements = tf.constant(3, tf.int32)
-    num_valid_elementss = tf.constant((1, 2), tf.int32)
-    valid_mask = context_rcnn_lib.compute_valid_mask(num_valid_elementss,
-                                                     num_elements)
-    expected_valid_mask = tf.constant([[1, 0, 0], [1, 1, 0]], tf.float32)
-    self.assertAllEqual(valid_mask, expected_valid_mask)
-
-  def test_filter_weight_value(self):
-    weights = tf.ones((2, 3, 2), tf.float32) * 4
-    values = tf.ones((2, 2, 4), tf.float32)
-    valid_mask = tf.constant([[True, True], [True, False]], tf.bool)
-
-    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
-        weights, values, valid_mask)
-    expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]],
-                                    [[4, _NEGATIVE_PADDING_VALUE + 4],
-                                     [4, _NEGATIVE_PADDING_VALUE + 4],
-                                     [4, _NEGATIVE_PADDING_VALUE + 4]]])
-
-    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
-                                   [[1, 1, 1, 1], [0, 0, 0, 0]]])
-    self.assertAllEqual(filtered_weights, expected_weights)
-    self.assertAllEqual(filtered_values, expected_values)
-
-    # Changes the valid_mask so the results will be different.
-    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
-
-    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
-        weights, values, valid_mask)
-    expected_weights = tf.constant(
-        [[[4, 4], [4, 4], [4, 4]],
-         [[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
-          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
-          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]])
-
-    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
-                                   [[0, 0, 0, 0], [0, 0, 0, 0]]])
-    self.assertAllEqual(filtered_weights, expected_weights)
-    self.assertAllEqual(filtered_values, expected_values)
-
-  @parameterized.parameters((2, True, True), (2, False, True),
-                            (10, True, False), (10, False, False))
-  def test_project_features(self, projection_dimension, is_training, normalize):
-    features = tf.ones([2, 3, 4], tf.float32)
-    projected_features = context_rcnn_lib.project_features(
-        features,
-        projection_dimension,
-        is_training=is_training,
-        normalize=normalize)
-
-    # Makes sure the shape is correct.
-    self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
-
-  @parameterized.parameters(
-      (2, 10, 1),
-      (3, 10, 2),
-      (4, 20, 3),
-      (5, 20, 4),
-      (7, 20, 5),
-  )
-  def test_attention_block(self, bottleneck_dimension, output_dimension,
-                           attention_temperature):
-    input_features = tf.ones([2, 3, 4], tf.float32)
-    context_features = tf.ones([2, 2, 3], tf.float32)
-    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
-    is_training = False
-    output_features = context_rcnn_lib.attention_block(
-        input_features, context_features, bottleneck_dimension,
-        output_dimension, attention_temperature, valid_mask, is_training)
-
-    # Makes sure the shape is correct.
-    self.assertAllEqual(output_features.shape, [2, 3, output_dimension])
-
-  @parameterized.parameters(True, False)
-  def test_compute_box_context_attention(self, is_training):
-    box_features = tf.ones([2, 3, 4, 4, 4], tf.float32)
-    context_features = tf.ones([2, 5, 6], tf.float32)
-    valid_context_size = tf.constant((2, 3), tf.int32)
-    bottleneck_dimension = 10
-    attention_temperature = 1
-    attention_features = context_rcnn_lib.compute_box_context_attention(
-        box_features, context_features, valid_context_size,
-        bottleneck_dimension, attention_temperature, is_training)
-    # Makes sure the shape is correct.
-    self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4])
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Context R-CNN meta-architecture definition.
-
-This adds the ability to use attention into contextual features within the
-Faster R-CNN object detection framework to improve object detection performance.
-See https://arxiv.org/abs/1912.03538 for more information.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-
-from object_detection.core import standard_fields as fields
-from object_detection.meta_architectures import context_rcnn_lib
-from object_detection.meta_architectures import faster_rcnn_meta_arch
-
-
-class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
-  """Context R-CNN Meta-architecture definition."""
-
-  def __init__(self,
-               is_training,
-               num_classes,
-               image_resizer_fn,
-               feature_extractor,
-               number_of_stages,
-               first_stage_anchor_generator,
-               first_stage_target_assigner,
-               first_stage_atrous_rate,
-               first_stage_box_predictor_arg_scope_fn,
-               first_stage_box_predictor_kernel_size,
-               first_stage_box_predictor_depth,
-               first_stage_minibatch_size,
-               first_stage_sampler,
-               first_stage_non_max_suppression_fn,
-               first_stage_max_proposals,
-               first_stage_localization_loss_weight,
-               first_stage_objectness_loss_weight,
-               crop_and_resize_fn,
-               initial_crop_size,
-               maxpool_kernel_size,
-               maxpool_stride,
-               second_stage_target_assigner,
-               second_stage_mask_rcnn_box_predictor,
-               second_stage_batch_size,
-               second_stage_sampler,
-               second_stage_non_max_suppression_fn,
-               second_stage_score_conversion_fn,
-               second_stage_localization_loss_weight,
-               second_stage_classification_loss_weight,
-               second_stage_classification_loss,
-               second_stage_mask_prediction_loss_weight=1.0,
-               hard_example_miner=None,
-               parallel_iterations=16,
-               add_summaries=True,
-               clip_anchors_to_image=False,
-               use_static_shapes=False,
-               resize_masks=True,
-               freeze_batchnorm=False,
-               return_raw_detections_during_predict=False,
-               output_final_box_features=False,
-               attention_bottleneck_dimension=None,
-               attention_temperature=None):
-    """ContextRCNNMetaArch Constructor.
-
-    Args:
-      is_training: A boolean indicating whether the training version of the
-        computation graph should be constructed.
-      num_classes: Number of classes.  Note that num_classes *does not*
-        include the background category, so if groundtruth labels take values
-        in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
-        assigned classification targets can range from {0,... K}).
-      image_resizer_fn: A callable for image resizing.  This callable
-        takes a rank-3 image tensor of shape [height, width, channels]
-        (corresponding to a single image), an optional rank-3 instance mask
-        tensor of shape [num_masks, height, width] and returns a resized rank-3
-        image tensor, a resized mask tensor if one was provided in the input. In
-        addition this callable must also return a 1-D tensor of the form
-        [height, width, channels] containing the size of the true image, as the
-        image resizer can perform zero padding. See protos/image_resizer.proto.
-      feature_extractor: A FasterRCNNFeatureExtractor object.
-      number_of_stages:  An integer values taking values in {1, 2, 3}. If
-        1, the function will construct only the Region Proposal Network (RPN)
-        part of the model. If 2, the function will perform box refinement and
-        other auxiliary predictions all in the second stage. If 3, it will
-        extract features from refined boxes and perform the auxiliary
-        predictions on the non-maximum suppressed refined boxes.
-        If is_training is true and the value of number_of_stages is 3, it is
-        reduced to 2 since all the model heads are trained in parallel in second
-        stage during training.
-      first_stage_anchor_generator: An anchor_generator.AnchorGenerator object
-        (note that currently we only support
-        grid_anchor_generator.GridAnchorGenerator objects)
-      first_stage_target_assigner: Target assigner to use for first stage of
-        Faster R-CNN (RPN).
-      first_stage_atrous_rate: A single integer indicating the atrous rate for
-        the single convolution op which is applied to the `rpn_features_to_crop`
-        tensor to obtain a tensor to be used for box prediction. Some feature
-        extractors optionally allow for producing feature maps computed at
-        denser resolutions.  The atrous rate is used to compensate for the
-        denser feature maps by using an effectively larger receptive field.
-        (This should typically be set to 1).
-      first_stage_box_predictor_arg_scope_fn: Either a
-        Keras layer hyperparams object or a function to construct tf-slim
-        arg_scope for conv2d, separable_conv2d and fully_connected ops. Used
-        for the RPN box predictor. If it is a keras hyperparams object the
-        RPN box predictor will be a Keras model. If it is a function to
-        construct an arg scope it will be a tf-slim box predictor.
-      first_stage_box_predictor_kernel_size: Kernel size to use for the
-        convolution op just prior to RPN box predictions.
-      first_stage_box_predictor_depth: Output depth for the convolution op
-        just prior to RPN box predictions.
-      first_stage_minibatch_size: The "batch size" to use for computing the
-        objectness and location loss of the region proposal network. This
-        "batch size" refers to the number of anchors selected as contributing
-        to the loss function for any given image within the image batch and is
-        only called "batch_size" due to terminology from the Faster R-CNN paper.
-      first_stage_sampler: Sampler to use for first stage loss (RPN loss).
-      first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
-        callable that takes `boxes`, `scores` and optional `clip_window`(with
-        all other inputs already set) and returns a dictionary containing
-        tensors with keys: `detection_boxes`, `detection_scores`,
-        `detection_classes`, `num_detections`. This is used to perform non max
-        suppression  on the boxes predicted by the Region Proposal Network
-        (RPN).
-        See `post_processing.batch_multiclass_non_max_suppression` for the type
-        and shape of these tensors.
-      first_stage_max_proposals: Maximum number of boxes to retain after
-        performing Non-Max Suppression (NMS) on the boxes predicted by the
-        Region Proposal Network (RPN).
-      first_stage_localization_loss_weight: A float
-      first_stage_objectness_loss_weight: A float
-      crop_and_resize_fn: A differentiable resampler to use for cropping RPN
-        proposal features.
-      initial_crop_size: A single integer indicating the output size
-        (width and height are set to be the same) of the initial bilinear
-        interpolation based cropping during ROI pooling.
-      maxpool_kernel_size: A single integer indicating the kernel size of the
-        max pool op on the cropped feature map during ROI pooling.
-      maxpool_stride: A single integer indicating the stride of the max pool
-        op on the cropped feature map during ROI pooling.
-      second_stage_target_assigner: Target assigner to use for second stage of
-        Faster R-CNN. If the model is configured with multiple prediction heads,
-        this target assigner is used to generate targets for all heads (with the
-        correct `unmatched_class_label`).
-      second_stage_mask_rcnn_box_predictor: Mask R-CNN box predictor to use for
-        the second stage.
-      second_stage_batch_size: The batch size used for computing the
-        classification and refined location loss of the box classifier.  This
-        "batch size" refers to the number of proposals selected as contributing
-        to the loss function for any given image within the image batch and is
-        only called "batch_size" due to terminology from the Faster R-CNN paper.
-      second_stage_sampler:  Sampler to use for second stage loss (box
-        classifier loss).
-      second_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
-        callable that takes `boxes`, `scores`, optional `clip_window` and
-        optional (kwarg) `mask` inputs (with all other inputs already set)
-        and returns a dictionary containing tensors with keys:
-        `detection_boxes`, `detection_scores`, `detection_classes`,
-        `num_detections`, and (optionally) `detection_masks`. See
-        `post_processing.batch_multiclass_non_max_suppression` for the type and
-        shape of these tensors.
-      second_stage_score_conversion_fn: Callable elementwise nonlinearity
-        (that takes tensors as inputs and returns tensors).  This is usually
-        used to convert logits to probabilities.
-      second_stage_localization_loss_weight: A float indicating the scale factor
-        for second stage localization loss.
-      second_stage_classification_loss_weight: A float indicating the scale
-        factor for second stage classification loss.
-      second_stage_classification_loss: Classification loss used by the second
-        stage classifier. Either losses.WeightedSigmoidClassificationLoss or
-        losses.WeightedSoftmaxClassificationLoss.
-      second_stage_mask_prediction_loss_weight: A float indicating the scale
-        factor for second stage mask prediction loss. This is applicable only if
-        second stage box predictor is configured to predict masks.
-      hard_example_miner:  A losses.HardExampleMiner object (can be None).
-      parallel_iterations: (Optional) The number of iterations allowed to run
-        in parallel for calls to tf.map_fn.
-      add_summaries: boolean (default: True) controlling whether summary ops
-        should be added to tensorflow graph.
-      clip_anchors_to_image: Normally, anchors generated for a given image size
-        are pruned during training if they lie outside the image window. This
-        option clips the anchors to be within the image instead of pruning.
-      use_static_shapes: If True, uses implementation of ops with static shape
-        guarantees.
-      resize_masks: Indicates whether the masks presend in the groundtruth
-        should be resized in the model with `image_resizer_fn`
-      freeze_batchnorm: Whether to freeze batch norm parameters in the first
-        stage box predictor during training or not. When training with a small
-        batch size (e.g. 1), it is desirable to freeze batch norm update and
-        use pretrained batch norm params.
-      return_raw_detections_during_predict: Whether to return raw detection
-        boxes in the predict() method. These are decoded boxes that have not
-        been through postprocessing (i.e. NMS). Default False.
-      output_final_box_features: Whether to output final box features. If true,
-        it crops the feauture map based on the final box prediction and returns
-        in the dict as detection_features.
-      attention_bottleneck_dimension: A single integer. The bottleneck feature
-        dimension of the attention block.
-      attention_temperature: A single float. The attention temperature.
-
-    Raises:
-      ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
-        training time.
-      ValueError: If first_stage_anchor_generator is not of type
-        grid_anchor_generator.GridAnchorGenerator.
-    """
-    super(ContextRCNNMetaArch, self).__init__(
-        is_training,
-        num_classes,
-        image_resizer_fn,
-        feature_extractor,
-        number_of_stages,
-        first_stage_anchor_generator,
-        first_stage_target_assigner,
-        first_stage_atrous_rate,
-        first_stage_box_predictor_arg_scope_fn,
-        first_stage_box_predictor_kernel_size,
-        first_stage_box_predictor_depth,
-        first_stage_minibatch_size,
-        first_stage_sampler,
-        first_stage_non_max_suppression_fn,
-        first_stage_max_proposals,
-        first_stage_localization_loss_weight,
-        first_stage_objectness_loss_weight,
-        crop_and_resize_fn,
-        initial_crop_size,
-        maxpool_kernel_size,
-        maxpool_stride,
-        second_stage_target_assigner,
-        second_stage_mask_rcnn_box_predictor,
-        second_stage_batch_size,
-        second_stage_sampler,
-        second_stage_non_max_suppression_fn,
-        second_stage_score_conversion_fn,
-        second_stage_localization_loss_weight,
-        second_stage_classification_loss_weight,
-        second_stage_classification_loss,
-        second_stage_mask_prediction_loss_weight=(
-            second_stage_mask_prediction_loss_weight),
-        hard_example_miner=hard_example_miner,
-        parallel_iterations=parallel_iterations,
-        add_summaries=add_summaries,
-        clip_anchors_to_image=clip_anchors_to_image,
-        use_static_shapes=use_static_shapes,
-        resize_masks=resize_masks,
-        freeze_batchnorm=freeze_batchnorm,
-        return_raw_detections_during_predict=(
-            return_raw_detections_during_predict),
-        output_final_box_features=output_final_box_features)
-
-    self._context_feature_extract_fn = functools.partial(
-        context_rcnn_lib.compute_box_context_attention,
-        bottleneck_dimension=attention_bottleneck_dimension,
-        attention_temperature=attention_temperature,
-        is_training=is_training)
-
-  @staticmethod
-  def get_side_inputs(features):
-    """Overrides the get_side_inputs function in the base class.
-
-    This function returns context_features and valid_context_size, which will be
-    used in the _compute_second_stage_input_feature_maps function.
-
-    Args:
-      features: A dictionary of tensors.
-
-    Returns:
-      A dictionary of tensors contains context_features and valid_context_size.
-
-    Raises:
-      ValueError: If context_features or valid_context_size is not in the
-        features.
-    """
-    if (fields.InputDataFields.context_features not in features or
-        fields.InputDataFields.valid_context_size not in features):
-      raise ValueError(
-          "Please make sure context_features and valid_context_size are in the "
-          "features")
-
-    return {
-        fields.InputDataFields.context_features:
-            features[fields.InputDataFields.context_features],
-        fields.InputDataFields.valid_context_size:
-            features[fields.InputDataFields.valid_context_size]
-    }
-
-  def _compute_second_stage_input_feature_maps(self, features_to_crop,
-                                               proposal_boxes_normalized,
-                                               context_features,
-                                               valid_context_size):
-    """Crops to a set of proposals from the feature map for a batch of images.
-
-    This function overrides the one in the FasterRCNNMetaArch. Aside from
-    cropping and resizing the feature maps, which is done in the parent class,
-    it adds context attention features to the box features.
-
-    Args:
-      features_to_crop: A float32 Tensor with shape [batch_size, height, width,
-        depth]
-      proposal_boxes_normalized: A float32 Tensor with shape [batch_size,
-        num_proposals, box_code_size] containing proposal boxes in normalized
-        coordinates.
-      context_features: A float Tensor of shape [batch_size, context_size,
-        num_context_features].
-      valid_context_size: A int32 Tensor of shape [batch_size].
-
-    Returns:
-      A float32 Tensor with shape [K, new_height, new_width, depth].
-    """
-    box_features = self._crop_and_resize_fn(
-        features_to_crop, proposal_boxes_normalized,
-        [self._initial_crop_size, self._initial_crop_size])
-
-    attention_features = self._context_feature_extract_fn(
-        box_features=box_features,
-        context_features=context_features,
-        valid_context_size=valid_context_size)
-
-    # Adds box features with attention features.
-    box_features += attention_features
-
-    flattened_feature_maps = self._flatten_first_two_dimensions(box_features)
-
-    return self._maxpool_layer(flattened_feature_maps)
--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
--- a/research/object_detection/meta_architectures/detr_meta_arch.py
+++ b/research/object_detection/meta_architectures/detr_meta_arch.py
-import abc
-import collections
-import functools
-import numpy as np
-import tensorflow.compat.v1 as tf
-import tensorflow.compat.v2 as tf2
-
-from object_detection.core import box_list
-from object_detection.core import box_list_ops
-from object_detection.core import keypoint_ops
-from object_detection.core import model
-from object_detection.core import standard_fields as fields
-from object_detection.core import target_assigner
-from object_detection.utils import shape_utils
-from object_detection.models import faster_rcnn_resnet_keras_feature_extractor
-
-from object_detection.meta_architectures import detr_transformer
-from object_detection.matchers import hungarian_matcher
-
-class DETRMetaArch(model.DetectionModel):
-    def __init__(self):
-        self.num_queries = 100
-        self.hidden_dimension = 100
-        self.feature_extractor = faster_rcnn_resnet_keras_feature_extractor.FasterRCNNResnet50KerasFeatureExtractor(is_training=False)
-        self.first_stage = self.feature_extractor.get_proposal_feature_extractor_model()
-        self.target_assigner = target_assigner.create_target_assigner('DETR', 'detection')
-        self.transformer = detr_transformer.Transformer()
-        self.ffn = self.feature_extractor.get_box_classifier_feature_extractor_model()
-        self.bboxes = tf.keras.layers.Dense(4)
-        self.cls = tf.keras.layers.Dense(2)
-        self.queries = tf.keras.Variable(tf.random([self.num_queries, self.hidden_dimension]))
-
-    def predict(self, preprocessed_inputs, true_image_shapes, **side_inputs):
-        x = self.first_stage(preprocessed_inputs)
-        x = tf.reshape(x, [x.shape[0], x.shape[1] * x.shape[2], x.shape[3]])
-        x = self.transformer([x, tf.repeat(tf.expand_dims(self.queries, 0), x.shape[0], axis=0)])
-        x = self.ffn(x)
-        return self.bboxes(x), self.cls(x)
-
-    def loss(self, prediction_dict, true_image_shapes, scope=None):
-        return 1
-
-    def preprocess(self, inputs):
-        """Feature-extractor specific preprocessing.
-
-        See base class.
-
-        For Faster R-CNN, we perform image resizing in the base class --- each
-        class subclassing FasterRCNNMetaArch is responsible for any additional
-        preprocessing (e.g., scaling pixel values to be in [-1, 1]).
-
-        Args:
-        inputs: a [batch, height_in, width_in, channels] float tensor representing
-            a batch of images with values between 0 and 255.0.
-
-        Returns:
-        preprocessed_inputs: a [batch, height_out, width_out, channels] float
-            tensor representing a batch of images.
-        true_image_shapes: int32 tensor of shape [batch, 3] where each row is
-            of the form [height, width, channels] indicating the shapes
-            of true images in the resized images, as resized images can be padded
-            with zeros.
-        Raises:
-        ValueError: if inputs tensor does not have type tf.float32
-        """
-
-        with tf.name_scope('Preprocessor'):
-            (resized_inputs,
-            true_image_shapes) = shape_utils.resize_images_and_return_shapes(
-                inputs, self._image_resizer_fn)
-
-        return (self.feature_extractor.preprocess(resized_inputs),
-                true_image_shapes)
-
-    def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
-        raise NotImplementedError("Model restoration implemented yet.")
-
-    def restore_map(self,
-                    fine_tune_checkpoint_type='detection',
-                    load_all_detection_checkpoint_vars=False):
-        raise NotImplementedError("Model restoration implemented yet.")
-
-    def loss(self, prediction_dict, true_image_shapes, scope=None):
-    """Compute scalar loss tensors given prediction tensors.
-
-    If number_of_stages=1, only RPN related losses are computed (i.e.,
-    `rpn_localization_loss` and `rpn_objectness_loss`).  Otherwise all
-    losses are computed.
-
-    Args:
-      prediction_dict: a dictionary holding prediction tensors (see the
-        documentation for the predict method.  If number_of_stages=1, we
-        expect prediction_dict to contain `rpn_box_encodings`,
-        `rpn_objectness_predictions_with_background`, `rpn_features_to_crop`,
-        `image_shape`, and `anchors` fields.  Otherwise we expect
-        prediction_dict to additionally contain `refined_box_encodings`,
-        `class_predictions_with_background`, `num_proposals`, and
-        `proposal_boxes` fields.
-      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
-        of the form [height, width, channels] indicating the shapes
-        of true images in the resized images, as resized images can be padded
-        with zeros.
-      scope: Optional scope name.
-
-    Returns:
-      a dictionary mapping loss keys (`first_stage_localization_loss`,
-        `first_stage_objectness_loss`, 'second_stage_localization_loss',
-        'second_stage_classification_loss') to scalar tensors representing
-        corresponding loss values.
-    """
-    with tf.name_scope(scope, 'Loss', prediction_dict.values()):
-      (groundtruth_boxlists, groundtruth_classes_with_background_list,
-       groundtruth_masks_list, groundtruth_weights_list
-      ) = self._format_groundtruth_data(
-          self._image_batch_shape_2d(prediction_dict['image_shape']))
-      loss_dict = self._loss_box_classifier(
-            prediction_dict['refined_box_encodings'],
-            prediction_dict['class_predictions_with_background'],
-            prediction_dict['proposal_boxes'],
-            prediction_dict['num_proposals'], groundtruth_boxlists,
-            groundtruth_classes_with_background_list,
-            groundtruth_weights_list, prediction_dict['image_shape'],
-            prediction_dict.get('mask_predictions'), groundtruth_masks_list,
-            prediction_dict.get(
-                fields.DetectionResultFields.detection_boxes),
-            prediction_dict.get(
-                fields.DetectionResultFields.num_detections))
-    return loss_dict
-
-    def _loss_box_classifier(self,
-                           refined_box_encodings,
-                           class_predictions_with_background,
-                           proposal_boxes,
-                           num_proposals,
-                           groundtruth_boxlists,
-                           groundtruth_classes_with_background_list,
-                           groundtruth_weights_list,
-                           image_shape,
-                           prediction_masks=None,
-                           groundtruth_masks_list=None,
-                           detection_boxes=None,
-                           num_detections=None):
-    """Computes scalar box classifier loss tensors.
-
-    Uses self._detector_target_assigner to obtain regression and classification
-    targets for the second stage box classifier, optionally performs
-    hard mining, and returns losses.  All losses are computed independently
-    for each image and then averaged across the batch.
-    Please note that for boxes and masks with multiple labels, the box
-    regression and mask prediction losses are only computed for one label.
-
-    This function assumes that the proposal boxes in the "padded" regions are
-    actually zero (and thus should not be matched to).
-
-
-    Args:
-      refined_box_encodings: a 3-D tensor with shape
-        [total_num_proposals, num_classes, box_coder.code_size] representing
-        predicted (final) refined box encodings. If using a shared box across
-        classes this will instead have shape
-        [total_num_proposals, 1, box_coder.code_size].
-      class_predictions_with_background: a 2-D tensor with shape
-        [total_num_proposals, num_classes + 1] containing class
-        predictions (logits) for each of the anchors.  Note that this tensor
-        *includes* background class predictions (at class index 0).
-      proposal_boxes: [batch_size, self.max_num_proposals, 4] representing
-        decoded proposal bounding boxes.
-      num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
-        representing the number of proposals predicted for each image in
-        the batch.
-      groundtruth_boxlists: a list of BoxLists containing coordinates of the
-        groundtruth boxes.
-      groundtruth_classes_with_background_list: a list of 2-D one-hot
-        (or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the
-        class targets with the 0th index assumed to map to the background class.
-      groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
-        [num_boxes] containing weights for groundtruth boxes.
-      image_shape: a 1-D tensor of shape [4] representing the image shape.
-      prediction_masks: an optional 4-D tensor with shape [total_num_proposals,
-        num_classes, mask_height, mask_width] containing the instance masks for
-        each box.
-      groundtruth_masks_list: an optional list of 3-D tensors of shape
-        [num_boxes, image_height, image_width] containing the instance masks for
-        each of the boxes.
-      detection_boxes: 3-D float tensor of shape [batch,
-        max_total_detections, 4] containing post-processed detection boxes in
-        normalized co-ordinates.
-      num_detections: 1-D int32 tensor of shape [batch] containing number of
-        valid detections in `detection_boxes`.
-
-    Returns:
-      a dictionary mapping loss keys ('second_stage_localization_loss',
-        'second_stage_classification_loss') to scalar tensors representing
-        corresponding loss values.
-
-    Raises:
-      ValueError: if `predict_instance_masks` in
-        second_stage_mask_rcnn_box_predictor is True and
-        `groundtruth_masks_list` is not provided.
-    """
-    with tf.name_scope('BoxClassifierLoss'):
-      paddings_indicator = self._padded_batched_proposals_indicator(
-          num_proposals, proposal_boxes.shape[1])
-      proposal_boxlists = [
-          box_list.BoxList(proposal_boxes_single_image)
-          for proposal_boxes_single_image in tf.unstack(proposal_boxes)]
-      batch_size = len(proposal_boxlists)
-
-      num_proposals_or_one = tf.cast(tf.expand_dims(
-          tf.maximum(num_proposals, tf.ones_like(num_proposals)), 1),
-                                     dtype=tf.float32)
-      normalizer = tf.tile(num_proposals_or_one,
-                           [1, self.max_num_proposals]) * batch_size
-
-      (batch_cls_targets_with_background, batch_cls_weights, batch_reg_targets,
-       batch_reg_weights, _) = target_assigner.batch_assign_targets(
-           target_assigner=self._detector_target_assigner,
-           anchors_batch=proposal_boxlists,
-           gt_box_batch=groundtruth_boxlists,
-           gt_class_targets_batch=groundtruth_classes_with_background_list,
-           unmatched_class_label=tf.constant(
-               [1] + self._num_classes * [0], dtype=tf.float32),
-           gt_weights_batch=groundtruth_weights_list)
-
-      class_predictions_with_background = tf.reshape(
-          class_predictions_with_background,
-          [batch_size, self.max_num_proposals, -1])
-
-      flat_cls_targets_with_background = tf.reshape(
-          batch_cls_targets_with_background,
-          [batch_size * self.max_num_proposals, -1])
-      one_hot_flat_cls_targets_with_background = tf.argmax(
-          flat_cls_targets_with_background, axis=1)
-      one_hot_flat_cls_targets_with_background = tf.one_hot(
-          one_hot_flat_cls_targets_with_background,
-          flat_cls_targets_with_background.get_shape()[1])
-
-      # If using a shared box across classes use directly
-      if refined_box_encodings.shape[1] == 1:
-        reshaped_refined_box_encodings = tf.reshape(
-            refined_box_encodings,
-            [batch_size, self.max_num_proposals, self._box_coder.code_size])
-      # For anchors with multiple labels, picks refined_location_encodings
-      # for just one class to avoid over-counting for regression loss and
-      # (optionally) mask loss.
-      else:
-        reshaped_refined_box_encodings = (
-            self._get_refined_encodings_for_postitive_class(
-                refined_box_encodings,
-                one_hot_flat_cls_targets_with_background, batch_size))
-
-      losses_mask = None
-      if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
-        losses_mask = tf.stack(self.groundtruth_lists(
-            fields.InputDataFields.is_annotated))
-      second_stage_loc_losses = self._second_stage_localization_loss(
-          reshaped_refined_box_encodings,
-          batch_reg_targets,
-          weights=batch_reg_weights,
-          losses_mask=losses_mask) / normalizer
-      second_stage_cls_losses = ops.reduce_sum_trailing_dimensions(
-          self._second_stage_classification_loss(
-              class_predictions_with_background,
-              batch_cls_targets_with_background,
-              weights=batch_cls_weights,
-              losses_mask=losses_mask),
-          ndims=2) / normalizer
-
-      second_stage_loc_loss = tf.reduce_sum(
-          second_stage_loc_losses * tf.cast(paddings_indicator,
-                                            dtype=tf.float32))
-      second_stage_cls_loss = tf.reduce_sum(
-          second_stage_cls_losses * tf.cast(paddings_indicator,
-                                            dtype=tf.float32))
-
-      if self._hard_example_miner:
-        (second_stage_loc_loss, second_stage_cls_loss
-        ) = self._unpad_proposals_and_apply_hard_mining(
-            proposal_boxlists, second_stage_loc_losses,
-            second_stage_cls_losses, num_proposals)
-      localization_loss = tf.multiply(self._second_stage_loc_loss_weight,
-                                      second_stage_loc_loss,
-                                      name='localization_loss')
-
-      classification_loss = tf.multiply(self._second_stage_cls_loss_weight,
-                                        second_stage_cls_loss,
-                                        name='classification_loss')
-
-      loss_dict = {'Loss/BoxClassifierLoss/localization_loss':
-                       localization_loss,
-                   'Loss/BoxClassifierLoss/classification_loss':
-                       classification_loss}
-    return loss_dict
\ No newline at end of file
--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
--- a/research/object_detection/meta_architectures/rfcn_meta_arch.py
+++ b/research/object_detection/meta_architectures/rfcn_meta_arch.py
--- a/research/object_detection/meta_architectures/rfcn_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/rfcn_meta_arch_test.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for object_detection.meta_architectures.rfcn_meta_arch."""
-
-import tensorflow.compat.v1 as tf
-
-from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib
-from object_detection.meta_architectures import rfcn_meta_arch
-
-
-class RFCNMetaArchTest(
-    faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase):
-
-  def _get_second_stage_box_predictor_text_proto(
-      self, share_box_across_classes=False):
-    del share_box_across_classes
-    box_predictor_text_proto = """
-      rfcn_box_predictor {
-        conv_hyperparams {
-          op: CONV
-          activation: NONE
-          regularizer {
-            l2_regularizer {
-              weight: 0.0005
-            }
-          }
-          initializer {
-            variance_scaling_initializer {
-              factor: 1.0
-              uniform: true
-              mode: FAN_AVG
-            }
-          }
-        }
-      }
-    """
-    return box_predictor_text_proto
-
-  def _get_model(self, box_predictor, **common_kwargs):
-    return rfcn_meta_arch.RFCNMetaArch(
-        second_stage_rfcn_box_predictor=box_predictor, **common_kwargs)
-
-  def _get_box_classifier_features_shape(self,
-                                         image_size,
-                                         batch_size,
-                                         max_num_proposals,
-                                         initial_crop_size,
-                                         maxpool_stride,
-                                         num_features):
-    return (batch_size, image_size, image_size, num_features)
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
--- a/research/object_detection/meta_architectures/ssd_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch_test.py
--- a/research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py