Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling

09d9656f · Srihari Humbarwadi · GitHub · ac671306 · 49a5706c · 09d9656f
Unverified Commit 09d9656f authored Jan 13, 2022 by Srihari Humbarwadi Committed by GitHub Jan 13, 2022
20 changed files
--- a/official/nlp/projects/triviaqa/inputs.py
+++ b/official/nlp/projects/triviaqa/inputs.py
@@ -20,7 +20,7 @@ import tensorflow as tf
 import tensorflow_datasets as tfds

 from official.modeling import tf_utils
-from official.nlp.projects.triviaqa import dataset  # pylint: disable=unused-import
+from official.projects.triviaqa import dataset  # pylint: disable=unused-import


 def _flatten_dims(tensor: tf.Tensor,

--- a/official/nlp/projects/triviaqa/modeling.py
+++ b/official/nlp/projects/triviaqa/modeling.py
--- a/official/nlp/projects/triviaqa/predict.py
+++ b/official/nlp/projects/triviaqa/predict.py
@@ -27,9 +27,9 @@ import tensorflow_datasets as tfds

 import sentencepiece as spm
 from official.nlp.configs import encoders  # pylint: disable=unused-import
-from official.nlp.projects.triviaqa import evaluation
-from official.nlp.projects.triviaqa import inputs
-from official.nlp.projects.triviaqa import prediction
+from official.projects.triviaqa import evaluation
+from official.projects.triviaqa import inputs
+from official.projects.triviaqa import prediction

 flags.DEFINE_string('data_dir', None, 'TensorFlow Datasets directory.')


--- a/official/nlp/projects/triviaqa/prediction.py
+++ b/official/nlp/projects/triviaqa/prediction.py
--- a/official/nlp/projects/triviaqa/preprocess.py
+++ b/official/nlp/projects/triviaqa/preprocess.py
@@ -30,8 +30,8 @@ import numpy as np
 import tensorflow.io.gfile as gfile

 import sentencepiece as spm
-from official.nlp.projects.triviaqa import evaluation
-from official.nlp.projects.triviaqa import sentencepiece_pb2
+from official.projects.triviaqa import evaluation
+from official.projects.triviaqa import sentencepiece_pb2


 @dataclasses.dataclass

--- a/official/nlp/projects/triviaqa/sentencepiece_pb2.py
+++ b/official/nlp/projects/triviaqa/sentencepiece_pb2.py
--- a/official/nlp/projects/triviaqa/train.py
+++ b/official/nlp/projects/triviaqa/train.py
@@ -30,10 +30,10 @@ import tensorflow_datasets as tfds
 import sentencepiece as spm
 from official.nlp import optimization as nlp_optimization
 from official.nlp.configs import encoders
-from official.nlp.projects.triviaqa import evaluation
-from official.nlp.projects.triviaqa import inputs
-from official.nlp.projects.triviaqa import modeling
-from official.nlp.projects.triviaqa import prediction
+from official.projects.triviaqa import evaluation
+from official.projects.triviaqa import inputs
+from official.projects.triviaqa import modeling
+from official.projects.triviaqa import prediction

 flags.DEFINE_string('data_dir', None, 'Data directory for TensorFlow Datasets.')


--- a/official/projects/volumetric_models/modeling/heads/segmentation_heads_3d.py
+++ b/official/projects/volumetric_models/modeling/heads/segmentation_heads_3d.py
@@ -155,6 +155,7 @@ class SegmentationHead3D(tf.keras.layers.Layer):
        - key: A `str` of the level of the multilevel features.
        - values: A `tf.Tensor` of the feature map tensors, whose shape is
            [batch, height_l, width_l, channels].
+        The first is backbone endpoints, and the second is decoder endpoints.
    Returns:
      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
        scores predicted from input features.

--- a/official/projects/volumetric_models/modeling/segmentation_model_test.py
+++ b/official/projects/volumetric_models/modeling/segmentation_model_test.py
@@ -47,10 +47,10 @@ class SegmentationNetworkUNet3DTest(parameterized.TestCase, tf.test.TestCase):
    model = segmentation_model.SegmentationModel(
        backbone=backbone, decoder=decoder, head=head)

-    logits = model(inputs)
+    outputs = model(inputs)
    self.assertAllEqual(
        [2, input_size[0], input_size[0], input_size[1], num_classes],
-        logits.numpy().shape)
+        outputs['logits'].numpy().shape)

  def test_serialize_deserialize(self):
    """Validate the network can be serialized and deserialized."""

--- a/official/projects/volumetric_models/serving/semantic_segmentation_3d.py
+++ b/official/projects/volumetric_models/serving/semantic_segmentation_3d.py
@@ -56,4 +56,4 @@ class SegmentationModule(export_base.ExportModule):
    outputs = self.inference_step(images)
    output_key = 'logits' if self.params.task.model.head.output_logits else 'probs'

-    return {output_key: outputs}
+    return {output_key: outputs['logits']}
--- a/official/projects/volumetric_models/serving/semantic_segmentation_3d_test.py
+++ b/official/projects/volumetric_models/serving/semantic_segmentation_3d_test.py
@@ -104,7 +104,8 @@ class SemanticSegmentationExportTest(tf.test.TestCase, parameterized.TestCase):
    # outputs equal.
    expected_output = module.model(image_tensor, training=False)
    out = segmentation_fn(tf.constant(images))
-    self.assertAllClose(out['logits'].numpy(), expected_output.numpy())
+    self.assertAllClose(out['logits'].numpy(),
+                        expected_output['logits'].numpy())


 if __name__ == '__main__':

--- a/official/projects/volumetric_models/tasks/semantic_segmentation_3d.py
+++ b/official/projects/volumetric_models/tasks/semantic_segmentation_3d.py
@@ -198,6 +198,8 @@ class SemanticSegmentation3DTask(base_task.Task):
      # Casting output layer as float32 is necessary when mixed_precision is
      # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
      outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs)
+
+      outputs = outputs['logits']
      if self.task_config.model.head.output_logits:
        outputs = tf.nn.softmax(outputs)

@@ -258,6 +260,7 @@ class SemanticSegmentation3DTask(base_task.Task):

    outputs = self.inference_step(features, model)
    outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs)
+    outputs = outputs['logits']
    if self.task_config.model.head.output_logits:
      outputs = tf.nn.softmax(outputs)

@@ -268,8 +271,8 @@ class SemanticSegmentation3DTask(base_task.Task):
    # Compute dice score metrics on CPU.
    for metric in self.metrics:
      labels = tf.cast(labels, tf.float32)
-      outputs = tf.cast(outputs, tf.float32)
-      logs.update({metric.name: (labels, outputs)})
+      logits = tf.cast(outputs, tf.float32)
+      logs.update({metric.name: (labels, logits)})

    return logs


--- a/official/recommendation/ranking/README.md
+++ b/official/recommendation/ranking/README.md
@@ -68,7 +68,7 @@ Note that the dataset is large (~1TB).

 ### Preprocess the data

-Follow the instructions in [Data Preprocessing](data/preprocessing) to
+Follow the instructions in [Data Preprocessing](./preprocessing) to
 preprocess the Criteo Terabyte dataset.

 Data preprocessing steps are summarized below.
@@ -87,7 +87,8 @@ Categorical features:
    function such as modulus will suffice, i.e. feature_value % MAX_INDEX.

 The vocabulary sizes resulting from pre-processing are passed in to the model
-trainer using the model.vocab_sizes config.
+trainer using the model.vocab_sizes config. Note that provided values in sample below
+are only valid for Criteo Terabyte dataset.

 The full dataset is composed of 24 directories. Partition the data into training
 and eval sets, for example days 1-23 for training and day 24 for evaluation.

--- a/official/recommendation/ranking/preprocessing/README.md
+++ b/official/recommendation/ranking/preprocessing/README.md
@@ -69,7 +69,9 @@ python3 criteo_preprocess.py \
  --vocab_gen_mode --runner DataflowRunner --max_vocab_size 5000000 \
  --project ${PROJECT} --region ${REGION}
 ```
-
+Vocabulary for each feature is going to be generated to
+`${STORAGE_BUCKET}/criteo_vocab/tftransform_tmp/feature_??_vocab` files.
+Vocabulary size can be found as `wc -l <feature_vocab_file>`.

 Preprocess training and test data:


--- a/official/vision/__init__.py
+++ b/official/vision/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/official/vision/beta/configs/backbones.py
+++ b/official/vision/beta/configs/backbones.py
@@ -98,6 +98,13 @@ class RevNet(hyperparams.Config):
  model_id: int = 56


+@dataclasses.dataclass
+class MobileDet(hyperparams.Config):
+  """Mobiledet config."""
+  model_id: str = 'MobileDetCPU'
+  filter_size_scale: float = 1.0
+
+
 @dataclasses.dataclass
 class Backbone(hyperparams.OneOfConfig):
  """Configuration for backbones.
@@ -111,6 +118,7 @@ class Backbone(hyperparams.OneOfConfig):
    spinenet: spinenet backbone config.
    spinenet_mobile: mobile spinenet backbone config.
    mobilenet: mobilenet backbone config.
+    mobiledet: mobiledet backbone config.
  """
  type: Optional[str] = None
  resnet: ResNet = ResNet()
@@ -120,3 +128,5 @@ class Backbone(hyperparams.OneOfConfig):
  spinenet: SpineNet = SpineNet()
  spinenet_mobile: SpineNetMobile = SpineNetMobile()
  mobilenet: MobileNet = MobileNet()
+  mobiledet: MobileDet = MobileDet()
+
--- a/official/vision/beta/configs/decoders.py
+++ b/official/vision/beta/configs/decoders.py
@@ -14,10 +14,10 @@

 # Lint as: python3
 """Decoders configurations."""
-from typing import Optional, List
+import dataclasses
+from typing import List, Optional

 # Import libraries
-import dataclasses

 from official.modeling import hyperparams

@@ -53,6 +53,8 @@ class ASPP(hyperparams.Config):
  num_filters: int = 256
  use_depthwise_convolution: bool = False
  pool_kernel_size: Optional[List[int]] = None  # Use global average pooling.
+  spp_layer_version: str = 'v1'
+  output_tensor: bool = False


 @dataclasses.dataclass

--- a/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv3large_tpu.yaml
+++ b/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv3large_tpu.yaml
+# MobileNetV3-large_1.0 ImageNet classification: 74.96% top-1.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV3Large'
+        filter_size_scale: 1.0
+    dropout_rate: 0.2
+  losses:
+    l2_weight_decay: 0.00001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    # Enables Inception-style pre-processing.
+    decode_jpeg_only: false
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+    # Enables Inception-style pre-processing.
+    decode_jpeg_only: false
+trainer:
+  train_steps: 156000  # 500 epochs
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312  # NUM_EXAMPLES (1281167) // global_batch_size
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        alpha: 0.0
+        decay_steps: 156000
+        initial_learning_rate: 0.5
+        name: CosineDecay
+        offset: 0
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 5000
--- a/official/vision/beta/configs/experiments/retinanet/coco_mobiledetcpu_tpu.yaml
+++ b/official/vision/beta/configs/experiments/retinanet/coco_mobiledetcpu_tpu.yaml
+# --experiment_type=retinanet_mobile_coco
+# COCO AP 27.0%
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 3.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      mobilenet:
+        model_id: 'MobileDetCPU'
+        filter_size_scale: 1.0
+      type: 'mobiledet'
+    decoder:
+      type: 'fpn'
+      fpn:
+        num_filters: 128
+        use_separable_conv: true
+    head:
+      num_convs: 4
+      num_filters: 128
+      use_separable_conv: true
+    input_size: [320 320, 3]
+    max_level: 6
+    min_level: 3
+    norm_activation:
+      activation: 'relu6'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.5
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [263340, 272580]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 277200
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/beta/configs/retinanet.py
+++ b/official/vision/beta/configs/retinanet.py
@@ -55,9 +55,14 @@ class Parser(hyperparams.Config):
  aug_rand_hflip: bool = False
  aug_scale_min: float = 1.0
  aug_scale_max: float = 1.0
-  aug_policy: Optional[str] = None
  skip_crowd_during_training: bool = True
  max_num_instances: int = 100
+  # Can choose AutoAugment and RandAugment.
+  # TODO(b/205346436) Support RandAugment.
+  aug_type: Optional[common.Augmentation] = None
+
+  # Keep for backward compatibility. Not used.
+  aug_policy: Optional[str] = None


 @dataclasses.dataclass