Unverified Commit 09d9656f authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling

parents ac671306 49a5706c
......@@ -20,7 +20,7 @@ import tensorflow as tf
import tensorflow_datasets as tfds
from official.modeling import tf_utils
from official.nlp.projects.triviaqa import dataset # pylint: disable=unused-import
from official.projects.triviaqa import dataset # pylint: disable=unused-import
def _flatten_dims(tensor: tf.Tensor,
......
......@@ -27,9 +27,9 @@ import tensorflow_datasets as tfds
import sentencepiece as spm
from official.nlp.configs import encoders # pylint: disable=unused-import
from official.nlp.projects.triviaqa import evaluation
from official.nlp.projects.triviaqa import inputs
from official.nlp.projects.triviaqa import prediction
from official.projects.triviaqa import evaluation
from official.projects.triviaqa import inputs
from official.projects.triviaqa import prediction
flags.DEFINE_string('data_dir', None, 'TensorFlow Datasets directory.')
......
......@@ -30,8 +30,8 @@ import numpy as np
import tensorflow.io.gfile as gfile
import sentencepiece as spm
from official.nlp.projects.triviaqa import evaluation
from official.nlp.projects.triviaqa import sentencepiece_pb2
from official.projects.triviaqa import evaluation
from official.projects.triviaqa import sentencepiece_pb2
@dataclasses.dataclass
......
......@@ -30,10 +30,10 @@ import tensorflow_datasets as tfds
import sentencepiece as spm
from official.nlp import optimization as nlp_optimization
from official.nlp.configs import encoders
from official.nlp.projects.triviaqa import evaluation
from official.nlp.projects.triviaqa import inputs
from official.nlp.projects.triviaqa import modeling
from official.nlp.projects.triviaqa import prediction
from official.projects.triviaqa import evaluation
from official.projects.triviaqa import inputs
from official.projects.triviaqa import modeling
from official.projects.triviaqa import prediction
flags.DEFINE_string('data_dir', None, 'Data directory for TensorFlow Datasets.')
......
......@@ -155,6 +155,7 @@ class SegmentationHead3D(tf.keras.layers.Layer):
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor` of the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
The first is backbone endpoints, and the second is decoder endpoints.
Returns:
segmentation prediction mask: A `tf.Tensor` of the segmentation mask
scores predicted from input features.
......
......@@ -47,10 +47,10 @@ class SegmentationNetworkUNet3DTest(parameterized.TestCase, tf.test.TestCase):
model = segmentation_model.SegmentationModel(
backbone=backbone, decoder=decoder, head=head)
logits = model(inputs)
outputs = model(inputs)
self.assertAllEqual(
[2, input_size[0], input_size[0], input_size[1], num_classes],
logits.numpy().shape)
outputs['logits'].numpy().shape)
def test_serialize_deserialize(self):
"""Validate the network can be serialized and deserialized."""
......
......@@ -56,4 +56,4 @@ class SegmentationModule(export_base.ExportModule):
outputs = self.inference_step(images)
output_key = 'logits' if self.params.task.model.head.output_logits else 'probs'
return {output_key: outputs}
return {output_key: outputs['logits']}
......@@ -104,7 +104,8 @@ class SemanticSegmentationExportTest(tf.test.TestCase, parameterized.TestCase):
# outputs equal.
expected_output = module.model(image_tensor, training=False)
out = segmentation_fn(tf.constant(images))
self.assertAllClose(out['logits'].numpy(), expected_output.numpy())
self.assertAllClose(out['logits'].numpy(),
expected_output['logits'].numpy())
if __name__ == '__main__':
......
......@@ -198,6 +198,8 @@ class SemanticSegmentation3DTask(base_task.Task):
# Casting output layer as float32 is necessary when mixed_precision is
# mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs)
outputs = outputs['logits']
if self.task_config.model.head.output_logits:
outputs = tf.nn.softmax(outputs)
......@@ -258,6 +260,7 @@ class SemanticSegmentation3DTask(base_task.Task):
outputs = self.inference_step(features, model)
outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs)
outputs = outputs['logits']
if self.task_config.model.head.output_logits:
outputs = tf.nn.softmax(outputs)
......@@ -268,8 +271,8 @@ class SemanticSegmentation3DTask(base_task.Task):
# Compute dice score metrics on CPU.
for metric in self.metrics:
labels = tf.cast(labels, tf.float32)
outputs = tf.cast(outputs, tf.float32)
logs.update({metric.name: (labels, outputs)})
logits = tf.cast(outputs, tf.float32)
logs.update({metric.name: (labels, logits)})
return logs
......
......@@ -68,7 +68,7 @@ Note that the dataset is large (~1TB).
### Preprocess the data
Follow the instructions in [Data Preprocessing](data/preprocessing) to
Follow the instructions in [Data Preprocessing](./preprocessing) to
preprocess the Criteo Terabyte dataset.
Data preprocessing steps are summarized below.
......@@ -87,7 +87,8 @@ Categorical features:
function such as modulus will suffice, i.e. feature_value % MAX_INDEX.
The vocabulary sizes resulting from pre-processing are passed in to the model
trainer using the model.vocab_sizes config.
trainer using the model.vocab_sizes config. Note that provided values in sample below
are only valid for Criteo Terabyte dataset.
The full dataset is composed of 24 directories. Partition the data into training
and eval sets, for example days 1-23 for training and day 24 for evaluation.
......
......@@ -69,7 +69,9 @@ python3 criteo_preprocess.py \
--vocab_gen_mode --runner DataflowRunner --max_vocab_size 5000000 \
--project ${PROJECT} --region ${REGION}
```
Vocabulary for each feature is going to be generated to
`${STORAGE_BUCKET}/criteo_vocab/tftransform_tmp/feature_??_vocab` files.
Vocabulary size can be found as `wc -l <feature_vocab_file>`.
Preprocess training and test data:
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -98,6 +98,13 @@ class RevNet(hyperparams.Config):
model_id: int = 56
@dataclasses.dataclass
class MobileDet(hyperparams.Config):
"""Mobiledet config."""
model_id: str = 'MobileDetCPU'
filter_size_scale: float = 1.0
@dataclasses.dataclass
class Backbone(hyperparams.OneOfConfig):
"""Configuration for backbones.
......@@ -111,6 +118,7 @@ class Backbone(hyperparams.OneOfConfig):
spinenet: spinenet backbone config.
spinenet_mobile: mobile spinenet backbone config.
mobilenet: mobilenet backbone config.
mobiledet: mobiledet backbone config.
"""
type: Optional[str] = None
resnet: ResNet = ResNet()
......@@ -120,3 +128,5 @@ class Backbone(hyperparams.OneOfConfig):
spinenet: SpineNet = SpineNet()
spinenet_mobile: SpineNetMobile = SpineNetMobile()
mobilenet: MobileNet = MobileNet()
mobiledet: MobileDet = MobileDet()
......@@ -14,10 +14,10 @@
# Lint as: python3
"""Decoders configurations."""
from typing import Optional, List
import dataclasses
from typing import List, Optional
# Import libraries
import dataclasses
from official.modeling import hyperparams
......@@ -53,6 +53,8 @@ class ASPP(hyperparams.Config):
num_filters: int = 256
use_depthwise_convolution: bool = False
pool_kernel_size: Optional[List[int]] = None # Use global average pooling.
spp_layer_version: str = 'v1'
output_tensor: bool = False
@dataclasses.dataclass
......
# MobileNetV3-large_1.0 ImageNet classification: 74.96% top-1.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
num_classes: 1001
input_size: [224, 224, 3]
backbone:
type: 'mobilenet'
mobilenet:
model_id: 'MobileNetV3Large'
filter_size_scale: 1.0
dropout_rate: 0.2
losses:
l2_weight_decay: 0.00001
one_hot: true
label_smoothing: 0.1
train_data:
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 4096
dtype: 'bfloat16'
# Enables Inception-style pre-processing.
decode_jpeg_only: false
validation_data:
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 4096
dtype: 'bfloat16'
drop_remainder: false
# Enables Inception-style pre-processing.
decode_jpeg_only: false
trainer:
train_steps: 156000 # 500 epochs
validation_steps: 13
validation_interval: 312
steps_per_loop: 312 # NUM_EXAMPLES (1281167) // global_batch_size
summary_interval: 312
checkpoint_interval: 312
optimizer_config:
learning_rate:
type: 'cosine'
cosine:
alpha: 0.0
decay_steps: 156000
initial_learning_rate: 0.5
name: CosineDecay
offset: 0
warmup:
type: 'linear'
linear:
warmup_steps: 5000
# --experiment_type=retinanet_mobile_coco
# COCO AP 27.0%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
losses:
l2_weight_decay: 3.0e-05
model:
anchor:
anchor_size: 3
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
mobilenet:
model_id: 'MobileDetCPU'
filter_size_scale: 1.0
type: 'mobiledet'
decoder:
type: 'fpn'
fpn:
num_filters: 128
use_separable_conv: true
head:
num_convs: 4
num_filters: 128
use_separable_conv: true
input_size: [320 320, 3]
max_level: 6
min_level: 3
norm_activation:
activation: 'relu6'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.5
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
is_training: false
trainer:
optimizer_config:
learning_rate:
stepwise:
boundaries: [263340, 272580]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 277200
validation_interval: 462
validation_steps: 625
......@@ -55,9 +55,14 @@ class Parser(hyperparams.Config):
aug_rand_hflip: bool = False
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
aug_policy: Optional[str] = None
skip_crowd_during_training: bool = True
max_num_instances: int = 100
# Can choose AutoAugment and RandAugment.
# TODO(b/205346436) Support RandAugment.
aug_type: Optional[common.Augmentation] = None
# Keep for backward compatibility. Not used.
aug_policy: Optional[str] = None
@dataclasses.dataclass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment