Unverified Commit 7479dbb8 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'tensorflow:master' into panoptic-deeplab-modeling

parents 8b60a5a8 9c8cbd0c
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
# mask head. This config is only trained on masks from the VOC classes in COCO # mask head. This config is only trained on masks from the VOC classes in COCO
# and achieves a mask mAP of 32.5% on non-VOC classes. # and achieves a mask mAP of 32.5% on non-VOC classes.
# [1]: https://arxiv.org/abs/2104.00613 # [1]: https://arxiv.org/abs/2104.00613
# [2]: https://arxiv.org/abs/1904.07850
# Train on TPU-32 # Train on TPU-32
...@@ -55,6 +54,7 @@ model { ...@@ -55,6 +54,7 @@ model {
classification_loss { classification_loss {
weighted_sigmoid {} weighted_sigmoid {}
} }
use_only_last_stage: true
allowed_masked_classes_ids: [ allowed_masked_classes_ids: [
1, # person 1, # person
......
...@@ -32,6 +32,8 @@ def context_float_feature(ndarray): ...@@ -32,6 +32,8 @@ def context_float_feature(ndarray):
""" """
feature = tf.train.Feature() feature = tf.train.Feature()
for val in ndarray: for val in ndarray:
if isinstance(val, np.ndarray):
val = val.item()
feature.float_list.value.append(val) feature.float_list.value.append(val)
return feature return feature
...@@ -47,6 +49,8 @@ def context_int64_feature(ndarray): ...@@ -47,6 +49,8 @@ def context_int64_feature(ndarray):
""" """
feature = tf.train.Feature() feature = tf.train.Feature()
for val in ndarray: for val in ndarray:
if isinstance(val, np.ndarray):
val = val.item()
feature.int64_list.value.append(val) feature.int64_list.value.append(val)
return feature return feature
...@@ -81,7 +85,7 @@ def sequence_float_feature(ndarray): ...@@ -81,7 +85,7 @@ def sequence_float_feature(ndarray):
for row in ndarray: for row in ndarray:
feature = feature_list.feature.add() feature = feature_list.feature.add()
if row.size: if row.size:
feature.float_list.value[:] = row feature.float_list.value[:] = np.ravel(row)
return feature_list return feature_list
...@@ -98,7 +102,7 @@ def sequence_int64_feature(ndarray): ...@@ -98,7 +102,7 @@ def sequence_int64_feature(ndarray):
for row in ndarray: for row in ndarray:
feature = feature_list.feature.add() feature = feature_list.feature.add()
if row.size: if row.size:
feature.int64_list.value[:] = row feature.int64_list.value[:] = np.ravel(row)
return feature_list return feature_list
...@@ -118,7 +122,7 @@ def sequence_bytes_feature(ndarray): ...@@ -118,7 +122,7 @@ def sequence_bytes_feature(ndarray):
feature = feature_list.feature.add() feature = feature_list.feature.add()
if row: if row:
row = [tf.compat.as_bytes(val) for val in row] row = [tf.compat.as_bytes(val) for val in row]
feature.bytes_list.value[:] = row feature.bytes_list.value[:] = np.ravel(row)
return feature_list return feature_list
......
...@@ -1164,3 +1164,7 @@ def eval_continuously( ...@@ -1164,3 +1164,7 @@ def eval_continuously(
postprocess_on_cpu=postprocess_on_cpu, postprocess_on_cpu=postprocess_on_cpu,
global_step=global_step, global_step=global_step,
) )
if global_step.numpy() == configs['train_config'].num_steps:
tf.logging.info('Exiting evaluation at step %d', global_step.numpy())
return
...@@ -19,13 +19,16 @@ from __future__ import absolute_import ...@@ -19,13 +19,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from tensorflow.python.keras.applications import resnet
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection.core import freezable_batch_norm from object_detection.core import freezable_batch_norm
from object_detection.models.keras_models import model_utils from object_detection.models.keras_models import model_utils
try:
from keras.applications import resnet # pylint:disable=g-import-not-at-top
except ImportError:
from tensorflow.python.keras.applications import resnet # pylint:disable=g-import-not-at-top
def _fixed_padding(inputs, kernel_size, rate=1): # pylint: disable=invalid-name def _fixed_padding(inputs, kernel_size, rate=1): # pylint: disable=invalid-name
"""Pads the input along the spatial dimensions independently of input size. """Pads the input along the spatial dimensions independently of input size.
......
...@@ -220,8 +220,8 @@ message CenterNet { ...@@ -220,8 +220,8 @@ message CenterNet {
// scores * exp((-distances^2) / (2 * sigma^2)) // scores * exp((-distances^2) / (2 * sigma^2))
// where 'distances' is the distance between the heatmap peak location and // where 'distances' is the distance between the heatmap peak location and
// the regressed joint location and 'sigma' is the Gaussian standard // the regressed joint location and 'sigma' is the Gaussian standard
// deviation used in generating the Gaussian heatmap target multiplied by the // deviation used in generating the Gaussian heatmap target multiplied by
// 'std_dev_multiplier'. // the 'std_dev_multiplier'.
optional float std_dev_multiplier = 29 [default = 1.0]; optional float std_dev_multiplier = 29 [default = 1.0];
// The radius (in the unit of output pixel) around heatmap peak to assign // The radius (in the unit of output pixel) around heatmap peak to assign
...@@ -415,7 +415,7 @@ message CenterNet { ...@@ -415,7 +415,7 @@ message CenterNet {
optional int32 dim = 3 [default = 256]; optional int32 dim = 3 [default = 256];
// The dimension of the per-pixel embedding // The dimension of the per-pixel embedding
optional int32 pixel_embedding_dim = 4 [default=16]; optional int32 pixel_embedding_dim = 4 [default = 16];
// If set, masks are only kept for classes listed here. Masks are deleted // If set, masks are only kept for classes listed here. Masks are deleted
// for all other classes. Note that this is only done at training time, eval // for all other classes. Note that this is only done at training time, eval
...@@ -424,75 +424,86 @@ message CenterNet { ...@@ -424,75 +424,86 @@ message CenterNet {
// The size of cropped pixel embedding that goes into the 2D mask prediction // The size of cropped pixel embedding that goes into the 2D mask prediction
// network (RoI align). // network (RoI align).
optional int32 mask_size = 6 [default=32]; optional int32 mask_size = 6 [default = 32];
// If set to a positive value, we subsample instances by this amount to // If set to a positive value, we subsample instances by this amount to
// save memory during training. // save memory during training.
optional int32 mask_num_subsamples = 67[default=-1]; optional int32 mask_num_subsamples = 67 [default = -1];
// Whether or not to use (x, y) coordinates as input to mask net. // Whether or not to use (x, y) coordinates as input to mask net.
optional bool use_xy = 8 [default=true]; optional bool use_xy = 8 [default = true];
// Defines the kind of architecture we want to use for mask network. // Defines the kind of architecture we want to use for mask network.
optional string network_type = 9 [default="hourglass52"]; optional string network_type = 9 [default = "hourglass52"];
// Whether or not we want to use instance embedding in mask network. // Whether or not we want to use instance embedding in mask network.
optional bool use_instance_embedding = 10 [default=true]; optional bool use_instance_embedding = 10 [default = true];
// Number of channels in the inital block of the mask prediction network. // Number of channels in the inital block of the mask prediction network.
optional int32 num_init_channels = 11 [default=64]; optional int32 num_init_channels = 11 [default = 64];
// Whether or not to predict masks at full resolution. If true, we predict // Whether or not to predict masks at full resolution. If true, we predict
// masks at the resolution of the output stride. Otherwise, masks are // masks at the resolution of the output stride. Otherwise, masks are
// predicted at resolution defined by mask_size // predicted at resolution defined by mask_size
optional bool predict_full_resolution_masks = 12 [default=false]; optional bool predict_full_resolution_masks = 12 [default = false];
// If predict_full_resolution_masks is set, this parameter controls the size // If predict_full_resolution_masks is set, this parameter controls the size
// of cropped masks returned by post-process. To be compatible with the rest // of cropped masks returned by post-process. To be compatible with the rest
// of the API, masks are always cropped and resized according to detected // of the API, masks are always cropped and resized according to detected
// boxes in postprocess. // boxes in postprocess.
optional int32 postprocess_crop_size = 13 [default=256]; optional int32 postprocess_crop_size = 13 [default = 256];
// The maximum relative amount by which boxes will be jittered before // The maximum relative amount by which boxes will be jittered before
// RoI crop happens. The x and y coordinates of the box are jittered // RoI crop happens. The x and y coordinates of the box are jittered
// relative to width and height respectively. // relative to width and height respectively.
optional float max_roi_jitter_ratio = 14 [default=0.0]; optional float max_roi_jitter_ratio = 14 [default = 0.0];
// The mode for jitterting box ROIs. See RandomJitterBoxes in // The mode for jitterting box ROIs. See RandomJitterBoxes in
// preprocessor.proto for more details // preprocessor.proto for more details
optional RandomJitterBoxes.JitterMode jitter_mode = 15 [default=DEFAULT]; optional RandomJitterBoxes.JitterMode jitter_mode = 15 [default = DEFAULT];
// Weight for the box consistency loss as described in the BoxInst paper // Weight for the box consistency loss as described in the BoxInst paper
// https://arxiv.org/abs/2012.02310 // https://arxiv.org/abs/2012.02310
optional float box_consistency_loss_weight = 16 [default=0.0]; optional float box_consistency_loss_weight = 16 [default = 0.0];
optional float color_consistency_threshold = 17 [default=0.4]; optional float color_consistency_threshold = 17 [default = 0.4];
optional int32 color_consistency_dilation = 18 [default=2]; optional int32 color_consistency_dilation = 18 [default = 2];
optional float color_consistency_loss_weight = 19 [default=0.0]; optional float color_consistency_loss_weight = 19 [default = 0.0];
optional LossNormalize box_consistency_loss_normalize = 20 [ optional LossNormalize box_consistency_loss_normalize = 20
default=NORMALIZE_AUTO]; [default = NORMALIZE_AUTO];
// If set, will use the bounding box tightness prior approach. This means // If set, will use the bounding box tightness prior approach. This means
// that the max will be restricted to only be inside the box for both // that the max will be restricted to only be inside the box for both
// dimensions. See details here: // dimensions. See details here:
// https://papers.nips.cc/paper/2019/hash/e6e713296627dff6475085cc6a224464-Abstract.html // https://papers.nips.cc/paper/2019/hash/e6e713296627dff6475085cc6a224464-Abstract.html
optional bool box_consistency_tightness = 21 [default=false]; optional bool box_consistency_tightness = 21 [default = false];
optional int32 color_consistency_warmup_steps = 22 [default=0]; optional int32 color_consistency_warmup_steps = 22 [default = 0];
optional int32 color_consistency_warmup_start = 23 [default=0]; optional int32 color_consistency_warmup_start = 23 [default = 0];
// This flag controls whether or not we use the outputs from only the
// last stage of the hourglass for training the mask-heads.
// DeepMAC has been refactored to process the entire batch at once, // DeepMAC has been refactored to process the entire batch at once,
// instead of the previous (simple) approach of processing one sample at // instead of the previous (simple) approach of processing one sample at
// a time. Because of this, the memory consumption has increased and // a time. Because of this, we need to set this flag to continue using
// it's crucial to only feed the mask head the last stage outputs // the old models with the same training hardware.
// from the hourglass. Doing so halves the memory requirement of the
// mask head and does not cause a drop in evaluation metrics. // This flag is not needed for 1024x1024 models. The performance and
optional bool use_only_last_stage = 24 [default=false]; // memory usage are same as before.
// For 512x512 models
// - Setting this flag to true will let the model train on TPU-v3 32
// chips. We observed a small (0.26 mAP) performance drop when doing so.
// - Setting this flag to false (default) increases the TPU requirement
// to TPU-v3 128 and reproduces previously demonstrated performance
// within error bars.
optional bool use_only_last_stage = 24 [default = false];
} }
optional DeepMACMaskEstimation deepmac_mask_estimation = 14; optional DeepMACMaskEstimation deepmac_mask_estimation = 14;
...@@ -506,7 +517,7 @@ message CenterNet { ...@@ -506,7 +517,7 @@ message CenterNet {
} }
enum LossNormalize { enum LossNormalize {
NORMALIZE_AUTO = 0; // SUM for 2D inputs (dice loss) and MEAN for others. NORMALIZE_AUTO = 0; // SUM for 2D inputs (dice loss) and MEAN for others.
NORMALIZE_GROUNDTRUTH_COUNT = 1; NORMALIZE_GROUNDTRUTH_COUNT = 1;
NORMALIZE_BALANCED = 3; NORMALIZE_BALANCED = 3;
} }
...@@ -547,4 +558,3 @@ message CenterNetFeatureExtractor { ...@@ -547,4 +558,3 @@ message CenterNetFeatureExtractor {
optional string upsampling_interpolation = 11 [default = 'nearest']; optional string upsampling_interpolation = 11 [default = 'nearest'];
} }
...@@ -20,4 +20,4 @@ from tensorflow_models import vision ...@@ -20,4 +20,4 @@ from tensorflow_models import vision
from official import core from official import core
from official.modeling import hyperparams from official.modeling import hyperparams
from official.modeling import optimization from official.modeling import optimization
from official.modeling import tf_utils from official.modeling import tf_utils as utils
...@@ -13,5 +13,5 @@ ...@@ -13,5 +13,5 @@
# limitations under the License. # limitations under the License.
"""TensorFlow Models Vision Libraries.""" """TensorFlow Models Vision Libraries."""
from official.vision.beta import configs from official.vision import configs
from official.vision.beta.modeling import * from official.vision.modeling import *
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment