Unverified Commit 4a0ee4a2 authored by aquariusjay's avatar aquariusjay Committed by GitHub
Browse files

Merge pull request #4877 from huihui-personal/master

PiperOrigin-RevId: 205684720
parents 6c210845 c961e92d
...@@ -33,10 +33,10 @@ works: ...@@ -33,10 +33,10 @@ works:
* DeepLabv3+: * DeepLabv3+:
``` ```
@article{deeplabv3plus2018, @inproceedings{deeplabv3plus2018,
title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation},
author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam},
journal={arXiv:1802.02611}, booktitle={ECCV},
year={2018} year={2018}
} }
``` ```
...@@ -45,7 +45,7 @@ works: ...@@ -45,7 +45,7 @@ works:
``` ```
@inproceedings{mobilenetv22018, @inproceedings{mobilenetv22018,
title={Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation}, title={MobileNetV2: Inverted Residuals and Linear Bottlenecks},
author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen}, author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen},
booktitle={CVPR}, booktitle={CVPR},
year={2018} year={2018}
...@@ -78,6 +78,7 @@ Some segmentation results on Flickr images: ...@@ -78,6 +78,7 @@ Some segmentation results on Flickr images:
* Liang-Chieh Chen, github: [aquariusjay](https://github.com/aquariusjay) * Liang-Chieh Chen, github: [aquariusjay](https://github.com/aquariusjay)
* YuKun Zhu, github: [yknzhu](https://github.com/YknZhu) * YuKun Zhu, github: [yknzhu](https://github.com/YknZhu)
* George Papandreou, github: [gpapan](https://github.com/gpapan) * George Papandreou, github: [gpapan](https://github.com/gpapan)
* Hui Hui, github: [huihui-personal](https://github.com/huihui-personal)
## Tables of Contents ## Tables of Contents
...@@ -154,8 +155,8 @@ and Cityscapes. ...@@ -154,8 +155,8 @@ and Cityscapes.
[[link]](http://arxiv.org/abs/1706.05587). arXiv: 1706.05587, 2017. [[link]](http://arxiv.org/abs/1706.05587). arXiv: 1706.05587, 2017.
4. **Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation**<br /> 4. **Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation**<br />
Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam. arXiv: 1802.02611.<br /> Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam.<br />
[[link]](https://arxiv.org/abs/1802.02611). arXiv: 1802.02611, 2018. [[link]](https://arxiv.org/abs/1802.02611). In ECCV, 2018.
5. **ParseNet: Looking Wider to See Better**<br /> 5. **ParseNet: Looking Wider to See Better**<br />
Wei Liu, Andrew Rabinovich, Alexander C Berg<br /> Wei Liu, Andrew Rabinovich, Alexander C Berg<br />
...@@ -169,9 +170,9 @@ and Cityscapes. ...@@ -169,9 +170,9 @@ and Cityscapes.
Sergey Ioffe, Christian Szegedy <br /> Sergey Ioffe, Christian Szegedy <br />
[[link]](https://arxiv.org/abs/1502.03167). In ICML, 2015. [[link]](https://arxiv.org/abs/1502.03167). In ICML, 2015.
8. **Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation**<br /> 8. **MobileNetV2: Inverted Residuals and Linear Bottlenecks**<br />
Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen<br /> Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen<br />
[[link]](https://arxiv.org/abs/1801.04381). arXiv:1801.04381, 2018. [[link]](https://arxiv.org/abs/1801.04381). In CVPR, 2018.
9. **Xception: Deep Learning with Depthwise Separable Convolutions**<br /> 9. **Xception: Deep Learning with Depthwise Separable Convolutions**<br />
François Chollet<br /> François Chollet<br />
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
Common flags from train/eval/vis/export_model.py are collected in this script. Common flags from train/eval/vis/export_model.py are collected in this script.
""" """
import collections import collections
import copy
import tensorflow as tf import tensorflow as tf
...@@ -51,6 +52,12 @@ flags.DEFINE_multi_float('image_pyramid', None, ...@@ -51,6 +52,12 @@ flags.DEFINE_multi_float('image_pyramid', None,
flags.DEFINE_boolean('add_image_level_feature', True, flags.DEFINE_boolean('add_image_level_feature', True,
'Add image level feature.') 'Add image level feature.')
flags.DEFINE_multi_integer(
'image_pooling_crop_size', None,
'Image pooling crop size [height, width] used in the ASPP module. When '
'value is None, the model performs image pooling with "crop_size". This'
'flag is useful when one likes to use different image pooling sizes.')
flags.DEFINE_boolean('aspp_with_batch_norm', True, flags.DEFINE_boolean('aspp_with_batch_norm', True,
'Use batch norm parameters for ASPP or not.') 'Use batch norm parameters for ASPP or not.')
...@@ -106,6 +113,7 @@ class ModelOptions( ...@@ -106,6 +113,7 @@ class ModelOptions(
'output_stride', 'output_stride',
'merge_method', 'merge_method',
'add_image_level_feature', 'add_image_level_feature',
'image_pooling_crop_size',
'aspp_with_batch_norm', 'aspp_with_batch_norm',
'aspp_with_separable_conv', 'aspp_with_separable_conv',
'multi_grid', 'multi_grid',
...@@ -140,7 +148,13 @@ class ModelOptions( ...@@ -140,7 +148,13 @@ class ModelOptions(
return super(ModelOptions, cls).__new__( return super(ModelOptions, cls).__new__(
cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride, cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride,
FLAGS.merge_method, FLAGS.add_image_level_feature, FLAGS.merge_method, FLAGS.add_image_level_feature,
FLAGS.aspp_with_batch_norm, FLAGS.aspp_with_separable_conv, FLAGS.image_pooling_crop_size, FLAGS.aspp_with_batch_norm,
FLAGS.multi_grid, FLAGS.decoder_output_stride, FLAGS.aspp_with_separable_conv, FLAGS.multi_grid,
FLAGS.decoder_use_separable_conv, FLAGS.logits_kernel_size, FLAGS.decoder_output_stride, FLAGS.decoder_use_separable_conv,
FLAGS.model_variant, FLAGS.depth_multiplier) FLAGS.logits_kernel_size, FLAGS.model_variant, FLAGS.depth_multiplier)
def __deepcopy__(self, memo):
return ModelOptions(copy.deepcopy(self.outputs_to_num_classes),
self.crop_size,
self.atrous_rates,
self.output_stride)
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# ============================================================================== # ==============================================================================
"""Tests for common.py.""" """Tests for common.py."""
import copy
import tensorflow as tf import tensorflow as tf
...@@ -29,6 +30,23 @@ class CommonTest(tf.test.TestCase): ...@@ -29,6 +30,23 @@ class CommonTest(tf.test.TestCase):
self.assertEqual(model_options.outputs_to_num_classes[common.OUTPUT_TYPE], self.assertEqual(model_options.outputs_to_num_classes[common.OUTPUT_TYPE],
num_classes) num_classes)
def testDeepcopy(self):
num_classes = 21
model_options = common.ModelOptions(
outputs_to_num_classes={common.OUTPUT_TYPE: num_classes})
model_options_new = copy.deepcopy(model_options)
self.assertEqual((model_options_new.
outputs_to_num_classes[common.OUTPUT_TYPE]),
num_classes)
num_classes_new = 22
model_options_new.outputs_to_num_classes[common.OUTPUT_TYPE] = (
num_classes_new)
self.assertEqual(model_options.outputs_to_num_classes[common.OUTPUT_TYPE],
num_classes)
self.assertEqual((model_options_new.
outputs_to_num_classes[common.OUTPUT_TYPE]),
num_classes_new)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -95,6 +95,7 @@ def preprocess_image_and_label(image, ...@@ -95,6 +95,7 @@ def preprocess_image_and_label(image,
original_image = tf.identity(processed_image) original_image = tf.identity(processed_image)
# Data augmentation by randomly scaling the inputs. # Data augmentation by randomly scaling the inputs.
if is_training:
scale = preprocess_utils.get_random_scale( scale = preprocess_utils.get_random_scale(
min_scale_factor, max_scale_factor, scale_factor_step_size) min_scale_factor, max_scale_factor, scale_factor_step_size)
processed_image, label = preprocess_utils.randomly_scale_image_and_label( processed_image, label = preprocess_utils.randomly_scale_image_and_label(
......
...@@ -237,9 +237,6 @@ def multi_scale_logits(images, ...@@ -237,9 +237,6 @@ def multi_scale_logits(images,
# Setup default values. # Setup default values.
if not image_pyramid: if not image_pyramid:
image_pyramid = [1.0] image_pyramid = [1.0]
if model_options.crop_size is None and model_options.add_image_level_feature:
raise ValueError(
'Crop size must be specified for using image-level feature.')
crop_height = ( crop_height = (
model_options.crop_size[0] model_options.crop_size[0]
if model_options.crop_size else tf.shape(images)[1]) if model_options.crop_size else tf.shape(images)[1])
...@@ -378,18 +375,39 @@ def extract_features(images, ...@@ -378,18 +375,39 @@ def extract_features(images,
branch_logits = [] branch_logits = []
if model_options.add_image_level_feature: if model_options.add_image_level_feature:
pool_height = scale_dimension(model_options.crop_size[0], if model_options.crop_size is not None:
image_pooling_crop_size = model_options.image_pooling_crop_size
# If image_pooling_crop_size is not specified, use crop_size.
if image_pooling_crop_size is None:
image_pooling_crop_size = model_options.crop_size
pool_height = scale_dimension(image_pooling_crop_size[0],
1. / model_options.output_stride) 1. / model_options.output_stride)
pool_width = scale_dimension(model_options.crop_size[1], pool_width = scale_dimension(image_pooling_crop_size[1],
1. / model_options.output_stride) 1. / model_options.output_stride)
image_feature = slim.avg_pool2d( image_feature = slim.avg_pool2d(
features, [pool_height, pool_width], [pool_height, pool_width], features, [pool_height, pool_width], [1, 1], padding='VALID')
padding='VALID') resize_height = scale_dimension(model_options.crop_size[0],
1. / model_options.output_stride)
resize_width = scale_dimension(model_options.crop_size[1],
1. / model_options.output_stride)
else:
# If crop_size is None, we simply do global pooling.
pool_height = tf.shape(features)[1]
pool_width = tf.shape(features)[2]
image_feature = tf.reduce_mean(features, axis=[1, 2])[:, tf.newaxis,
tf.newaxis]
resize_height = pool_height
resize_width = pool_width
image_feature = slim.conv2d( image_feature = slim.conv2d(
image_feature, depth, 1, scope=IMAGE_POOLING_SCOPE) image_feature, depth, 1, scope=IMAGE_POOLING_SCOPE)
image_feature = tf.image.resize_bilinear( image_feature = tf.image.resize_bilinear(
image_feature, [pool_height, pool_width], align_corners=True) image_feature, [resize_height, resize_width], align_corners=True)
image_feature.set_shape([None, pool_height, pool_width, depth]) # Set shape for resize_height/resize_width if they are not Tensor.
if isinstance(resize_height, tf.Tensor):
resize_height = None
if isinstance(resize_width, tf.Tensor):
resize_width = None
image_feature.set_shape([None, resize_height, resize_width, depth])
branch_logits.append(image_feature) branch_logits.append(image_feature)
# Employ a 1x1 convolution. # Employ a 1x1 convolution.
...@@ -453,9 +471,14 @@ def _get_logits(images, ...@@ -453,9 +471,14 @@ def _get_logits(images,
fine_tune_batch_norm=fine_tune_batch_norm) fine_tune_batch_norm=fine_tune_batch_norm)
if model_options.decoder_output_stride is not None: if model_options.decoder_output_stride is not None:
decoder_height = scale_dimension(model_options.crop_size[0], if model_options.crop_size is None:
height = tf.shape(images)[1]
width = tf.shape(images)[2]
else:
height, width = model_options.crop_size
decoder_height = scale_dimension(height,
1.0 / model_options.decoder_output_stride) 1.0 / model_options.decoder_output_stride)
decoder_width = scale_dimension(model_options.crop_size[1], decoder_width = scale_dimension(width,
1.0 / model_options.decoder_output_stride) 1.0 / model_options.decoder_output_stride)
features = refine_by_decoder( features = refine_by_decoder(
features, features,
...@@ -557,8 +580,11 @@ def refine_by_decoder(features, ...@@ -557,8 +580,11 @@ def refine_by_decoder(features,
for j, feature in enumerate(decoder_features_list): for j, feature in enumerate(decoder_features_list):
decoder_features_list[j] = tf.image.resize_bilinear( decoder_features_list[j] = tf.image.resize_bilinear(
feature, [decoder_height, decoder_width], align_corners=True) feature, [decoder_height, decoder_width], align_corners=True)
decoder_features_list[j].set_shape( h = (None if isinstance(decoder_height, tf.Tensor)
[None, decoder_height, decoder_width, None]) else decoder_height)
w = (None if isinstance(decoder_width, tf.Tensor)
else decoder_width)
decoder_features_list[j].set_shape([None, h, w, None])
decoder_depth = 256 decoder_depth = 256
if decoder_use_separable_conv: if decoder_use_separable_conv:
decoder_features = split_separable_conv2d( decoder_features = split_separable_conv2d(
......
...@@ -21,6 +21,8 @@ defined by the different datasets. Supported colormaps are: ...@@ -21,6 +21,8 @@ defined by the different datasets. Supported colormaps are:
* Cityscapes dataset (https://www.cityscapes-dataset.com). * Cityscapes dataset (https://www.cityscapes-dataset.com).
* Mapillary Vistas (https://research.mapillary.com).
* PASCAL VOC 2012 (http://host.robots.ox.ac.uk/pascal/VOC/). * PASCAL VOC 2012 (http://host.robots.ox.ac.uk/pascal/VOC/).
""" """
......
...@@ -112,10 +112,12 @@ def get_model_init_fn(train_logdir, ...@@ -112,10 +112,12 @@ def get_model_init_fn(train_logdir,
variables_to_restore = slim.get_variables_to_restore(exclude=exclude_list) variables_to_restore = slim.get_variables_to_restore(exclude=exclude_list)
if variables_to_restore:
return slim.assign_from_checkpoint_fn( return slim.assign_from_checkpoint_fn(
tf_initial_checkpoint, tf_initial_checkpoint,
variables_to_restore, variables_to_restore,
ignore_missing_vars=ignore_missing_vars) ignore_missing_vars=ignore_missing_vars)
return None
def get_model_gradient_multipliers(last_layers, last_layer_gradient_multiplier): def get_model_gradient_multipliers(last_layers, last_layer_gradient_multiplier):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment