Commit a17394cc authored by Liang-Chieh Chen's avatar Liang-Chieh Chen Committed by Hui Hui
Browse files

PiperOrigin-RevId: 205684720

parent 7922c9eb
......@@ -33,10 +33,10 @@ works:
* DeepLabv3+:
```
@article{deeplabv3plus2018,
@inproceedings{deeplabv3plus2018,
title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation},
author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam},
journal={arXiv:1802.02611},
booktitle={ECCV},
year={2018}
}
```
......@@ -45,7 +45,7 @@ works:
```
@inproceedings{mobilenetv22018,
title={Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation},
title={MobileNetV2: Inverted Residuals and Linear Bottlenecks},
author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen},
booktitle={CVPR},
year={2018}
......@@ -78,6 +78,7 @@ Some segmentation results on Flickr images:
* Liang-Chieh Chen, github: [aquariusjay](https://github.com/aquariusjay)
* YuKun Zhu, github: [yknzhu](https://github.com/YknZhu)
* George Papandreou, github: [gpapan](https://github.com/gpapan)
* Hui Hui, github: [huihui-personal](https://github.com/huihui-personal)
## Tables of Contents
......@@ -154,8 +155,8 @@ and Cityscapes.
[[link]](http://arxiv.org/abs/1706.05587). arXiv: 1706.05587, 2017.
4. **Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation**<br />
Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam. arXiv: 1802.02611.<br />
[[link]](https://arxiv.org/abs/1802.02611). arXiv: 1802.02611, 2018.
Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam.<br />
[[link]](https://arxiv.org/abs/1802.02611). In ECCV, 2018.
5. **ParseNet: Looking Wider to See Better**<br />
Wei Liu, Andrew Rabinovich, Alexander C Berg<br />
......@@ -169,9 +170,9 @@ and Cityscapes.
Sergey Ioffe, Christian Szegedy <br />
[[link]](https://arxiv.org/abs/1502.03167). In ICML, 2015.
8. **Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation**<br />
8. **MobileNetV2: Inverted Residuals and Linear Bottlenecks**<br />
Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen<br />
[[link]](https://arxiv.org/abs/1801.04381). arXiv:1801.04381, 2018.
[[link]](https://arxiv.org/abs/1801.04381). In CVPR, 2018.
9. **Xception: Deep Learning with Depthwise Separable Convolutions**<br />
François Chollet<br />
......
......@@ -17,6 +17,7 @@
Common flags from train/eval/vis/export_model.py are collected in this script.
"""
import collections
import copy
import tensorflow as tf
......@@ -51,6 +52,12 @@ flags.DEFINE_multi_float('image_pyramid', None,
flags.DEFINE_boolean('add_image_level_feature', True,
'Add image level feature.')
flags.DEFINE_multi_integer(
'image_pooling_crop_size', None,
'Image pooling crop size [height, width] used in the ASPP module. When '
'value is None, the model performs image pooling with "crop_size". This'
'flag is useful when one likes to use different image pooling sizes.')
flags.DEFINE_boolean('aspp_with_batch_norm', True,
'Use batch norm parameters for ASPP or not.')
......@@ -106,6 +113,7 @@ class ModelOptions(
'output_stride',
'merge_method',
'add_image_level_feature',
'image_pooling_crop_size',
'aspp_with_batch_norm',
'aspp_with_separable_conv',
'multi_grid',
......@@ -140,7 +148,13 @@ class ModelOptions(
return super(ModelOptions, cls).__new__(
cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride,
FLAGS.merge_method, FLAGS.add_image_level_feature,
FLAGS.aspp_with_batch_norm, FLAGS.aspp_with_separable_conv,
FLAGS.multi_grid, FLAGS.decoder_output_stride,
FLAGS.decoder_use_separable_conv, FLAGS.logits_kernel_size,
FLAGS.model_variant, FLAGS.depth_multiplier)
FLAGS.image_pooling_crop_size, FLAGS.aspp_with_batch_norm,
FLAGS.aspp_with_separable_conv, FLAGS.multi_grid,
FLAGS.decoder_output_stride, FLAGS.decoder_use_separable_conv,
FLAGS.logits_kernel_size, FLAGS.model_variant, FLAGS.depth_multiplier)
def __deepcopy__(self, memo):
return ModelOptions(copy.deepcopy(self.outputs_to_num_classes),
self.crop_size,
self.atrous_rates,
self.output_stride)
......@@ -14,6 +14,7 @@
# ==============================================================================
"""Tests for common.py."""
import copy
import tensorflow as tf
......@@ -29,6 +30,23 @@ class CommonTest(tf.test.TestCase):
self.assertEqual(model_options.outputs_to_num_classes[common.OUTPUT_TYPE],
num_classes)
def testDeepcopy(self):
num_classes = 21
model_options = common.ModelOptions(
outputs_to_num_classes={common.OUTPUT_TYPE: num_classes})
model_options_new = copy.deepcopy(model_options)
self.assertEqual((model_options_new.
outputs_to_num_classes[common.OUTPUT_TYPE]),
num_classes)
num_classes_new = 22
model_options_new.outputs_to_num_classes[common.OUTPUT_TYPE] = (
num_classes_new)
self.assertEqual(model_options.outputs_to_num_classes[common.OUTPUT_TYPE],
num_classes)
self.assertEqual((model_options_new.
outputs_to_num_classes[common.OUTPUT_TYPE]),
num_classes_new)
if __name__ == '__main__':
tf.test.main()
......@@ -294,13 +294,13 @@
" try:\n",
" f = urllib.request.urlopen(url)\n",
" jpeg_str = f.read()\n",
" original_im = Image.open(BytesIO(jpeg_str))\n",
" orignal_im = Image.open(BytesIO(jpeg_str))\n",
" except IOError:\n",
" print('Cannot retrieve image. Please check url: ' + url)\n",
" return\n",
"\n",
" print('running deeplab on image %s...' % url)\n",
" resized_im, seg_map = MODEL.run(original_im)\n",
" resized_im, seg_map = MODEL.run(orignal_im)\n",
"\n",
" vis_segmentation(resized_im, seg_map)\n",
"\n",
......
......@@ -95,6 +95,7 @@ def preprocess_image_and_label(image,
original_image = tf.identity(processed_image)
# Data augmentation by randomly scaling the inputs.
if is_training:
scale = preprocess_utils.get_random_scale(
min_scale_factor, max_scale_factor, scale_factor_step_size)
processed_image, label = preprocess_utils.randomly_scale_image_and_label(
......
......@@ -237,9 +237,6 @@ def multi_scale_logits(images,
# Setup default values.
if not image_pyramid:
image_pyramid = [1.0]
if model_options.crop_size is None and model_options.add_image_level_feature:
raise ValueError(
'Crop size must be specified for using image-level feature.')
crop_height = (
model_options.crop_size[0]
if model_options.crop_size else tf.shape(images)[1])
......@@ -378,18 +375,39 @@ def extract_features(images,
branch_logits = []
if model_options.add_image_level_feature:
pool_height = scale_dimension(model_options.crop_size[0],
if model_options.crop_size is not None:
image_pooling_crop_size = model_options.image_pooling_crop_size
# If image_pooling_crop_size is not specified, use crop_size.
if image_pooling_crop_size is None:
image_pooling_crop_size = model_options.crop_size
pool_height = scale_dimension(image_pooling_crop_size[0],
1. / model_options.output_stride)
pool_width = scale_dimension(model_options.crop_size[1],
pool_width = scale_dimension(image_pooling_crop_size[1],
1. / model_options.output_stride)
image_feature = slim.avg_pool2d(
features, [pool_height, pool_width], [pool_height, pool_width],
padding='VALID')
features, [pool_height, pool_width], [1, 1], padding='VALID')
resize_height = scale_dimension(model_options.crop_size[0],
1. / model_options.output_stride)
resize_width = scale_dimension(model_options.crop_size[1],
1. / model_options.output_stride)
else:
# If crop_size is None, we simply do global pooling.
pool_height = tf.shape(features)[1]
pool_width = tf.shape(features)[2]
image_feature = tf.reduce_mean(features, axis=[1, 2])[:, tf.newaxis,
tf.newaxis]
resize_height = pool_height
resize_width = pool_width
image_feature = slim.conv2d(
image_feature, depth, 1, scope=IMAGE_POOLING_SCOPE)
image_feature = tf.image.resize_bilinear(
image_feature, [pool_height, pool_width], align_corners=True)
image_feature.set_shape([None, pool_height, pool_width, depth])
image_feature, [resize_height, resize_width], align_corners=True)
# Set shape for resize_height/resize_width if they are not Tensor.
if isinstance(resize_height, tf.Tensor):
resize_height = None
if isinstance(resize_width, tf.Tensor):
resize_width = None
image_feature.set_shape([None, resize_height, resize_width, depth])
branch_logits.append(image_feature)
# Employ a 1x1 convolution.
......@@ -453,9 +471,14 @@ def _get_logits(images,
fine_tune_batch_norm=fine_tune_batch_norm)
if model_options.decoder_output_stride is not None:
decoder_height = scale_dimension(model_options.crop_size[0],
if model_options.crop_size is None:
height = tf.shape(images)[1]
width = tf.shape(images)[2]
else:
height, width = model_options.crop_size
decoder_height = scale_dimension(height,
1.0 / model_options.decoder_output_stride)
decoder_width = scale_dimension(model_options.crop_size[1],
decoder_width = scale_dimension(width,
1.0 / model_options.decoder_output_stride)
features = refine_by_decoder(
features,
......@@ -557,8 +580,11 @@ def refine_by_decoder(features,
for j, feature in enumerate(decoder_features_list):
decoder_features_list[j] = tf.image.resize_bilinear(
feature, [decoder_height, decoder_width], align_corners=True)
decoder_features_list[j].set_shape(
[None, decoder_height, decoder_width, None])
h = (None if isinstance(decoder_height, tf.Tensor)
else decoder_height)
w = (None if isinstance(decoder_width, tf.Tensor)
else decoder_width)
decoder_features_list[j].set_shape([None, h, w, None])
decoder_depth = 256
if decoder_use_separable_conv:
decoder_features = split_separable_conv2d(
......
......@@ -21,6 +21,8 @@ defined by the different datasets. Supported colormaps are:
* Cityscapes dataset (https://www.cityscapes-dataset.com).
* Mapillary Vistas (https://research.mapillary.com).
* PASCAL VOC 2012 (http://host.robots.ox.ac.uk/pascal/VOC/).
"""
......
......@@ -112,10 +112,12 @@ def get_model_init_fn(train_logdir,
variables_to_restore = slim.get_variables_to_restore(exclude=exclude_list)
if variables_to_restore:
return slim.assign_from_checkpoint_fn(
tf_initial_checkpoint,
variables_to_restore,
ignore_missing_vars=ignore_missing_vars)
return None
def get_model_gradient_multipliers(last_layers, last_layer_gradient_multiplier):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment