Commit 83490227 authored by Hui Hui's avatar Hui Hui
Browse files

Source code for `Searching for Efficient Multi-Scale Architectures for Dense Image Prediction`

parent c961e92d
...@@ -52,6 +52,18 @@ works: ...@@ -52,6 +52,18 @@ works:
} }
``` ```
* Architecture search for dense prediction cell:
```
@inproceedings{dpc2018,
title={Searching for Efficient Multi-Scale Architectures for Dense Image Prediction},
author={Liang-Chieh Chen and Maxwell D. Collins and Yukun Zhu and George Papandreou and Barret Zoph and Florian Schroff and Hartwig Adam and Jonathon Shlens},
booktitle={NIPS},
year={2018}
}
```
In the current implementation, we support adopting the following network In the current implementation, we support adopting the following network
backbones: backbones:
...@@ -114,6 +126,10 @@ with "deeplab". ...@@ -114,6 +126,10 @@ with "deeplab".
## Change Logs ## Change Logs
### September 5, 2018
Released Cityscapes pretrained checkpoints with found best dense prediction cell.
### May 26, 2018 ### May 26, 2018
Updated ADE20K pretrained checkpoint. Updated ADE20K pretrained checkpoint.
......
...@@ -18,6 +18,7 @@ Common flags from train/eval/vis/export_model.py are collected in this script. ...@@ -18,6 +18,7 @@ Common flags from train/eval/vis/export_model.py are collected in this script.
""" """
import collections import collections
import copy import copy
import json
import tensorflow as tf import tensorflow as tf
...@@ -85,6 +86,11 @@ flags.DEFINE_boolean('decoder_use_separable_conv', True, ...@@ -85,6 +86,11 @@ flags.DEFINE_boolean('decoder_use_separable_conv', True,
flags.DEFINE_enum('merge_method', 'max', ['max', 'avg'], flags.DEFINE_enum('merge_method', 'max', ['max', 'avg'],
'Scheme to merge multi scale features.') 'Scheme to merge multi scale features.')
flags.DEFINE_string(
'dense_prediction_cell_json',
'',
'A JSON file that specifies the dense prediction cell.')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
# Constants # Constants
...@@ -122,6 +128,7 @@ class ModelOptions( ...@@ -122,6 +128,7 @@ class ModelOptions(
'logits_kernel_size', 'logits_kernel_size',
'model_variant', 'model_variant',
'depth_multiplier', 'depth_multiplier',
'dense_prediction_cell_config',
])): ])):
"""Immutable class to hold model options.""" """Immutable class to hold model options."""
...@@ -145,13 +152,19 @@ class ModelOptions( ...@@ -145,13 +152,19 @@ class ModelOptions(
Returns: Returns:
A new ModelOptions instance. A new ModelOptions instance.
""" """
dense_prediction_cell_config = None
if FLAGS.dense_prediction_cell_json:
with tf.gfile.Open(FLAGS.dense_prediction_cell_json, 'r') as f:
dense_prediction_cell_config = json.load(f)
return super(ModelOptions, cls).__new__( return super(ModelOptions, cls).__new__(
cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride, cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride,
FLAGS.merge_method, FLAGS.add_image_level_feature, FLAGS.merge_method, FLAGS.add_image_level_feature,
FLAGS.image_pooling_crop_size, FLAGS.aspp_with_batch_norm, FLAGS.image_pooling_crop_size, FLAGS.aspp_with_batch_norm,
FLAGS.aspp_with_separable_conv, FLAGS.multi_grid, FLAGS.aspp_with_separable_conv, FLAGS.multi_grid,
FLAGS.decoder_output_stride, FLAGS.decoder_use_separable_conv, FLAGS.decoder_output_stride, FLAGS.decoder_use_separable_conv,
FLAGS.logits_kernel_size, FLAGS.model_variant, FLAGS.depth_multiplier) FLAGS.logits_kernel_size, FLAGS.model_variant, FLAGS.depth_multiplier,
dense_prediction_cell_config)
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return ModelOptions(copy.deepcopy(self.outputs_to_num_classes), return ModelOptions(copy.deepcopy(self.outputs_to_num_classes),
......
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Dense Prediction Cell class that can be evolved in semantic segmentation.
DensePredictionCell is used as a `layer` in semantic segmentation whose
architecture is determined by the `config`, a dictionary specifying
the architecture.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from deeplab.core import utils
slim = tf.contrib.slim
# Local constants.
_META_ARCHITECTURE_SCOPE = 'meta_architecture'
_CONCAT_PROJECTION_SCOPE = 'concat_projection'
_OP = 'op'
_CONV = 'conv'
_PYRAMID_POOLING = 'pyramid_pooling'
_KERNEL = 'kernel'
_RATE = 'rate'
_GRID_SIZE = 'grid_size'
_TARGET_SIZE = 'target_size'
_INPUT = 'input'
def dense_prediction_cell_hparams():
"""DensePredictionCell HParams.
Returns:
A dictionary of hyper-parameters used for dense prediction cell with keys:
- reduction_size: Integer, the number of output filters for each operation
inside the cell.
- dropout_on_concat_features: Boolean, apply dropout on the concatenated
features or not.
- dropout_on_projection_features: Boolean, apply dropout on the projection
features or not.
- dropout_keep_prob: Float, when `dropout_on_concat_features' or
`dropout_on_projection_features' is True, the `keep_prob` value used
in the dropout operation.
- concat_channels: Integer, the concatenated features will be
channel-reduced to `concat_channels` channels.
- conv_rate_multiplier: Integer, used to multiply the convolution rates.
This is useful in the case when the output_stride is changed from 16
to 8, we need to double the convolution rates correspondingly.
"""
return {
'reduction_size': 256,
'dropout_on_concat_features': True,
'dropout_on_projection_features': False,
'dropout_keep_prob': 0.9,
'concat_channels': 256,
'conv_rate_multiplier': 1,
}
class DensePredictionCell(object):
"""DensePredictionCell class used as a 'layer' in semantic segmentation."""
def __init__(self, config, hparams=None):
"""Initializes the dense prediction cell.
Args:
config: A dictionary storing the architecture of a dense prediction cell.
hparams: A dictionary of hyper-parameters, provided by users. This
dictionary will be used to update the default dictionary returned by
dense_prediction_cell_hparams().
Raises:
ValueError: If `conv_rate_multiplier` has value < 1.
"""
self.hparams = dense_prediction_cell_hparams()
if hparams is not None:
self.hparams.update(hparams)
self.config = config
# Check values in hparams are valid or not.
if self.hparams['conv_rate_multiplier'] < 1:
raise ValueError('conv_rate_multiplier cannot have value < 1.')
def _get_pyramid_pooling_arguments(
self, crop_size, output_stride, image_grid, image_pooling_crop_size=None):
"""Gets arguments for pyramid pooling.
Args:
crop_size: A list of two integers, [crop_height, crop_width] specifying
whole patch crop size.
output_stride: Integer, output stride value for extracted features.
image_grid: A list of two integers, [image_grid_height, image_grid_width],
specifying the grid size of how the pyramid pooling will be performed.
image_pooling_crop_size: A list of two integers, [crop_height, crop_width]
specifying the crop size for image pooling operations. Note that we
decouple whole patch crop_size and image_pooling_crop_size as one could
perform the image_pooling with different crop sizes.
Returns:
A list of (resize_value, pooled_kernel)
"""
resize_height = utils.scale_dimension(crop_size[0], 1. / output_stride)
resize_width = utils.scale_dimension(crop_size[1], 1. / output_stride)
# If image_pooling_crop_size is not specified, use crop_size.
if image_pooling_crop_size is None:
image_pooling_crop_size = crop_size
pooled_height = utils.scale_dimension(
image_pooling_crop_size[0], 1. / (output_stride * image_grid[0]))
pooled_width = utils.scale_dimension(
image_pooling_crop_size[1], 1. / (output_stride * image_grid[1]))
return ([resize_height, resize_width], [pooled_height, pooled_width])
def _parse_operation(self, config, crop_size, output_stride,
image_pooling_crop_size=None):
"""Parses one operation.
When 'operation' is 'pyramid_pooling', we compute the required
hyper-parameters and save in config.
Args:
config: A dictionary storing required hyper-parameters for one
operation.
crop_size: A list of two integers, [crop_height, crop_width] specifying
whole patch crop size.
output_stride: Integer, output stride value for extracted features.
image_pooling_crop_size: A list of two integers, [crop_height, crop_width]
specifying the crop size for image pooling operations. Note that we
decouple whole patch crop_size and image_pooling_crop_size as one could
perform the image_pooling with different crop sizes.
Returns:
A dictionary stores the related information for the operation.
"""
if config[_OP] == _PYRAMID_POOLING:
(config[_TARGET_SIZE],
config[_KERNEL]) = self._get_pyramid_pooling_arguments(
crop_size=crop_size,
output_stride=output_stride,
image_grid=config[_GRID_SIZE],
image_pooling_crop_size=image_pooling_crop_size)
return config
def build_cell(self,
features,
output_stride=16,
crop_size=None,
image_pooling_crop_size=None,
weight_decay=0.00004,
reuse=None,
is_training=False,
fine_tune_batch_norm=False,
scope=None):
"""Builds the dense prediction cell based on the config.
Args:
features: Input feature map of size [batch, height, width, channels].
output_stride: Int, output stride at which the features were extracted.
crop_size: A list [crop_height, crop_width], determining the input
features resolution.
image_pooling_crop_size: A list of two integers, [crop_height, crop_width]
specifying the crop size for image pooling operations. Note that we
decouple whole patch crop_size and image_pooling_crop_size as one could
perform the image_pooling with different crop sizes.
weight_decay: Float, the weight decay for model variables.
reuse: Reuse the model variables or not.
is_training: Boolean, is training or not.
fine_tune_batch_norm: Boolean, fine-tuning batch norm parameters or not.
scope: Optional string, specifying the variable scope.
Returns:
Features after passing through the constructed dense prediction cell with
shape = [batch, height, width, channels] where channels are determined
by `reduction_size` returned by dense_prediction_cell_hparams().
Raises:
ValueError: Use Convolution with kernel size not equal to 1x1 or 3x3 or
the operation is not recognized.
"""
batch_norm_params = {
'is_training': is_training and fine_tune_batch_norm,
'decay': 0.9997,
'epsilon': 1e-5,
'scale': True,
}
hparams = self.hparams
with slim.arg_scope(
[slim.conv2d, slim.separable_conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
padding='SAME',
stride=1,
reuse=reuse):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
with tf.variable_scope(scope, _META_ARCHITECTURE_SCOPE, [features]):
depth = hparams['reduction_size']
branch_logits = []
for i, current_config in enumerate(self.config):
scope = 'branch%d' % i
current_config = self._parse_operation(
config=current_config,
crop_size=crop_size,
output_stride=output_stride,
image_pooling_crop_size=image_pooling_crop_size)
tf.logging.info(current_config)
if current_config[_INPUT] < 0:
operation_input = features
else:
operation_input = branch_logits[current_config[_INPUT]]
if current_config[_OP] == _CONV:
if current_config[_KERNEL] == [1, 1] or current_config[
_KERNEL] == 1:
branch_logits.append(
slim.conv2d(operation_input, depth, 1, scope=scope))
else:
conv_rate = [r * hparams['conv_rate_multiplier']
for r in current_config[_RATE]]
branch_logits.append(
utils.split_separable_conv2d(
operation_input,
filters=depth,
kernel_size=current_config[_KERNEL],
rate=conv_rate,
weight_decay=weight_decay,
scope=scope))
elif current_config[_OP] == _PYRAMID_POOLING:
pooled_features = slim.avg_pool2d(
operation_input,
kernel_size=current_config[_KERNEL],
stride=[1, 1],
padding='VALID')
pooled_features = slim.conv2d(
pooled_features,
depth,
1,
scope=scope)
pooled_features = tf.image.resize_bilinear(
pooled_features,
current_config[_TARGET_SIZE],
align_corners=True)
# Set shape for resize_height/resize_width if they are not Tensor.
resize_height = current_config[_TARGET_SIZE][0]
resize_width = current_config[_TARGET_SIZE][1]
if isinstance(resize_height, tf.Tensor):
resize_height = None
if isinstance(resize_width, tf.Tensor):
resize_width = None
pooled_features.set_shape(
[None, resize_height, resize_width, depth])
branch_logits.append(pooled_features)
else:
raise ValueError('Unrecognized operation.')
# Merge branch logits.
concat_logits = tf.concat(branch_logits, 3)
if self.hparams['dropout_on_concat_features']:
concat_logits = slim.dropout(
concat_logits,
keep_prob=self.hparams['dropout_keep_prob'],
is_training=is_training,
scope=_CONCAT_PROJECTION_SCOPE + '_dropout')
concat_logits = slim.conv2d(concat_logits,
self.hparams['concat_channels'],
1,
scope=_CONCAT_PROJECTION_SCOPE)
if self.hparams['dropout_on_projection_features']:
concat_logits = slim.dropout(
concat_logits,
keep_prob=self.hparams['dropout_keep_prob'],
is_training=is_training,
scope=_CONCAT_PROJECTION_SCOPE + '_dropout')
return concat_logits
\ No newline at end of file
[{"kernel": 3, "rate": [1, 6], "op": "conv", "input": -1}, {"kernel": 3, "rate": [18, 15], "op": "conv", "input": 0}, {"kernel": 3, "rate": [6, 3], "op": "conv", "input": 1}, {"kernel": 3, "rate": [1, 1], "op": "conv", "input": 0}, {"kernel": 3, "rate": [6, 21], "op": "conv", "input": 0}]
\ No newline at end of file
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for dense_prediction_cell."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from deeplab.core import dense_prediction_cell
class DensePredictionCellTest(tf.test.TestCase):
def setUp(self):
self.segmentation_layer = dense_prediction_cell.DensePredictionCell(
config=[
{
dense_prediction_cell._INPUT: -1,
dense_prediction_cell._OP: dense_prediction_cell._CONV,
dense_prediction_cell._KERNEL: 1,
},
{
dense_prediction_cell._INPUT: 0,
dense_prediction_cell._OP: dense_prediction_cell._CONV,
dense_prediction_cell._KERNEL: 3,
dense_prediction_cell._RATE: [1, 3],
},
{
dense_prediction_cell._INPUT: 1,
dense_prediction_cell._OP: (
dense_prediction_cell._PYRAMID_POOLING),
dense_prediction_cell._GRID_SIZE: [1, 2],
},
],
hparams={'conv_rate_multiplier': 2})
def testPyramidPoolingArguments(self):
features_size, pooled_kernel = (
self.segmentation_layer._get_pyramid_pooling_arguments(
crop_size=[513, 513],
output_stride=16,
image_grid=[4, 4]))
self.assertListEqual(features_size, [33, 33])
self.assertListEqual(pooled_kernel, [9, 9])
def testPyramidPoolingArgumentsWithImageGrid1x1(self):
features_size, pooled_kernel = (
self.segmentation_layer._get_pyramid_pooling_arguments(
crop_size=[257, 257],
output_stride=16,
image_grid=[1, 1]))
self.assertListEqual(features_size, [17, 17])
self.assertListEqual(pooled_kernel, [17, 17])
def testParseOperationStringWithConv1x1(self):
operation = self.segmentation_layer._parse_operation(
config={
dense_prediction_cell._OP: dense_prediction_cell._CONV,
dense_prediction_cell._KERNEL: [1, 1],
},
crop_size=[513, 513], output_stride=16)
self.assertEqual(operation[dense_prediction_cell._OP],
dense_prediction_cell._CONV)
self.assertListEqual(operation[dense_prediction_cell._KERNEL], [1, 1])
def testParseOperationStringWithConv3x3(self):
operation = self.segmentation_layer._parse_operation(
config={
dense_prediction_cell._OP: dense_prediction_cell._CONV,
dense_prediction_cell._KERNEL: [3, 3],
dense_prediction_cell._RATE: [9, 6],
},
crop_size=[513, 513], output_stride=16)
self.assertEqual(operation[dense_prediction_cell._OP],
dense_prediction_cell._CONV)
self.assertListEqual(operation[dense_prediction_cell._KERNEL], [3, 3])
self.assertEqual(operation[dense_prediction_cell._RATE], [9, 6])
def testParseOperationStringWithPyramidPooling2x2(self):
operation = self.segmentation_layer._parse_operation(
config={
dense_prediction_cell._OP: dense_prediction_cell._PYRAMID_POOLING,
dense_prediction_cell._GRID_SIZE: [2, 2],
},
crop_size=[513, 513],
output_stride=16)
self.assertEqual(operation[dense_prediction_cell._OP],
dense_prediction_cell._PYRAMID_POOLING)
# The feature maps of size [33, 33] should be covered by 2x2 kernels with
# size [17, 17].
self.assertListEqual(
operation[dense_prediction_cell._TARGET_SIZE], [33, 33])
self.assertListEqual(operation[dense_prediction_cell._KERNEL], [17, 17])
def testBuildCell(self):
with self.test_session(graph=tf.Graph()) as sess:
features = tf.random_normal([2, 33, 33, 5])
concat_logits = self.segmentation_layer.build_cell(
features,
output_stride=8,
crop_size=[257, 257])
sess.run(tf.global_variables_initializer())
concat_logits = sess.run(concat_logits)
self.assertTrue(concat_logits.any())
def testBuildCellWithImagePoolingCropSize(self):
with self.test_session(graph=tf.Graph()) as sess:
features = tf.random_normal([2, 33, 33, 5])
concat_logits = self.segmentation_layer.build_cell(
features,
output_stride=8,
crop_size=[257, 257],
image_pooling_crop_size=[129, 129])
sess.run(tf.global_variables_initializer())
concat_logits = sess.run(concat_logits)
self.assertTrue(concat_logits.any())
if __name__ == '__main__':
tf.test.main()
\ No newline at end of file
...@@ -126,7 +126,7 @@ networks_to_feature_maps = { ...@@ -126,7 +126,7 @@ networks_to_feature_maps = {
}, },
'xception_71': { 'xception_71': {
DECODER_END_POINTS: [ DECODER_END_POINTS: [
'entry_flow/block2/unit_1/xception_module/' 'entry_flow/block3/unit_1/xception_module/'
'separable_conv2_pointwise', 'separable_conv2_pointwise',
], ],
}, },
......
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""This script contains utility functions."""
import tensorflow as tf
slim = tf.contrib.slim
def scale_dimension(dim, scale):
"""Scales the input dimension.
Args:
dim: Input dimension (a scalar or a scalar Tensor).
scale: The amount of scaling applied to the input.
Returns:
Scaled dimension.
"""
if isinstance(dim, tf.Tensor):
return tf.cast((tf.to_float(dim) - 1.0) * scale + 1.0, dtype=tf.int32)
else:
return int((float(dim) - 1.0) * scale + 1.0)
def split_separable_conv2d(inputs,
filters,
kernel_size=3,
rate=1,
weight_decay=0.00004,
depthwise_weights_initializer_stddev=0.33,
pointwise_weights_initializer_stddev=0.06,
scope=None):
"""Splits a separable conv2d into depthwise and pointwise conv2d.
This operation differs from `tf.layers.separable_conv2d` as this operation
applies activation function between depthwise and pointwise conv2d.
Args:
inputs: Input tensor with shape [batch, height, width, channels].
filters: Number of filters in the 1x1 pointwise convolution.
kernel_size: A list of length 2: [kernel_height, kernel_width] of
of the filters. Can be an int if both values are the same.
rate: Atrous convolution rate for the depthwise convolution.
weight_decay: The weight decay to use for regularizing the model.
depthwise_weights_initializer_stddev: The standard deviation of the
truncated normal weight initializer for depthwise convolution.
pointwise_weights_initializer_stddev: The standard deviation of the
truncated normal weight initializer for pointwise convolution.
scope: Optional scope for the operation.
Returns:
Computed features after split separable conv2d.
"""
outputs = slim.separable_conv2d(
inputs,
None,
kernel_size=kernel_size,
depth_multiplier=1,
rate=rate,
weights_initializer=tf.truncated_normal_initializer(
stddev=depthwise_weights_initializer_stddev),
weights_regularizer=None,
scope=scope + '_depthwise')
return slim.conv2d(
outputs,
filters,
1,
weights_initializer=tf.truncated_normal_initializer(
stddev=pointwise_weights_initializer_stddev),
weights_regularizer=slim.l2_regularizer(weight_decay),
scope=scope + '_pointwise')
\ No newline at end of file
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for utils.py."""
import tensorflow as tf
from deeplab.core import utils
class UtilsTest(tf.test.TestCase):
def testScaleDimensionOutput(self):
self.assertEqual(161, utils.scale_dimension(321, 0.5))
self.assertEqual(193, utils.scale_dimension(321, 0.6))
self.assertEqual(241, utils.scale_dimension(321, 0.75))
if __name__ == '__main__':
tf.test.main()
\ No newline at end of file
...@@ -79,6 +79,14 @@ ${PATH_TO_DATASET} is the directory in which the Cityscapes dataset resides. ...@@ -79,6 +79,14 @@ ${PATH_TO_DATASET} is the directory in which the Cityscapes dataset resides.
3. The users could skip the flag, `decoder_output_stride`, if you do not want 3. The users could skip the flag, `decoder_output_stride`, if you do not want
to use the decoder structure. to use the decoder structure.
4. Change and add the following flags in order to use the provided dense prediction cell.
```bash
--model_variant="xception_71"
--dense_prediction_cell_json="deeplab/core/dense_prediction_cell_branch5_top1_cityscapes.json"
```
A local evaluation job using `xception_65` can be run with the following A local evaluation job using `xception_65` can be run with the following
command: command:
......
...@@ -30,8 +30,8 @@ Checkpoint name | Network backbone | Pretrained dataset | ASPP | D ...@@ -30,8 +30,8 @@ Checkpoint name | Network backbone | Pretrained dataset | ASPP | D
--------------------------- | :--------------: | :-----------------: | :---: | :-----: --------------------------- | :--------------: | :-----------------: | :---: | :-----:
mobilenetv2_coco_voc_trainaug | MobileNet-v2 | MS-COCO <br> VOC 2012 train_aug set| N/A | N/A mobilenetv2_coco_voc_trainaug | MobileNet-v2 | MS-COCO <br> VOC 2012 train_aug set| N/A | N/A
mobilenetv2_coco_voc_trainval | MobileNet-v2 | MS-COCO <br> VOC 2012 train_aug + trainval sets | N/A | N/A mobilenetv2_coco_voc_trainval | MobileNet-v2 | MS-COCO <br> VOC 2012 train_aug + trainval sets | N/A | N/A
xception_coco_voc_trainaug | Xception_65 | MS-COCO <br> VOC 2012 train_aug set| [6,12,18] for OS=16 <br> [12,24,36] for OS=8 | OS = 4 xception65_coco_voc_trainaug | Xception_65 | MS-COCO <br> VOC 2012 train_aug set| [6,12,18] for OS=16 <br> [12,24,36] for OS=8 | OS = 4
xception_coco_voc_trainval | Xception_65 | MS-COCO <br> VOC 2012 train_aug + trainval sets | [6,12,18] for OS=16 <br> [12,24,36] for OS=8 | OS = 4 xception65_coco_voc_trainval | Xception_65 | MS-COCO <br> VOC 2012 train_aug + trainval sets | [6,12,18] for OS=16 <br> [12,24,36] for OS=8 | OS = 4
In the table, **OS** denotes output stride. In the table, **OS** denotes output stride.
...@@ -39,8 +39,8 @@ Checkpoint name ...@@ -39,8 +39,8 @@ Checkpoint name
------------------------------------------------------------------------------------------------------------------------ | :-------: | :------------------------: | :-------------: | :------------------: | :------------: | :----------------------------: | :-------: ------------------------------------------------------------------------------------------------------------------------ | :-------: | :------------------------: | :-------------: | :------------------: | :------------: | :----------------------------: | :-------:
[mobilenetv2_coco_voc_trainaug](http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz) | 16 <br> 8 | [1.0] <br> [0.5:0.25:1.75] | No <br> Yes | 2.75B <br> 152.59B | 0.1 <br> 26.9 | 75.32% (val) <br> 77.33 (val) | 23MB [mobilenetv2_coco_voc_trainaug](http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz) | 16 <br> 8 | [1.0] <br> [0.5:0.25:1.75] | No <br> Yes | 2.75B <br> 152.59B | 0.1 <br> 26.9 | 75.32% (val) <br> 77.33 (val) | 23MB
[mobilenetv2_coco_voc_trainval](http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz) | 8 | [0.5:0.25:1.75] | Yes | 152.59B | 26.9 | 80.25% (**test**) | 23MB [mobilenetv2_coco_voc_trainval](http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz) | 8 | [0.5:0.25:1.75] | Yes | 152.59B | 26.9 | 80.25% (**test**) | 23MB
[xception_coco_voc_trainaug](http://download.tensorflow.org/models/deeplabv3_pascal_train_aug_2018_01_04.tar.gz) | 16 <br> 8 | [1.0] <br> [0.5:0.25:1.75] | No <br> Yes | 54.17B <br> 3055.35B | 0.7 <br> 223.2 | 82.20% (val) <br> 83.58% (val) | 439MB [xception65_coco_voc_trainaug](http://download.tensorflow.org/models/deeplabv3_pascal_train_aug_2018_01_04.tar.gz) | 16 <br> 8 | [1.0] <br> [0.5:0.25:1.75] | No <br> Yes | 54.17B <br> 3055.35B | 0.7 <br> 223.2 | 82.20% (val) <br> 83.58% (val) | 439MB
[xception_coco_voc_trainval](http://download.tensorflow.org/models/deeplabv3_pascal_trainval_2018_01_04.tar.gz) | 8 | [0.5:0.25:1.75] | Yes | 3055.35B | 223.2 | 87.80% (**test**) | 439MB [xception65_coco_voc_trainval](http://download.tensorflow.org/models/deeplabv3_pascal_trainval_2018_01_04.tar.gz) | 8 | [0.5:0.25:1.75] | Yes | 3055.35B | 223.2 | 87.80% (**test**) | 439MB
In the table, we report both computation complexity (in terms of Multiply-Adds In the table, we report both computation complexity (in terms of Multiply-Adds
and CPU Runtime) and segmentation performance (in terms of mIOU) on the PASCAL and CPU Runtime) and segmentation performance (in terms of mIOU) on the PASCAL
...@@ -61,14 +61,20 @@ dataset and does not employ ASPP and decoder modules for fast computation. ...@@ -61,14 +61,20 @@ dataset and does not employ ASPP and decoder modules for fast computation.
Checkpoint name | Network backbone | Pretrained dataset | ASPP | Decoder Checkpoint name | Network backbone | Pretrained dataset | ASPP | Decoder
------------------------------------- | :--------------: | :-------------------------------------: | :----------------------------------------------: | :-----: ------------------------------------- | :--------------: | :-------------------------------------: | :----------------------------------------------: | :-----:
mobilenetv2_coco_cityscapes_trainfine | MobileNet-v2 | MS-COCO <br> Cityscapes train_fine set | N/A | N/A mobilenetv2_coco_cityscapes_trainfine | MobileNet-v2 | MS-COCO <br> Cityscapes train_fine set | N/A | N/A
xception_cityscapes_trainfine | Xception_65 | ImageNet <br> Cityscapes train_fine set | [6, 12, 18] for OS=16 <br> [12, 24, 36] for OS=8 | OS = 4 xception65_cityscapes_trainfine | Xception_65 | ImageNet <br> Cityscapes train_fine set | [6, 12, 18] for OS=16 <br> [12, 24, 36] for OS=8 | OS = 4
xception71_dpc_cityscapes_trainfine | Xception_71 | ImageNet <br> MS-COCO <br> Cityscapes train_fine set | Dense Prediction Cell | OS = 4
xception71_dpc_cityscapes_trainval | Xception_71 | ImageNet <br> MS-COCO <br> Cityscapes trainval_fine and coarse set | Dense Prediction Cell | OS = 4
In the table, **OS** denotes output stride. In the table, **OS** denotes output stride.
Checkpoint name | Eval OS | Eval scales | Left-right Flip | Multiply-Adds | Runtime (sec) | Cityscapes mIOU | File Size Checkpoint name | Eval OS | Eval scales | Left-right Flip | Multiply-Adds | Runtime (sec) | Cityscapes mIOU | File Size
-------------------------------------------------------------------------------------------------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :------------: | :----------------------------: | :-------: -------------------------------------------------------------------------------------------------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :------------: | :----------------------------: | :-------:
[mobilenetv2_coco_cityscapes_trainfine](http://download.tensorflow.org/models/deeplabv3_mnv2_cityscapes_train_2018_02_05.tar.gz) | 16 <br> 8 | [1.0] <br> [0.75:0.25:1.25] | No <br> Yes | 21.27B <br> 433.24B | 0.8 <br> 51.12 | 70.71% (val) <br> 73.57% (val) | 23MB [mobilenetv2_coco_cityscapes_trainfine](http://download.tensorflow.org/models/deeplabv3_mnv2_cityscapes_train_2018_02_05.tar.gz) | 16 <br> 8 | [1.0] <br> [0.75:0.25:1.25] | No <br> Yes | 21.27B <br> 433.24B | 0.8 <br> 51.12 | 70.71% (val) <br> 73.57% (val) | 23MB
[xception_cityscapes_trainfine](http://download.tensorflow.org/models/deeplabv3_cityscapes_train_2018_02_06.tar.gz) | 16 <br> 8 | [1.0] <br> [0.75:0.25:1.25] | No <br> Yes | 418.64B <br> 8677.92B | 5.0 <br> 422.8 | 78.79% (val) <br> 80.42% (val) | 439MB [xception65_cityscapes_trainfine](http://download.tensorflow.org/models/deeplabv3_cityscapes_train_2018_02_06.tar.gz) | 16 <br> 8 | [1.0] <br> [0.75:0.25:1.25] | No <br> Yes | 418.64B <br> 8677.92B | 5.0 <br> 422.8 | 78.79% (val) <br> 80.42% (val) | 439MB
[xception71_dpc_cityscapes_trainfine](http://download.tensorflow.org/models/deeplab_cityscapes_xception71_trainfine_2018_09_08.tar.gz) | 16 | [1.0] | No | 502.07B | - | 80.31% (val) | 445MB
[xception71_dpc_cityscapes_trainval](http://download.tensorflow.org/models/deeplab_cityscapes_xception71_trainvalfine_2018_09_08.tar.gz) | 8 | [0.75:0.25:2] | Yes | - | - | 82.66% (**test**) | 446MB
## DeepLab models trained on ADE20K ## DeepLab models trained on ADE20K
...@@ -80,11 +86,11 @@ dataset rule. ...@@ -80,11 +86,11 @@ dataset rule.
Checkpoint name | Network backbone | Pretrained dataset | ASPP | Decoder Checkpoint name | Network backbone | Pretrained dataset | ASPP | Decoder
------------------------------------- | :--------------: | :-------------------------------------: | :----------------------------------------------: | :-----: ------------------------------------- | :--------------: | :-------------------------------------: | :----------------------------------------------: | :-----:
xception_ade20k_train | Xception_65 | ImageNet <br> ADE20K training set | [6, 12, 18] for OS=16 <br> [12, 24, 36] for OS=8 | OS = 4 xception65_ade20k_train | Xception_65 | ImageNet <br> ADE20K training set | [6, 12, 18] for OS=16 <br> [12, 24, 36] for OS=8 | OS = 4
Checkpoint name | Eval OS | Eval scales | Left-right Flip | mIOU | Pixel-wise Accuracy | File Size Checkpoint name | Eval OS | Eval scales | Left-right Flip | mIOU | Pixel-wise Accuracy | File Size
------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :-------------------: | :-------: ------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :-------------------: | :-------:
[xception_ade20k_train](http://download.tensorflow.org/models/deeplabv3_xception_ade20k_train_2018_05_29.tar.gz) | 8 | [0.5:0.25:1.75] | Yes | 45.65% (val) | 82.52% (val) | 439MB [xception65_ade20k_train](http://download.tensorflow.org/models/deeplabv3_xception_ade20k_train_2018_05_29.tar.gz) | 8 | [0.5:0.25:1.75] | Yes | 45.65% (val) | 82.52% (val) | 439MB
## Checkpoints pretrained on ImageNet ## Checkpoints pretrained on ImageNet
...@@ -170,4 +176,4 @@ Model name ...@@ -170,4 +176,4 @@ Model name
12. **Scene Parsing through ADE20K Dataset**<br /> 12. **Scene Parsing through ADE20K Dataset**<br />
Bolei Zhou, Hang Zhao, Xavier Puig, Sanja Fidler, Adela Barriuso, Antonio Torralba<br /> Bolei Zhou, Hang Zhao, Xavier Puig, Sanja Fidler, Adela Barriuso, Antonio Torralba<br />
[[link]](http://groups.csail.mit.edu/vision/datasets/ADE20K/). In CVPR, [[link]](http://groups.csail.mit.edu/vision/datasets/ADE20K/). In CVPR,
2017. 2017.
\ No newline at end of file
...@@ -52,7 +52,10 @@ Alan L. Yuille (* equal contribution) ...@@ -52,7 +52,10 @@ Alan L. Yuille (* equal contribution)
(https://arxiv.org/abs/1412.7062) (https://arxiv.org/abs/1412.7062)
""" """
import tensorflow as tf import tensorflow as tf
from deeplab.core import dense_prediction_cell
from deeplab.core import feature_extractor from deeplab.core import feature_extractor
from deeplab.core import utils
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -62,7 +65,10 @@ IMAGE_POOLING_SCOPE = 'image_pooling' ...@@ -62,7 +65,10 @@ IMAGE_POOLING_SCOPE = 'image_pooling'
ASPP_SCOPE = 'aspp' ASPP_SCOPE = 'aspp'
CONCAT_PROJECTION_SCOPE = 'concat_projection' CONCAT_PROJECTION_SCOPE = 'concat_projection'
DECODER_SCOPE = 'decoder' DECODER_SCOPE = 'decoder'
META_ARCHITECTURE_SCOPE = 'meta_architecture'
scale_dimension = utils.scale_dimension
split_separable_conv2d = utils.split_separable_conv2d
def get_extra_layer_scopes(last_layers_contain_logits_only=False): def get_extra_layer_scopes(last_layers_contain_logits_only=False):
"""Gets the scopes for extra layers. """Gets the scopes for extra layers.
...@@ -83,6 +89,7 @@ def get_extra_layer_scopes(last_layers_contain_logits_only=False): ...@@ -83,6 +89,7 @@ def get_extra_layer_scopes(last_layers_contain_logits_only=False):
ASPP_SCOPE, ASPP_SCOPE,
CONCAT_PROJECTION_SCOPE, CONCAT_PROJECTION_SCOPE,
DECODER_SCOPE, DECODER_SCOPE,
META_ARCHITECTURE_SCOPE,
] ]
...@@ -186,20 +193,20 @@ def predict_labels(images, model_options, image_pyramid=None): ...@@ -186,20 +193,20 @@ def predict_labels(images, model_options, image_pyramid=None):
return predictions return predictions
def scale_dimension(dim, scale): def _resize_bilinear(images, size, output_dtype=tf.float32):
"""Scales the input dimension. """Returns resized images as output_type.
Args: Args:
dim: Input dimension (a scalar or a scalar Tensor). images: A tensor of size [batch, height_in, width_in, channels].
scale: The amount of scaling applied to the input. size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size
for the images.
output_dtype: The destination type.
Returns: Returns:
Scaled dimension. A tensor of size [batch, height_out, width_out, channels] as a dtype of
output_dtype.
""" """
if isinstance(dim, tf.Tensor): images = tf.image.resize_bilinear(images, size, align_corners=True)
return tf.cast((tf.to_float(dim) - 1.0) * scale + 1.0, dtype=tf.int32) return tf.cast(images, dtype=output_dtype)
else:
return int((float(dim) - 1.0) * scale + 1.0)
def multi_scale_logits(images, def multi_scale_logits(images,
...@@ -355,92 +362,120 @@ def extract_features(images, ...@@ -355,92 +362,120 @@ def extract_features(images,
if not model_options.aspp_with_batch_norm: if not model_options.aspp_with_batch_norm:
return features, end_points return features, end_points
else: else:
batch_norm_params = { if model_options.dense_prediction_cell_config is not None:
tf.logging.info('Using dense prediction cell config.')
dense_prediction_layer = dense_prediction_cell.DensePredictionCell(
config=model_options.dense_prediction_cell_config,
hparams={
'conv_rate_multiplier': 16 // model_options.output_stride,
})
concat_logits = dense_prediction_layer.build_cell(
features,
output_stride=model_options.output_stride,
crop_size=model_options.crop_size,
image_pooling_crop_size=model_options.image_pooling_crop_size,
weight_decay=weight_decay,
reuse=reuse,
is_training=is_training,
fine_tune_batch_norm=fine_tune_batch_norm)
return concat_logits, end_points
else:
# The following codes employ the DeepLabv3 ASPP module. Note that We
# could express the ASPP module as one particular dense prediction
# cell architecture. We do not do so but leave the followng codes in
# order for backward compatibility.
batch_norm_params = {
'is_training': is_training and fine_tune_batch_norm, 'is_training': is_training and fine_tune_batch_norm,
'decay': 0.9997, 'decay': 0.9997,
'epsilon': 1e-5, 'epsilon': 1e-5,
'scale': True, 'scale': True,
} }
with slim.arg_scope( with slim.arg_scope(
[slim.conv2d, slim.separable_conv2d], [slim.conv2d, slim.separable_conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay), weights_regularizer=slim.l2_regularizer(weight_decay),
activation_fn=tf.nn.relu, activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm, normalizer_fn=slim.batch_norm,
padding='SAME', padding='SAME',
stride=1, stride=1,
reuse=reuse): reuse=reuse):
with slim.arg_scope([slim.batch_norm], **batch_norm_params): with slim.arg_scope([slim.batch_norm], **batch_norm_params):
depth = 256 depth = 256
branch_logits = [] branch_logits = []
if model_options.add_image_level_feature: if model_options.add_image_level_feature:
if model_options.crop_size is not None: if model_options.crop_size is not None:
image_pooling_crop_size = model_options.image_pooling_crop_size image_pooling_crop_size = model_options.image_pooling_crop_size
# If image_pooling_crop_size is not specified, use crop_size. # If image_pooling_crop_size is not specified, use crop_size.
if image_pooling_crop_size is None: if image_pooling_crop_size is None:
image_pooling_crop_size = model_options.crop_size image_pooling_crop_size = model_options.crop_size
pool_height = scale_dimension(image_pooling_crop_size[0], pool_height = scale_dimension(
1. / model_options.output_stride) image_pooling_crop_size[0],
pool_width = scale_dimension(image_pooling_crop_size[1], 1. / model_options.output_stride)
1. / model_options.output_stride) pool_width = scale_dimension(
image_feature = slim.avg_pool2d( image_pooling_crop_size[1],
features, [pool_height, pool_width], [1, 1], padding='VALID') 1. / model_options.output_stride)
resize_height = scale_dimension(model_options.crop_size[0], image_feature = slim.avg_pool2d(
1. / model_options.output_stride) features, [pool_height, pool_width], [1, 1], padding='VALID')
resize_width = scale_dimension(model_options.crop_size[1], resize_height = scale_dimension(
1. / model_options.output_stride) model_options.crop_size[0],
else: 1. / model_options.output_stride)
# If crop_size is None, we simply do global pooling. resize_width = scale_dimension(
pool_height = tf.shape(features)[1] model_options.crop_size[1],
pool_width = tf.shape(features)[2] 1. / model_options.output_stride)
image_feature = tf.reduce_mean(features, axis=[1, 2])[:, tf.newaxis,
tf.newaxis]
resize_height = pool_height
resize_width = pool_width
image_feature = slim.conv2d(
image_feature, depth, 1, scope=IMAGE_POOLING_SCOPE)
image_feature = tf.image.resize_bilinear(
image_feature, [resize_height, resize_width], align_corners=True)
# Set shape for resize_height/resize_width if they are not Tensor.
if isinstance(resize_height, tf.Tensor):
resize_height = None
if isinstance(resize_width, tf.Tensor):
resize_width = None
image_feature.set_shape([None, resize_height, resize_width, depth])
branch_logits.append(image_feature)
# Employ a 1x1 convolution.
branch_logits.append(slim.conv2d(features, depth, 1,
scope=ASPP_SCOPE + str(0)))
if model_options.atrous_rates:
# Employ 3x3 convolutions with different atrous rates.
for i, rate in enumerate(model_options.atrous_rates, 1):
scope = ASPP_SCOPE + str(i)
if model_options.aspp_with_separable_conv:
aspp_features = split_separable_conv2d(
features,
filters=depth,
rate=rate,
weight_decay=weight_decay,
scope=scope)
else: else:
aspp_features = slim.conv2d( # If crop_size is None, we simply do global pooling.
features, depth, 3, rate=rate, scope=scope) pool_height = tf.shape(features)[1]
branch_logits.append(aspp_features) pool_width = tf.shape(features)[2]
image_feature = tf.reduce_mean(
# Merge branch logits. features, axis=[1, 2], keepdims=True)
concat_logits = tf.concat(branch_logits, 3) resize_height = pool_height
concat_logits = slim.conv2d( resize_width = pool_width
concat_logits, depth, 1, scope=CONCAT_PROJECTION_SCOPE) image_feature = slim.conv2d(
concat_logits = slim.dropout( image_feature, depth, 1, scope=IMAGE_POOLING_SCOPE)
concat_logits, image_feature = _resize_bilinear(
keep_prob=0.9, image_feature,
is_training=is_training, [resize_height, resize_width],
scope=CONCAT_PROJECTION_SCOPE + '_dropout') image_feature.dtype)
# Set shape for resize_height/resize_width if they are not Tensor.
return concat_logits, end_points if isinstance(resize_height, tf.Tensor):
resize_height = None
if isinstance(resize_width, tf.Tensor):
resize_width = None
image_feature.set_shape([None, resize_height, resize_width, depth])
branch_logits.append(image_feature)
# Employ a 1x1 convolution.
branch_logits.append(slim.conv2d(features, depth, 1,
scope=ASPP_SCOPE + str(0)))
if model_options.atrous_rates:
# Employ 3x3 convolutions with different atrous rates.
for i, rate in enumerate(model_options.atrous_rates, 1):
scope = ASPP_SCOPE + str(i)
if model_options.aspp_with_separable_conv:
aspp_features = split_separable_conv2d(
features,
filters=depth,
rate=rate,
weight_decay=weight_decay,
scope=scope)
else:
aspp_features = slim.conv2d(
features, depth, 3, rate=rate, scope=scope)
branch_logits.append(aspp_features)
# Merge branch logits.
concat_logits = tf.concat(branch_logits, 3)
concat_logits = slim.conv2d(
concat_logits, depth, 1, scope=CONCAT_PROJECTION_SCOPE)
concat_logits = slim.dropout(
concat_logits,
keep_prob=0.9,
is_training=is_training,
scope=CONCAT_PROJECTION_SCOPE + '_dropout')
return concat_logits, end_points
def _get_logits(images, def _get_logits(images,
...@@ -672,52 +707,3 @@ def get_branch_logits(features, ...@@ -672,52 +707,3 @@ def get_branch_logits(features,
scope=scope)) scope=scope))
return tf.add_n(branch_logits) return tf.add_n(branch_logits)
def split_separable_conv2d(inputs,
filters,
kernel_size=3,
rate=1,
weight_decay=0.00004,
depthwise_weights_initializer_stddev=0.33,
pointwise_weights_initializer_stddev=0.06,
scope=None):
"""Splits a separable conv2d into depthwise and pointwise conv2d.
This operation differs from `tf.layers.separable_conv2d` as this operation
applies activation function between depthwise and pointwise conv2d.
Args:
inputs: Input tensor with shape [batch, height, width, channels].
filters: Number of filters in the 1x1 pointwise convolution.
kernel_size: A list of length 2: [kernel_height, kernel_width] of
of the filters. Can be an int if both values are the same.
rate: Atrous convolution rate for the depthwise convolution.
weight_decay: The weight decay to use for regularizing the model.
depthwise_weights_initializer_stddev: The standard deviation of the
truncated normal weight initializer for depthwise convolution.
pointwise_weights_initializer_stddev: The standard deviation of the
truncated normal weight initializer for pointwise convolution.
scope: Optional scope for the operation.
Returns:
Computed features after split separable conv2d.
"""
outputs = slim.separable_conv2d(
inputs,
None,
kernel_size=kernel_size,
depth_multiplier=1,
rate=rate,
weights_initializer=tf.truncated_normal_initializer(
stddev=depthwise_weights_initializer_stddev),
weights_regularizer=None,
scope=scope + '_depthwise')
return slim.conv2d(
outputs,
filters,
1,
weights_initializer=tf.truncated_normal_initializer(
stddev=pointwise_weights_initializer_stddev),
weights_regularizer=slim.l2_regularizer(weight_decay),
scope=scope + '_pointwise')
...@@ -23,11 +23,6 @@ from deeplab import model ...@@ -23,11 +23,6 @@ from deeplab import model
class DeeplabModelTest(tf.test.TestCase): class DeeplabModelTest(tf.test.TestCase):
def testScaleDimensionOutput(self):
self.assertEqual(161, model.scale_dimension(321, 0.5))
self.assertEqual(193, model.scale_dimension(321, 0.6))
self.assertEqual(241, model.scale_dimension(321, 0.75))
def testWrongDeepLabVariant(self): def testWrongDeepLabVariant(self):
model_options = common.ModelOptions([])._replace( model_options = common.ModelOptions([])._replace(
model_variant='no_such_variant') model_variant='no_such_variant')
...@@ -115,6 +110,37 @@ class DeeplabModelTest(tf.test.TestCase): ...@@ -115,6 +110,37 @@ class DeeplabModelTest(tf.test.TestCase):
for logits in scales_to_logits.values(): for logits in scales_to_logits.values():
self.assertTrue(logits.any()) self.assertTrue(logits.any())
def testBuildDeepLabWithDensePredictionCell(self):
batch_size = 1
crop_size = [33, 33]
outputs_to_num_classes = {'semantic': 2}
expected_endpoints = ['merged_logits']
dense_prediction_cell_config = [
{'kernel': 3, 'rate': [1, 6], 'op': 'conv', 'input': -1},
{'kernel': 3, 'rate': [18, 15], 'op': 'conv', 'input': 0},
]
model_options = common.ModelOptions(
outputs_to_num_classes,
crop_size,
output_stride=16)._replace(
aspp_with_batch_norm=True,
model_variant='mobilenet_v2',
dense_prediction_cell_config=dense_prediction_cell_config)
g = tf.Graph()
with g.as_default():
with self.test_session(graph=g):
inputs = tf.random_uniform(
(batch_size, crop_size[0], crop_size[1], 3))
outputs_to_scales_to_model_results = model.multi_scale_logits(
inputs,
model_options,
image_pyramid=[1.0])
for output in outputs_to_num_classes:
scales_to_model_results = outputs_to_scales_to_model_results[output]
self.assertListEqual(scales_to_model_results.keys(),
expected_endpoints)
self.assertEqual(len(scales_to_model_results), 1)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment