Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
2310bc34
Unverified
Commit
2310bc34
authored
Jun 14, 2018
by
Yukun Zhu
Committed by
GitHub
Jun 14, 2018
Browse files
Merge pull request #4534 from huihui-personal/master
PiperOrigin-RevId: 200493322
parents
1f82c227
e2e820c1
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
248 additions
and
193 deletions
+248
-193
research/deeplab/README.md
research/deeplab/README.md
+5
-0
research/deeplab/common.py
research/deeplab/common.py
+6
-4
research/deeplab/common_test.py
research/deeplab/common_test.py
+34
-0
research/deeplab/core/feature_extractor.py
research/deeplab/core/feature_extractor.py
+4
-10
research/deeplab/core/resnet_v1_beta.py
research/deeplab/core/resnet_v1_beta.py
+32
-102
research/deeplab/core/resnet_v1_beta_test.py
research/deeplab/core/resnet_v1_beta_test.py
+0
-2
research/deeplab/datasets/segmentation_dataset.py
research/deeplab/datasets/segmentation_dataset.py
+1
-0
research/deeplab/g3doc/ade20k.md
research/deeplab/g3doc/ade20k.md
+1
-1
research/deeplab/g3doc/model_zoo.md
research/deeplab/g3doc/model_zoo.md
+1
-1
research/deeplab/model.py
research/deeplab/model.py
+64
-69
research/deeplab/train.py
research/deeplab/train.py
+4
-2
research/deeplab/utils/get_dataset_colormap.py
research/deeplab/utils/get_dataset_colormap.py
+84
-0
research/deeplab/utils/get_dataset_colormap_test.py
research/deeplab/utils/get_dataset_colormap_test.py
+5
-0
research/deeplab/utils/train_utils.py
research/deeplab/utils/train_utils.py
+7
-2
No files found.
research/deeplab/README.md
View file @
2310bc34
...
...
@@ -113,6 +113,11 @@ with "deeplab".
## Change Logs
### May 26, 2018
Updated ADE20K pretrained checkpoint.
### May 18, 2018
1.
Added builders for ResNet-v1 and Xception model variants.
1.
Added ADE20K support, including colormap and pretrained Xception_65 checkpoint.
...
...
research/deeplab/common.py
View file @
2310bc34
...
...
@@ -40,10 +40,10 @@ flags.DEFINE_integer('logits_kernel_size', 1,
'generates logits.'
)
# When using 'mobilent_v2', we set atrous_rates = decoder_output_stride = None.
# When using 'xception_65'
, we set atrous_rates = [6, 12, 18] (output stride 16)
# and decoder_output_stride = 4.
flags
.
DEFINE_enum
(
'
model
_
variant
'
,
'mobilenet_v2'
,
[
'xception_65
'
,
'mobilenet_v2'
]
,
'DeepLab model variant.'
)
# When using 'xception_65'
or 'resnet_v1' model variants, we set
#
atrous_rates = [6, 12, 18] (output stride 16)
and decoder_output_stride = 4.
# See core/feature_extractor.py for supported
model
variant
s.
flags
.
DEFINE_string
(
'model_variant
'
,
'mobilenet_v2'
,
'DeepLab model variant.'
)
flags
.
DEFINE_multi_float
(
'image_pyramid'
,
None
,
'Input scales for multi-scale feature extraction.'
)
...
...
@@ -57,6 +57,8 @@ flags.DEFINE_boolean('aspp_with_batch_norm', True,
flags
.
DEFINE_boolean
(
'aspp_with_separable_conv'
,
True
,
'Use separable convolution for ASPP or not.'
)
# Defaults to None. Set multi_grid = [1, 2, 4] when using provided
# 'resnet_v1_{50,101}_beta' checkpoints.
flags
.
DEFINE_multi_integer
(
'multi_grid'
,
None
,
'Employ a hierarchy of atrous rates for ResNet.'
)
...
...
research/deeplab/common_test.py
0 → 100644
View file @
2310bc34
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for common.py."""
import
tensorflow
as
tf
from
deeplab
import
common
class
CommonTest
(
tf
.
test
.
TestCase
):
def
testOutputsToNumClasses
(
self
):
num_classes
=
21
model_options
=
common
.
ModelOptions
(
outputs_to_num_classes
=
{
common
.
OUTPUT_TYPE
:
num_classes
})
self
.
assertEqual
(
model_options
.
outputs_to_num_classes
[
common
.
OUTPUT_TYPE
],
num_classes
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/deeplab/core/feature_extractor.py
View file @
2310bc34
...
...
@@ -98,8 +98,7 @@ DECODER_END_POINTS = 'decoder_end_points'
# A dictionary from network name to a map of end point features.
networks_to_feature_maps
=
{
'mobilenet_v2'
:
{
# The provided checkpoint does not include decoder module.
DECODER_END_POINTS
:
None
,
DECODER_END_POINTS
:
[
'layer_4/depthwise_output'
],
},
'resnet_v1_50'
:
{
DECODER_END_POINTS
:
[
'block1/unit_2/bottleneck_v1/conv3'
],
...
...
@@ -211,8 +210,7 @@ def extract_features(images,
regularize_depthwise
=
False
,
preprocess_images
=
True
,
num_classes
=
None
,
global_pool
=
False
,
use_bounded_activations
=
False
):
global_pool
=
False
):
"""Extracts features by the particular model_variant.
Args:
...
...
@@ -237,8 +235,6 @@ def extract_features(images,
to None for dense prediction tasks.
global_pool: Global pooling for image classification task. Defaults to
False, since dense prediction tasks do not use this.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
Returns:
features: A tensor of size [batch, feature_height, feature_width,
...
...
@@ -255,8 +251,7 @@ def extract_features(images,
weight_decay
=
weight_decay
,
batch_norm_decay
=
0.95
,
batch_norm_epsilon
=
1e-5
,
batch_norm_scale
=
True
,
activation_fn
=
tf
.
nn
.
relu6
if
use_bounded_activations
else
tf
.
nn
.
relu
)
batch_norm_scale
=
True
)
features
,
end_points
=
get_network
(
model_variant
,
preprocess_images
,
arg_scope
)(
inputs
=
images
,
...
...
@@ -266,8 +261,7 @@ def extract_features(images,
output_stride
=
output_stride
,
multi_grid
=
multi_grid
,
reuse
=
reuse
,
scope
=
name_scope
[
model_variant
],
use_bounded_activations
=
use_bounded_activations
)
scope
=
name_scope
[
model_variant
])
elif
'xception'
in
model_variant
:
arg_scope
=
arg_scopes_map
[
model_variant
](
weight_decay
=
weight_decay
,
...
...
research/deeplab/core/resnet_v1_beta.py
View file @
2310bc34
...
...
@@ -44,8 +44,7 @@ def bottleneck(inputs,
unit_rate
=
1
,
rate
=
1
,
outputs_collections
=
None
,
scope
=
None
,
use_bounded_activations
=
True
):
scope
=
None
):
"""Bottleneck residual unit variant with BN after convolutions.
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
...
...
@@ -65,8 +64,6 @@ def bottleneck(inputs,
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
Returns:
The ResNet unit's output.
...
...
@@ -81,7 +78,7 @@ def bottleneck(inputs,
depth
,
[
1
,
1
],
stride
=
stride
,
activation_fn
=
tf
.
nn
.
relu6
if
use_bounded_activations
else
None
,
activation_fn
=
None
,
scope
=
'shortcut'
)
residual
=
slim
.
conv2d
(
inputs
,
depth_bottleneck
,
[
1
,
1
],
stride
=
1
,
...
...
@@ -90,12 +87,6 @@ def bottleneck(inputs,
rate
=
rate
*
unit_rate
,
scope
=
'conv2'
)
residual
=
slim
.
conv2d
(
residual
,
depth
,
[
1
,
1
],
stride
=
1
,
activation_fn
=
None
,
scope
=
'conv3'
)
if
use_bounded_activations
:
# Use clip_by_value to simulate bandpass activation.
residual
=
tf
.
clip_by_value
(
residual
,
-
6.0
,
6.0
)
output
=
tf
.
nn
.
relu6
(
shortcut
+
residual
)
else
:
output
=
tf
.
nn
.
relu
(
shortcut
+
residual
)
return
slim
.
utils
.
collect_named_outputs
(
outputs_collections
,
...
...
@@ -129,8 +120,6 @@ def resnet_v1_beta(inputs,
global_pool
=
True
,
output_stride
=
None
,
root_block_fn
=
None
,
store_non_strided_activations
=
False
,
use_bounded_activations
=
False
,
reuse
=
None
,
scope
=
None
):
"""Generator for v1 ResNet models (beta variant).
...
...
@@ -159,14 +148,6 @@ def resnet_v1_beta(inputs,
root_block_fn: The function consisting of convolution operations applied to
the root input. If root_block_fn is None, use the original setting of
RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
...
...
@@ -196,8 +177,6 @@ def resnet_v1_beta(inputs,
with
slim
.
arg_scope
([
slim
.
conv2d
,
bottleneck
,
resnet_utils
.
stack_blocks_dense
],
outputs_collections
=
end_points_collection
):
with
slim
.
arg_scope
(
[
bottleneck
],
use_bounded_activations
=
use_bounded_activations
):
if
is_training
is
not
None
:
arg_scope
=
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
is_training
)
else
:
...
...
@@ -210,8 +189,7 @@ def resnet_v1_beta(inputs,
output_stride
/=
4
net
=
root_block_fn
(
net
)
net
=
slim
.
max_pool2d
(
net
,
3
,
stride
=
2
,
padding
=
'SAME'
,
scope
=
'pool1'
)
net
=
resnet_utils
.
stack_blocks_dense
(
net
,
blocks
,
output_stride
,
store_non_strided_activations
)
net
=
resnet_utils
.
stack_blocks_dense
(
net
,
blocks
,
output_stride
)
if
global_pool
:
# Global average pooling.
...
...
@@ -258,9 +236,7 @@ def resnet_v1_50(inputs,
is_training
=
None
,
global_pool
=
False
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
use_bounded_activations
=
False
,
reuse
=
None
,
scope
=
'resnet_v1_50'
):
"""Resnet v1 50.
...
...
@@ -275,15 +251,7 @@ def resnet_v1_50(inputs,
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
multi_grid: Employ a hierarchy of different atrous rates within network.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
...
...
@@ -328,10 +296,8 @@ def resnet_v1_50(inputs,
is_training
=
is_training
,
global_pool
=
global_pool
,
output_stride
=
output_stride
,
store_non_strided_activations
=
store_non_strided_activations
,
reuse
=
reuse
,
scope
=
scope
,
use_bounded_activations
=
use_bounded_activations
)
scope
=
scope
)
def
resnet_v1_50_beta
(
inputs
,
...
...
@@ -339,9 +305,7 @@ def resnet_v1_50_beta(inputs,
is_training
=
None
,
global_pool
=
False
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
use_bounded_activations
=
False
,
reuse
=
None
,
scope
=
'resnet_v1_50'
):
"""Resnet v1 50 beta variant.
...
...
@@ -360,15 +324,7 @@ def resnet_v1_50_beta(inputs,
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
multi_grid: Employ a hierarchy of different atrous rates within network.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
...
...
@@ -414,10 +370,8 @@ def resnet_v1_50_beta(inputs,
global_pool
=
global_pool
,
output_stride
=
output_stride
,
root_block_fn
=
functools
.
partial
(
root_block_fn_for_beta_variant
),
store_non_strided_activations
=
store_non_strided_activations
,
reuse
=
reuse
,
scope
=
scope
,
use_bounded_activations
=
use_bounded_activations
)
scope
=
scope
)
def
resnet_v1_101
(
inputs
,
...
...
@@ -425,9 +379,7 @@ def resnet_v1_101(inputs,
is_training
=
None
,
global_pool
=
False
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
use_bounded_activations
=
False
,
reuse
=
None
,
scope
=
'resnet_v1_101'
):
"""Resnet v1 101.
...
...
@@ -442,15 +394,7 @@ def resnet_v1_101(inputs,
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
multi_grid: Employ a hierarchy of different atrous rates within network.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
...
...
@@ -495,10 +439,8 @@ def resnet_v1_101(inputs,
is_training
=
is_training
,
global_pool
=
global_pool
,
output_stride
=
output_stride
,
store_non_strided_activations
=
store_non_strided_activations
,
reuse
=
reuse
,
scope
=
scope
,
use_bounded_activations
=
use_bounded_activations
)
scope
=
scope
)
def
resnet_v1_101_beta
(
inputs
,
...
...
@@ -506,9 +448,7 @@ def resnet_v1_101_beta(inputs,
is_training
=
None
,
global_pool
=
False
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
use_bounded_activations
=
False
,
reuse
=
None
,
scope
=
'resnet_v1_101'
):
"""Resnet v1 101 beta variant.
...
...
@@ -527,15 +467,7 @@ def resnet_v1_101_beta(inputs,
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
multi_grid: Employ a hierarchy of different atrous rates within network.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
...
...
@@ -581,7 +513,5 @@ def resnet_v1_101_beta(inputs,
global_pool
=
global_pool
,
output_stride
=
output_stride
,
root_block_fn
=
functools
.
partial
(
root_block_fn_for_beta_variant
),
store_non_strided_activations
=
store_non_strided_activations
,
use_bounded_activations
=
use_bounded_activations
,
reuse
=
reuse
,
scope
=
scope
)
research/deeplab/core/resnet_v1_beta_test.py
View file @
2310bc34
...
...
@@ -53,7 +53,6 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
is_training
=
True
,
global_pool
=
True
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
reuse
=
None
,
scope
=
'resnet_v1_small'
):
...
...
@@ -84,7 +83,6 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
output_stride
=
output_stride
,
root_block_fn
=
functools
.
partial
(
resnet_v1_beta
.
root_block_fn_for_beta_variant
),
store_non_strided_activations
=
store_non_strided_activations
,
reuse
=
reuse
,
scope
=
scope
)
...
...
research/deeplab/datasets/segmentation_dataset.py
View file @
2310bc34
...
...
@@ -89,6 +89,7 @@ _CITYSCAPES_INFORMATION = DatasetDescriptor(
_PASCAL_VOC_SEG_INFORMATION
=
DatasetDescriptor
(
splits_to_sizes
=
{
'train'
:
1464
,
'train_aug'
:
10582
,
'trainval'
:
2913
,
'val'
:
1449
,
},
...
...
research/deeplab/g3doc/ade20k.md
View file @
2310bc34
...
...
@@ -49,7 +49,7 @@ A local training job using `xception_65` can be run with the following command:
# From tensorflow/models/research/
python deeplab/train.py
\
--logtostderr
\
--training_number_of_steps
=
9
0000
\
--training_number_of_steps
=
15
0000
\
--train_split
=
"train"
\
--model_variant
=
"xception_65"
\
--atrous_rates
=
6
\
...
...
research/deeplab/g3doc/model_zoo.md
View file @
2310bc34
...
...
@@ -84,7 +84,7 @@ xception_ade20k_train | Xception_65 | ImageNet <br> ADE20K
Checkpoint name | Eval OS | Eval scales | Left-right Flip | mIOU | Pixel-wise Accuracy | File Size
------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :-------------------: | :-------:
[
xception_ade20k_train
](
http://download.tensorflow.org/models/deeplabv3_xception_ade20k_train_2018_05_
14
.tar.gz
)
|
16
| [0.5:0.25:1.75] | Yes | 4
3.54
% (val) | 8
1.74
% (val) | 439MB
[
xception_ade20k_train
](
http://download.tensorflow.org/models/deeplabv3_xception_ade20k_train_2018_05_
29
.tar.gz
)
|
8
| [0.5:0.25:1.75] | Yes | 4
5.65
% (val) | 8
2.52
% (val) | 439MB
## Checkpoints pretrained on ImageNet
...
...
research/deeplab/model.py
View file @
2310bc34
...
...
@@ -56,16 +56,12 @@ from deeplab.core import feature_extractor
slim
=
tf
.
contrib
.
slim
_LOGITS_SCOPE_NAME
=
'logits'
_MERGED_LOGITS_SCOPE
=
'merged_logits'
_IMAGE_POOLING_SCOPE
=
'image_pooling'
_ASPP_SCOPE
=
'aspp'
_CONCAT_PROJECTION_SCOPE
=
'concat_projection'
_DECODER_SCOPE
=
'decoder'
def
get_merged_logits_scope
():
return
_MERGED_LOGITS_SCOPE
LOGITS_SCOPE_NAME
=
'logits'
MERGED_LOGITS_SCOPE
=
'merged_logits'
IMAGE_POOLING_SCOPE
=
'image_pooling'
ASPP_SCOPE
=
'aspp'
CONCAT_PROJECTION_SCOPE
=
'concat_projection'
DECODER_SCOPE
=
'decoder'
def
get_extra_layer_scopes
(
last_layers_contain_logits_only
=
False
):
...
...
@@ -79,14 +75,14 @@ def get_extra_layer_scopes(last_layers_contain_logits_only=False):
A list of scopes for extra layers.
"""
if
last_layers_contain_logits_only
:
return
[
_
LOGITS_SCOPE_NAME
]
return
[
LOGITS_SCOPE_NAME
]
else
:
return
[
_
LOGITS_SCOPE_NAME
,
_
IMAGE_POOLING_SCOPE
,
_
ASPP_SCOPE
,
_
CONCAT_PROJECTION_SCOPE
,
_
DECODER_SCOPE
,
LOGITS_SCOPE_NAME
,
IMAGE_POOLING_SCOPE
,
ASPP_SCOPE
,
CONCAT_PROJECTION_SCOPE
,
DECODER_SCOPE
,
]
...
...
@@ -133,7 +129,7 @@ def predict_labels_multi_scale(images,
for
output
in
sorted
(
outputs_to_scales_to_logits
):
scales_to_logits
=
outputs_to_scales_to_logits
[
output
]
logits
=
tf
.
image
.
resize_bilinear
(
scales_to_logits
[
_
MERGED_LOGITS_SCOPE
],
scales_to_logits
[
MERGED_LOGITS_SCOPE
],
tf
.
shape
(
images
)[
1
:
3
],
align_corners
=
True
)
outputs_to_predictions
[
output
].
append
(
...
...
@@ -143,7 +139,7 @@ def predict_labels_multi_scale(images,
scales_to_logits_reversed
=
(
outputs_to_scales_to_logits_reversed
[
output
])
logits_reversed
=
tf
.
image
.
resize_bilinear
(
tf
.
reverse_v2
(
scales_to_logits_reversed
[
_
MERGED_LOGITS_SCOPE
],
[
2
]),
tf
.
reverse_v2
(
scales_to_logits_reversed
[
MERGED_LOGITS_SCOPE
],
[
2
]),
tf
.
shape
(
images
)[
1
:
3
],
align_corners
=
True
)
outputs_to_predictions
[
output
].
append
(
...
...
@@ -182,7 +178,7 @@ def predict_labels(images, model_options, image_pyramid=None):
for
output
in
sorted
(
outputs_to_scales_to_logits
):
scales_to_logits
=
outputs_to_scales_to_logits
[
output
]
logits
=
tf
.
image
.
resize_bilinear
(
scales_to_logits
[
_
MERGED_LOGITS_SCOPE
],
scales_to_logits
[
MERGED_LOGITS_SCOPE
],
tf
.
shape
(
images
)[
1
:
3
],
align_corners
=
True
)
predictions
[
output
]
=
tf
.
argmax
(
logits
,
3
)
...
...
@@ -221,7 +217,6 @@ def multi_scale_logits(images,
images: A tensor of size [batch, height, width, channels].
model_options: A ModelOptions instance to configure models.
image_pyramid: Input image scales for multi-scale feature extraction.
weight_decay: The weight decay for model variables.
is_training: Is training or not.
fine_tune_batch_norm: Fine-tune the batch norm parameters or not.
...
...
@@ -242,17 +237,9 @@ def multi_scale_logits(images,
# Setup default values.
if
not
image_pyramid
:
image_pyramid
=
[
1.0
]
if
model_options
.
crop_size
is
None
and
model_options
.
add_image_level_feature
:
raise
ValueError
(
'Crop size must be specified for using image-level feature.'
)
if
model_options
.
model_variant
==
'mobilenet_v2'
:
if
(
model_options
.
atrous_rates
is
not
None
or
model_options
.
decoder_output_stride
is
not
None
):
# Output a warning and users should make sure if the setting is desired.
tf
.
logging
.
warning
(
'Our provided mobilenet_v2 checkpoint does not '
'include ASPP and decoder modules.'
)
crop_height
=
(
model_options
.
crop_size
[
0
]
if
model_options
.
crop_size
else
tf
.
shape
(
images
)[
1
])
...
...
@@ -277,7 +264,7 @@ def multi_scale_logits(images,
for
k
in
model_options
.
outputs_to_num_classes
}
for
count
,
image_scale
in
enumerate
(
image_pyramid
)
:
for
image_scale
in
image_pyramid
:
if
image_scale
!=
1.0
:
scaled_height
=
scale_dimension
(
crop_height
,
image_scale
)
scaled_width
=
scale_dimension
(
crop_width
,
image_scale
)
...
...
@@ -295,7 +282,7 @@ def multi_scale_logits(images,
scaled_images
,
updated_options
,
weight_decay
=
weight_decay
,
reuse
=
True
if
count
else
None
,
reuse
=
tf
.
AUTO_REUSE
,
is_training
=
is_training
,
fine_tune_batch_norm
=
fine_tune_batch_norm
)
...
...
@@ -309,7 +296,7 @@ def multi_scale_logits(images,
if
len
(
image_pyramid
)
==
1
:
for
output
in
sorted
(
model_options
.
outputs_to_num_classes
):
outputs_to_scales_to_logits
[
output
][
_
MERGED_LOGITS_SCOPE
]
=
outputs_to_logits
[
output
]
MERGED_LOGITS_SCOPE
]
=
outputs_to_logits
[
output
]
return
outputs_to_scales_to_logits
# Save logits to the output map.
...
...
@@ -328,13 +315,13 @@ def multi_scale_logits(images,
merge_fn
=
(
tf
.
reduce_max
if
model_options
.
merge_method
==
'max'
else
tf
.
reduce_mean
)
outputs_to_scales_to_logits
[
output
][
_
MERGED_LOGITS_SCOPE
]
=
merge_fn
(
outputs_to_scales_to_logits
[
output
][
MERGED_LOGITS_SCOPE
]
=
merge_fn
(
all_logits
,
axis
=
4
)
return
outputs_to_scales_to_logits
def
_
extract_features
(
images
,
def
extract_features
(
images
,
model_options
,
weight_decay
=
0.0001
,
reuse
=
None
,
...
...
@@ -399,7 +386,7 @@ def _extract_features(images,
features
,
[
pool_height
,
pool_width
],
[
pool_height
,
pool_width
],
padding
=
'VALID'
)
image_feature
=
slim
.
conv2d
(
image_feature
,
depth
,
1
,
scope
=
_
IMAGE_POOLING_SCOPE
)
image_feature
,
depth
,
1
,
scope
=
IMAGE_POOLING_SCOPE
)
image_feature
=
tf
.
image
.
resize_bilinear
(
image_feature
,
[
pool_height
,
pool_width
],
align_corners
=
True
)
image_feature
.
set_shape
([
None
,
pool_height
,
pool_width
,
depth
])
...
...
@@ -407,14 +394,14 @@ def _extract_features(images,
# Employ a 1x1 convolution.
branch_logits
.
append
(
slim
.
conv2d
(
features
,
depth
,
1
,
scope
=
_
ASPP_SCOPE
+
str
(
0
)))
scope
=
ASPP_SCOPE
+
str
(
0
)))
if
model_options
.
atrous_rates
:
# Employ 3x3 convolutions with different atrous rates.
for
i
,
rate
in
enumerate
(
model_options
.
atrous_rates
,
1
):
scope
=
_
ASPP_SCOPE
+
str
(
i
)
scope
=
ASPP_SCOPE
+
str
(
i
)
if
model_options
.
aspp_with_separable_conv
:
aspp_features
=
_
split_separable_conv2d
(
aspp_features
=
split_separable_conv2d
(
features
,
filters
=
depth
,
rate
=
rate
,
...
...
@@ -428,12 +415,12 @@ def _extract_features(images,
# Merge branch logits.
concat_logits
=
tf
.
concat
(
branch_logits
,
3
)
concat_logits
=
slim
.
conv2d
(
concat_logits
,
depth
,
1
,
scope
=
_
CONCAT_PROJECTION_SCOPE
)
concat_logits
,
depth
,
1
,
scope
=
CONCAT_PROJECTION_SCOPE
)
concat_logits
=
slim
.
dropout
(
concat_logits
,
keep_prob
=
0.9
,
is_training
=
is_training
,
scope
=
_
CONCAT_PROJECTION_SCOPE
+
'_dropout'
)
scope
=
CONCAT_PROJECTION_SCOPE
+
'_dropout'
)
return
concat_logits
,
end_points
...
...
@@ -457,7 +444,7 @@ def _get_logits(images,
Returns:
outputs_to_logits: A map from output_type to logits.
"""
features
,
end_points
=
_
extract_features
(
features
,
end_points
=
extract_features
(
images
,
model_options
,
weight_decay
=
weight_decay
,
...
...
@@ -484,7 +471,7 @@ def _get_logits(images,
outputs_to_logits
=
{}
for
output
in
sorted
(
model_options
.
outputs_to_num_classes
):
outputs_to_logits
[
output
]
=
_
get_branch_logits
(
outputs_to_logits
[
output
]
=
get_branch_logits
(
features
,
model_options
.
outputs_to_num_classes
[
output
],
model_options
.
atrous_rates
,
...
...
@@ -543,7 +530,7 @@ def refine_by_decoder(features,
stride
=
1
,
reuse
=
reuse
):
with
slim
.
arg_scope
([
slim
.
batch_norm
],
**
batch_norm_params
):
with
tf
.
variable_scope
(
_
DECODER_SCOPE
,
_
DECODER_SCOPE
,
[
features
]):
with
tf
.
variable_scope
(
DECODER_SCOPE
,
DECODER_SCOPE
,
[
features
]):
feature_list
=
feature_extractor
.
networks_to_feature_maps
[
model_variant
][
feature_extractor
.
DECODER_END_POINTS
]
if
feature_list
is
None
:
...
...
@@ -553,6 +540,11 @@ def refine_by_decoder(features,
decoder_features
=
features
for
i
,
name
in
enumerate
(
feature_list
):
decoder_features_list
=
[
decoder_features
]
# MobileNet variants use different naming convention.
if
'mobilenet'
in
model_variant
:
feature_name
=
name
else
:
feature_name
=
'{}/{}'
.
format
(
feature_extractor
.
name_scope
[
model_variant
],
name
)
decoder_features_list
.
append
(
...
...
@@ -569,13 +561,13 @@ def refine_by_decoder(features,
[
None
,
decoder_height
,
decoder_width
,
None
])
decoder_depth
=
256
if
decoder_use_separable_conv
:
decoder_features
=
_
split_separable_conv2d
(
decoder_features
=
split_separable_conv2d
(
tf
.
concat
(
decoder_features_list
,
3
),
filters
=
decoder_depth
,
rate
=
1
,
weight_decay
=
weight_decay
,
scope
=
'decoder_conv0'
)
decoder_features
=
_
split_separable_conv2d
(
decoder_features
=
split_separable_conv2d
(
decoder_features
,
filters
=
decoder_depth
,
rate
=
1
,
...
...
@@ -593,7 +585,7 @@ def refine_by_decoder(features,
return
decoder_features
def
_
get_branch_logits
(
features
,
def
get_branch_logits
(
features
,
num_classes
,
atrous_rates
=
None
,
aspp_with_batch_norm
=
False
,
...
...
@@ -624,7 +616,7 @@ def _get_branch_logits(features,
ValueError: Upon invalid input kernel_size value.
"""
# When using batch normalization with ASPP, ASPP has been applied before
# in
_
extract_features, and thus we simply apply 1x1 convolution here.
# in extract_features, and thus we simply apply 1x1 convolution here.
if
aspp_with_batch_norm
or
atrous_rates
is
None
:
if
kernel_size
!=
1
:
raise
ValueError
(
'Kernel size must be 1 when atrous_rates is None or '
...
...
@@ -636,7 +628,7 @@ def _get_branch_logits(features,
weights_regularizer
=
slim
.
l2_regularizer
(
weight_decay
),
weights_initializer
=
tf
.
truncated_normal_initializer
(
stddev
=
0.01
),
reuse
=
reuse
):
with
tf
.
variable_scope
(
_
LOGITS_SCOPE_NAME
,
_
LOGITS_SCOPE_NAME
,
[
features
]):
with
tf
.
variable_scope
(
LOGITS_SCOPE_NAME
,
LOGITS_SCOPE_NAME
,
[
features
]):
branch_logits
=
[]
for
i
,
rate
in
enumerate
(
atrous_rates
):
scope
=
scope_suffix
...
...
@@ -656,8 +648,9 @@ def _get_branch_logits(features,
return
tf
.
add_n
(
branch_logits
)
def
_
split_separable_conv2d
(
inputs
,
def
split_separable_conv2d
(
inputs
,
filters
,
kernel_size
=
3
,
rate
=
1
,
weight_decay
=
0.00004
,
depthwise_weights_initializer_stddev
=
0.33
,
...
...
@@ -671,6 +664,8 @@ def _split_separable_conv2d(inputs,
Args:
inputs: Input tensor with shape [batch, height, width, channels].
filters: Number of filters in the 1x1 pointwise convolution.
kernel_size: A list of length 2: [kernel_height, kernel_width] of
of the filters. Can be an int if both values are the same.
rate: Atrous convolution rate for the depthwise convolution.
weight_decay: The weight decay to use for regularizing the model.
depthwise_weights_initializer_stddev: The standard deviation of the
...
...
@@ -685,7 +680,7 @@ def _split_separable_conv2d(inputs,
outputs
=
slim
.
separable_conv2d
(
inputs
,
None
,
3
,
kernel_size
=
kernel_size
,
depth_multiplier
=
1
,
rate
=
rate
,
weights_initializer
=
tf
.
truncated_normal_initializer
(
...
...
research/deeplab/train.py
View file @
2310bc34
...
...
@@ -101,6 +101,8 @@ flags.DEFINE_float('momentum', 0.9, 'The momentum value to use')
flags
.
DEFINE_integer
(
'train_batch_size'
,
8
,
'The number of images in each batch during training.'
)
# For weight_decay, use 0.00004 for MobileNet-V2 or Xcpetion model variants.
# Use 0.0001 for ResNet model variants.
flags
.
DEFINE_float
(
'weight_decay'
,
0.00004
,
'The value of the weight decay for training.'
)
...
...
@@ -206,8 +208,8 @@ def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label):
# Add name to graph node so we can add to summary.
output_type_dict
=
outputs_to_scales_to_logits
[
common
.
OUTPUT_TYPE
]
output_type_dict
[
model
.
get_merged_logits_scope
()
]
=
tf
.
identity
(
output_type_dict
[
model
.
get_merged_logits_scope
()
],
output_type_dict
[
model
.
MERGED_LOGITS_SCOPE
]
=
tf
.
identity
(
output_type_dict
[
model
.
MERGED_LOGITS_SCOPE
],
name
=
common
.
OUTPUT_TYPE
)
for
output
,
num_classes
in
six
.
iteritems
(
outputs_to_num_classes
):
...
...
research/deeplab/utils/get_dataset_colormap.py
View file @
2310bc34
...
...
@@ -29,12 +29,14 @@ import numpy as np
# Dataset names.
_ADE20K
=
'ade20k'
_CITYSCAPES
=
'cityscapes'
_MAPILLARY_VISTAS
=
'mapillary_vistas'
_PASCAL
=
'pascal'
# Max number of entries in the colormap for each dataset.
_DATASET_MAX_ENTRIES
=
{
_ADE20K
:
151
,
_CITYSCAPES
:
19
,
_MAPILLARY_VISTAS
:
66
,
_PASCAL
:
256
,
}
...
...
@@ -229,6 +231,82 @@ def create_cityscapes_label_colormap():
])
def
create_mapillary_vistas_label_colormap
():
"""Creates a label colormap used in Mapillary Vistas segmentation benchmark.
Returns:
A colormap for visualizing segmentation results.
"""
return
np
.
asarray
([
[
165
,
42
,
42
],
[
0
,
192
,
0
],
[
196
,
196
,
196
],
[
190
,
153
,
153
],
[
180
,
165
,
180
],
[
102
,
102
,
156
],
[
102
,
102
,
156
],
[
128
,
64
,
255
],
[
140
,
140
,
200
],
[
170
,
170
,
170
],
[
250
,
170
,
160
],
[
96
,
96
,
96
],
[
230
,
150
,
140
],
[
128
,
64
,
128
],
[
110
,
110
,
110
],
[
244
,
35
,
232
],
[
150
,
100
,
100
],
[
70
,
70
,
70
],
[
150
,
120
,
90
],
[
220
,
20
,
60
],
[
255
,
0
,
0
],
[
255
,
0
,
0
],
[
255
,
0
,
0
],
[
200
,
128
,
128
],
[
255
,
255
,
255
],
[
64
,
170
,
64
],
[
128
,
64
,
64
],
[
70
,
130
,
180
],
[
255
,
255
,
255
],
[
152
,
251
,
152
],
[
107
,
142
,
35
],
[
0
,
170
,
30
],
[
255
,
255
,
128
],
[
250
,
0
,
30
],
[
0
,
0
,
0
],
[
220
,
220
,
220
],
[
170
,
170
,
170
],
[
222
,
40
,
40
],
[
100
,
170
,
30
],
[
40
,
40
,
40
],
[
33
,
33
,
33
],
[
170
,
170
,
170
],
[
0
,
0
,
142
],
[
170
,
170
,
170
],
[
210
,
170
,
100
],
[
153
,
153
,
153
],
[
128
,
128
,
128
],
[
0
,
0
,
142
],
[
250
,
170
,
30
],
[
192
,
192
,
192
],
[
220
,
220
,
0
],
[
180
,
165
,
180
],
[
119
,
11
,
32
],
[
0
,
0
,
142
],
[
0
,
60
,
100
],
[
0
,
0
,
142
],
[
0
,
0
,
90
],
[
0
,
0
,
230
],
[
0
,
80
,
100
],
[
128
,
64
,
64
],
[
0
,
0
,
110
],
[
0
,
0
,
70
],
[
0
,
0
,
192
],
[
32
,
32
,
32
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
])
def
create_pascal_label_colormap
():
"""Creates a label colormap used in PASCAL VOC segmentation benchmark.
...
...
@@ -254,6 +332,10 @@ def get_cityscapes_name():
return
_CITYSCAPES
def
get_mapillary_vistas_name
():
return
_MAPILLARY_VISTAS
def
get_pascal_name
():
return
_PASCAL
...
...
@@ -287,6 +369,8 @@ def create_label_colormap(dataset=_PASCAL):
return
create_ade20k_label_colormap
()
elif
dataset
==
_CITYSCAPES
:
return
create_cityscapes_label_colormap
()
elif
dataset
==
_MAPILLARY_VISTAS
:
return
create_mapillary_vistas_label_colormap
()
elif
dataset
==
_PASCAL
:
return
create_pascal_label_colormap
()
else
:
...
...
research/deeplab/utils/get_dataset_colormap_test.py
View file @
2310bc34
...
...
@@ -86,6 +86,11 @@ class VisualizationUtilTest(tf.test.TestCase):
label
,
get_dataset_colormap
.
get_ade20k_name
())
self
.
assertTrue
(
np
.
array_equal
(
colored_label
,
expected_result
))
def
testMapillaryVistasColorMapValue
(
self
):
colormap
=
get_dataset_colormap
.
create_mapillary_vistas_label_colormap
()
self
.
assertTrue
(
np
.
array_equal
([
190
,
153
,
153
],
colormap
[
3
,
:]))
self
.
assertTrue
(
np
.
array_equal
([
102
,
102
,
156
],
colormap
[
6
,
:]))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/deeplab/utils/train_utils.py
View file @
2310bc34
...
...
@@ -17,6 +17,7 @@
import
six
import
tensorflow
as
tf
from
deeplab.core
import
preprocess_utils
slim
=
tf
.
contrib
.
slim
...
...
@@ -54,12 +55,16 @@ def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits,
if
upsample_logits
:
# Label is not downsampled, and instead we upsample logits.
logits
=
tf
.
image
.
resize_bilinear
(
logits
,
tf
.
shape
(
labels
)[
1
:
3
],
align_corners
=
True
)
logits
,
preprocess_utils
.
resolve_shape
(
labels
,
4
)[
1
:
3
],
align_corners
=
True
)
scaled_labels
=
labels
else
:
# Label is downsampled to the same size as logits.
scaled_labels
=
tf
.
image
.
resize_nearest_neighbor
(
labels
,
tf
.
shape
(
logits
)[
1
:
3
],
align_corners
=
True
)
labels
,
preprocess_utils
.
resolve_shape
(
logits
,
4
)[
1
:
3
],
align_corners
=
True
)
scaled_labels
=
tf
.
reshape
(
scaled_labels
,
shape
=
[
-
1
])
not_ignore_mask
=
tf
.
to_float
(
tf
.
not_equal
(
scaled_labels
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment