Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
8caa269d
Commit
8caa269d
authored
Jun 14, 2018
by
Liang-Chieh Chen
Committed by
huihui
Jun 14, 2018
Browse files
PiperOrigin-RevId: 200493322
parent
1f82c227
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
250 additions
and
195 deletions
+250
-195
research/deeplab/README.md
research/deeplab/README.md
+5
-0
research/deeplab/common.py
research/deeplab/common.py
+6
-4
research/deeplab/common_test.py
research/deeplab/common_test.py
+34
-0
research/deeplab/core/feature_extractor.py
research/deeplab/core/feature_extractor.py
+4
-10
research/deeplab/core/resnet_v1_beta.py
research/deeplab/core/resnet_v1_beta.py
+32
-102
research/deeplab/core/resnet_v1_beta_test.py
research/deeplab/core/resnet_v1_beta_test.py
+0
-2
research/deeplab/datasets/segmentation_dataset.py
research/deeplab/datasets/segmentation_dataset.py
+1
-0
research/deeplab/deeplab_demo.ipynb
research/deeplab/deeplab_demo.ipynb
+2
-2
research/deeplab/g3doc/ade20k.md
research/deeplab/g3doc/ade20k.md
+1
-1
research/deeplab/g3doc/model_zoo.md
research/deeplab/g3doc/model_zoo.md
+1
-1
research/deeplab/model.py
research/deeplab/model.py
+64
-69
research/deeplab/train.py
research/deeplab/train.py
+4
-2
research/deeplab/utils/get_dataset_colormap.py
research/deeplab/utils/get_dataset_colormap.py
+84
-0
research/deeplab/utils/get_dataset_colormap_test.py
research/deeplab/utils/get_dataset_colormap_test.py
+5
-0
research/deeplab/utils/train_utils.py
research/deeplab/utils/train_utils.py
+7
-2
No files found.
research/deeplab/README.md
View file @
8caa269d
...
@@ -113,6 +113,11 @@ with "deeplab".
...
@@ -113,6 +113,11 @@ with "deeplab".
## Change Logs
## Change Logs
### May 26, 2018
Updated ADE20K pretrained checkpoint.
### May 18, 2018
### May 18, 2018
1.
Added builders for ResNet-v1 and Xception model variants.
1.
Added builders for ResNet-v1 and Xception model variants.
1.
Added ADE20K support, including colormap and pretrained Xception_65 checkpoint.
1.
Added ADE20K support, including colormap and pretrained Xception_65 checkpoint.
...
...
research/deeplab/common.py
View file @
8caa269d
...
@@ -40,10 +40,10 @@ flags.DEFINE_integer('logits_kernel_size', 1,
...
@@ -40,10 +40,10 @@ flags.DEFINE_integer('logits_kernel_size', 1,
'generates logits.'
)
'generates logits.'
)
# When using 'mobilent_v2', we set atrous_rates = decoder_output_stride = None.
# When using 'mobilent_v2', we set atrous_rates = decoder_output_stride = None.
# When using 'xception_65'
, we set atrous_rates = [6, 12, 18] (output stride 16)
# When using 'xception_65'
or 'resnet_v1' model variants, we set
# and decoder_output_stride = 4.
#
atrous_rates = [6, 12, 18] (output stride 16)
and decoder_output_stride = 4.
flags
.
DEFINE_enum
(
'
model
_
variant
'
,
'mobilenet_v2'
,
# See core/feature_extractor.py for supported
model
variant
s.
[
'xception_65
'
,
'mobilenet_v2'
]
,
'DeepLab model variant.'
)
flags
.
DEFINE_string
(
'model_variant
'
,
'mobilenet_v2'
,
'DeepLab model variant.'
)
flags
.
DEFINE_multi_float
(
'image_pyramid'
,
None
,
flags
.
DEFINE_multi_float
(
'image_pyramid'
,
None
,
'Input scales for multi-scale feature extraction.'
)
'Input scales for multi-scale feature extraction.'
)
...
@@ -57,6 +57,8 @@ flags.DEFINE_boolean('aspp_with_batch_norm', True,
...
@@ -57,6 +57,8 @@ flags.DEFINE_boolean('aspp_with_batch_norm', True,
flags
.
DEFINE_boolean
(
'aspp_with_separable_conv'
,
True
,
flags
.
DEFINE_boolean
(
'aspp_with_separable_conv'
,
True
,
'Use separable convolution for ASPP or not.'
)
'Use separable convolution for ASPP or not.'
)
# Defaults to None. Set multi_grid = [1, 2, 4] when using provided
# 'resnet_v1_{50,101}_beta' checkpoints.
flags
.
DEFINE_multi_integer
(
'multi_grid'
,
None
,
flags
.
DEFINE_multi_integer
(
'multi_grid'
,
None
,
'Employ a hierarchy of atrous rates for ResNet.'
)
'Employ a hierarchy of atrous rates for ResNet.'
)
...
...
research/deeplab/common_test.py
0 → 100644
View file @
8caa269d
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for common.py."""
import
tensorflow
as
tf
from
deeplab
import
common
class
CommonTest
(
tf
.
test
.
TestCase
):
def
testOutputsToNumClasses
(
self
):
num_classes
=
21
model_options
=
common
.
ModelOptions
(
outputs_to_num_classes
=
{
common
.
OUTPUT_TYPE
:
num_classes
})
self
.
assertEqual
(
model_options
.
outputs_to_num_classes
[
common
.
OUTPUT_TYPE
],
num_classes
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/deeplab/core/feature_extractor.py
View file @
8caa269d
...
@@ -98,8 +98,7 @@ DECODER_END_POINTS = 'decoder_end_points'
...
@@ -98,8 +98,7 @@ DECODER_END_POINTS = 'decoder_end_points'
# A dictionary from network name to a map of end point features.
# A dictionary from network name to a map of end point features.
networks_to_feature_maps
=
{
networks_to_feature_maps
=
{
'mobilenet_v2'
:
{
'mobilenet_v2'
:
{
# The provided checkpoint does not include decoder module.
DECODER_END_POINTS
:
[
'layer_4/depthwise_output'
],
DECODER_END_POINTS
:
None
,
},
},
'resnet_v1_50'
:
{
'resnet_v1_50'
:
{
DECODER_END_POINTS
:
[
'block1/unit_2/bottleneck_v1/conv3'
],
DECODER_END_POINTS
:
[
'block1/unit_2/bottleneck_v1/conv3'
],
...
@@ -211,8 +210,7 @@ def extract_features(images,
...
@@ -211,8 +210,7 @@ def extract_features(images,
regularize_depthwise
=
False
,
regularize_depthwise
=
False
,
preprocess_images
=
True
,
preprocess_images
=
True
,
num_classes
=
None
,
num_classes
=
None
,
global_pool
=
False
,
global_pool
=
False
):
use_bounded_activations
=
False
):
"""Extracts features by the particular model_variant.
"""Extracts features by the particular model_variant.
Args:
Args:
...
@@ -237,8 +235,6 @@ def extract_features(images,
...
@@ -237,8 +235,6 @@ def extract_features(images,
to None for dense prediction tasks.
to None for dense prediction tasks.
global_pool: Global pooling for image classification task. Defaults to
global_pool: Global pooling for image classification task. Defaults to
False, since dense prediction tasks do not use this.
False, since dense prediction tasks do not use this.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
Returns:
Returns:
features: A tensor of size [batch, feature_height, feature_width,
features: A tensor of size [batch, feature_height, feature_width,
...
@@ -255,8 +251,7 @@ def extract_features(images,
...
@@ -255,8 +251,7 @@ def extract_features(images,
weight_decay
=
weight_decay
,
weight_decay
=
weight_decay
,
batch_norm_decay
=
0.95
,
batch_norm_decay
=
0.95
,
batch_norm_epsilon
=
1e-5
,
batch_norm_epsilon
=
1e-5
,
batch_norm_scale
=
True
,
batch_norm_scale
=
True
)
activation_fn
=
tf
.
nn
.
relu6
if
use_bounded_activations
else
tf
.
nn
.
relu
)
features
,
end_points
=
get_network
(
features
,
end_points
=
get_network
(
model_variant
,
preprocess_images
,
arg_scope
)(
model_variant
,
preprocess_images
,
arg_scope
)(
inputs
=
images
,
inputs
=
images
,
...
@@ -266,8 +261,7 @@ def extract_features(images,
...
@@ -266,8 +261,7 @@ def extract_features(images,
output_stride
=
output_stride
,
output_stride
=
output_stride
,
multi_grid
=
multi_grid
,
multi_grid
=
multi_grid
,
reuse
=
reuse
,
reuse
=
reuse
,
scope
=
name_scope
[
model_variant
],
scope
=
name_scope
[
model_variant
])
use_bounded_activations
=
use_bounded_activations
)
elif
'xception'
in
model_variant
:
elif
'xception'
in
model_variant
:
arg_scope
=
arg_scopes_map
[
model_variant
](
arg_scope
=
arg_scopes_map
[
model_variant
](
weight_decay
=
weight_decay
,
weight_decay
=
weight_decay
,
...
...
research/deeplab/core/resnet_v1_beta.py
View file @
8caa269d
...
@@ -44,8 +44,7 @@ def bottleneck(inputs,
...
@@ -44,8 +44,7 @@ def bottleneck(inputs,
unit_rate
=
1
,
unit_rate
=
1
,
rate
=
1
,
rate
=
1
,
outputs_collections
=
None
,
outputs_collections
=
None
,
scope
=
None
,
scope
=
None
):
use_bounded_activations
=
True
):
"""Bottleneck residual unit variant with BN after convolutions.
"""Bottleneck residual unit variant with BN after convolutions.
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
...
@@ -65,8 +64,6 @@ def bottleneck(inputs,
...
@@ -65,8 +64,6 @@ def bottleneck(inputs,
rate: An integer, rate for atrous convolution.
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
scope: Optional variable_scope.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
Returns:
Returns:
The ResNet unit's output.
The ResNet unit's output.
...
@@ -81,7 +78,7 @@ def bottleneck(inputs,
...
@@ -81,7 +78,7 @@ def bottleneck(inputs,
depth
,
depth
,
[
1
,
1
],
[
1
,
1
],
stride
=
stride
,
stride
=
stride
,
activation_fn
=
tf
.
nn
.
relu6
if
use_bounded_activations
else
None
,
activation_fn
=
None
,
scope
=
'shortcut'
)
scope
=
'shortcut'
)
residual
=
slim
.
conv2d
(
inputs
,
depth_bottleneck
,
[
1
,
1
],
stride
=
1
,
residual
=
slim
.
conv2d
(
inputs
,
depth_bottleneck
,
[
1
,
1
],
stride
=
1
,
...
@@ -90,13 +87,7 @@ def bottleneck(inputs,
...
@@ -90,13 +87,7 @@ def bottleneck(inputs,
rate
=
rate
*
unit_rate
,
scope
=
'conv2'
)
rate
=
rate
*
unit_rate
,
scope
=
'conv2'
)
residual
=
slim
.
conv2d
(
residual
,
depth
,
[
1
,
1
],
stride
=
1
,
residual
=
slim
.
conv2d
(
residual
,
depth
,
[
1
,
1
],
stride
=
1
,
activation_fn
=
None
,
scope
=
'conv3'
)
activation_fn
=
None
,
scope
=
'conv3'
)
output
=
tf
.
nn
.
relu
(
shortcut
+
residual
)
if
use_bounded_activations
:
# Use clip_by_value to simulate bandpass activation.
residual
=
tf
.
clip_by_value
(
residual
,
-
6.0
,
6.0
)
output
=
tf
.
nn
.
relu6
(
shortcut
+
residual
)
else
:
output
=
tf
.
nn
.
relu
(
shortcut
+
residual
)
return
slim
.
utils
.
collect_named_outputs
(
outputs_collections
,
return
slim
.
utils
.
collect_named_outputs
(
outputs_collections
,
sc
.
name
,
sc
.
name
,
...
@@ -129,8 +120,6 @@ def resnet_v1_beta(inputs,
...
@@ -129,8 +120,6 @@ def resnet_v1_beta(inputs,
global_pool
=
True
,
global_pool
=
True
,
output_stride
=
None
,
output_stride
=
None
,
root_block_fn
=
None
,
root_block_fn
=
None
,
store_non_strided_activations
=
False
,
use_bounded_activations
=
False
,
reuse
=
None
,
reuse
=
None
,
scope
=
None
):
scope
=
None
):
"""Generator for v1 ResNet models (beta variant).
"""Generator for v1 ResNet models (beta variant).
...
@@ -159,14 +148,6 @@ def resnet_v1_beta(inputs,
...
@@ -159,14 +148,6 @@ def resnet_v1_beta(inputs,
root_block_fn: The function consisting of convolution operations applied to
root_block_fn: The function consisting of convolution operations applied to
the root input. If root_block_fn is None, use the original setting of
the root input. If root_block_fn is None, use the original setting of
RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2.
RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
able to reuse 'scope' must be given.
scope: Optional variable_scope.
scope: Optional variable_scope.
...
@@ -196,35 +177,32 @@ def resnet_v1_beta(inputs,
...
@@ -196,35 +177,32 @@ def resnet_v1_beta(inputs,
with
slim
.
arg_scope
([
slim
.
conv2d
,
bottleneck
,
with
slim
.
arg_scope
([
slim
.
conv2d
,
bottleneck
,
resnet_utils
.
stack_blocks_dense
],
resnet_utils
.
stack_blocks_dense
],
outputs_collections
=
end_points_collection
):
outputs_collections
=
end_points_collection
):
with
slim
.
arg_scope
(
if
is_training
is
not
None
:
[
bottleneck
],
use_bounded_activations
=
use_bounded_activations
):
arg_scope
=
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
is_training
)
if
is_training
is
not
None
:
else
:
arg_scope
=
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
is_training
)
arg_scope
=
slim
.
arg_scope
([])
else
:
with
arg_scope
:
arg_scope
=
slim
.
arg_scope
([])
net
=
inputs
with
arg_scope
:
if
output_stride
is
not
None
:
net
=
inputs
if
output_stride
%
4
!=
0
:
if
output_stride
is
not
None
:
raise
ValueError
(
'The output_stride needs to be a multiple of 4.'
)
if
output_stride
%
4
!=
0
:
output_stride
/=
4
raise
ValueError
(
'The output_stride needs to be a multiple of 4.'
)
net
=
root_block_fn
(
net
)
output_stride
/=
4
net
=
slim
.
max_pool2d
(
net
,
3
,
stride
=
2
,
padding
=
'SAME'
,
scope
=
'pool1'
)
net
=
root_block_fn
(
net
)
net
=
resnet_utils
.
stack_blocks_dense
(
net
,
blocks
,
output_stride
)
net
=
slim
.
max_pool2d
(
net
,
3
,
stride
=
2
,
padding
=
'SAME'
,
scope
=
'pool1'
)
net
=
resnet_utils
.
stack_blocks_dense
(
net
,
blocks
,
output_stride
,
if
global_pool
:
store_non_strided_activations
)
# Global average pooling.
net
=
tf
.
reduce_mean
(
net
,
[
1
,
2
],
name
=
'pool5'
,
keepdims
=
True
)
if
global_pool
:
if
num_classes
is
not
None
:
# Global average pooling.
net
=
slim
.
conv2d
(
net
,
num_classes
,
[
1
,
1
],
activation_fn
=
None
,
net
=
tf
.
reduce_mean
(
net
,
[
1
,
2
],
name
=
'pool5'
,
keepdims
=
True
)
normalizer_fn
=
None
,
scope
=
'logits'
)
if
num_classes
is
not
None
:
# Convert end_points_collection into a dictionary of end_points.
net
=
slim
.
conv2d
(
net
,
num_classes
,
[
1
,
1
],
activation_fn
=
None
,
end_points
=
slim
.
utils
.
convert_collection_to_dict
(
normalizer_fn
=
None
,
scope
=
'logits'
)
end_points_collection
)
# Convert end_points_collection into a dictionary of end_points.
if
num_classes
is
not
None
:
end_points
=
slim
.
utils
.
convert_collection_to_dict
(
end_points
[
'predictions'
]
=
slim
.
softmax
(
net
,
scope
=
'predictions'
)
end_points_collection
)
return
net
,
end_points
if
num_classes
is
not
None
:
end_points
[
'predictions'
]
=
slim
.
softmax
(
net
,
scope
=
'predictions'
)
return
net
,
end_points
def
resnet_v1_beta_block
(
scope
,
base_depth
,
num_units
,
stride
):
def
resnet_v1_beta_block
(
scope
,
base_depth
,
num_units
,
stride
):
...
@@ -258,9 +236,7 @@ def resnet_v1_50(inputs,
...
@@ -258,9 +236,7 @@ def resnet_v1_50(inputs,
is_training
=
None
,
is_training
=
None
,
global_pool
=
False
,
global_pool
=
False
,
output_stride
=
None
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
multi_grid
=
None
,
use_bounded_activations
=
False
,
reuse
=
None
,
reuse
=
None
,
scope
=
'resnet_v1_50'
):
scope
=
'resnet_v1_50'
):
"""Resnet v1 50.
"""Resnet v1 50.
...
@@ -275,15 +251,7 @@ def resnet_v1_50(inputs,
...
@@ -275,15 +251,7 @@ def resnet_v1_50(inputs,
output_stride: If None, then the output will be computed at the nominal
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
ratio of input to output spatial resolution.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
multi_grid: Employ a hierarchy of different atrous rates within network.
multi_grid: Employ a hierarchy of different atrous rates within network.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
able to reuse 'scope' must be given.
scope: Optional variable_scope.
scope: Optional variable_scope.
...
@@ -328,10 +296,8 @@ def resnet_v1_50(inputs,
...
@@ -328,10 +296,8 @@ def resnet_v1_50(inputs,
is_training
=
is_training
,
is_training
=
is_training
,
global_pool
=
global_pool
,
global_pool
=
global_pool
,
output_stride
=
output_stride
,
output_stride
=
output_stride
,
store_non_strided_activations
=
store_non_strided_activations
,
reuse
=
reuse
,
reuse
=
reuse
,
scope
=
scope
,
scope
=
scope
)
use_bounded_activations
=
use_bounded_activations
)
def
resnet_v1_50_beta
(
inputs
,
def
resnet_v1_50_beta
(
inputs
,
...
@@ -339,9 +305,7 @@ def resnet_v1_50_beta(inputs,
...
@@ -339,9 +305,7 @@ def resnet_v1_50_beta(inputs,
is_training
=
None
,
is_training
=
None
,
global_pool
=
False
,
global_pool
=
False
,
output_stride
=
None
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
multi_grid
=
None
,
use_bounded_activations
=
False
,
reuse
=
None
,
reuse
=
None
,
scope
=
'resnet_v1_50'
):
scope
=
'resnet_v1_50'
):
"""Resnet v1 50 beta variant.
"""Resnet v1 50 beta variant.
...
@@ -360,15 +324,7 @@ def resnet_v1_50_beta(inputs,
...
@@ -360,15 +324,7 @@ def resnet_v1_50_beta(inputs,
output_stride: If None, then the output will be computed at the nominal
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
ratio of input to output spatial resolution.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
multi_grid: Employ a hierarchy of different atrous rates within network.
multi_grid: Employ a hierarchy of different atrous rates within network.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
able to reuse 'scope' must be given.
scope: Optional variable_scope.
scope: Optional variable_scope.
...
@@ -414,10 +370,8 @@ def resnet_v1_50_beta(inputs,
...
@@ -414,10 +370,8 @@ def resnet_v1_50_beta(inputs,
global_pool
=
global_pool
,
global_pool
=
global_pool
,
output_stride
=
output_stride
,
output_stride
=
output_stride
,
root_block_fn
=
functools
.
partial
(
root_block_fn_for_beta_variant
),
root_block_fn
=
functools
.
partial
(
root_block_fn_for_beta_variant
),
store_non_strided_activations
=
store_non_strided_activations
,
reuse
=
reuse
,
reuse
=
reuse
,
scope
=
scope
,
scope
=
scope
)
use_bounded_activations
=
use_bounded_activations
)
def
resnet_v1_101
(
inputs
,
def
resnet_v1_101
(
inputs
,
...
@@ -425,9 +379,7 @@ def resnet_v1_101(inputs,
...
@@ -425,9 +379,7 @@ def resnet_v1_101(inputs,
is_training
=
None
,
is_training
=
None
,
global_pool
=
False
,
global_pool
=
False
,
output_stride
=
None
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
multi_grid
=
None
,
use_bounded_activations
=
False
,
reuse
=
None
,
reuse
=
None
,
scope
=
'resnet_v1_101'
):
scope
=
'resnet_v1_101'
):
"""Resnet v1 101.
"""Resnet v1 101.
...
@@ -442,15 +394,7 @@ def resnet_v1_101(inputs,
...
@@ -442,15 +394,7 @@ def resnet_v1_101(inputs,
output_stride: If None, then the output will be computed at the nominal
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
ratio of input to output spatial resolution.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
multi_grid: Employ a hierarchy of different atrous rates within network.
multi_grid: Employ a hierarchy of different atrous rates within network.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
able to reuse 'scope' must be given.
scope: Optional variable_scope.
scope: Optional variable_scope.
...
@@ -495,10 +439,8 @@ def resnet_v1_101(inputs,
...
@@ -495,10 +439,8 @@ def resnet_v1_101(inputs,
is_training
=
is_training
,
is_training
=
is_training
,
global_pool
=
global_pool
,
global_pool
=
global_pool
,
output_stride
=
output_stride
,
output_stride
=
output_stride
,
store_non_strided_activations
=
store_non_strided_activations
,
reuse
=
reuse
,
reuse
=
reuse
,
scope
=
scope
,
scope
=
scope
)
use_bounded_activations
=
use_bounded_activations
)
def
resnet_v1_101_beta
(
inputs
,
def
resnet_v1_101_beta
(
inputs
,
...
@@ -506,9 +448,7 @@ def resnet_v1_101_beta(inputs,
...
@@ -506,9 +448,7 @@ def resnet_v1_101_beta(inputs,
is_training
=
None
,
is_training
=
None
,
global_pool
=
False
,
global_pool
=
False
,
output_stride
=
None
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
multi_grid
=
None
,
use_bounded_activations
=
False
,
reuse
=
None
,
reuse
=
None
,
scope
=
'resnet_v1_101'
):
scope
=
'resnet_v1_101'
):
"""Resnet v1 101 beta variant.
"""Resnet v1 101 beta variant.
...
@@ -527,15 +467,7 @@ def resnet_v1_101_beta(inputs,
...
@@ -527,15 +467,7 @@ def resnet_v1_101_beta(inputs,
output_stride: If None, then the output will be computed at the nominal
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
ratio of input to output spatial resolution.
store_non_strided_activations: If True, we compute non-strided (undecimated)
activations at the last unit of each block and store them in the
`outputs_collections` before subsampling them. This gives us access to
higher resolution intermediate activations which are useful in some
dense prediction problems but increases 4x the computation and memory cost
at the last unit of each block.
multi_grid: Employ a hierarchy of different atrous rates within network.
multi_grid: Employ a hierarchy of different atrous rates within network.
use_bounded_activations: Whether or not to use bounded activations. Bounded
activations better lend themselves to quantized inference.
reuse: whether or not the network and its variables should be reused. To be
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
able to reuse 'scope' must be given.
scope: Optional variable_scope.
scope: Optional variable_scope.
...
@@ -581,7 +513,5 @@ def resnet_v1_101_beta(inputs,
...
@@ -581,7 +513,5 @@ def resnet_v1_101_beta(inputs,
global_pool
=
global_pool
,
global_pool
=
global_pool
,
output_stride
=
output_stride
,
output_stride
=
output_stride
,
root_block_fn
=
functools
.
partial
(
root_block_fn_for_beta_variant
),
root_block_fn
=
functools
.
partial
(
root_block_fn_for_beta_variant
),
store_non_strided_activations
=
store_non_strided_activations
,
use_bounded_activations
=
use_bounded_activations
,
reuse
=
reuse
,
reuse
=
reuse
,
scope
=
scope
)
scope
=
scope
)
research/deeplab/core/resnet_v1_beta_test.py
View file @
8caa269d
...
@@ -53,7 +53,6 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
...
@@ -53,7 +53,6 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
is_training
=
True
,
is_training
=
True
,
global_pool
=
True
,
global_pool
=
True
,
output_stride
=
None
,
output_stride
=
None
,
store_non_strided_activations
=
False
,
multi_grid
=
None
,
multi_grid
=
None
,
reuse
=
None
,
reuse
=
None
,
scope
=
'resnet_v1_small'
):
scope
=
'resnet_v1_small'
):
...
@@ -84,7 +83,6 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
...
@@ -84,7 +83,6 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
output_stride
=
output_stride
,
output_stride
=
output_stride
,
root_block_fn
=
functools
.
partial
(
root_block_fn
=
functools
.
partial
(
resnet_v1_beta
.
root_block_fn_for_beta_variant
),
resnet_v1_beta
.
root_block_fn_for_beta_variant
),
store_non_strided_activations
=
store_non_strided_activations
,
reuse
=
reuse
,
reuse
=
reuse
,
scope
=
scope
)
scope
=
scope
)
...
...
research/deeplab/datasets/segmentation_dataset.py
View file @
8caa269d
...
@@ -89,6 +89,7 @@ _CITYSCAPES_INFORMATION = DatasetDescriptor(
...
@@ -89,6 +89,7 @@ _CITYSCAPES_INFORMATION = DatasetDescriptor(
_PASCAL_VOC_SEG_INFORMATION
=
DatasetDescriptor
(
_PASCAL_VOC_SEG_INFORMATION
=
DatasetDescriptor
(
splits_to_sizes
=
{
splits_to_sizes
=
{
'train'
:
1464
,
'train'
:
1464
,
'train_aug'
:
10582
,
'trainval'
:
2913
,
'trainval'
:
2913
,
'val'
:
1449
,
'val'
:
1449
,
},
},
...
...
research/deeplab/deeplab_demo.ipynb
View file @
8caa269d
...
@@ -294,13 +294,13 @@
...
@@ -294,13 +294,13 @@
" try:\n",
" try:\n",
" f = urllib.request.urlopen(url)\n",
" f = urllib.request.urlopen(url)\n",
" jpeg_str = f.read()\n",
" jpeg_str = f.read()\n",
" orig
i
nal_im = Image.open(BytesIO(jpeg_str))\n",
" orignal_im = Image.open(BytesIO(jpeg_str))\n",
" except IOError:\n",
" except IOError:\n",
" print('Cannot retrieve image. Please check url: ' + url)\n",
" print('Cannot retrieve image. Please check url: ' + url)\n",
" return\n",
" return\n",
"\n",
"\n",
" print('running deeplab on image %s...' % url)\n",
" print('running deeplab on image %s...' % url)\n",
" resized_im, seg_map = MODEL.run(orig
i
nal_im)\n",
" resized_im, seg_map = MODEL.run(orignal_im)\n",
"\n",
"\n",
" vis_segmentation(resized_im, seg_map)\n",
" vis_segmentation(resized_im, seg_map)\n",
"\n",
"\n",
...
...
research/deeplab/g3doc/ade20k.md
View file @
8caa269d
...
@@ -49,7 +49,7 @@ A local training job using `xception_65` can be run with the following command:
...
@@ -49,7 +49,7 @@ A local training job using `xception_65` can be run with the following command:
# From tensorflow/models/research/
# From tensorflow/models/research/
python deeplab/train.py
\
python deeplab/train.py
\
--logtostderr
\
--logtostderr
\
--training_number_of_steps
=
9
0000
\
--training_number_of_steps
=
15
0000
\
--train_split
=
"train"
\
--train_split
=
"train"
\
--model_variant
=
"xception_65"
\
--model_variant
=
"xception_65"
\
--atrous_rates
=
6
\
--atrous_rates
=
6
\
...
...
research/deeplab/g3doc/model_zoo.md
View file @
8caa269d
...
@@ -84,7 +84,7 @@ xception_ade20k_train | Xception_65 | ImageNet <br> ADE20K
...
@@ -84,7 +84,7 @@ xception_ade20k_train | Xception_65 | ImageNet <br> ADE20K
Checkpoint name | Eval OS | Eval scales | Left-right Flip | mIOU | Pixel-wise Accuracy | File Size
Checkpoint name | Eval OS | Eval scales | Left-right Flip | mIOU | Pixel-wise Accuracy | File Size
------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :-------------------: | :-------:
------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :-------------------: | :-------:
[
xception_ade20k_train
](
http://download.tensorflow.org/models/deeplabv3_xception_ade20k_train_2018_05_
14
.tar.gz
)
|
16
| [0.5:0.25:1.75] | Yes | 4
3.54
% (val) | 8
1.74
% (val) | 439MB
[
xception_ade20k_train
](
http://download.tensorflow.org/models/deeplabv3_xception_ade20k_train_2018_05_
29
.tar.gz
)
|
8
| [0.5:0.25:1.75] | Yes | 4
5.65
% (val) | 8
2.52
% (val) | 439MB
## Checkpoints pretrained on ImageNet
## Checkpoints pretrained on ImageNet
...
...
research/deeplab/model.py
View file @
8caa269d
...
@@ -56,16 +56,12 @@ from deeplab.core import feature_extractor
...
@@ -56,16 +56,12 @@ from deeplab.core import feature_extractor
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
_LOGITS_SCOPE_NAME
=
'logits'
LOGITS_SCOPE_NAME
=
'logits'
_MERGED_LOGITS_SCOPE
=
'merged_logits'
MERGED_LOGITS_SCOPE
=
'merged_logits'
_IMAGE_POOLING_SCOPE
=
'image_pooling'
IMAGE_POOLING_SCOPE
=
'image_pooling'
_ASPP_SCOPE
=
'aspp'
ASPP_SCOPE
=
'aspp'
_CONCAT_PROJECTION_SCOPE
=
'concat_projection'
CONCAT_PROJECTION_SCOPE
=
'concat_projection'
_DECODER_SCOPE
=
'decoder'
DECODER_SCOPE
=
'decoder'
def
get_merged_logits_scope
():
return
_MERGED_LOGITS_SCOPE
def
get_extra_layer_scopes
(
last_layers_contain_logits_only
=
False
):
def
get_extra_layer_scopes
(
last_layers_contain_logits_only
=
False
):
...
@@ -79,14 +75,14 @@ def get_extra_layer_scopes(last_layers_contain_logits_only=False):
...
@@ -79,14 +75,14 @@ def get_extra_layer_scopes(last_layers_contain_logits_only=False):
A list of scopes for extra layers.
A list of scopes for extra layers.
"""
"""
if
last_layers_contain_logits_only
:
if
last_layers_contain_logits_only
:
return
[
_
LOGITS_SCOPE_NAME
]
return
[
LOGITS_SCOPE_NAME
]
else
:
else
:
return
[
return
[
_
LOGITS_SCOPE_NAME
,
LOGITS_SCOPE_NAME
,
_
IMAGE_POOLING_SCOPE
,
IMAGE_POOLING_SCOPE
,
_
ASPP_SCOPE
,
ASPP_SCOPE
,
_
CONCAT_PROJECTION_SCOPE
,
CONCAT_PROJECTION_SCOPE
,
_
DECODER_SCOPE
,
DECODER_SCOPE
,
]
]
...
@@ -133,7 +129,7 @@ def predict_labels_multi_scale(images,
...
@@ -133,7 +129,7 @@ def predict_labels_multi_scale(images,
for
output
in
sorted
(
outputs_to_scales_to_logits
):
for
output
in
sorted
(
outputs_to_scales_to_logits
):
scales_to_logits
=
outputs_to_scales_to_logits
[
output
]
scales_to_logits
=
outputs_to_scales_to_logits
[
output
]
logits
=
tf
.
image
.
resize_bilinear
(
logits
=
tf
.
image
.
resize_bilinear
(
scales_to_logits
[
_
MERGED_LOGITS_SCOPE
],
scales_to_logits
[
MERGED_LOGITS_SCOPE
],
tf
.
shape
(
images
)[
1
:
3
],
tf
.
shape
(
images
)[
1
:
3
],
align_corners
=
True
)
align_corners
=
True
)
outputs_to_predictions
[
output
].
append
(
outputs_to_predictions
[
output
].
append
(
...
@@ -143,7 +139,7 @@ def predict_labels_multi_scale(images,
...
@@ -143,7 +139,7 @@ def predict_labels_multi_scale(images,
scales_to_logits_reversed
=
(
scales_to_logits_reversed
=
(
outputs_to_scales_to_logits_reversed
[
output
])
outputs_to_scales_to_logits_reversed
[
output
])
logits_reversed
=
tf
.
image
.
resize_bilinear
(
logits_reversed
=
tf
.
image
.
resize_bilinear
(
tf
.
reverse_v2
(
scales_to_logits_reversed
[
_
MERGED_LOGITS_SCOPE
],
[
2
]),
tf
.
reverse_v2
(
scales_to_logits_reversed
[
MERGED_LOGITS_SCOPE
],
[
2
]),
tf
.
shape
(
images
)[
1
:
3
],
tf
.
shape
(
images
)[
1
:
3
],
align_corners
=
True
)
align_corners
=
True
)
outputs_to_predictions
[
output
].
append
(
outputs_to_predictions
[
output
].
append
(
...
@@ -182,7 +178,7 @@ def predict_labels(images, model_options, image_pyramid=None):
...
@@ -182,7 +178,7 @@ def predict_labels(images, model_options, image_pyramid=None):
for
output
in
sorted
(
outputs_to_scales_to_logits
):
for
output
in
sorted
(
outputs_to_scales_to_logits
):
scales_to_logits
=
outputs_to_scales_to_logits
[
output
]
scales_to_logits
=
outputs_to_scales_to_logits
[
output
]
logits
=
tf
.
image
.
resize_bilinear
(
logits
=
tf
.
image
.
resize_bilinear
(
scales_to_logits
[
_
MERGED_LOGITS_SCOPE
],
scales_to_logits
[
MERGED_LOGITS_SCOPE
],
tf
.
shape
(
images
)[
1
:
3
],
tf
.
shape
(
images
)[
1
:
3
],
align_corners
=
True
)
align_corners
=
True
)
predictions
[
output
]
=
tf
.
argmax
(
logits
,
3
)
predictions
[
output
]
=
tf
.
argmax
(
logits
,
3
)
...
@@ -221,7 +217,6 @@ def multi_scale_logits(images,
...
@@ -221,7 +217,6 @@ def multi_scale_logits(images,
images: A tensor of size [batch, height, width, channels].
images: A tensor of size [batch, height, width, channels].
model_options: A ModelOptions instance to configure models.
model_options: A ModelOptions instance to configure models.
image_pyramid: Input image scales for multi-scale feature extraction.
image_pyramid: Input image scales for multi-scale feature extraction.
weight_decay: The weight decay for model variables.
weight_decay: The weight decay for model variables.
is_training: Is training or not.
is_training: Is training or not.
fine_tune_batch_norm: Fine-tune the batch norm parameters or not.
fine_tune_batch_norm: Fine-tune the batch norm parameters or not.
...
@@ -242,17 +237,9 @@ def multi_scale_logits(images,
...
@@ -242,17 +237,9 @@ def multi_scale_logits(images,
# Setup default values.
# Setup default values.
if
not
image_pyramid
:
if
not
image_pyramid
:
image_pyramid
=
[
1.0
]
image_pyramid
=
[
1.0
]
if
model_options
.
crop_size
is
None
and
model_options
.
add_image_level_feature
:
if
model_options
.
crop_size
is
None
and
model_options
.
add_image_level_feature
:
raise
ValueError
(
raise
ValueError
(
'Crop size must be specified for using image-level feature.'
)
'Crop size must be specified for using image-level feature.'
)
if
model_options
.
model_variant
==
'mobilenet_v2'
:
if
(
model_options
.
atrous_rates
is
not
None
or
model_options
.
decoder_output_stride
is
not
None
):
# Output a warning and users should make sure if the setting is desired.
tf
.
logging
.
warning
(
'Our provided mobilenet_v2 checkpoint does not '
'include ASPP and decoder modules.'
)
crop_height
=
(
crop_height
=
(
model_options
.
crop_size
[
0
]
model_options
.
crop_size
[
0
]
if
model_options
.
crop_size
else
tf
.
shape
(
images
)[
1
])
if
model_options
.
crop_size
else
tf
.
shape
(
images
)[
1
])
...
@@ -277,7 +264,7 @@ def multi_scale_logits(images,
...
@@ -277,7 +264,7 @@ def multi_scale_logits(images,
for
k
in
model_options
.
outputs_to_num_classes
for
k
in
model_options
.
outputs_to_num_classes
}
}
for
count
,
image_scale
in
enumerate
(
image_pyramid
)
:
for
image_scale
in
image_pyramid
:
if
image_scale
!=
1.0
:
if
image_scale
!=
1.0
:
scaled_height
=
scale_dimension
(
crop_height
,
image_scale
)
scaled_height
=
scale_dimension
(
crop_height
,
image_scale
)
scaled_width
=
scale_dimension
(
crop_width
,
image_scale
)
scaled_width
=
scale_dimension
(
crop_width
,
image_scale
)
...
@@ -295,7 +282,7 @@ def multi_scale_logits(images,
...
@@ -295,7 +282,7 @@ def multi_scale_logits(images,
scaled_images
,
scaled_images
,
updated_options
,
updated_options
,
weight_decay
=
weight_decay
,
weight_decay
=
weight_decay
,
reuse
=
True
if
count
else
None
,
reuse
=
tf
.
AUTO_REUSE
,
is_training
=
is_training
,
is_training
=
is_training
,
fine_tune_batch_norm
=
fine_tune_batch_norm
)
fine_tune_batch_norm
=
fine_tune_batch_norm
)
...
@@ -309,7 +296,7 @@ def multi_scale_logits(images,
...
@@ -309,7 +296,7 @@ def multi_scale_logits(images,
if
len
(
image_pyramid
)
==
1
:
if
len
(
image_pyramid
)
==
1
:
for
output
in
sorted
(
model_options
.
outputs_to_num_classes
):
for
output
in
sorted
(
model_options
.
outputs_to_num_classes
):
outputs_to_scales_to_logits
[
output
][
outputs_to_scales_to_logits
[
output
][
_
MERGED_LOGITS_SCOPE
]
=
outputs_to_logits
[
output
]
MERGED_LOGITS_SCOPE
]
=
outputs_to_logits
[
output
]
return
outputs_to_scales_to_logits
return
outputs_to_scales_to_logits
# Save logits to the output map.
# Save logits to the output map.
...
@@ -328,18 +315,18 @@ def multi_scale_logits(images,
...
@@ -328,18 +315,18 @@ def multi_scale_logits(images,
merge_fn
=
(
merge_fn
=
(
tf
.
reduce_max
tf
.
reduce_max
if
model_options
.
merge_method
==
'max'
else
tf
.
reduce_mean
)
if
model_options
.
merge_method
==
'max'
else
tf
.
reduce_mean
)
outputs_to_scales_to_logits
[
output
][
_
MERGED_LOGITS_SCOPE
]
=
merge_fn
(
outputs_to_scales_to_logits
[
output
][
MERGED_LOGITS_SCOPE
]
=
merge_fn
(
all_logits
,
axis
=
4
)
all_logits
,
axis
=
4
)
return
outputs_to_scales_to_logits
return
outputs_to_scales_to_logits
def
_
extract_features
(
images
,
def
extract_features
(
images
,
model_options
,
model_options
,
weight_decay
=
0.0001
,
weight_decay
=
0.0001
,
reuse
=
None
,
reuse
=
None
,
is_training
=
False
,
is_training
=
False
,
fine_tune_batch_norm
=
False
):
fine_tune_batch_norm
=
False
):
"""Extracts features by the particular model_variant.
"""Extracts features by the particular model_variant.
Args:
Args:
...
@@ -399,7 +386,7 @@ def _extract_features(images,
...
@@ -399,7 +386,7 @@ def _extract_features(images,
features
,
[
pool_height
,
pool_width
],
[
pool_height
,
pool_width
],
features
,
[
pool_height
,
pool_width
],
[
pool_height
,
pool_width
],
padding
=
'VALID'
)
padding
=
'VALID'
)
image_feature
=
slim
.
conv2d
(
image_feature
=
slim
.
conv2d
(
image_feature
,
depth
,
1
,
scope
=
_
IMAGE_POOLING_SCOPE
)
image_feature
,
depth
,
1
,
scope
=
IMAGE_POOLING_SCOPE
)
image_feature
=
tf
.
image
.
resize_bilinear
(
image_feature
=
tf
.
image
.
resize_bilinear
(
image_feature
,
[
pool_height
,
pool_width
],
align_corners
=
True
)
image_feature
,
[
pool_height
,
pool_width
],
align_corners
=
True
)
image_feature
.
set_shape
([
None
,
pool_height
,
pool_width
,
depth
])
image_feature
.
set_shape
([
None
,
pool_height
,
pool_width
,
depth
])
...
@@ -407,14 +394,14 @@ def _extract_features(images,
...
@@ -407,14 +394,14 @@ def _extract_features(images,
# Employ a 1x1 convolution.
# Employ a 1x1 convolution.
branch_logits
.
append
(
slim
.
conv2d
(
features
,
depth
,
1
,
branch_logits
.
append
(
slim
.
conv2d
(
features
,
depth
,
1
,
scope
=
_
ASPP_SCOPE
+
str
(
0
)))
scope
=
ASPP_SCOPE
+
str
(
0
)))
if
model_options
.
atrous_rates
:
if
model_options
.
atrous_rates
:
# Employ 3x3 convolutions with different atrous rates.
# Employ 3x3 convolutions with different atrous rates.
for
i
,
rate
in
enumerate
(
model_options
.
atrous_rates
,
1
):
for
i
,
rate
in
enumerate
(
model_options
.
atrous_rates
,
1
):
scope
=
_
ASPP_SCOPE
+
str
(
i
)
scope
=
ASPP_SCOPE
+
str
(
i
)
if
model_options
.
aspp_with_separable_conv
:
if
model_options
.
aspp_with_separable_conv
:
aspp_features
=
_
split_separable_conv2d
(
aspp_features
=
split_separable_conv2d
(
features
,
features
,
filters
=
depth
,
filters
=
depth
,
rate
=
rate
,
rate
=
rate
,
...
@@ -428,12 +415,12 @@ def _extract_features(images,
...
@@ -428,12 +415,12 @@ def _extract_features(images,
# Merge branch logits.
# Merge branch logits.
concat_logits
=
tf
.
concat
(
branch_logits
,
3
)
concat_logits
=
tf
.
concat
(
branch_logits
,
3
)
concat_logits
=
slim
.
conv2d
(
concat_logits
=
slim
.
conv2d
(
concat_logits
,
depth
,
1
,
scope
=
_
CONCAT_PROJECTION_SCOPE
)
concat_logits
,
depth
,
1
,
scope
=
CONCAT_PROJECTION_SCOPE
)
concat_logits
=
slim
.
dropout
(
concat_logits
=
slim
.
dropout
(
concat_logits
,
concat_logits
,
keep_prob
=
0.9
,
keep_prob
=
0.9
,
is_training
=
is_training
,
is_training
=
is_training
,
scope
=
_
CONCAT_PROJECTION_SCOPE
+
'_dropout'
)
scope
=
CONCAT_PROJECTION_SCOPE
+
'_dropout'
)
return
concat_logits
,
end_points
return
concat_logits
,
end_points
...
@@ -457,7 +444,7 @@ def _get_logits(images,
...
@@ -457,7 +444,7 @@ def _get_logits(images,
Returns:
Returns:
outputs_to_logits: A map from output_type to logits.
outputs_to_logits: A map from output_type to logits.
"""
"""
features
,
end_points
=
_
extract_features
(
features
,
end_points
=
extract_features
(
images
,
images
,
model_options
,
model_options
,
weight_decay
=
weight_decay
,
weight_decay
=
weight_decay
,
...
@@ -484,7 +471,7 @@ def _get_logits(images,
...
@@ -484,7 +471,7 @@ def _get_logits(images,
outputs_to_logits
=
{}
outputs_to_logits
=
{}
for
output
in
sorted
(
model_options
.
outputs_to_num_classes
):
for
output
in
sorted
(
model_options
.
outputs_to_num_classes
):
outputs_to_logits
[
output
]
=
_
get_branch_logits
(
outputs_to_logits
[
output
]
=
get_branch_logits
(
features
,
features
,
model_options
.
outputs_to_num_classes
[
output
],
model_options
.
outputs_to_num_classes
[
output
],
model_options
.
atrous_rates
,
model_options
.
atrous_rates
,
...
@@ -543,7 +530,7 @@ def refine_by_decoder(features,
...
@@ -543,7 +530,7 @@ def refine_by_decoder(features,
stride
=
1
,
stride
=
1
,
reuse
=
reuse
):
reuse
=
reuse
):
with
slim
.
arg_scope
([
slim
.
batch_norm
],
**
batch_norm_params
):
with
slim
.
arg_scope
([
slim
.
batch_norm
],
**
batch_norm_params
):
with
tf
.
variable_scope
(
_
DECODER_SCOPE
,
_
DECODER_SCOPE
,
[
features
]):
with
tf
.
variable_scope
(
DECODER_SCOPE
,
DECODER_SCOPE
,
[
features
]):
feature_list
=
feature_extractor
.
networks_to_feature_maps
[
feature_list
=
feature_extractor
.
networks_to_feature_maps
[
model_variant
][
feature_extractor
.
DECODER_END_POINTS
]
model_variant
][
feature_extractor
.
DECODER_END_POINTS
]
if
feature_list
is
None
:
if
feature_list
is
None
:
...
@@ -553,8 +540,13 @@ def refine_by_decoder(features,
...
@@ -553,8 +540,13 @@ def refine_by_decoder(features,
decoder_features
=
features
decoder_features
=
features
for
i
,
name
in
enumerate
(
feature_list
):
for
i
,
name
in
enumerate
(
feature_list
):
decoder_features_list
=
[
decoder_features
]
decoder_features_list
=
[
decoder_features
]
feature_name
=
'{}/{}'
.
format
(
feature_extractor
.
name_scope
[
model_variant
],
name
)
# MobileNet variants use different naming convention.
if
'mobilenet'
in
model_variant
:
feature_name
=
name
else
:
feature_name
=
'{}/{}'
.
format
(
feature_extractor
.
name_scope
[
model_variant
],
name
)
decoder_features_list
.
append
(
decoder_features_list
.
append
(
slim
.
conv2d
(
slim
.
conv2d
(
end_points
[
feature_name
],
end_points
[
feature_name
],
...
@@ -569,13 +561,13 @@ def refine_by_decoder(features,
...
@@ -569,13 +561,13 @@ def refine_by_decoder(features,
[
None
,
decoder_height
,
decoder_width
,
None
])
[
None
,
decoder_height
,
decoder_width
,
None
])
decoder_depth
=
256
decoder_depth
=
256
if
decoder_use_separable_conv
:
if
decoder_use_separable_conv
:
decoder_features
=
_
split_separable_conv2d
(
decoder_features
=
split_separable_conv2d
(
tf
.
concat
(
decoder_features_list
,
3
),
tf
.
concat
(
decoder_features_list
,
3
),
filters
=
decoder_depth
,
filters
=
decoder_depth
,
rate
=
1
,
rate
=
1
,
weight_decay
=
weight_decay
,
weight_decay
=
weight_decay
,
scope
=
'decoder_conv0'
)
scope
=
'decoder_conv0'
)
decoder_features
=
_
split_separable_conv2d
(
decoder_features
=
split_separable_conv2d
(
decoder_features
,
decoder_features
,
filters
=
decoder_depth
,
filters
=
decoder_depth
,
rate
=
1
,
rate
=
1
,
...
@@ -593,14 +585,14 @@ def refine_by_decoder(features,
...
@@ -593,14 +585,14 @@ def refine_by_decoder(features,
return
decoder_features
return
decoder_features
def
_
get_branch_logits
(
features
,
def
get_branch_logits
(
features
,
num_classes
,
num_classes
,
atrous_rates
=
None
,
atrous_rates
=
None
,
aspp_with_batch_norm
=
False
,
aspp_with_batch_norm
=
False
,
kernel_size
=
1
,
kernel_size
=
1
,
weight_decay
=
0.0001
,
weight_decay
=
0.0001
,
reuse
=
None
,
reuse
=
None
,
scope_suffix
=
''
):
scope_suffix
=
''
):
"""Gets the logits from each model's branch.
"""Gets the logits from each model's branch.
The underlying model is branched out in the last layer when atrous
The underlying model is branched out in the last layer when atrous
...
@@ -624,7 +616,7 @@ def _get_branch_logits(features,
...
@@ -624,7 +616,7 @@ def _get_branch_logits(features,
ValueError: Upon invalid input kernel_size value.
ValueError: Upon invalid input kernel_size value.
"""
"""
# When using batch normalization with ASPP, ASPP has been applied before
# When using batch normalization with ASPP, ASPP has been applied before
# in
_
extract_features, and thus we simply apply 1x1 convolution here.
# in extract_features, and thus we simply apply 1x1 convolution here.
if
aspp_with_batch_norm
or
atrous_rates
is
None
:
if
aspp_with_batch_norm
or
atrous_rates
is
None
:
if
kernel_size
!=
1
:
if
kernel_size
!=
1
:
raise
ValueError
(
'Kernel size must be 1 when atrous_rates is None or '
raise
ValueError
(
'Kernel size must be 1 when atrous_rates is None or '
...
@@ -636,7 +628,7 @@ def _get_branch_logits(features,
...
@@ -636,7 +628,7 @@ def _get_branch_logits(features,
weights_regularizer
=
slim
.
l2_regularizer
(
weight_decay
),
weights_regularizer
=
slim
.
l2_regularizer
(
weight_decay
),
weights_initializer
=
tf
.
truncated_normal_initializer
(
stddev
=
0.01
),
weights_initializer
=
tf
.
truncated_normal_initializer
(
stddev
=
0.01
),
reuse
=
reuse
):
reuse
=
reuse
):
with
tf
.
variable_scope
(
_
LOGITS_SCOPE_NAME
,
_
LOGITS_SCOPE_NAME
,
[
features
]):
with
tf
.
variable_scope
(
LOGITS_SCOPE_NAME
,
LOGITS_SCOPE_NAME
,
[
features
]):
branch_logits
=
[]
branch_logits
=
[]
for
i
,
rate
in
enumerate
(
atrous_rates
):
for
i
,
rate
in
enumerate
(
atrous_rates
):
scope
=
scope_suffix
scope
=
scope_suffix
...
@@ -656,13 +648,14 @@ def _get_branch_logits(features,
...
@@ -656,13 +648,14 @@ def _get_branch_logits(features,
return
tf
.
add_n
(
branch_logits
)
return
tf
.
add_n
(
branch_logits
)
def
_split_separable_conv2d
(
inputs
,
def
split_separable_conv2d
(
inputs
,
filters
,
filters
,
rate
=
1
,
kernel_size
=
3
,
weight_decay
=
0.00004
,
rate
=
1
,
depthwise_weights_initializer_stddev
=
0.33
,
weight_decay
=
0.00004
,
pointwise_weights_initializer_stddev
=
0.06
,
depthwise_weights_initializer_stddev
=
0.33
,
scope
=
None
):
pointwise_weights_initializer_stddev
=
0.06
,
scope
=
None
):
"""Splits a separable conv2d into depthwise and pointwise conv2d.
"""Splits a separable conv2d into depthwise and pointwise conv2d.
This operation differs from `tf.layers.separable_conv2d` as this operation
This operation differs from `tf.layers.separable_conv2d` as this operation
...
@@ -671,6 +664,8 @@ def _split_separable_conv2d(inputs,
...
@@ -671,6 +664,8 @@ def _split_separable_conv2d(inputs,
Args:
Args:
inputs: Input tensor with shape [batch, height, width, channels].
inputs: Input tensor with shape [batch, height, width, channels].
filters: Number of filters in the 1x1 pointwise convolution.
filters: Number of filters in the 1x1 pointwise convolution.
kernel_size: A list of length 2: [kernel_height, kernel_width] of
of the filters. Can be an int if both values are the same.
rate: Atrous convolution rate for the depthwise convolution.
rate: Atrous convolution rate for the depthwise convolution.
weight_decay: The weight decay to use for regularizing the model.
weight_decay: The weight decay to use for regularizing the model.
depthwise_weights_initializer_stddev: The standard deviation of the
depthwise_weights_initializer_stddev: The standard deviation of the
...
@@ -685,7 +680,7 @@ def _split_separable_conv2d(inputs,
...
@@ -685,7 +680,7 @@ def _split_separable_conv2d(inputs,
outputs
=
slim
.
separable_conv2d
(
outputs
=
slim
.
separable_conv2d
(
inputs
,
inputs
,
None
,
None
,
3
,
kernel_size
=
kernel_size
,
depth_multiplier
=
1
,
depth_multiplier
=
1
,
rate
=
rate
,
rate
=
rate
,
weights_initializer
=
tf
.
truncated_normal_initializer
(
weights_initializer
=
tf
.
truncated_normal_initializer
(
...
...
research/deeplab/train.py
View file @
8caa269d
...
@@ -101,6 +101,8 @@ flags.DEFINE_float('momentum', 0.9, 'The momentum value to use')
...
@@ -101,6 +101,8 @@ flags.DEFINE_float('momentum', 0.9, 'The momentum value to use')
flags
.
DEFINE_integer
(
'train_batch_size'
,
8
,
flags
.
DEFINE_integer
(
'train_batch_size'
,
8
,
'The number of images in each batch during training.'
)
'The number of images in each batch during training.'
)
# For weight_decay, use 0.00004 for MobileNet-V2 or Xcpetion model variants.
# Use 0.0001 for ResNet model variants.
flags
.
DEFINE_float
(
'weight_decay'
,
0.00004
,
flags
.
DEFINE_float
(
'weight_decay'
,
0.00004
,
'The value of the weight decay for training.'
)
'The value of the weight decay for training.'
)
...
@@ -206,8 +208,8 @@ def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label):
...
@@ -206,8 +208,8 @@ def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label):
# Add name to graph node so we can add to summary.
# Add name to graph node so we can add to summary.
output_type_dict
=
outputs_to_scales_to_logits
[
common
.
OUTPUT_TYPE
]
output_type_dict
=
outputs_to_scales_to_logits
[
common
.
OUTPUT_TYPE
]
output_type_dict
[
model
.
get_merged_logits_scope
()
]
=
tf
.
identity
(
output_type_dict
[
model
.
MERGED_LOGITS_SCOPE
]
=
tf
.
identity
(
output_type_dict
[
model
.
get_merged_logits_scope
()
],
output_type_dict
[
model
.
MERGED_LOGITS_SCOPE
],
name
=
common
.
OUTPUT_TYPE
)
name
=
common
.
OUTPUT_TYPE
)
for
output
,
num_classes
in
six
.
iteritems
(
outputs_to_num_classes
):
for
output
,
num_classes
in
six
.
iteritems
(
outputs_to_num_classes
):
...
...
research/deeplab/utils/get_dataset_colormap.py
View file @
8caa269d
...
@@ -29,12 +29,14 @@ import numpy as np
...
@@ -29,12 +29,14 @@ import numpy as np
# Dataset names.
# Dataset names.
_ADE20K
=
'ade20k'
_ADE20K
=
'ade20k'
_CITYSCAPES
=
'cityscapes'
_CITYSCAPES
=
'cityscapes'
_MAPILLARY_VISTAS
=
'mapillary_vistas'
_PASCAL
=
'pascal'
_PASCAL
=
'pascal'
# Max number of entries in the colormap for each dataset.
# Max number of entries in the colormap for each dataset.
_DATASET_MAX_ENTRIES
=
{
_DATASET_MAX_ENTRIES
=
{
_ADE20K
:
151
,
_ADE20K
:
151
,
_CITYSCAPES
:
19
,
_CITYSCAPES
:
19
,
_MAPILLARY_VISTAS
:
66
,
_PASCAL
:
256
,
_PASCAL
:
256
,
}
}
...
@@ -229,6 +231,82 @@ def create_cityscapes_label_colormap():
...
@@ -229,6 +231,82 @@ def create_cityscapes_label_colormap():
])
])
def
create_mapillary_vistas_label_colormap
():
"""Creates a label colormap used in Mapillary Vistas segmentation benchmark.
Returns:
A colormap for visualizing segmentation results.
"""
return
np
.
asarray
([
[
165
,
42
,
42
],
[
0
,
192
,
0
],
[
196
,
196
,
196
],
[
190
,
153
,
153
],
[
180
,
165
,
180
],
[
102
,
102
,
156
],
[
102
,
102
,
156
],
[
128
,
64
,
255
],
[
140
,
140
,
200
],
[
170
,
170
,
170
],
[
250
,
170
,
160
],
[
96
,
96
,
96
],
[
230
,
150
,
140
],
[
128
,
64
,
128
],
[
110
,
110
,
110
],
[
244
,
35
,
232
],
[
150
,
100
,
100
],
[
70
,
70
,
70
],
[
150
,
120
,
90
],
[
220
,
20
,
60
],
[
255
,
0
,
0
],
[
255
,
0
,
0
],
[
255
,
0
,
0
],
[
200
,
128
,
128
],
[
255
,
255
,
255
],
[
64
,
170
,
64
],
[
128
,
64
,
64
],
[
70
,
130
,
180
],
[
255
,
255
,
255
],
[
152
,
251
,
152
],
[
107
,
142
,
35
],
[
0
,
170
,
30
],
[
255
,
255
,
128
],
[
250
,
0
,
30
],
[
0
,
0
,
0
],
[
220
,
220
,
220
],
[
170
,
170
,
170
],
[
222
,
40
,
40
],
[
100
,
170
,
30
],
[
40
,
40
,
40
],
[
33
,
33
,
33
],
[
170
,
170
,
170
],
[
0
,
0
,
142
],
[
170
,
170
,
170
],
[
210
,
170
,
100
],
[
153
,
153
,
153
],
[
128
,
128
,
128
],
[
0
,
0
,
142
],
[
250
,
170
,
30
],
[
192
,
192
,
192
],
[
220
,
220
,
0
],
[
180
,
165
,
180
],
[
119
,
11
,
32
],
[
0
,
0
,
142
],
[
0
,
60
,
100
],
[
0
,
0
,
142
],
[
0
,
0
,
90
],
[
0
,
0
,
230
],
[
0
,
80
,
100
],
[
128
,
64
,
64
],
[
0
,
0
,
110
],
[
0
,
0
,
70
],
[
0
,
0
,
192
],
[
32
,
32
,
32
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
])
def
create_pascal_label_colormap
():
def
create_pascal_label_colormap
():
"""Creates a label colormap used in PASCAL VOC segmentation benchmark.
"""Creates a label colormap used in PASCAL VOC segmentation benchmark.
...
@@ -254,6 +332,10 @@ def get_cityscapes_name():
...
@@ -254,6 +332,10 @@ def get_cityscapes_name():
return
_CITYSCAPES
return
_CITYSCAPES
def
get_mapillary_vistas_name
():
return
_MAPILLARY_VISTAS
def
get_pascal_name
():
def
get_pascal_name
():
return
_PASCAL
return
_PASCAL
...
@@ -287,6 +369,8 @@ def create_label_colormap(dataset=_PASCAL):
...
@@ -287,6 +369,8 @@ def create_label_colormap(dataset=_PASCAL):
return
create_ade20k_label_colormap
()
return
create_ade20k_label_colormap
()
elif
dataset
==
_CITYSCAPES
:
elif
dataset
==
_CITYSCAPES
:
return
create_cityscapes_label_colormap
()
return
create_cityscapes_label_colormap
()
elif
dataset
==
_MAPILLARY_VISTAS
:
return
create_mapillary_vistas_label_colormap
()
elif
dataset
==
_PASCAL
:
elif
dataset
==
_PASCAL
:
return
create_pascal_label_colormap
()
return
create_pascal_label_colormap
()
else
:
else
:
...
...
research/deeplab/utils/get_dataset_colormap_test.py
View file @
8caa269d
...
@@ -86,6 +86,11 @@ class VisualizationUtilTest(tf.test.TestCase):
...
@@ -86,6 +86,11 @@ class VisualizationUtilTest(tf.test.TestCase):
label
,
get_dataset_colormap
.
get_ade20k_name
())
label
,
get_dataset_colormap
.
get_ade20k_name
())
self
.
assertTrue
(
np
.
array_equal
(
colored_label
,
expected_result
))
self
.
assertTrue
(
np
.
array_equal
(
colored_label
,
expected_result
))
def
testMapillaryVistasColorMapValue
(
self
):
colormap
=
get_dataset_colormap
.
create_mapillary_vistas_label_colormap
()
self
.
assertTrue
(
np
.
array_equal
([
190
,
153
,
153
],
colormap
[
3
,
:]))
self
.
assertTrue
(
np
.
array_equal
([
102
,
102
,
156
],
colormap
[
6
,
:]))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/deeplab/utils/train_utils.py
View file @
8caa269d
...
@@ -17,6 +17,7 @@
...
@@ -17,6 +17,7 @@
import
six
import
six
import
tensorflow
as
tf
import
tensorflow
as
tf
from
deeplab.core
import
preprocess_utils
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
...
@@ -54,12 +55,16 @@ def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits,
...
@@ -54,12 +55,16 @@ def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits,
if
upsample_logits
:
if
upsample_logits
:
# Label is not downsampled, and instead we upsample logits.
# Label is not downsampled, and instead we upsample logits.
logits
=
tf
.
image
.
resize_bilinear
(
logits
=
tf
.
image
.
resize_bilinear
(
logits
,
tf
.
shape
(
labels
)[
1
:
3
],
align_corners
=
True
)
logits
,
preprocess_utils
.
resolve_shape
(
labels
,
4
)[
1
:
3
],
align_corners
=
True
)
scaled_labels
=
labels
scaled_labels
=
labels
else
:
else
:
# Label is downsampled to the same size as logits.
# Label is downsampled to the same size as logits.
scaled_labels
=
tf
.
image
.
resize_nearest_neighbor
(
scaled_labels
=
tf
.
image
.
resize_nearest_neighbor
(
labels
,
tf
.
shape
(
logits
)[
1
:
3
],
align_corners
=
True
)
labels
,
preprocess_utils
.
resolve_shape
(
logits
,
4
)[
1
:
3
],
align_corners
=
True
)
scaled_labels
=
tf
.
reshape
(
scaled_labels
,
shape
=
[
-
1
])
scaled_labels
=
tf
.
reshape
(
scaled_labels
,
shape
=
[
-
1
])
not_ignore_mask
=
tf
.
to_float
(
tf
.
not_equal
(
scaled_labels
,
not_ignore_mask
=
tf
.
to_float
(
tf
.
not_equal
(
scaled_labels
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment