Commit 7a2f1a3e authored by Liang-Chieh Chen's avatar Liang-Chieh Chen Committed by yukun
Browse files

PiperOrigin-RevId: 190154671

parent 67d65c69
......@@ -28,7 +28,9 @@ features:
convolution to trade-off precision and runtime.
If you find the code useful for your research, please consider citing our latest
work:
works:
* DeepLabv3+:
```
@article{deeplabv3plus2018,
......@@ -39,11 +41,21 @@ work:
}
```
* MobileNetv2:
```
@inproceedings{mobilenetv22018,
title={Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation},
author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen},
booktitle={CVPR},
year={2018}
}
```
In the current implementation, we support adopting the following network
backbones:
1. MobileNetv2 [8]: A fast network structure designed for mobile devices. **We
will provide MobileNetv2 support in the next update. Please stay tuned.**
1. MobileNetv2 [8]: A fast network structure designed for mobile devices.
2. Xception [9, 10]: A powerful network structure intended for server-side
deployment.
......@@ -71,7 +83,7 @@ Some segmentation results on Flickr images:
Demo:
* <a href='deeplab_demo.ipynb'>Jupyter notebook for off-the-shelf inference.</a><br>
* <a href='https://colab.sandbox.google.com/github/tensorflow/models/blob/master/research/deeplab/deeplab_demo.ipynb'>Colab notebook for off-the-shelf inference.</a><br>
Running:
......
......@@ -39,11 +39,11 @@ flags.DEFINE_integer('logits_kernel_size', 1,
'The kernel size for the convolutional kernel that '
'generates logits.')
# We will support `mobilenet_v2' in the coming update. When using
# 'xception_65', we set atrous_rates = [6, 12, 18] (output stride 16) and
# decoder_output_stride = 4.
flags.DEFINE_enum('model_variant', 'xception_65', ['xception_65'],
'DeepLab model variants.')
# When using 'mobilent_v2', we set atrous_rates = decoder_output_stride = None.
# When using 'xception_65', we set atrous_rates = [6, 12, 18] (output stride 16)
# and decoder_output_stride = 4.
flags.DEFINE_enum('model_variant', 'mobilenet_v2',
['xception_65', 'mobilenet_v2'], 'DeepLab model variant.')
flags.DEFINE_multi_float('image_pyramid', None,
'Input scales for multi-scale feature extraction.')
......@@ -60,7 +60,12 @@ flags.DEFINE_boolean('aspp_with_separable_conv', True,
flags.DEFINE_multi_integer('multi_grid', None,
'Employ a hierarchy of atrous rates for ResNet.')
# For `xception_65`, use decoder_output_stride = 4.
flags.DEFINE_float('depth_multiplier', 1.0,
'Multiplier for the depth (number of channels) for all '
'convolution ops used in MobileNet.')
# For `xception_65`, use decoder_output_stride = 4. For `mobilenet_v2`, use
# decoder_output_stride = None.
flags.DEFINE_integer('decoder_output_stride', None,
'The ratio of input to output spatial resolution when '
'employing decoder to refine segmentation results.')
......
......@@ -18,18 +18,62 @@ import functools
import tensorflow as tf
from deeplab.core import xception
from nets.mobilenet import mobilenet as mobilenet_lib
from nets.mobilenet import mobilenet_v2
slim = tf.contrib.slim
# Default end point for MobileNetv2.
_MOBILENET_V2_FINAL_ENDPOINT = 'layer_18'
def _mobilenet_v2(net,
depth_multiplier,
output_stride,
reuse=None,
scope=None,
final_endpoint=None):
"""Auxiliary function to add support for 'reuse' to mobilenet_v2.
Args:
net: Input tensor of shape [batch_size, height, width, channels].
depth_multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
output_stride: An integer that specifies the requested ratio of input to
output spatial resolution. If not None, then we invoke atrous convolution
if necessary to prevent the network from reducing the spatial resolution
of the activation maps. Allowed values are 8 (accurate fully convolutional
mode), 16 (fast fully convolutional mode), 32 (classification mode).
reuse: Reuse model variables.
scope: Optional variable scope.
final_endpoint: The endpoint to construct the network up to.
Returns:
Features extracted by MobileNetv2.
"""
with tf.variable_scope(
scope, 'MobilenetV2', [net], reuse=reuse) as scope:
return mobilenet_lib.mobilenet_base(
net,
conv_defs=mobilenet_v2.V2_DEF,
multiplier=depth_multiplier,
final_endpoint=final_endpoint or _MOBILENET_V2_FINAL_ENDPOINT,
output_stride=output_stride,
scope=scope)
# A map from network name to network function.
networks_map = {
'mobilenet_v2': _mobilenet_v2,
'xception_65': xception.xception_65,
}
# A map from network name to network arg scope.
arg_scopes_map = {
'mobilenet_v2': mobilenet_v2.training_scope,
'xception_65': xception.xception_arg_scope,
}
......@@ -38,6 +82,10 @@ DECODER_END_POINTS = 'decoder_end_points'
# A dictionary from network name to a map of end point features.
networks_to_feature_maps = {
'mobilenet_v2': {
# The provided checkpoint does not include decoder module.
DECODER_END_POINTS: None,
},
'xception_65': {
DECODER_END_POINTS: [
'entry_flow/block2/unit_1/xception_module/'
......@@ -49,6 +97,7 @@ networks_to_feature_maps = {
# A map from feature extractor name to the network name scope used in the
# ImageNet pretrained versions of these models.
name_scope = {
'mobilenet_v2': 'MobilenetV2',
'xception_65': 'xception_65',
}
......@@ -68,6 +117,7 @@ def _preprocess_zero_mean_unit_range(inputs):
_PREPROCESS_FN = {
'mobilenet_v2': _preprocess_zero_mean_unit_range,
'xception_65': _preprocess_zero_mean_unit_range,
}
......@@ -99,6 +149,8 @@ def mean_pixel(model_variant=None):
def extract_features(images,
output_stride=8,
multi_grid=None,
depth_multiplier=1.0,
final_endpoint=None,
model_variant=None,
weight_decay=0.0001,
reuse=None,
......@@ -114,6 +166,9 @@ def extract_features(images,
images: A tensor of size [batch, height, width, channels].
output_stride: The ratio of input to output spatial resolution.
multi_grid: Employ a hierarchy of different atrous rates within network.
depth_multiplier: Float multiplier for the depth (number of channels)
for all convolution ops used in MobileNet.
final_endpoint: The MobileNet endpoint to construct the network up to.
model_variant: Model variant for feature extraction.
weight_decay: The weight decay for model variables.
reuse: Reuse the model variables or not.
......@@ -159,7 +214,17 @@ def extract_features(images,
reuse=reuse,
scope=name_scope[model_variant])
elif 'mobilenet' in model_variant:
raise ValueError('MobileNetv2 support is coming soon.')
arg_scope = arg_scopes_map[model_variant](
is_training=(is_training and fine_tune_batch_norm),
weight_decay=weight_decay)
features, end_points = get_network(
model_variant, preprocess_images, arg_scope)(
inputs=images,
depth_multiplier=depth_multiplier,
output_stride=output_stride,
reuse=reuse,
scope=name_scope[model_variant],
final_endpoint=final_endpoint)
else:
raise ValueError('Unknown model variant %s.' % model_variant)
......
......@@ -14,19 +14,21 @@
# limitations under the License.
# ==============================================================================
#
# Script to preprocess the Cityscapes dataset. Note (1) the users should register
# the Cityscapes dataset website: https://www.cityscapes-dataset.com/downloads/ to
# download the dataset, and (2) the users should run the script provided by Cityscapes
# `preparation/createTrainIdLabelImgs.py` to generate the training groundtruth.
# Script to preprocess the Cityscapes dataset. Note (1) the users should
# register the Cityscapes dataset website at
# https://www.cityscapes-dataset.com/downloads/ to download the dataset,
# and (2) the users should download the utility scripts provided by
# Cityscapes at https://github.com/mcordts/cityscapesScripts.
#
# Usage:
# bash ./preprocess_cityscapes.sh
#
# The folder structure is assumed to be:
# + data
# + datasets
# - build_cityscapes_data.py
# - convert_cityscapes.sh
# + cityscapes
# + cityscapesscripts
# + cityscapesscripts (downloaded scripts)
# + gtFine
# + leftImg8bit
#
......@@ -37,17 +39,18 @@ set -e
CURRENT_DIR=$(pwd)
WORK_DIR="."
cd "${CURRENT_DIR}"
# Root path for PASCAL VOC 2012 dataset.
# Root path for Cityscapes dataset.
CITYSCAPES_ROOT="${WORK_DIR}/cityscapes"
# Create training labels.
python "${CITYSCAPES_ROOT}/cityscapesscripts/preparation/createTrainIdLabelImgs.py"
# Build TFRecords of the dataset.
# First, create output directory for storing TFRecords.
OUTPUT_DIR="${CITYSCAPES_ROOT}/tfrecord"
mkdir -p "${OUTPUT_DIR}"
BUILD_SCRIPT="${WORK_DIR}/build_cityscapes_data.py"
BUILD_SCRIPT="${CURRENT_DIR}/build_cityscapes_data.py"
echo "Converting Cityscapes dataset..."
python "${BUILD_SCRIPT}" \
......
......@@ -20,15 +20,16 @@
# bash ./download_and_preprocess_voc2012.sh
#
# The folder structure is assumed to be:
# + data
# + datasets
# - build_data.py
# - build_voc2012_data.py
# - download_and_preprocess_voc2012.sh
# - remove_gt_colormap.py
# + VOCdevkit
# + VOC2012
# + JPEGImages
# + SegmentationClass
# + pascal_voc_seg
# + VOCdevkit
# + VOC2012
# + JPEGImages
# + SegmentationClass
#
# Exit immediately if a command exits with a non-zero status.
......
This diff is collapsed.
......@@ -50,8 +50,8 @@ flags.DEFINE_integer('eval_interval_secs', 60 * 5,
'How often (in seconds) to run evaluation.')
# For `xception_65`, use atrous_rates = [12, 24, 36] if output_stride = 8, or
# rates = [6, 12, 18] if output_stride = 16. Note one could use different
# atrous_rates/output_stride during training/evaluation.
# rates = [6, 12, 18] if output_stride = 16. For `mobilenet_v2`, use None. Note
# one could use different atrous_rates/output_stride during training/evaluation.
flags.DEFINE_multi_integer('atrous_rates', None,
'Atrous rates for atrous spatial pyramid pooling.')
......
......@@ -54,7 +54,7 @@ python deeplab/train.py \
--train_crop_size=769 \
--train_batch_size=1 \
--dataset="cityscapes" \
--tf_initial_checkpoints=${PATH_TO_INITIAL_CHECKPOINT} \
--tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
--train_logdir=${PATH_TO_TRAIN_DIR} \
--dataset_dir=${PATH_TO_DATASET}
```
......
......@@ -18,12 +18,49 @@ A: We have not tried this. The interested users could take a look at Philipp Kr
___
Q5: What if I want to train the model and fine-tune the batch normalization parameters?
A: Fine-tuning batch normalization requires large batch size, and thus in the train.py we suggest setting `num_clones` (number of GPUs on one machine) and `train_batch_size` to be as large as possible.
A: If given the limited resource at hand, we would suggest you simply fine-tune
from our provided checkpoint whose batch-norm parameters have been trained (i.e.,
train with a smaller learning rate, set `fine_tune_batch_norm = false`, and
employ longer training iterations since the learning rate is small). If
you really would like to train by yourself, we would suggest
1. Set `output_stride = 16` or maybe even `32` (remember to change the flag
`atrous_rates` accordingly, e.g., `atrous_rates = [3, 6, 9]` for
`output_stride = 32`).
2. Use as many GPUs as possible (change the flag `num_clones` in train.py) and
set `train_batch_size` as large as possible.
3. Adjust the `train_crop_size` in train.py. Maybe set it to be smaller, e.g.,
513x513 (or even 321x321), so that you could use a larger batch size.
4. Use a smaller network backbone, such as MobileNet-v2.
___
Q6: How can I train the model asynchronously?
A: In the train.py, the users could set `num_replicas` (number of machines for training) and `num_ps_tasks` (we usually set `num_ps_tasks` = `num_replicas` / 2). See slim.deployment.model_deploy for more details.
___
Q7: I could not reproduce the performance even with the provided checkpoints.
A: Please try running
```bash
# Run the simple test with Xception_65 as network backbone.
sh local_test.sh
```
or
```bash
# Run the simple test with MobileNet-v2 as network backbone.
sh local_test_mobilenetv2.sh
```
First, make sure you could reproduce the results with our provided setting.
After that, you could start to make a new change one at a time to help debug.
___
## References
1. **Deep Residual Learning for Image Recognition**<br />
......
......@@ -10,7 +10,8 @@ Un-tar'ed directory includes:
* a frozen inference graph (`frozen_inference_graph.pb`). All frozen inference
graphs use output stride of 8 and a single eval scale of 1.0. No left-right
flips are used.
flips are used, and MobileNet-v2 based models do not include the decoder
module.
* a checkpoint (`model.ckpt.data-00000-of-00001`, `model.ckpt.index`)
......@@ -21,10 +22,13 @@ set or train_aug + trainval set. In the former case, one could train their model
with smaller batch size and freeze batch normalization when limited GPU memory
is available, since we have already fine-tuned the batch normalization for you.
In the latter case, one could directly evaluate the checkpoints on VOC 2012 test
set or use this checkpoint for demo.
set or use this checkpoint for demo. Note *MobileNet-v2* based models do not
employ ASPP and decoder modules for fast computation.
Checkpoint name | Network backbone | Pretrained dataset | ASPP | Decoder
--------------------------- | :--------------: | :-----------------: | :---: | :-----:
mobilenetv2_coco_voc_trainaug | MobileNet-v2 | MS-COCO <br> VOC 2012 train_aug set| N/A | N/A
mobilenetv2_coco_voc_trainval | MobileNet-v2 | MS-COCO <br> VOC 2012 train_aug + trainval sets | N/A | N/A
xception_coco_voc_trainaug | Xception_65 | MS-COCO <br> VOC 2012 train_aug set| [6,12,18] for OS=16 <br> [12,24,36] for OS=8 | OS = 4
xception_coco_voc_trainval | Xception_65 | MS-COCO <br> VOC 2012 train_aug + trainval sets | [6,12,18] for OS=16 <br> [12,24,36] for OS=8 | OS = 4
......@@ -32,6 +36,8 @@ In the table, **OS** denotes output stride.
Checkpoint name | Eval OS | Eval scales | Left-right Flip | Multiply-Adds | Runtime (sec) | PASCAL mIOU | File Size
------------------------------------------------------------------------------------------------------------------------ | :-------: | :------------------------: | :-------------: | :------------------: | :------------: | :----------------------------: | :-------:
[mobilenetv2_coco_voc_trainaug](http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz) | 16 <br> 8 | [1.0] <br> [0.5:0.25:1.75] | No <br> Yes | 2.75B <br> 152.59B | 0.1 <br> 26.9 | 75.32% (val) <br> 77.33 (val) | 23MB
[mobilenetv2_coco_voc_trainval](http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz) | 8 | [0.5:0.25:1.75] | Yes | 152.59B | 26.9 | 80.25% (**test**) | 23MB
[xception_coco_voc_trainaug](http://download.tensorflow.org/models/deeplabv3_pascal_train_aug_2018_01_04.tar.gz) | 16 <br> 8 | [1.0] <br> [0.5:0.25:1.75] | No <br> Yes | 54.17B <br> 3055.35B | 0.7 <br> 223.2 | 82.20% (val) <br> 83.58% (val) | 439MB
[xception_coco_voc_trainval](http://download.tensorflow.org/models/deeplabv3_pascal_trainval_2018_01_04.tar.gz) | 8 | [0.5:0.25:1.75] | Yes | 3055.35B | 223.2 | 87.80% (**test**) | 439MB
......@@ -48,16 +54,19 @@ for real-time applications.
### Model details
We provide several checkpoints that have been pretrained on Cityscapes
train_fine set.
train_fine set. Note *MobileNet-v2* based model has been pretrained on MS-COCO
dataset and does not employ ASPP and decoder modules for fast computation.
Checkpoint name | Network backbone | Pretrained dataset | ASPP | Decoder
------------------------------------- | :--------------: | :-------------------------------------: | :----------------------------------------------: | :-----:
mobilenetv2_coco_cityscapes_trainfine | MobileNet-v2 | MS-COCO <br> Cityscapes train_fine set | N/A | N/A
xception_cityscapes_trainfine | Xception_65 | ImageNet <br> Cityscapes train_fine set | [6, 12, 18] for OS=16 <br> [12, 24, 36] for OS=8 | OS = 4
In the table, **OS** denotes output stride.
Checkpoint name | Eval OS | Eval scales | Left-right Flip | Multiply-Adds | Runtime (sec) | Cityscapes mIOU | File Size
-------------------------------------------------------------------------------------------------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :------------: | :----------------------------: | :-------:
[mobilenetv2_coco_cityscapes_trainfine](http://download.tensorflow.org/models/deeplabv3_mnv2_cityscapes_train_2018_02_05.tar.gz) | 16 <br> 8 | [1.0] <br> [0.75:0.25:1.25] | No <br> Yes | 21.27B <br> 433.24B | 0.8 <br> 51.12 | 70.71% (val) <br> 73.57% (val) | 23MB
[xception_cityscapes_trainfine](http://download.tensorflow.org/models/deeplabv3_cityscapes_train_2018_02_06.tar.gz) | 16 <br> 8 | [1.0] <br> [0.75:0.25:1.25] | No <br> Yes | 418.64B <br> 8677.92B | 5.0 <br> 422.8 | 78.79% (val) <br> 80.42% (val) | 439MB
## Checkpoints pretrained on ImageNet
......@@ -71,6 +80,10 @@ Un-tar'ed directory includes:
We also provide some checkpoints that are only pretrained on ImageNet so that
one could use this for training your own models.
* mobilenet_v2: We refer the interested users to the TensorFlow open source
[MobileNet-V2](https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet)
for details.
* xception: We adapt the original Xception model to the task of semantic
segmentation with the following changes: (1) more layers, (2) all max
pooling operations are replaced by strided (atrous) separable convolutions,
......
......@@ -56,7 +56,7 @@ python deeplab/train.py \
--train_crop_size=513 \
--train_batch_size=1 \
--dataset="pascal_voc_seg" \
--tf_initial_checkpoints=${PATH_TO_INITIAL_CHECKPOINT} \
--tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
--train_logdir=${PATH_TO_TRAIN_DIR} \
--dataset_dir=${PATH_TO_DATASET}
```
......
#!/bin/bash
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# This script is used to run local test on PASCAL VOC 2012 using MobileNet-v2.
# Users could also modify from this script for their use case.
#
# Usage:
# # From the tensorflow/models/research/deeplab directory.
# sh ./local_test_mobilenetv2.sh
#
#
# Exit immediately if a command exits with a non-zero status.
set -e
# Move one-level up to tensorflow/models/research directory.
cd ..
# Update PYTHONPATH.
export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
# Set up the working environment.
CURRENT_DIR=$(pwd)
WORK_DIR="${CURRENT_DIR}/deeplab"
# Run model_test first to make sure the PYTHONPATH is correctly set.
python "${WORK_DIR}"/model_test.py -v
# Go to datasets folder and download PASCAL VOC 2012 segmentation dataset.
DATASET_DIR="datasets"
cd "${WORK_DIR}/${DATASET_DIR}"
sh download_and_convert_voc2012.sh
# Go back to original directory.
cd "${CURRENT_DIR}"
# Set up the working directories.
PASCAL_FOLDER="pascal_voc_seg"
EXP_FOLDER="exp/train_on_trainval_set_mobilenetv2"
INIT_FOLDER="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/init_models"
TRAIN_LOGDIR="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/${EXP_FOLDER}/train"
EVAL_LOGDIR="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/${EXP_FOLDER}/eval"
VIS_LOGDIR="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/${EXP_FOLDER}/vis"
EXPORT_DIR="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/${EXP_FOLDER}/export"
mkdir -p "${INIT_FOLDER}"
mkdir -p "${TRAIN_LOGDIR}"
mkdir -p "${EVAL_LOGDIR}"
mkdir -p "${VIS_LOGDIR}"
mkdir -p "${EXPORT_DIR}"
# Copy locally the trained checkpoint as the initial checkpoint.
TF_INIT_ROOT="http://download.tensorflow.org/models"
CKPT_NAME="deeplabv3_mnv2_pascal_train_aug"
TF_INIT_CKPT="${CKPT_NAME}_2018_01_29.tar.gz"
cd "${INIT_FOLDER}"
wget -nd -c "${TF_INIT_ROOT}/${TF_INIT_CKPT}"
tar -xf "${TF_INIT_CKPT}"
cd "${CURRENT_DIR}"
PASCAL_DATASET="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/tfrecord"
# Train 10 iterations.
NUM_ITERATIONS=10
python "${WORK_DIR}"/train.py \
--logtostderr \
--train_split="trainval" \
--model_variant="mobilenet_v2" \
--output_stride=16 \
--train_crop_size=513 \
--train_crop_size=513 \
--train_batch_size=4 \
--training_number_of_steps="${NUM_ITERATIONS}" \
--fine_tune_batch_norm=true \
--tf_initial_checkpoint="${INIT_FOLDER}/${CKPT_NAME}/model.ckpt-30000" \
--train_logdir="${TRAIN_LOGDIR}" \
--dataset_dir="${PASCAL_DATASET}"
# Run evaluation. This performs eval over the full val split (1449 images) and
# will take a while.
# Using the provided checkpoint, one should expect mIOU=75.34%.
python "${WORK_DIR}"/eval.py \
--logtostderr \
--eval_split="val" \
--model_variant="mobilenet_v2" \
--eval_crop_size=513 \
--eval_crop_size=513 \
--checkpoint_dir="${TRAIN_LOGDIR}" \
--eval_logdir="${EVAL_LOGDIR}" \
--dataset_dir="${PASCAL_DATASET}" \
--max_number_of_evaluations=1
# Visualize the results.
python "${WORK_DIR}"/vis.py \
--logtostderr \
--vis_split="val" \
--model_variant="mobilenet_v2" \
--vis_crop_size=513 \
--vis_crop_size=513 \
--checkpoint_dir="${TRAIN_LOGDIR}" \
--vis_logdir="${VIS_LOGDIR}" \
--dataset_dir="${PASCAL_DATASET}" \
--max_number_of_iterations=1
# Export the trained checkpoint.
CKPT_PATH="${TRAIN_LOGDIR}/model.ckpt-${NUM_ITERATIONS}"
EXPORT_PATH="${EXPORT_DIR}/frozen_inference_graph.pb"
python "${WORK_DIR}"/export_model.py \
--logtostderr \
--checkpoint_path="${CKPT_PATH}" \
--export_path="${EXPORT_PATH}" \
--model_variant="mobilenet_v2" \
--num_classes=21 \
--crop_size=513 \
--crop_size=513 \
--inference_scales=1.0
# Run inference with the exported checkpoint.
# Please refer to the provided deeplab_demo.ipynb for an example.
......@@ -226,8 +226,7 @@ def multi_scale_logits(images,
Raises:
ValueError: If model_options doesn't specify crop_size and its
add_image_level_feature = True, since add_image_level_feature requires
crop_size information. Or, if model_options has model_variant =
'mobilenet_v2' but atrous_rates or decoder_output_stride are not None.
crop_size information.
"""
# Setup default values.
if not image_pyramid:
......@@ -236,6 +235,12 @@ def multi_scale_logits(images,
if model_options.crop_size is None and model_options.add_image_level_feature:
raise ValueError(
'Crop size must be specified for using image-level feature.')
if model_options.model_variant == 'mobilenet_v2':
if (model_options.atrous_rates is not None or
model_options.decoder_output_stride is not None):
# Output a warning and users should make sure if the setting is desired.
tf.logging.warning('Our provided mobilenet_v2 checkpoint does not '
'include ASPP and decoder modules.')
crop_height = (
model_options.crop_size[0]
......
......@@ -42,7 +42,7 @@ class DeeplabModelTest(tf.test.TestCase):
image_pyramids = [[1], [0.5, 1]]
# Test two model variants.
model_variants = ['xception_65']
model_variants = ['xception_65', 'mobilenet_v2']
# Test with two output_types.
outputs_to_num_classes = {'semantic': 3,
......@@ -87,16 +87,12 @@ class DeeplabModelTest(tf.test.TestCase):
model_options = common.ModelOptions(
outputs_to_num_classes,
crop_size,
atrous_rates=[6],
output_stride=16
)._replace(
add_image_level_feature=True,
aspp_with_batch_norm=True,
aspp_with_separable_conv=True,
decoder_output_stride=4,
decoder_use_separable_conv=True,
logits_kernel_size=1,
model_variant='xception_65')
model_variant='mobilenet_v2') # Employ MobileNetv2 for fast test.
g = tf.Graph()
with g.as_default():
......
......@@ -139,8 +139,8 @@ flags.DEFINE_float('scale_factor_step_size', 0.25,
'Scale factor step size for data augmentation.')
# For `xception_65`, use atrous_rates = [12, 24, 36] if output_stride = 8, or
# rates = [6, 12, 18] if output_stride = 16. Note one could use different
# atrous_rates/output_stride during training/evaluation.
# rates = [6, 12, 18] if output_stride = 16. For `mobilenet_v2`, use None. Note
# one could use different atrous_rates/output_stride during training/evaluation.
flags.DEFINE_multi_integer('atrous_rates', None,
'Atrous rates for atrous spatial pyramid pooling.')
......
......@@ -54,8 +54,8 @@ flags.DEFINE_integer('eval_interval_secs', 60 * 5,
'How often (in seconds) to run evaluation.')
# For `xception_65`, use atrous_rates = [12, 24, 36] if output_stride = 8, or
# rates = [6, 12, 18] if output_stride = 16. Note one could use different
# atrous_rates/output_stride during training/evaluation.
# rates = [6, 12, 18] if output_stride = 16. For `mobilenet_v2`, use None. Note
# one could use different atrous_rates/output_stride during training/evaluation.
flags.DEFINE_multi_integer('atrous_rates', None,
'Atrous rates for atrous spatial pyramid pooling.')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment