Unverified Commit 05ccaf88 authored by Lukasz Kaiser's avatar Lukasz Kaiser Committed by GitHub
Browse files

Merge pull request #3521 from YknZhu/master

Add deeplab model in tensorflow models
parents 6571d16d 1e9b07d8
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Exports trained model to TensorFlow frozen graph."""
import os
import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from deeplab import common
from deeplab import input_preprocess
from deeplab import model
slim = tf.contrib.slim
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('checkpoint_path', None, 'Checkpoint path')
flags.DEFINE_string('export_path', None,
'Path to output Tensorflow frozen graph.')
flags.DEFINE_integer('num_classes', 21, 'Number of classes.')
flags.DEFINE_multi_integer('crop_size', [513, 513],
'Crop size [height, width].')
# For `xception_65`, use atrous_rates = [12, 24, 36] if output_stride = 8, or
# rates = [6, 12, 18] if output_stride = 16. For `mobilenet_v2`, use None. Note
# one could use different atrous_rates/output_stride during training/evaluation.
flags.DEFINE_multi_integer('atrous_rates', None,
'Atrous rates for atrous spatial pyramid pooling.')
flags.DEFINE_integer('output_stride', 8,
'The ratio of input to output spatial resolution.')
# Change to [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] for multi-scale inference.
flags.DEFINE_multi_float('inference_scales', [1.0],
'The scales to resize images for inference.')
flags.DEFINE_bool('add_flipped_images', False,
'Add flipped images during inference or not.')
# Input name of the exported model.
_INPUT_NAME = 'ImageTensor'
# Output name of the exported model.
_OUTPUT_NAME = 'SemanticPredictions'
def _create_input_tensors():
"""Creates and prepares input tensors for DeepLab model.
This method creates a 4-D uint8 image tensor 'ImageTensor' with shape
[1, None, None, 3]. The actual input tensor name to use during inference is
'ImageTensor:0'.
Returns:
image: Preprocessed 4-D float32 tensor with shape [1, crop_height,
crop_width, 3].
original_image_size: Original image shape tensor [height, width].
resized_image_size: Resized image shape tensor [height, width].
"""
# input_preprocess takes 4-D image tensor as input.
input_image = tf.placeholder(tf.uint8, [1, None, None, 3], name=_INPUT_NAME)
original_image_size = tf.shape(input_image)[1:3]
# Squeeze the dimension in axis=0 since `preprocess_image_and_label` assumes
# image to be 3-D.
image = tf.squeeze(input_image, axis=0)
resized_image, image, _ = input_preprocess.preprocess_image_and_label(
image,
label=None,
crop_height=FLAGS.crop_size[0],
crop_width=FLAGS.crop_size[1],
min_resize_value=FLAGS.min_resize_value,
max_resize_value=FLAGS.max_resize_value,
resize_factor=FLAGS.resize_factor,
is_training=False,
model_variant=FLAGS.model_variant)
resized_image_size = tf.shape(resized_image)[:2]
# Expand the dimension in axis=0, since the following operations assume the
# image to be 4-D.
image = tf.expand_dims(image, 0)
return image, original_image_size, resized_image_size
def main(unused_argv):
tf.logging.set_verbosity(tf.logging.INFO)
tf.logging.info('Prepare to export model to: %s', FLAGS.export_path)
with tf.Graph().as_default():
image, image_size, resized_image_size = _create_input_tensors()
model_options = common.ModelOptions(
outputs_to_num_classes={common.OUTPUT_TYPE: FLAGS.num_classes},
crop_size=FLAGS.crop_size,
atrous_rates=FLAGS.atrous_rates,
output_stride=FLAGS.output_stride)
if tuple(FLAGS.inference_scales) == (1.0,):
tf.logging.info('Exported model performs single-scale inference.')
predictions = model.predict_labels(
image,
model_options=model_options,
image_pyramid=FLAGS.image_pyramid)
else:
tf.logging.info('Exported model performs multi-scale inference.')
predictions = model.predict_labels_multi_scale(
image,
model_options=model_options,
eval_scales=FLAGS.inference_scales,
add_flipped_images=FLAGS.add_flipped_images)
# Crop the valid regions from the predictions.
semantic_predictions = tf.slice(
predictions[common.OUTPUT_TYPE],
[0, 0, 0],
[1, resized_image_size[0], resized_image_size[1]])
# Resize back the prediction to the original image size.
def _resize_label(label, label_size):
# Expand dimension of label to [1, height, width, 1] for resize operation.
label = tf.expand_dims(label, 3)
resized_label = tf.image.resize_images(
label,
label_size,
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=True)
return tf.squeeze(resized_label, 3)
semantic_predictions = _resize_label(semantic_predictions, image_size)
semantic_predictions = tf.identity(semantic_predictions, name=_OUTPUT_NAME)
saver = tf.train.Saver(tf.model_variables())
tf.gfile.MakeDirs(os.path.dirname(FLAGS.export_path))
freeze_graph.freeze_graph_with_def_protos(
tf.get_default_graph().as_graph_def(add_shapes=True),
saver.as_saver_def(),
FLAGS.checkpoint_path,
_OUTPUT_NAME,
restore_op_name=None,
filename_tensor_name=None,
output_graph=FLAGS.export_path,
clear_devices=True,
initializer_nodes=None)
if __name__ == '__main__':
flags.mark_flag_as_required('checkpoint_path')
flags.mark_flag_as_required('export_path')
tf.app.run()
# Running DeepLab on Cityscapes Semantic Segmentation Dataset
This page walks through the steps required to run DeepLab on Cityscapes on a
local machine.
## Download dataset and convert to TFRecord
We have prepared the script (under the folder `datasets`) to convert Cityscapes
dataset to TFRecord. The users are required to download the dataset beforehand
by registering the [website](https://www.cityscapes-dataset.com/).
```bash
# From the tensorflow/models/research/deeplab/datasets directory.
sh convert_cityscapes.sh
```
The converted dataset will be saved at ./deeplab/datasets/cityscapes/tfrecord.
## Recommended Directory Structure for Training and Evaluation
```
+ datasets
+ cityscapes
+ leftImg8bit
+ gtFine
+ tfrecord
+ exp
+ train_on_train_set
+ train
+ eval
+ vis
```
where the folder `train_on_train_set` stores the train/eval/vis events and
results (when training DeepLab on the Cityscapes train set).
## Running the train/eval/vis jobs
A local training job using `xception_65` can be run with the following command:
```bash
# From tensorflow/models/research/
python deeplab/train.py \
--logtostderr \
--train_split="train" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--train_crop_size=769 \
--train_crop_size=769 \
--train_batch_size=1 \
--tf_initial_checkpoints=${PATH_TO_INITIAL_CHECKPOINT} \
--train_logdir=${PATH_TO_TRAIN_DIR} \
--dataset_dir=${PATH_TO_DATASET}
```
where ${PATH_TO_INITIAL_CHECKPOINT} is the path to the initial checkpoint
(usually an ImageNet pretrained checkpoint), ${PATH_TO_TRAIN_DIR} is the
directory in which training checkpoints and events will be written to, and
${PATH_TO_DATASET} is the directory in which the Cityscapes dataset resides.
Note that for {train,eval,vis}.py:
1. We use small batch size during training. The users could change it based on
the available GPU memory and also set `fine_tune_batch_norm` to be False or
True depending on the use case.
2. The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
setting output_stride=8.
3. The users could skip the flag, `decoder_output_stride`, if you do not want
to use the decoder structure.
A local evaluation job using `xception_65` can be run with the following
command:
```bash
# From tensorflow/models/research/
python deeplab/eval.py \
--logtostderr \
--eval_split="val" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--eval_crop_size=1025 \
--eval_crop_size=2049 \
--checkpoint_dir=${PATH_TO_CHECKPOINT} \
--eval_logdir=${PATH_TO_EVAL_DIR} \
--dataset_dir=${PATH_TO_DATASET}
```
where ${PATH_TO_CHECKPOINT} is the path to the trained checkpoint (i.e., the
path to train_logdir), ${PATH_TO_EVAL_DIR} is the directory in which evaluation
events will be written to, and ${PATH_TO_DATASET} is the directory in which the
Cityscapes dataset resides.
A local visualization job using `xception_65` can be run with the following
command:
```bash
# From tensorflow/models/research/
python deeplab/vis.py \
--logtostderr \
--vis_split="val" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--vis_crop_size=1025 \
--vis_crop_size=2049 \
--colormap_type="cityscapes" \
--checkpoint_dir=${PATH_TO_CHECKPOINT} \
--vis_logdir=${PATH_TO_VIS_DIR} \
--dataset_dir=${PATH_TO_DATASET}
```
where ${PATH_TO_CHECKPOINT} is the path to the trained checkpoint (i.e., the
path to train_logdir), ${PATH_TO_VIS_DIR} is the directory in which evaluation
events will be written to, and ${PATH_TO_DATASET} is the directory in which the
Cityscapes dataset resides. Note that if the users would like to save the
segmentation results for evaluation server, set also_save_raw_predictions =
True.
## Running Tensorboard
Progress for training and evaluation jobs can be inspected using Tensorboard. If
using the recommended directory structure, Tensorboard can be run using the
following command:
```bash
tensorboard --logdir=${PATH_TO_LOG_DIRECTORY}
```
where `${PATH_TO_LOG_DIRECTORY}` points to the directory that contains the
train, eval, and vis directories (e.g., the folder `train_on_train_set` in the
above example). Please note it may take Tensorboard a couple minutes to populate
with data.
# Export trained deeplab model to frozen inference graph
After model training finishes, you could export it to a frozen TensorFlow
inference graph proto. Your trained model checkpoint usually includes the
following files:
* model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001,
* model.ckpt-${CHECKPOINT_NUMBER}.index
* model.ckpt-${CHECKPOINT_NUMBER}.meta
After you have identified a candidate checkpoint to export, you can run the
following commandline to export to a frozen graph:
```bash
# From tensorflow/models/research/
# Assume all checkpoint files share the same path prefix `${CHECKPOINT_PATH}`.
python deeplab/export_model.py \
--checkpoint_path=${CHECKPOINT_PATH} \
--export_path=${OUTPUT_DIR}/frozen_inference_graph.pb
```
Please also add other model specific flags as you use for training, such as
`model_variant`, `add_image_level_feature`, etc.
# FAQ
___
Q1: What if I want to use other network backbones, such as ResNet [1], instead of only those provided ones (e.g., Xception)?
A: The users could modify the provided core/feature_extractor.py to support more network backbones.
___
Q2: What if I want to train the model on other datasets?
A: The users could modify the provided dataset/build_{cityscapes,voc2012}_data.py and dataset/segmentation_dataset.py to build their own dataset.
___
Q3: Where can I download the PASCAL VOC augmented training set?
A: The PASCAL VOC augmented training set is provided by Bharath Hariharan et al. [2] Please refer to their [website](http://home.bharathh.info/pubs/codes/SBD/download.html) for details and consider citing their paper if using the dataset.
___
Q4: Why the implementation does not include DenseCRF [3]?
A: We have not tried this. The interested users could take a look at Philipp Krähenbühl's [website](http://graphics.stanford.edu/projects/densecrf/) and [paper](https://arxiv.org/abs/1210.5644) for details.
___
Q5: What if I want to train the model and fine-tune the batch normalization parameters?
A: Fine-tuning batch normalization requires large batch size, and thus in the train.py we suggest setting `num_clones` (number of GPUs on one machine) and `train_batch_size` to be as large as possible.
___
Q6: How can I train the model asynchronously?
A: In the train.py, the users could set `num_replicas` (number of machines for training) and `num_ps_tasks` (we usually set `num_ps_tasks` = `num_replicas` / 2). See slim.deployment.model_deploy for more details.
___
## References
1. **Deep Residual Learning for Image Recognition**<br />
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun<br />
[[link]](https://arxiv.org/abs/1512.03385), In CVPR, 2016.
2. **Semantic Contours from Inverse Detectors**<br />
Bharath Hariharan, Pablo Arbelaez, Lubomir Bourdev, Subhransu Maji, Jitendra Malik<br />
[[link]](http://home.bharathh.info/pubs/codes/SBD/download.html), In ICCV, 2011.
3. **Efficient Inference in Fully Connected CRFs with Gaussian Edge Potentials**<br />
Philipp Krähenbühl, Vladlen Koltun<br />
[[link]](http://graphics.stanford.edu/projects/densecrf/), In NIPS, 2011.
Image provenance:
image1.jpg: Philippe Put,
https://www.flickr.com/photos/34547181@N00/14499172124
image2.jpg: Peretz Partensky
https://www.flickr.com/photos/ifl/3926001309
image3.jpg: Peter Harrison
https://www.flickr.com/photos/devcentre/392585679
vis[1-3].png: Showing original image together with DeepLab segmentation map.
# Installation
## Dependencies
DeepLab depends on the following libraries:
* Numpy
* Pillow 1.0
* tf Slim (which is included in the "tensorflow/models/research/" checkout)
* Jupyter notebook
* Matplotlib
* Tensorflow
For detailed steps to install Tensorflow, follow the [Tensorflow installation
instructions](https://www.tensorflow.org/install/). A typical user can install
Tensorflow using one of the following commands:
```bash
# For CPU
pip install tensorflow
# For GPU
pip install tensorflow-gpu
```
The remaining libraries can be installed on Ubuntu 14.04 using via apt-get:
```bash
sudo apt-get install python-pil python-numpy
sudo pip install jupyter
sudo pip install matplotlib
```
## Add Libraries to PYTHONPATH
When running locally, the tensorflow/models/research/ and slim directories
should be appended to PYTHONPATH. This can be done by running the following from
tensorflow/models/research/:
```bash
# From tensorflow/models/research/
export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
```
Note: This command needs to run from every new terminal you start. If you wish
to avoid running this manually, you can add it as a new line to the end of your
~/.bashrc file.
# Testing the Installation
You can test if you have successfully installed the Tensorflow DeepLab by
running the following commands:
Quick test by running model_test.py:
```bash
# From tensorflow/models/research/
python deeplab/model_test.py
```
Quick running the whole code on the PASCAL VOC 2012 dataset:
```bash
# From tensorflow/models/research/deeplab
sh local_test.sh
```
# TensorFlow DeepLab Model Zoo
We provide deeplab models pretrained on PASCAL VOC 2012 and Cityscapes datasets
for reproducing our results, as well as some checkpoints that are only
pretrained on ImageNet for training your own models.
## DeepLab models trained on PASCAL VOC 2012
Un-tar'ed directory includes:
* a frozen inference graph (`frozen_inference_graph.pb`). All frozen inference
graphs use output stride of 8 and a single eval scale of 1.0. No left-right
flips are used.
* a checkpoint (`model.ckpt.data-00000-of-00001`, `model.ckpt.index`)
### Model details
We provide several checkpoints that have been pretrained on VOC 2012 train_aug
set or train_aug + trainval set. In the former case, one could train their model
with smaller batch size and freeze batch normalization when limited GPU memory
is available, since we have already fine-tuned the batch normalization for you.
In the latter case, one could directly evaluate the checkpoints on VOC 2012 test
set or use this checkpoint for demo.
Checkpoint name | Network backbone | Pretrained dataset | ASPP | Decoder
--------------------------- | :--------------: | :-----------------: | :---: | :-----:
xception_coco_voc_trainaug | Xception_65 | MS-COCO <br> VOC 2012 train_aug set| [6,12,18] for OS=16 <br> [12,24,36] for OS=8 | OS = 4
xception_coco_voc_trainval | Xception_65 | MS-COCO <br> VOC 2012 train_aug + trainval sets | [6,12,18] for OS=16 <br> [12,24,36] for OS=8 | OS = 4
In the table, **OS** denotes output stride.
Checkpoint name | Eval OS | Eval scales | Left-right Flip | Multiply-Adds | Runtime (sec) | PASCAL mIOU | File Size
------------------------------------------------------------------------------------------------------------------------ | :-------: | :------------------------: | :-------------: | :------------------: | :------------: | :----------------------------: | :-------:
[xception_coco_voc_trainaug](http://download.tensorflow.org/models/deeplabv3_pascal_train_aug_2018_01_04.tar.gz) | 16 <br> 8 | [1.0] <br> [0.5:0.25:1.75] | No <br> Yes | 54.17B <br> 3055.35B | 0.7 <br> 223.2 | 82.20% (val) <br> 83.58% (val) | 439MB
[xception_coco_voc_trainval](http://download.tensorflow.org/models/deeplabv3_pascal_trainval_2018_01_04.tar.gz) | 8 | [0.5:0.25:1.75] | Yes | 3055.35B | 223.2 | 87.80% (**test**) | 439MB
In the table, we report both computation complexity (in terms of Multiply-Adds
and CPU Runtime) and segmentation performance (in terms of mIOU) on the PASCAL
VOC val or test set. The reported runtime is calculated by tfprof on a
workstation with CPU E5-1650 v3 @ 3.50GHz and 32GB memory. Note that applying
multi-scale inputs and left-right flips increases the segmentation performance
but also significantly increases the computation and thus may not be suitable
for real-time applications.
## DeepLab models trained on Cityscapes
### Model details
We provide several checkpoints that have been pretrained on Cityscapes
train_fine set.
Checkpoint name | Network backbone | Pretrained dataset | ASPP | Decoder
------------------------------------- | :--------------: | :-------------------------------------: | :----------------------------------------------: | :-----:
xception_cityscapes_trainfine | Xception_65 | ImageNet <br> Cityscapes train_fine set | [6, 12, 18] for OS=16 <br> [12, 24, 36] for OS=8 | OS = 4
In the table, **OS** denotes output stride.
Checkpoint name | Eval OS | Eval scales | Left-right Flip | Multiply-Adds | Runtime (sec) | Cityscapes mIOU | File Size
-------------------------------------------------------------------------------------------------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :------------: | :----------------------------: | :-------:
[xception_cityscapes_trainfine](http://download.tensorflow.org/models/deeplabv3_cityscapes_train_2018_02_06.tar.gz) | 16 <br> 8 | [1.0] <br> [0.75:0.25:1.25] | No <br> Yes | 418.64B <br> 8677.92B | 5.0 <br> 422.8 | 78.79% (val) <br> 80.42% (val) | 439MB
## Checkpoints pretrained on ImageNet
Un-tar'ed directory includes:
* model checkpoint (`model.ckpt.data-00000-of-00001`, `model.ckpt.index`).
### Model details
We also provide some checkpoints that are only pretrained on ImageNet so that
one could use this for training your own models.
* xception: We adapt the original Xception model to the task of semantic
segmentation with the following changes: (1) more layers, (2) all max
pooling operations are replaced by strided (atrous) separable convolutions,
and (3) extra batch-norm and ReLU after each 3x3 depthwise convolution are
added.
Model name | File Size
-------------------------------------------------------------------------------------- | :-------:
[xception](http://download.tensorflow.org/models/deeplabv3_xception_2018_01_04.tar.gz) | 447MB
## References
1. **Mobilenets: Efficient convolutional neural networks for mobile vision applications**<br />
Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam<br />
[[link]](https://arxiv.org/abs/1704.04861). arXiv:1704.04861, 2017.
2. **Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation**<br />
Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen<br />
[[link]](https://arxiv.org/abs/1801.04381). arXiv:1801.04381, 2018.
3. **Xception: Deep Learning with Depthwise Separable Convolutions**<br />
François Chollet<br />
[[link]](https://arxiv.org/abs/1610.02357). In the Proc. of CVPR, 2017.
4. **Deformable Convolutional Networks -- COCO Detection and Segmentation Challenge 2017 Entry**<br />
Haozhi Qi, Zheng Zhang, Bin Xiao, Han Hu, Bowen Cheng, Yichen Wei, Jifeng Dai<br />
[[link]](http://presentations.cocodataset.org/COCO17-Detect-MSRA.pdf). ICCV COCO Challenge
Workshop, 2017.
5. **The Pascal Visual Object Classes Challenge: A Retrospective**<br />
Mark Everingham, S. M. Ali Eslami, Luc Van Gool, Christopher K. I. Williams, John M. Winn, Andrew Zisserman<br />
[[link]](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/). IJCV, 2014.
6. **Semantic Contours from Inverse Detectors**<br />
Bharath Hariharan, Pablo Arbelaez, Lubomir Bourdev, Subhransu Maji, Jitendra Malik<br />
[[link]](http://home.bharathh.info/pubs/codes/SBD/download.html). In the Proc. of ICCV, 2011.
7. **The Cityscapes Dataset for Semantic Urban Scene Understanding**<br />
Cordts, Marius, Mohamed Omran, Sebastian Ramos, Timo Rehfeld, Markus Enzweiler, Rodrigo Benenson, Uwe Franke, Stefan Roth, Bernt Schiele. <br />
[[link]](https://www.cityscapes-dataset.com/). In the Proc. of CVPR, 2016.
8. **Microsoft COCO: Common Objects in Context**<br />
Tsung-Yi Lin, Michael Maire, Serge Belongie, Lubomir Bourdev, Ross Girshick, James Hays, Pietro Perona, Deva Ramanan, C. Lawrence Zitnick, Piotr Dollar<br />
[[link]](http://cocodataset.org/). In the Proc. of ECCV, 2014.
9. **ImageNet Large Scale Visual Recognition Challenge**<br />
Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein, Alexander C. Berg, Li Fei-Fei<br />
[[link]](http://www.image-net.org/). IJCV, 2015.
# Running DeepLab on PASCAL VOC 2012 Semantic Segmentation Dataset
This page walks through the steps required to run DeepLab on PASCAL VOC 2012 on
a local machine.
## Download dataset and convert to TFRecord
We have prepared the script (under the folder `datasets`) to download and
convert PASCAL VOC 2012 semantic segmentation dataset to TFRecord.
```bash
# From the tensorflow/models/research/deeplab/datasets directory.
sh download_and_convert_voc2012.sh
```
The converted dataset will be saved at
./deeplab/datasets/pascal_voc_seg/tfrecord
## Recommended Directory Structure for Training and Evaluation
```
+ datasets
+ pascal_voc_seg
+ VOCdevkit
+ VOC2012
+ JPEGImages
+ SegmentationClass
+ tfrecord
+ exp
+ train_on_train_set
+ train
+ eval
+ vis
```
where the folder `train_on_train_set` stores the train/eval/vis events and
results (when training DeepLab on the PASCAL VOC 2012 train set).
## Running the train/eval/vis jobs
A local training job using `xception_65` can be run with the following command:
```bash
# From tensorflow/models/research/
python deeplab/train.py \
--logtostderr \
--train_split="train" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--train_crop_size=513 \
--train_crop_size=513 \
--train_batch_size=1 \
--tf_initial_checkpoints=${PATH_TO_INITIAL_CHECKPOINT} \
--train_logdir=${PATH_TO_TRAIN_DIR} \
--dataset_dir=${PATH_TO_DATASET}
```
where ${PATH_TO_INITIAL_CHECKPOINT} is the path to the initial checkpoint
(usually an ImageNet pretrained checkpoint), ${PATH_TO_TRAIN_DIR} is the
directory in which training checkpoints and events will be written to, and
${PATH_TO_DATASET} is the directory in which the PASCAL VOC 2012 dataset
resides.
Note that for {train,eval,vis}.py:
1. We use small batch size during training. The users could change it based on
the available GPU memory and also set `fine_tune_batch_norm` to be False or
True depending on the use case.
2. The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
setting output_stride=8.
3. The users could skip the flag, `decoder_output_stride`, if you do not want
to use the decoder structure.
A local evaluation job using `xception_65` can be run with the following
command:
```bash
# From tensorflow/models/research/
python deeplab/eval.py \
--logtostderr \
--eval_split="val" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--eval_crop_size=513 \
--eval_crop_size=513 \
--checkpoint_dir=${PATH_TO_CHECKPOINT} \
--eval_logdir=${PATH_TO_EVAL_DIR} \
--dataset_dir=${PATH_TO_DATASET}
```
where ${PATH_TO_CHECKPOINT} is the path to the trained checkpoint (i.e., the
path to train_logdir), ${PATH_TO_EVAL_DIR} is the directory in which evaluation
events will be written to, and ${PATH_TO_DATASET} is the directory in which the
PASCAL VOC 2012 dataset resides.
A local visualization job using `xception_65` can be run with the following
command:
```bash
# From tensorflow/models/research/
python deeplab/vis.py \
--logtostderr \
--vis_split="val" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--vis_crop_size=513 \
--vis_crop_size=513 \
--checkpoint_dir=${PATH_TO_CHECKPOINT} \
--vis_logdir=${PATH_TO_VIS_DIR} \
--dataset_dir=${PATH_TO_DATASET}
```
where ${PATH_TO_CHECKPOINT} is the path to the trained checkpoint (i.e., the
path to train_logdir), ${PATH_TO_VIS_DIR} is the directory in which evaluation
events will be written to, and ${PATH_TO_DATASET} is the directory in which the
PASCAL VOC 2012 dataset resides. Note that if the users would like to save the
segmentation results for evaluation server, set also_save_raw_predictions =
True.
## Running Tensorboard
Progress for training and evaluation jobs can be inspected using Tensorboard. If
using the recommended directory structure, Tensorboard can be run using the
following command:
```bash
tensorboard --logdir=${PATH_TO_LOG_DIRECTORY}
```
where `${PATH_TO_LOG_DIRECTORY}` points to the directory that contains the
train, eval, and vis directories (e.g., the folder `train_on_train_set` in the
above example). Please note it may take Tensorboard a couple minutes to populate
with data.
## Example
We provide a script to run the {train,eval,vis,export_model}.py on the PASCAL VOC
2012 dataset as an example. See the code in local_test.sh for details.
```bash
# From tensorflow/models/research/deeplab
sh local_test.sh
```
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Prepares the data used for DeepLab training/evaluation."""
import tensorflow as tf
from deeplab.core import feature_extractor
from deeplab.core import preprocess_utils
# The probability of flipping the images and labels
# left-right during training
_PROB_OF_FLIP = 0.5
def preprocess_image_and_label(image,
label,
crop_height,
crop_width,
min_resize_value=None,
max_resize_value=None,
resize_factor=None,
min_scale_factor=1.,
max_scale_factor=1.,
scale_factor_step_size=0,
ignore_label=255,
is_training=True,
model_variant=None):
"""Preprocesses the image and label.
Args:
image: Input image.
label: Ground truth annotation label.
crop_height: The height value used to crop the image and label.
crop_width: The width value used to crop the image and label.
min_resize_value: Desired size of the smaller image side.
max_resize_value: Maximum allowed size of the larger image side.
resize_factor: Resized dimensions are multiple of factor plus one.
min_scale_factor: Minimum scale factor value.
max_scale_factor: Maximum scale factor value.
scale_factor_step_size: The step size from min scale factor to max scale
factor. The input is randomly scaled based on the value of
(min_scale_factor, max_scale_factor, scale_factor_step_size).
ignore_label: The label value which will be ignored for training and
evaluation.
is_training: If the preprocessing is used for training or not.
model_variant: Model variant (string) for choosing how to mean-subtract the
images. See feature_extractor.network_map for supported model variants.
Returns:
original_image: Original image (could be resized).
processed_image: Preprocessed image.
label: Preprocessed ground truth segmentation label.
Raises:
ValueError: Ground truth label not provided during training.
"""
if is_training and label is None:
raise ValueError('During training, label must be provided.')
if model_variant is None:
tf.logging.warning('Default mean-subtraction is performed. Please specify '
'a model_variant. See feature_extractor.network_map for '
'supported model variants.')
# Keep reference to original image.
original_image = image
processed_image = tf.cast(image, tf.float32)
if label is not None:
label = tf.cast(label, tf.int32)
# Resize image and label to the desired range.
if min_resize_value is not None or max_resize_value is not None:
[processed_image, label] = (
preprocess_utils.resize_to_range(
image=processed_image,
label=label,
min_size=min_resize_value,
max_size=max_resize_value,
factor=resize_factor,
align_corners=True))
# The `original_image` becomes the resized image.
original_image = tf.identity(processed_image)
# Data augmentation by randomly scaling the inputs.
scale = preprocess_utils.get_random_scale(
min_scale_factor, max_scale_factor, scale_factor_step_size)
processed_image, label = preprocess_utils.randomly_scale_image_and_label(
processed_image, label, scale)
processed_image.set_shape([None, None, 3])
# Pad image and label to have dimensions >= [crop_height, crop_width]
image_shape = tf.shape(processed_image)
image_height = image_shape[0]
image_width = image_shape[1]
target_height = image_height + tf.maximum(crop_height - image_height, 0)
target_width = image_width + tf.maximum(crop_width - image_width, 0)
# Pad image with mean pixel value.
mean_pixel = tf.reshape(
feature_extractor.mean_pixel(model_variant), [1, 1, 3])
processed_image = preprocess_utils.pad_to_bounding_box(
processed_image, 0, 0, target_height, target_width, mean_pixel)
if label is not None:
label = preprocess_utils.pad_to_bounding_box(
label, 0, 0, target_height, target_width, ignore_label)
# Randomly crop the image and label.
if is_training and label is not None:
processed_image, label = preprocess_utils.random_crop(
[processed_image, label], crop_height, crop_width)
processed_image.set_shape([crop_height, crop_width, 3])
if label is not None:
label.set_shape([crop_height, crop_width, 1])
if is_training:
# Randomly left-right flip the image and label.
processed_image, label, _ = preprocess_utils.flip_dim(
[processed_image, label], _PROB_OF_FLIP, dim=1)
return original_image, processed_image, label
#!/bin/bash
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# This script is used to run local test on PASCAL VOC 2012. Users could also
# modify from this script for their use case.
#
# Usage:
# # From the tensorflow/models/research/deeplab directory.
# sh ./local_test.sh
#
#
# Exit immediately if a command exits with a non-zero status.
set -e
# Move one-level up to tensorflow/models/research directory.
cd ..
# Update PYTHONPATH.
export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
# Set up the working environment.
CURRENT_DIR=$(pwd)
WORK_DIR="${CURRENT_DIR}/deeplab"
# Run model_test first to make sure the PYTHONPATH is correctly set.
python "${WORK_DIR}"/model_test.py -v
# Go to datasets folder and download PASCAL VOC 2012 segmentation dataset.
DATASET_DIR="datasets"
cd "${WORK_DIR}/${DATASET_DIR}"
sh download_and_convert_voc2012.sh
# Go back to original directory.
cd "${CURRENT_DIR}"
# Set up the working directories.
PASCAL_FOLDER="pascal_voc_seg"
EXP_FOLDER="exp/train_on_trainval_set"
INIT_FOLDER="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/init_models"
TRAIN_LOGDIR="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/${EXP_FOLDER}/train"
EVAL_LOGDIR="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/${EXP_FOLDER}/eval"
VIS_LOGDIR="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/${EXP_FOLDER}/vis"
EXPORT_DIR="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/${EXP_FOLDER}/export"
mkdir -p "${INIT_FOLDER}"
mkdir -p "${TRAIN_LOGDIR}"
mkdir -p "${EVAL_LOGDIR}"
mkdir -p "${VIS_LOGDIR}"
mkdir -p "${EXPORT_DIR}"
# Copy locally the trained checkpoint as the initial checkpoint.
TF_INIT_ROOT="http://download.tensorflow.org/models"
TF_INIT_CKPT="deeplabv3_pascal_train_aug_2018_01_04.tar.gz"
cd "${INIT_FOLDER}"
wget -nd -c "${TF_INIT_ROOT}/${TF_INIT_CKPT}"
tar -xf "${TF_INIT_CKPT}"
cd "${CURRENT_DIR}"
PASCAL_DATASET="${WORK_DIR}/${DATASET_DIR}/${PASCAL_FOLDER}/tfrecord"
# Train 10 iterations.
NUM_ITERATIONS=10
python "${WORK_DIR}"/train.py \
--logtostderr \
--train_split="trainval" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--train_crop_size=513 \
--train_crop_size=513 \
--train_batch_size=4 \
--training_number_of_steps="${NUM_ITERATIONS}" \
--fine_tune_batch_norm=true \
--tf_initial_checkpoint="${INIT_FOLDER}/deeplabv3_pascal_train_aug/model.ckpt" \
--train_logdir="${TRAIN_LOGDIR}" \
--dataset_dir="${PASCAL_DATASET}"
# Run evaluation. This performs eval over the full val split (1449 images) and
# will take a while.
# Using the provided checkpoint, one should expect mIOU=82.20%.
python "${WORK_DIR}"/eval.py \
--logtostderr \
--eval_split="val" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--eval_crop_size=513 \
--eval_crop_size=513 \
--checkpoint_dir="${TRAIN_LOGDIR}" \
--eval_logdir="${EVAL_LOGDIR}" \
--dataset_dir="${PASCAL_DATASET}" \
--max_number_of_evaluations=1
# Visualize the results.
python "${WORK_DIR}"/vis.py \
--logtostderr \
--vis_split="val" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--vis_crop_size=513 \
--vis_crop_size=513 \
--checkpoint_dir="${TRAIN_LOGDIR}" \
--vis_logdir="${VIS_LOGDIR}" \
--dataset_dir="${PASCAL_DATASET}" \
--max_number_of_iterations=1
# Export the trained checkpoint.
CKPT_PATH="${TRAIN_LOGDIR}/model.ckpt-${NUM_ITERATIONS}"
EXPORT_PATH="${EXPORT_DIR}/frozen_inference_graph.pb"
python "${WORK_DIR}"/export_model.py \
--logtostderr \
--checkpoint_path="${CKPT_PATH}" \
--export_path="${EXPORT_PATH}" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--num_classes=21 \
--crop_size=513 \
--crop_size=513 \
--inference_scales=1.0
# Run inference with the exported checkpoint.
# Please refer to the provided deeplab_demo.ipynb for an example.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment