Unverified Commit 6741cfce authored by aquariusjay's avatar aquariusjay Committed by GitHub
Browse files

Merge pull request #3853 from walkerlala/add-ade20k

add ADE20K dataset
parents 18e06438 13c9de39
......@@ -90,6 +90,7 @@ Running:
* <a href='g3doc/installation.md'>Installation.</a><br>
* <a href='g3doc/pascal.md'>Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.</a><br>
* <a href='g3doc/cityscapes.md'>Running DeepLab on Cityscapes semantic segmentation dataset.</a><br>
* <a href='g3doc/ade20k.md'>Running DeepLab on ADE20K semantic segmentation dataset.</a><br>
Models:
......
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import math
import os
import random
import string
import sys
import build_data
import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'train_image_folder',
'./ADE20K/ADEChallengeData2016/images/training',
'Folder containing trainng images')
tf.app.flags.DEFINE_string(
'train_image_label_folder',
'./ADE20K/ADEChallengeData2016/annotations/training',
'Folder containing annotations for trainng images')
tf.app.flags.DEFINE_string(
'val_image_folder',
'./ADE20K/ADEChallengeData2016/images/validation',
'Folder containing validation images')
tf.app.flags.DEFINE_string(
'val_image_label_folder',
'./ADE20K/ADEChallengeData2016/annotations/validation',
'Folder containing annotations for validation')
tf.app.flags.DEFINE_string(
'output_dir', './ADE20K/tfrecord',
'Path to save converted SSTable of Tensorflow example')
_NUM_SHARDS = 4
def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
""" Converts the ADE20k dataset into into tfrecord format (SSTable).
Args:
dataset_split: Dataset split (e.g., train, val).
dataset_dir: Dir in which the dataset locates.
dataset_label_dir: Dir in which the annotations locates.
Raises:
RuntimeError: If loaded image and label have different shape.
"""
img_names = tf.gfile.Glob(os.path.join(dataset_dir, '*.jpg'))
random.shuffle(img_names)
seg_names = []
for f in img_names:
# get the filename without the extension
basename = os.path.basename(f).split(".")[0]
# cover its corresponding *_seg.png
seg = os.path.join(dataset_label_dir, basename+'.png')
seg_names.append(seg)
num_images = len(img_names)
num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
image_reader = build_data.ImageReader('jpeg', channels=3)
label_reader = build_data.ImageReader('png', channels=1)
for shard_id in range(_NUM_SHARDS):
output_filename = os.path.join(
FLAGS.output_dir,
'%s-%05d-of-%05d.tfrecord' % (dataset_split, shard_id, _NUM_SHARDS))
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
start_idx = shard_id * num_per_shard
end_idx = min((shard_id + 1) * num_per_shard, num_images)
for i in range(start_idx, end_idx):
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
i + 1, num_images, shard_id))
sys.stdout.flush()
# Read the image.
image_filename = img_names[i]
image_data = tf.gfile.FastGFile(image_filename, 'r').read()
height, width = image_reader.read_image_dims(image_data)
# Read the semantic segmentation annotation.
seg_filename = seg_names[i]
seg_data = tf.gfile.FastGFile(seg_filename, 'r').read()
seg_height, seg_width = label_reader.read_image_dims(seg_data)
if height != seg_height or width != seg_width:
raise RuntimeError('Shape mismatched between image and label.')
# Convert to tf example.
example = build_data.image_seg_to_tfexample(
image_data, img_names[i], height, width, seg_data)
tfrecord_writer.write(example.SerializeToString())
sys.stdout.write('\n')
sys.stdout.flush()
def main(unused_argv):
tf.gfile.MakeDirs(FLAGS.output_dir)
_convert_dataset('train', FLAGS.train_image_folder, FLAGS.train_image_label_folder)
_convert_dataset('val', FLAGS.val_image_folder, FLAGS.val_image_label_folder)
if __name__ == '__main__':
tf.app.run()
......@@ -50,7 +50,6 @@ The Example proto contains the following fields:
image/segmentation/class/encoded: encoded semantic segmentation content.
image/segmentation/class/format: semantic segmentation file format.
"""
import glob
import math
import os.path
import sys
......@@ -133,7 +132,7 @@ def _convert_dataset(dataset_split):
def main(unused_argv):
dataset_splits = glob.glob(os.path.join(FLAGS.list_folder, '*.txt'))
dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt'))
for dataset_split in dataset_splits:
_convert_dataset(dataset_split)
......
#!/bin/bash
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Script to download and preprocess the PASCAL VOC 2012 dataset.
#
# Usage:
# bash ./download_and_convert_ade20k.sh
#
# The folder structure is assumed to be:
# + datasets
# - build_data.py
# - build_ade20k_data.py
# - download_and_convert_ade20k.sh
# + ADE20K
# + tfrecord
# + ADEChallengeData2016
# + annotations
# + training
# + validation
# + images
# + training
# + validation
# Exit immediately if a command exits with a non-zero status.
set -e
CURRENT_DIR=$(pwd)
WORK_DIR="./ADE20K"
mkdir -p "${WORK_DIR}"
cd "${WORK_DIR}"
# Helper function to download and unpack ADE20K dataset.
download_and_uncompress() {
local BASE_URL=${1}
local FILENAME=${2}
if [ ! -f "${FILENAME}" ]; then
echo "Downloading ${FILENAME} to ${WORK_DIR}"
wget -nd -c "${BASE_URL}/${FILENAME}"
fi
echo "Uncompressing ${FILENAME}"
unzip "${FILENAME}"
}
# Download the images.
BASE_URL="http://data.csail.mit.edu/places/ADEchallenge"
FILENAME="ADEChallengeData2016.zip"
download_and_uncompress "${BASE_URL}" "${FILENAME}"
cd "${CURRENT_DIR}"
# Root path for ADE20K dataset.
ADE20K_ROOT="${WORK_DIR}/ADEChallengeData2016"
# Build TFRecords of the dataset.
# First, create output directory for storing TFRecords.
OUTPUT_DIR="${WORK_DIR}/tfrecord"
mkdir -p "${OUTPUT_DIR}"
echo "Converting ADE20K dataset..."
python ./build_ade20k_data.py \
--train_image_folder="${ADE20K_ROOT}/images/training/" \
--train_image_label_folder="${ADE20K_ROOT}/annotations/training/" \
--val_image_folder="${ADE20K_ROOT}/images/validation/" \
--val_image_label_folder="${ADE20K_ROOT}/annotations/validation/" \
--output_dir="${OUTPUT_DIR}"
......@@ -17,13 +17,13 @@
# Script to download and preprocess the PASCAL VOC 2012 dataset.
#
# Usage:
# bash ./download_and_preprocess_voc2012.sh
# bash ./download_and_convert_voc2012.sh
#
# The folder structure is assumed to be:
# + datasets
# - build_data.py
# - build_voc2012_data.py
# - download_and_preprocess_voc2012.sh
# - download_and_convert_voc2012.sh
# - remove_gt_colormap.py
# + pascal_voc_seg
# + VOCdevkit
......@@ -37,27 +37,27 @@ set -e
CURRENT_DIR=$(pwd)
WORK_DIR="./pascal_voc_seg"
mkdir -p ${WORK_DIR}
cd ${WORK_DIR}
mkdir -p "${WORK_DIR}"
cd "${WORK_DIR}"
# Helper function to download and unpack VOC 2012 dataset.
download_and_uncompress() {
local BASE_URL=${1}
local FILENAME=${2}
if [ ! -f ${FILENAME} ]; then
if [ ! -f "${FILENAME}" ]; then
echo "Downloading ${FILENAME} to ${WORK_DIR}"
wget -nd -c "${BASE_URL}/${FILENAME}"
fi
echo "Uncompressing ${FILENAME}"
tar -xf ${FILENAME}
tar -xf "${FILENAME}"
}
# Download the images.
BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
FILENAME="VOCtrainval_11-May-2012.tar"
download_and_uncompress ${BASE_URL} ${FILENAME}
download_and_uncompress "${BASE_URL}" "${FILENAME}"
cd "${CURRENT_DIR}"
......
......@@ -31,6 +31,11 @@ images for the training, validation and test respectively.
The Cityscapes dataset contains 19 semantic labels (such as road, person, car,
and so on) for urban street scenes.
3. ADE20K dataset (http://groups.csail.mit.edu/vision/datasets/ADE20K)
The ADE20K dataset contains 150 semantic labels both urban street scenes and
indoor scenes.
References:
M. Everingham, S. M. A. Eslami, L. V. Gool, C. K. I. Williams, J. Winn,
and A. Zisserman, The pascal visual object classes challenge a retrospective.
......@@ -39,6 +44,9 @@ References:
M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson,
U. Franke, S. Roth, and B. Schiele, "The cityscapes dataset for semantic urban
scene understanding," In Proc. of CVPR, 2016.
B. Zhou, H. Zhao, X. Puig, S. Fidler, A. Barriuso, A. Torralba, "Scene Parsing
through ADE20K dataset", In Proc. of CVPR, 2017.
"""
import collections
import os.path
......@@ -85,10 +93,22 @@ _PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor(
ignore_label=255,
)
# These number (i.e., 'train'/'test') seems to have to be hard coded
# You are required to figure it out for your training/testing example.
_ADE20K_INFORMATION = DatasetDescriptor(
splits_to_sizes = {
'train': 20210, # num of samples in images/training
'val': 2000, # num of samples in images/validation
},
num_classes=150,
ignore_label=255,
)
_DATASETS_INFORMATION = {
'cityscapes': _CITYSCAPES_INFORMATION,
'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION,
'ade20k': _ADE20K_INFORMATION,
}
# Default file pattern of TFRecord of TensorFlow Example.
......
# Running DeepLab on ADE20K Semantic Segmentation Dataset
This page walks through the steps required to run DeepLab on ADE20K dataset on a
local machine.
## Download dataset and convert to TFRecord
We have prepared the script (under the folder `datasets`) to download and
convert ADE20K semantic segmentation dataset to TFRecord.
```bash
# From the tensorflow/models/research/deeplab/datasets directory.
bash download_and_convert_ade20k.sh
```
The converted dataset will be saved at
./deeplab/datasets/ADE20K/tfrecord
## Recommended Directory Structure for Training and Evaluation
```
+ datasets
- build_data.py
- build_ade20k_data.py
- download_and_convert_ade20k.sh
+ ADE20K
+ tfrecord
+ exp
+ train_on_train_set
+ train
+ eval
+ vis
+ ADEChallengeData2016
+ annotations
+ training
+ validation
+ images
+ training
+ validation
```
where the folder `train_on_train_set` stores the train/eval/vis events and
results (when training DeepLab on the ADE20K train set).
## Running the train/eval/vis jobs
A local training job using `xception_65` can be run with the following command:
```bash
# From tensorflow/models/research/
python deeplab/train.py \
--logtostderr \
--training_number_of_steps=50000 \
--train_split="train" \
--model_variant="xception_65" \
--astrous_rates=6 \
--astrous_rates=12 \
--astrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--train_crop_size=513 \
--train_crop_size=513 \
--train_batch_size=4 \
--min_resize_value=350 \
--max_resize_value=500 \
--resize_factor=16 \
--fine_tune_batch_norm=False \
--dataset="ade20k" \
--initialize_last_layer=False \
--last_layers_contain_logits_only=True \
--tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
--train_logdir=${PATH_TO_TRAIN_DIR}\
--dataset_dir=${PATH_TO_DATASET}
```
where ${PATH\_TO\_INITIAL\_CHECKPOINT} is the path to the initial checkpoint.
For example, if you are using the deeplabv3\_pascal\_train\_aug checkppoint, you
will set this to `/path/to/deeplabv3\_pascal\_train\_aug/model.ckpt`.
${PATH\_TO\_TRAIN\_DIR} is the directory in which training checkpoints and
events will be written to (it is recommended to set it to the
`train_on_train_set/train` above), and ${PATH\_TO\_DATASET} is the directory in
which the ADE20K dataset resides (the `tfrecord` above)
**Note that for train.py:**
1. In order to fine tune the BN layers, one needs to use large batch size (> 12),
and set fine_tune_batch_norm = True. Here, we simply use small batch size
during training for the purpose of demonstration. If the users have limited
GPU memory at hand, please fine-tune from our provided checkpoints whose
batch norm parameters have been trained, and use smaller learning rate with
fine_tune_batch_norm = False.
2. User should fine tune the `min_resize_value` and `max_resize_value` to get
better result. Note that `resize_factor` has to be equal to `output_stride`.
2. The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
setting output_stride=8.
3. The users could skip the flag, `decoder_output_stride`, if you do not want
to use the decoder structure.
Currently there are no fine-tuned checkpoint for the ADE20K dataset.
## Running Tensorboard
Progress for training and evaluation jobs can be inspected using Tensorboard. If
using the recommended directory structure, Tensorboard can be run using the
following command:
```bash
tensorboard --logdir=${PATH_TO_LOG_DIRECTORY}
```
where `${PATH_TO_LOG_DIRECTORY}` points to the directory that contains the train
directorie (e.g., the folder `train_on_train_set` in the above example). Please
note it may take Tensorboard a couple minutes to populate with data.
......@@ -64,19 +64,26 @@ _CONCAT_PROJECTION_SCOPE = 'concat_projection'
_DECODER_SCOPE = 'decoder'
def get_extra_layer_scopes():
def get_extra_layer_scopes(last_layers_contain_logits_only=False):
"""Gets the scopes for extra layers.
Args:
last_layers_contain_logits_only: Boolean, True if only consider logits as
the last layer (i.e., exclude ASPP module, decoder module and so on)
Returns:
A list of scopes for extra layers.
"""
return [
_LOGITS_SCOPE_NAME,
_IMAGE_POOLING_SCOPE,
_ASPP_SCOPE,
_CONCAT_PROJECTION_SCOPE,
_DECODER_SCOPE,
]
if last_layers_contain_logits_only:
return [_LOGITS_SCOPE_NAME]
else:
return [
_LOGITS_SCOPE_NAME,
_IMAGE_POOLING_SCOPE,
_ASPP_SCOPE,
_CONCAT_PROJECTION_SCOPE,
_DECODER_SCOPE,
]
def predict_labels_multi_scale(images,
......
......@@ -122,6 +122,9 @@ flags.DEFINE_string('tf_initial_checkpoint', None,
flags.DEFINE_boolean('initialize_last_layer', True,
'Initialize the last layer.')
flags.DEFINE_boolean('last_layers_contain_logits_only', False,
'Only consider logits as last layers or not.')
flags.DEFINE_integer('slow_start_step', 0,
'Training model with small learning rate for few steps.')
......@@ -322,7 +325,7 @@ def main(unused_argv):
summaries.add(tf.summary.scalar('total_loss', total_loss))
# Modify the gradients for biases and last layer variables.
last_layers = model.get_extra_layer_scopes()
last_layers = model.get_extra_layer_scopes(FLAGS.last_layers_contain_logits_only)
grad_mult = train_utils.get_model_gradient_multipliers(
last_layers, FLAGS.last_layer_gradient_multiplier)
if grad_mult:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment