Commit 17ba1ca4 authored by Yubin Ruan's avatar Yubin Ruan
Browse files

add ADE20K dataset

parent 5281c9a0
...@@ -90,6 +90,7 @@ Running: ...@@ -90,6 +90,7 @@ Running:
* <a href='g3doc/installation.md'>Installation.</a><br> * <a href='g3doc/installation.md'>Installation.</a><br>
* <a href='g3doc/pascal.md'>Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.</a><br> * <a href='g3doc/pascal.md'>Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.</a><br>
* <a href='g3doc/cityscapes.md'>Running DeepLab on Cityscapes semantic segmentation dataset.</a><br> * <a href='g3doc/cityscapes.md'>Running DeepLab on Cityscapes semantic segmentation dataset.</a><br>
* <a href='g3doc/ade20k.md'>Running DeepLab on ADE20K semantic segmentation dataset.</a><br>
Models: Models:
......
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import glob
import math
import os
import random
import string
import sys
from PIL import Image
import build_data
import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
flags = tf.app.flags
tf.app.flags.DEFINE_string(
'train_image_folder',
'./ADE20K/ADEChallengeData2016/images/training',
'Folder containing trainng images')
tf.app.flags.DEFINE_string(
'train_image_label_folder',
'./ADE20K/ADEChallengeData2016/annotations/training',
'Folder containing annotations for trainng images')
tf.app.flags.DEFINE_string(
'val_image_folder',
'./ADE20K/ADEChallengeData2016/images/validation',
'Folder containing validation images')
tf.app.flags.DEFINE_string(
'val_image_label_folder',
'./ADE20K/ADEChallengeData2016/annotations/validation',
'Folder containing annotations for validation')
tf.app.flags.DEFINE_string(
'output_dir', './ADE20K/tfrecord',
'Path to save converted SSTable of Tensorflow example')
_NUM_SHARDS = 4
def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
""" Convert the ADE20k dataset into into tfrecord format (SSTable).
Args:
dataset_split: dataset split (e.g., train, val)
dataset_dir: dir in which the dataset locates
dataset_label_dir: dir in which the annotations locates
Raises:
RuntimeError: If loaded image and label have different shape.
"""
img_names = glob.glob(os.path.join(dataset_dir, '*.jpg'))
random.shuffle(img_names)
seg_names = []
for f in img_names:
# get the filename without the extension
basename = os.path.basename(f).split(".")[0]
# cover its corresponding *_seg.png
seg = os.path.join(dataset_label_dir, basename+'.png')
seg_names.append(seg)
num_images = len(img_names)
num_per_shard = int(math.ceil(num_images) / float(_NUM_SHARDS))
image_reader = build_data.ImageReader('jpeg', channels=3)
label_reader = build_data.ImageReader('png', channels=1)
for shard_id in range(_NUM_SHARDS):
output_filename = os.path.join(
FLAGS.output_dir,
'%s-%05d-of-%05d.tfrecord' % (dataset_split, shard_id, _NUM_SHARDS))
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
start_idx = shard_id * num_per_shard
end_idx = min((shard_id + 1) * num_per_shard, num_images)
for i in range(start_idx, end_idx):
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
i + 1, num_images, shard_id))
sys.stdout.flush()
# Read the image.
image_filename = img_names[i]
image_data = tf.gfile.FastGFile(image_filename, 'r').read()
height, width = image_reader.read_image_dims(image_data)
# Read the semantic segmentation annotation.
seg_filename = seg_names[i]
seg_data = tf.gfile.FastGFile(seg_filename, 'r').read()
seg_height, seg_width = label_reader.read_image_dims(seg_data)
if height != seg_height or width != seg_width:
raise RuntimeError('Shape mismatched between image and label.')
# Convert to tf example.
example = build_data.image_seg_to_tfexample(
image_data, img_names[i], height, width, seg_data)
tfrecord_writer.write(example.SerializeToString())
sys.stdout.write('\n')
sys.stdout.flush()
def main(unused_argv):
tf.gfile.MakeDirs(FLAGS.output_dir)
_convert_dataset('train', FLAGS.train_image_folder, FLAGS.train_image_label_folder)
_convert_dataset('val', FLAGS.val_image_folder, FLAGS.val_image_label_folder)
if __name__ == '__main__':
tf.app.run()
#!/bin/bash
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Script to download and preprocess the PASCAL VOC 2012 dataset.
#
# Usage:
# bash ./download_and_convert_ade20k.sh
#
# The folder structure is assumed to be:
# + datasets
# - build_data.py
# - build_ade20k_data.py
# - download_and_convert_ade20k.sh
# + ADE20K
# + tfrecord
# + ADEChallengeData2016
# + annotations
# + training
# + validation
# + images
# + training
# + validation
# Exit immediately if a command exits with a non-zero status.
set -e
CURRENT_DIR=$(pwd)
WORK_DIR="./ADE20K"
mkdir -p ${WORK_DIR}
cd ${WORK_DIR}
# Helper function to download and unpack ADE20K dataset.
download_and_uncompress() {
local BASE_URL=${1}
local FILENAME=${2}
if [ ! -f ${FILENAME} ]; then
echo "Downloading ${FILENAME} to ${WORK_DIR}"
wget -nd -c "${BASE_URL}/${FILENAME}"
fi
echo "Uncompressing ${FILENAME}"
unzip ${FILENAME}
}
# Download the images.
BASE_URL="http://data.csail.mit.edu/places/ADEchallenge"
FILENAME="ADEChallengeData2016.zip"
download_and_uncompress ${BASE_URL} ${FILENAME}
cd "${CURRENT_DIR}"
# Root path for ADE20K dataset.
ADE20K_ROOT="${WORK_DIR}/ADEChallengeData2016"
# Build TFRecords of the dataset.
# First, create output directory for storing TFRecords.
OUTPUT_DIR="${WORK_DIR}/tfrecord"
mkdir -p "${OUTPUT_DIR}"
echo "Converting ADE20K dataset..."
python ./build_ade20k_data.py \
--train_image_folder="${ADE20K_ROOT}/images/training/" \
--train_image_label_folder="${ADE20K_ROOT}/annotations/training/" \
--val_image_folder="${ADE20K_ROOT}/images/validation/" \
--val_image_label_folder="${ADE20K_ROOT}/annotations/validation/" \
--output_dir="${OUTPUT_DIR}"
...@@ -17,13 +17,13 @@ ...@@ -17,13 +17,13 @@
# Script to download and preprocess the PASCAL VOC 2012 dataset. # Script to download and preprocess the PASCAL VOC 2012 dataset.
# #
# Usage: # Usage:
# bash ./download_and_preprocess_voc2012.sh # bash ./download_and_convert_voc2012.sh
# #
# The folder structure is assumed to be: # The folder structure is assumed to be:
# + datasets # + datasets
# - build_data.py # - build_data.py
# - build_voc2012_data.py # - build_voc2012_data.py
# - download_and_preprocess_voc2012.sh # - download_and_convert_voc2012.sh
# - remove_gt_colormap.py # - remove_gt_colormap.py
# + pascal_voc_seg # + pascal_voc_seg
# + VOCdevkit # + VOCdevkit
......
...@@ -85,10 +85,24 @@ _PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor( ...@@ -85,10 +85,24 @@ _PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor(
ignore_label=255, ignore_label=255,
) )
# These number (i.e., 'train'/'test') seems to have to be hard coded
# You are required to figure it out for your training/testing example.
# Is there a way to automatically figure it out ?
_ADE20K_INFORMATION = DatasetDescriptor(
splits_to_sizes = {
'train': 20210, # num of samples in images/training
'val': 2000, # num of samples in images/validation
'eval': 2,
},
num_classes=150,
ignore_label=255,
)
_DATASETS_INFORMATION = { _DATASETS_INFORMATION = {
'cityscapes': _CITYSCAPES_INFORMATION, 'cityscapes': _CITYSCAPES_INFORMATION,
'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION, 'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION,
'ade20k': _ADE20K_INFORMATION,
} }
# Default file pattern of TFRecord of TensorFlow Example. # Default file pattern of TFRecord of TensorFlow Example.
......
# Running DeepLab on ADE20K Semantic Segmentation Dataset
This page walks through the steps required to run DeepLab on ADE20K dataset on a
local machine.
## Download dataset and convert to TFRecord
We have prepared the script (under the folder `datasets`) to download and
convert ADE20K semantic segmentation dataset to TFRecord.
```bash
# From the tensorflow/models/research/deeplab/datasets directory.
bash download_and_convert_ade20k.sh
```
The converted dataset will be saved at
./deeplab/datasets/ADE20K/tfrecord
## Recommended Directory Structure for Training and Evaluation
```
+ datasets
- build_data.py
- build_ade20k_data.py
- download_and_convert_ade20k.sh
+ ADE20K
+ tfrecord
+ exp
+ train_on_train_set
+ train
+ eval
+ vis
+ ADEChallengeData2016
+ annotations
+ training
+ validation
+ images
+ training
+ validation
```
where the folder `train_on_train_set` stores the train/eval/vis events and
results (when training DeepLab on the ADE20K train set).
## Running the train/eval/vis jobs
A local training job using `xception_65` can be run with the following command:
```bash
# From tensorflow/models/research/
python deeplab/train.py \
--logtostderr \
--training_number_of_steps=50000 \
--train_split="train" \
--model_variant="xception_65" \
--astrous_rates=6 \
--astrous_rates=12 \
--astrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--train_crop_size=513 \
--train_crop_size=513 \
--train_batch_size=4 \
--min_resize_value=350 \
--max_resize_value=500 \
--resize_factor=16 \
--fine_tune_batch_norm=False \
--dataset="ade20k" \
--initialize_last_layer=False \
--tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
--train_logdir=${PATH_TO_TRAIN_DIR}\
--dataset_dir=${PATH_TO_DATASET}
```
where ${PATH\_TO\_INITIAL\_CHECKPOINT} is the path to the initial checkpoint.
For example, if you are using the deeplabv3\_pascal\_train\_aug checkppoint, you
will set this to `/path/to/deeplabv3\_pascal\_train\_aug/model.ckpt`.
${PATH\_TO\_TRAIN\_DIR} is the directory in which training checkpoints and
events will be written to (it is recommended to set it to the
`train_on_train_set/train` above), and ${PATH\_TO\_DATASET} is the directory in
which the ADE20K dataset resides (the `tfrecord` above)
**Note that for train.py:**
1. In order to fine tune the BN layers, one needs to use large batch size (> 12),
and set fine_tune_batch_norm = True. Here, we simply use small batch size
during training for the purpose of demonstration. If the users have limited
GPU memory at hand, please fine-tune from our provided checkpoints whose
batch norm parameters have been trained, and use smaller learning rate with
fine_tune_batch_norm = False.
2. User should fine tune the `min_resize_value` and `max_resize_value` to get
better result. Note that `resize_factor` has to equals to `output_stride`.
2. The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
setting output_stride=8.
3. The users could skip the flag, `decoder_output_stride`, if you do not want
to use the decoder structure.
Currently there are no fine-tuned checkpoint for the ADE20K dataset.
## Running Tensorboard
Progress for training and evaluation jobs can be inspected using Tensorboard. If
using the recommended directory structure, Tensorboard can be run using the
following command:
```bash
tensorboard --logdir=${PATH_TO_LOG_DIRECTORY}
```
where `${PATH_TO_LOG_DIRECTORY}` points to the directory that contains the train
directorie (e.g., the folder `train_on_train_set` in the above example). Please
note it may take Tensorboard a couple minutes to populate with data.
...@@ -99,7 +99,7 @@ def get_model_init_fn(train_logdir, ...@@ -99,7 +99,7 @@ def get_model_init_fn(train_logdir,
tf.logging.info('Initializing model from path: %s', tf_initial_checkpoint) tf.logging.info('Initializing model from path: %s', tf_initial_checkpoint)
# Variables that will not be restored. # Variables that will not be restored.
exclude_list = ['global_step'] exclude_list = ['global_step', 'logits']
if not initialize_last_layer: if not initialize_last_layer:
exclude_list.extend(last_layers) exclude_list.extend(last_layers)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment