add ADE20K dataset

17ba1ca4 · Yubin Ruan · 5281c9a0 · 17ba1ca4 · 17ba1ca4 · 17ba1ca4
Commit 17ba1ca4 authored Apr 03, 2018 by Yubin Ruan
7 changed files
--- a/research/deeplab/README.md
+++ b/research/deeplab/README.md
@@ -90,6 +90,7 @@ Running:
 *   <a href='g3doc/installation.md'>Installation.</a><br>
 *   <a href='g3doc/pascal.md'>Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.</a><br>
 *   <a href='g3doc/cityscapes.md'>Running DeepLab on Cityscapes semantic segmentation dataset.</a><br>
+*   <a href='g3doc/ade20k.md'>Running DeepLab on ADE20K semantic segmentation dataset.</a><br>
 Models:

--- a/research/deeplab/datasets/build_ade20k_data.py
+++ b/research/deeplab/datasets/build_ade20k_data.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import glob
+import math
+import os
+import random
+import string
+import sys
+from PIL import Image
+import build_data
+import tensorflow as tf
+FLAGS = tf.app.flags.FLAGS
+flags = tf.app.flags
+tf.app.flags.DEFINE_string(
+    'train_image_folder',
+    './ADE20K/ADEChallengeData2016/images/training',
+    'Folder containing trainng images')
+tf.app.flags.DEFINE_string(
+    'train_image_label_folder',
+    './ADE20K/ADEChallengeData2016/annotations/training',
+    'Folder containing annotations for trainng images')
+tf.app.flags.DEFINE_string(
+    'val_image_folder',
+    './ADE20K/ADEChallengeData2016/images/validation',
+    'Folder containing validation images')
+tf.app.flags.DEFINE_string(
+    'val_image_label_folder',
+    './ADE20K/ADEChallengeData2016/annotations/validation',
+    'Folder containing annotations for validation')
+tf.app.flags.DEFINE_string(
+    'output_dir', './ADE20K/tfrecord',
+    'Path to save converted SSTable of Tensorflow example')
+_NUM_SHARDS = 4
+def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
+  """ Convert the ADE20k dataset into into tfrecord format (SSTable).
+  Args:
+    dataset_split: dataset split (e.g., train, val)
+    dataset_dir: dir in which the dataset locates
+    dataset_label_dir: dir in which the annotations locates
+  Raises:
+    RuntimeError: If loaded image and label have different shape.
+  """
+  img_names = glob.glob(os.path.join(dataset_dir, '*.jpg'))
+  random.shuffle(img_names)
+  seg_names = []
+  for f in img_names:
+    # get the filename without the extension
+    basename = os.path.basename(f).split(".")[0]
+    # cover its corresponding *_seg.png
+    seg = os.path.join(dataset_label_dir, basename+'.png')
+    seg_names.append(seg)
+  num_images = len(img_names)
+  num_per_shard = int(math.ceil(num_images) / float(_NUM_SHARDS))
+  image_reader = build_data.ImageReader('jpeg', channels=3)
+  label_reader = build_data.ImageReader('png', channels=1)
+  for shard_id in range(_NUM_SHARDS):
+    output_filename = os.path.join(
+        FLAGS.output_dir,
+        '%s-%05d-of-%05d.tfrecord' % (dataset_split, shard_id, _NUM_SHARDS))
+    with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
+      start_idx = shard_id * num_per_shard
+      end_idx = min((shard_id + 1) * num_per_shard, num_images)
+      for i in range(start_idx, end_idx):
+        sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
+            i + 1, num_images, shard_id))
+        sys.stdout.flush()
+        # Read the image.
+        image_filename = img_names[i]
+        image_data = tf.gfile.FastGFile(image_filename, 'r').read()
+        height, width = image_reader.read_image_dims(image_data)
+        # Read the semantic segmentation annotation.
+        seg_filename = seg_names[i]
+        seg_data = tf.gfile.FastGFile(seg_filename, 'r').read()
+        seg_height, seg_width = label_reader.read_image_dims(seg_data)
+        if height != seg_height or width != seg_width:
+          raise RuntimeError('Shape mismatched between image and label.')
+        # Convert to tf example.
+        example = build_data.image_seg_to_tfexample(
+            image_data, img_names[i], height, width, seg_data)
+        tfrecord_writer.write(example.SerializeToString())
+    sys.stdout.write('\n')
+    sys.stdout.flush()
+def main(unused_argv):
+  tf.gfile.MakeDirs(FLAGS.output_dir)
+  _convert_dataset('train', FLAGS.train_image_folder, FLAGS.train_image_label_folder)
+  _convert_dataset('val', FLAGS.val_image_folder, FLAGS.val_image_label_folder)
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/deeplab/datasets/download_and_convert_ade20k.sh
+++ b/research/deeplab/datasets/download_and_convert_ade20k.sh
+#!/bin/bash
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to download and preprocess the PASCAL VOC 2012 dataset.
+#
+# Usage:
+#   bash ./download_and_convert_ade20k.sh
+#
+# The folder structure is assumed to be:
+#  + datasets
+#     - build_data.py
+#     - build_ade20k_data.py
+#     - download_and_convert_ade20k.sh
+#     + ADE20K 
+#       + tfrecord
+#       + ADEChallengeData2016
+#         + annotations
+#           + training
+#           + validation
+#         + images
+#           + training
+#           + validation
+# Exit immediately if a command exits with a non-zero status.
+set -e
+CURRENT_DIR=$(pwd)
+WORK_DIR="./ADE20K"
+mkdir -p ${WORK_DIR}
+cd ${WORK_DIR}
+# Helper function to download and unpack ADE20K dataset.
+download_and_uncompress() {
+  local BASE_URL=${1}
+  local FILENAME=${2}
+  if [ ! -f ${FILENAME} ]; then
+    echo "Downloading ${FILENAME} to ${WORK_DIR}"
+    wget -nd -c "${BASE_URL}/${FILENAME}"
+  fi
+  echo "Uncompressing ${FILENAME}"
+  unzip ${FILENAME}
+}
+# Download the images.
+BASE_URL="http://data.csail.mit.edu/places/ADEchallenge"
+FILENAME="ADEChallengeData2016.zip"
+download_and_uncompress ${BASE_URL} ${FILENAME}
+cd "${CURRENT_DIR}"
+# Root path for ADE20K dataset.
+ADE20K_ROOT="${WORK_DIR}/ADEChallengeData2016"
+# Build TFRecords of the dataset.
+# First, create output directory for storing TFRecords.
+OUTPUT_DIR="${WORK_DIR}/tfrecord"
+mkdir -p "${OUTPUT_DIR}"
+echo "Converting ADE20K dataset..."
+python ./build_ade20k_data.py  \
+  --train_image_folder="${ADE20K_ROOT}/images/training/" \
+  --train_image_label_folder="${ADE20K_ROOT}/annotations/training/" \
+  --val_image_folder="${ADE20K_ROOT}/images/validation/" \
+  --val_image_label_folder="${ADE20K_ROOT}/annotations/validation/" \
+  --output_dir="${OUTPUT_DIR}"
--- a/research/deeplab/datasets/download_and_convert_voc2012.sh
+++ b/research/deeplab/datasets/download_and_convert_voc2012.sh
@@ -17,13 +17,13 @@
 # Script to download and preprocess the PASCAL VOC 2012 dataset.
 #
 # Usage:
-#   bash ./download_and_preprocess_voc2012.sh
+#   bash ./download_and_convert_voc2012.sh
 #
 # The folder structure is assumed to be:
 #  + datasets
 #     - build_data.py
 #     - build_voc2012_data.py
-#     - download_and_preprocess_voc2012.sh
+#     - download_and_convert_voc2012.sh
 #     - remove_gt_colormap.py
 #     + pascal_voc_seg
 #       + VOCdevkit

--- a/research/deeplab/datasets/segmentation_dataset.py
+++ b/research/deeplab/datasets/segmentation_dataset.py
@@ -85,10 +85,24 @@ _PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor(
    ignore_label=255,
 )
+# These number (i.e., 'train'/'test') seems to have to be hard coded
+# You are required to figure it out for your training/testing example.
+# Is there a way to automatically figure it out ?
+_ADE20K_INFORMATION = DatasetDescriptor(
+    splits_to_sizes = {
+        'train': 20210, # num of samples in images/training
+        'val': 2000, # num of samples in images/validation
+        'eval': 2,
+    },
+    num_classes=150,
+    ignore_label=255,
+)
 _DATASETS_INFORMATION = {
    'cityscapes': _CITYSCAPES_INFORMATION,
    'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION,
+    'ade20k': _ADE20K_INFORMATION,
 }
 # Default file pattern of TFRecord of TensorFlow Example.

--- a/research/deeplab/g3doc/ade20k.md
+++ b/research/deeplab/g3doc/ade20k.md
+# Running DeepLab on ADE20K Semantic Segmentation Dataset
+This page walks through the steps required to run DeepLab on ADE20K dataset on a
+local machine.
+## Download dataset and convert to TFRecord
+We have prepared the script (under the folder `datasets`) to download and
+convert ADE20K semantic segmentation dataset to TFRecord.
+```bash
+# From the tensorflow/models/research/deeplab/datasets directory.
+bash download_and_convert_ade20k.sh
+```
+The converted dataset will be saved at
+./deeplab/datasets/ADE20K/tfrecord
+## Recommended Directory Structure for Training and Evaluation
+```
+ datasets
+   - build_data.py
+   - build_ade20k_data.py
+   - download_and_convert_ade20k.sh
+   + ADE20K 
+     + tfrecord
+    + exp
+      + train_on_train_set
+        + train
+        + eval
+        + vis
+     + ADEChallengeData2016
+       + annotations
+         + training
+         + validation
+       + images
+         + training
+         + validation
+```
+where the folder `train_on_train_set` stores the train/eval/vis events and
+results (when training DeepLab on the ADE20K train set).
+## Running the train/eval/vis jobs
+A local training job using `xception_65` can be run with the following command:
+```bash
+# From tensorflow/models/research/
+python deeplab/train.py \
+    --logtostderr \
+    --training_number_of_steps=50000 \
+    --train_split="train" \
+    --model_variant="xception_65" \
+    --astrous_rates=6 \
+    --astrous_rates=12 \
+    --astrous_rates=18 \
+    --output_stride=16 \
+    --decoder_output_stride=4 \
+    --train_crop_size=513 \
+    --train_crop_size=513 \
+    --train_batch_size=4 \
+    --min_resize_value=350 \
+    --max_resize_value=500 \
+    --resize_factor=16 \
+    --fine_tune_batch_norm=False \
+    --dataset="ade20k" \
+    --initialize_last_layer=False \
+    --tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
+    --train_logdir=${PATH_TO_TRAIN_DIR}\
+    --dataset_dir=${PATH_TO_DATASET}
+```
+where ${PATH\_TO\_INITIAL\_CHECKPOINT} is the path to the initial checkpoint.
+For example, if you are using the deeplabv3\_pascal\_train\_aug checkppoint, you
+will set this to `/path/to/deeplabv3\_pascal\_train\_aug/model.ckpt`.
+${PATH\_TO\_TRAIN\_DIR} is the directory in which training checkpoints and
+events will be written to (it is recommended to set it to the
+`train_on_train_set/train` above), and ${PATH\_TO\_DATASET} is the directory in
+which the ADE20K dataset resides (the `tfrecord` above)
+**Note that for train.py:**
+1.  In order to fine tune the BN layers, one needs to use large batch size (> 12),
+    and set fine_tune_batch_norm = True. Here, we simply use small batch size
+    during training for the purpose of demonstration. If the users have limited
+    GPU memory at hand, please fine-tune from our provided checkpoints whose
+    batch norm parameters have been trained, and use smaller learning rate with
+    fine_tune_batch_norm = False.
+2. User should fine tune the `min_resize_value` and `max_resize_value` to get
+   better result. Note that `resize_factor` has to equals to `output_stride`.
+2.  The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
+    setting output_stride=8.
+3.  The users could skip the flag, `decoder_output_stride`, if you do not want
+    to use the decoder structure.
+Currently there are no fine-tuned checkpoint for the ADE20K dataset.
+## Running Tensorboard
+Progress for training and evaluation jobs can be inspected using Tensorboard. If
+using the recommended directory structure, Tensorboard can be run using the
+following command:
+```bash
+tensorboard --logdir=${PATH_TO_LOG_DIRECTORY}
+```
+where `${PATH_TO_LOG_DIRECTORY}` points to the directory that contains the train
+directorie (e.g., the folder `train_on_train_set` in the above example). Please
+note it may take Tensorboard a couple minutes to populate with data.
--- a/research/deeplab/utils/train_utils.py
+++ b/research/deeplab/utils/train_utils.py
@@ -99,7 +99,7 @@ def get_model_init_fn(train_logdir,
  tf.logging.info('Initializing model from path: %s', tf_initial_checkpoint)
  # Variables that will not be restored.
-  exclude_list = ['global_step']
+  exclude_list = ['global_step', 'logits']
  if not initialize_last_layer:
    exclude_list.extend(last_layers)