Merge pull request #3853 from walkerlala/add-ade20k

add ADE20K dataset

Merge pull request #3853 from walkerlala/add-ade20k
add ADE20K dataset
6741cfce · aquariusjay · GitHub · 18e06438 · 13c9de39 · 6741cfce
Unverified Commit 6741cfce authored Apr 09, 2018 by aquariusjay Committed by GitHub Apr 09, 2018
9 changed files
--- a/research/deeplab/README.md
+++ b/research/deeplab/README.md
@@ -90,6 +90,7 @@ Running:
 *   <a href='g3doc/installation.md'>Installation.</a><br>
 *   <a href='g3doc/pascal.md'>Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.</a><br>
 *   <a href='g3doc/cityscapes.md'>Running DeepLab on Cityscapes semantic segmentation dataset.</a><br>
+*   <a href='g3doc/ade20k.md'>Running DeepLab on ADE20K semantic segmentation dataset.</a><br>

 Models:


--- a/research/deeplab/datasets/build_ade20k_data.py
+++ b/research/deeplab/datasets/build_ade20k_data.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import math
+import os
+import random
+import string
+import sys
+import build_data
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_string(
+    'train_image_folder',
+    './ADE20K/ADEChallengeData2016/images/training',
+    'Folder containing trainng images')
+tf.app.flags.DEFINE_string(
+    'train_image_label_folder',
+    './ADE20K/ADEChallengeData2016/annotations/training',
+    'Folder containing annotations for trainng images')
+
+tf.app.flags.DEFINE_string(
+    'val_image_folder',
+    './ADE20K/ADEChallengeData2016/images/validation',
+    'Folder containing validation images')
+
+tf.app.flags.DEFINE_string(
+    'val_image_label_folder',
+    './ADE20K/ADEChallengeData2016/annotations/validation',
+    'Folder containing annotations for validation')
+
+tf.app.flags.DEFINE_string(
+    'output_dir', './ADE20K/tfrecord',
+    'Path to save converted SSTable of Tensorflow example')
+
+_NUM_SHARDS = 4
+
+def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
+  """ Converts the ADE20k dataset into into tfrecord format (SSTable).
+
+  Args:
+    dataset_split: Dataset split (e.g., train, val).
+    dataset_dir: Dir in which the dataset locates.
+    dataset_label_dir: Dir in which the annotations locates.
+
+  Raises:
+    RuntimeError: If loaded image and label have different shape.
+  """
+
+  img_names = tf.gfile.Glob(os.path.join(dataset_dir, '*.jpg'))
+  random.shuffle(img_names)
+  seg_names = []
+  for f in img_names:
+    # get the filename without the extension
+    basename = os.path.basename(f).split(".")[0]
+    # cover its corresponding *_seg.png
+    seg = os.path.join(dataset_label_dir, basename+'.png')
+    seg_names.append(seg)
+
+  num_images = len(img_names)
+  num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
+
+  image_reader = build_data.ImageReader('jpeg', channels=3)
+  label_reader = build_data.ImageReader('png', channels=1)
+
+  for shard_id in range(_NUM_SHARDS):
+    output_filename = os.path.join(
+        FLAGS.output_dir,
+        '%s-%05d-of-%05d.tfrecord' % (dataset_split, shard_id, _NUM_SHARDS))
+    with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
+      start_idx = shard_id * num_per_shard
+      end_idx = min((shard_id + 1) * num_per_shard, num_images)
+      for i in range(start_idx, end_idx):
+        sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
+            i + 1, num_images, shard_id))
+        sys.stdout.flush()
+        # Read the image.
+        image_filename = img_names[i]
+        image_data = tf.gfile.FastGFile(image_filename, 'r').read()
+        height, width = image_reader.read_image_dims(image_data)
+        # Read the semantic segmentation annotation.
+        seg_filename = seg_names[i]
+        seg_data = tf.gfile.FastGFile(seg_filename, 'r').read()
+        seg_height, seg_width = label_reader.read_image_dims(seg_data)
+        if height != seg_height or width != seg_width:
+          raise RuntimeError('Shape mismatched between image and label.')
+        # Convert to tf example.
+        example = build_data.image_seg_to_tfexample(
+            image_data, img_names[i], height, width, seg_data)
+        tfrecord_writer.write(example.SerializeToString())
+    sys.stdout.write('\n')
+    sys.stdout.flush()
+
+def main(unused_argv):
+  tf.gfile.MakeDirs(FLAGS.output_dir)
+  _convert_dataset('train', FLAGS.train_image_folder, FLAGS.train_image_label_folder)
+  _convert_dataset('val', FLAGS.val_image_folder, FLAGS.val_image_label_folder)
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/deeplab/datasets/build_voc2012_data.py
+++ b/research/deeplab/datasets/build_voc2012_data.py
@@ -50,7 +50,6 @@ The Example proto contains the following fields:
  image/segmentation/class/encoded: encoded semantic segmentation content.
  image/segmentation/class/format: semantic segmentation file format.
 """
-import glob
 import math
 import os.path
 import sys
@@ -133,7 +132,7 @@ def _convert_dataset(dataset_split):


 def main(unused_argv):
-  dataset_splits = glob.glob(os.path.join(FLAGS.list_folder, '*.txt'))
+  dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt'))
  for dataset_split in dataset_splits:
    _convert_dataset(dataset_split)


--- a/research/deeplab/datasets/download_and_convert_ade20k.sh
+++ b/research/deeplab/datasets/download_and_convert_ade20k.sh
+#!/bin/bash
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to download and preprocess the PASCAL VOC 2012 dataset.
+#
+# Usage:
+#   bash ./download_and_convert_ade20k.sh
+#
+# The folder structure is assumed to be:
+#  + datasets
+#     - build_data.py
+#     - build_ade20k_data.py
+#     - download_and_convert_ade20k.sh
+#     + ADE20K 
+#       + tfrecord
+#       + ADEChallengeData2016
+#         + annotations
+#           + training
+#           + validation
+#         + images
+#           + training
+#           + validation
+
+# Exit immediately if a command exits with a non-zero status.
+set -e
+
+CURRENT_DIR=$(pwd)
+WORK_DIR="./ADE20K"
+mkdir -p "${WORK_DIR}"
+cd "${WORK_DIR}"
+
+# Helper function to download and unpack ADE20K dataset.
+download_and_uncompress() {
+  local BASE_URL=${1}
+  local FILENAME=${2}
+
+  if [ ! -f "${FILENAME}" ]; then
+    echo "Downloading ${FILENAME} to ${WORK_DIR}"
+    wget -nd -c "${BASE_URL}/${FILENAME}"
+  fi
+  echo "Uncompressing ${FILENAME}"
+  unzip "${FILENAME}"
+}
+
+# Download the images.
+BASE_URL="http://data.csail.mit.edu/places/ADEchallenge"
+FILENAME="ADEChallengeData2016.zip"
+
+download_and_uncompress "${BASE_URL}" "${FILENAME}"
+
+cd "${CURRENT_DIR}"
+
+# Root path for ADE20K dataset.
+ADE20K_ROOT="${WORK_DIR}/ADEChallengeData2016"
+
+# Build TFRecords of the dataset.
+# First, create output directory for storing TFRecords.
+OUTPUT_DIR="${WORK_DIR}/tfrecord"
+mkdir -p "${OUTPUT_DIR}"
+
+echo "Converting ADE20K dataset..."
+python ./build_ade20k_data.py  \
+  --train_image_folder="${ADE20K_ROOT}/images/training/" \
+  --train_image_label_folder="${ADE20K_ROOT}/annotations/training/" \
+  --val_image_folder="${ADE20K_ROOT}/images/validation/" \
+  --val_image_label_folder="${ADE20K_ROOT}/annotations/validation/" \
+  --output_dir="${OUTPUT_DIR}"
--- a/research/deeplab/datasets/download_and_convert_voc2012.sh
+++ b/research/deeplab/datasets/download_and_convert_voc2012.sh
@@ -17,13 +17,13 @@
 # Script to download and preprocess the PASCAL VOC 2012 dataset.
 #
 # Usage:
-#   bash ./download_and_preprocess_voc2012.sh
+#   bash ./download_and_convert_voc2012.sh
 #
 # The folder structure is assumed to be:
 #  + datasets
 #     - build_data.py
 #     - build_voc2012_data.py
-#     - download_and_preprocess_voc2012.sh
+#     - download_and_convert_voc2012.sh
 #     - remove_gt_colormap.py
 #     + pascal_voc_seg
 #       + VOCdevkit
@@ -37,27 +37,27 @@ set -e

 CURRENT_DIR=$(pwd)
 WORK_DIR="./pascal_voc_seg"
-mkdir -p ${WORK_DIR}
-cd ${WORK_DIR}
+mkdir -p "${WORK_DIR}"
+cd "${WORK_DIR}"

 # Helper function to download and unpack VOC 2012 dataset.
 download_and_uncompress() {
  local BASE_URL=${1}
  local FILENAME=${2}

-  if [ ! -f ${FILENAME} ]; then
+  if [ ! -f "${FILENAME}" ]; then
    echo "Downloading ${FILENAME} to ${WORK_DIR}"
    wget -nd -c "${BASE_URL}/${FILENAME}"
  fi
  echo "Uncompressing ${FILENAME}"
-  tar -xf ${FILENAME}
+  tar -xf "${FILENAME}"
 }

 # Download the images.
 BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
 FILENAME="VOCtrainval_11-May-2012.tar"

-download_and_uncompress ${BASE_URL} ${FILENAME}
+download_and_uncompress "${BASE_URL}" "${FILENAME}"

 cd "${CURRENT_DIR}"


--- a/research/deeplab/datasets/segmentation_dataset.py
+++ b/research/deeplab/datasets/segmentation_dataset.py
@@ -31,6 +31,11 @@ images for the training, validation and test respectively.
 The Cityscapes dataset contains 19 semantic labels (such as road, person, car,
 and so on) for urban street scenes.

+3. ADE20K dataset (http://groups.csail.mit.edu/vision/datasets/ADE20K)
+
+The ADE20K dataset contains 150 semantic labels both urban street scenes and
+indoor scenes.
+
 References:
  M. Everingham, S. M. A. Eslami, L. V. Gool, C. K. I. Williams, J. Winn,
  and A. Zisserman, The pascal visual object classes challenge a retrospective.
@@ -39,6 +44,9 @@ References:
  M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson,
  U. Franke, S. Roth, and B. Schiele, "The cityscapes dataset for semantic urban
  scene understanding," In Proc. of CVPR, 2016.
+
+  B. Zhou, H. Zhao, X. Puig, S. Fidler, A. Barriuso, A. Torralba, "Scene Parsing
+  through ADE20K dataset", In Proc. of CVPR, 2017.
 """
 import collections
 import os.path
@@ -85,10 +93,22 @@ _PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor(
    ignore_label=255,
 )

+# These number (i.e., 'train'/'test') seems to have to be hard coded
+# You are required to figure it out for your training/testing example.
+_ADE20K_INFORMATION = DatasetDescriptor(
+    splits_to_sizes = {
+        'train': 20210, # num of samples in images/training
+        'val': 2000, # num of samples in images/validation
+    },
+    num_classes=150,
+    ignore_label=255,
+)
+

 _DATASETS_INFORMATION = {
    'cityscapes': _CITYSCAPES_INFORMATION,
    'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION,
+    'ade20k': _ADE20K_INFORMATION,
 }

 # Default file pattern of TFRecord of TensorFlow Example.

--- a/research/deeplab/g3doc/ade20k.md
+++ b/research/deeplab/g3doc/ade20k.md
+# Running DeepLab on ADE20K Semantic Segmentation Dataset
+
+This page walks through the steps required to run DeepLab on ADE20K dataset on a
+local machine.
+
+## Download dataset and convert to TFRecord
+
+We have prepared the script (under the folder `datasets`) to download and
+convert ADE20K semantic segmentation dataset to TFRecord.
+
+```bash
+# From the tensorflow/models/research/deeplab/datasets directory.
+bash download_and_convert_ade20k.sh
+```
+
+The converted dataset will be saved at
+./deeplab/datasets/ADE20K/tfrecord
+
+## Recommended Directory Structure for Training and Evaluation
+
+```
+ datasets
+   - build_data.py
+   - build_ade20k_data.py
+   - download_and_convert_ade20k.sh
+   + ADE20K 
+     + tfrecord
+    + exp
+      + train_on_train_set
+        + train
+        + eval
+        + vis
+     + ADEChallengeData2016
+       + annotations
+         + training
+         + validation
+       + images
+         + training
+         + validation
+```
+
+where the folder `train_on_train_set` stores the train/eval/vis events and
+results (when training DeepLab on the ADE20K train set).
+
+## Running the train/eval/vis jobs
+
+A local training job using `xception_65` can be run with the following command:
+
+```bash
+# From tensorflow/models/research/
+python deeplab/train.py \
+    --logtostderr \
+    --training_number_of_steps=50000 \
+    --train_split="train" \
+    --model_variant="xception_65" \
+    --astrous_rates=6 \
+    --astrous_rates=12 \
+    --astrous_rates=18 \
+    --output_stride=16 \
+    --decoder_output_stride=4 \
+    --train_crop_size=513 \
+    --train_crop_size=513 \
+    --train_batch_size=4 \
+    --min_resize_value=350 \
+    --max_resize_value=500 \
+    --resize_factor=16 \
+    --fine_tune_batch_norm=False \
+    --dataset="ade20k" \
+    --initialize_last_layer=False \
+    --last_layers_contain_logits_only=True \
+    --tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
+    --train_logdir=${PATH_TO_TRAIN_DIR}\
+    --dataset_dir=${PATH_TO_DATASET}
+```
+
+where ${PATH\_TO\_INITIAL\_CHECKPOINT} is the path to the initial checkpoint.
+For example, if you are using the deeplabv3\_pascal\_train\_aug checkppoint, you
+will set this to `/path/to/deeplabv3\_pascal\_train\_aug/model.ckpt`.
+${PATH\_TO\_TRAIN\_DIR} is the directory in which training checkpoints and
+events will be written to (it is recommended to set it to the
+`train_on_train_set/train` above), and ${PATH\_TO\_DATASET} is the directory in
+which the ADE20K dataset resides (the `tfrecord` above)
+
+**Note that for train.py:**
+
+1.  In order to fine tune the BN layers, one needs to use large batch size (> 12),
+    and set fine_tune_batch_norm = True. Here, we simply use small batch size
+    during training for the purpose of demonstration. If the users have limited
+    GPU memory at hand, please fine-tune from our provided checkpoints whose
+    batch norm parameters have been trained, and use smaller learning rate with
+    fine_tune_batch_norm = False.
+
+2. User should fine tune the `min_resize_value` and `max_resize_value` to get
+   better result. Note that `resize_factor` has to be equal to `output_stride`.
+
+2.  The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
+    setting output_stride=8.
+
+3.  The users could skip the flag, `decoder_output_stride`, if you do not want
+    to use the decoder structure.
+
+Currently there are no fine-tuned checkpoint for the ADE20K dataset.
+
+## Running Tensorboard
+
+Progress for training and evaluation jobs can be inspected using Tensorboard. If
+using the recommended directory structure, Tensorboard can be run using the
+following command:
+
+```bash
+tensorboard --logdir=${PATH_TO_LOG_DIRECTORY}
+```
+
+where `${PATH_TO_LOG_DIRECTORY}` points to the directory that contains the train
+directorie (e.g., the folder `train_on_train_set` in the above example). Please
+note it may take Tensorboard a couple minutes to populate with data.
--- a/research/deeplab/model.py
+++ b/research/deeplab/model.py
@@ -64,19 +64,26 @@ _CONCAT_PROJECTION_SCOPE = 'concat_projection'
 _DECODER_SCOPE = 'decoder'


-def get_extra_layer_scopes():
+def get_extra_layer_scopes(last_layers_contain_logits_only=False):
  """Gets the scopes for extra layers.

+  Args:
+    last_layers_contain_logits_only: Boolean, True if only consider logits as
+    the last layer (i.e., exclude ASPP module, decoder module and so on)
+
  Returns:
    A list of scopes for extra layers.
  """
-  return [
-      _LOGITS_SCOPE_NAME,
-      _IMAGE_POOLING_SCOPE,
-      _ASPP_SCOPE,
-      _CONCAT_PROJECTION_SCOPE,
-      _DECODER_SCOPE,
-  ]
+  if last_layers_contain_logits_only:
+    return [_LOGITS_SCOPE_NAME]
+  else:
+    return [
+        _LOGITS_SCOPE_NAME,
+        _IMAGE_POOLING_SCOPE,
+        _ASPP_SCOPE,
+        _CONCAT_PROJECTION_SCOPE,
+        _DECODER_SCOPE,
+    ]


 def predict_labels_multi_scale(images,

--- a/research/deeplab/train.py
+++ b/research/deeplab/train.py
@@ -122,6 +122,9 @@ flags.DEFINE_string('tf_initial_checkpoint', None,
 flags.DEFINE_boolean('initialize_last_layer', True,
                     'Initialize the last layer.')

+flags.DEFINE_boolean('last_layers_contain_logits_only', False,
+                     'Only consider logits as last layers or not.')
+
 flags.DEFINE_integer('slow_start_step', 0,
                     'Training model with small learning rate for few steps.')

@@ -322,7 +325,7 @@ def main(unused_argv):
      summaries.add(tf.summary.scalar('total_loss', total_loss))

      # Modify the gradients for biases and last layer variables.
-      last_layers = model.get_extra_layer_scopes()
+      last_layers = model.get_extra_layer_scopes(FLAGS.last_layers_contain_logits_only)
      grad_mult = train_utils.get_model_gradient_multipliers(
          last_layers, FLAGS.last_layer_gradient_multiplier)
      if grad_mult: