Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
6741cfce
Unverified
Commit
6741cfce
authored
Apr 09, 2018
by
aquariusjay
Committed by
GitHub
Apr 09, 2018
Browse files
Merge pull request #3853 from walkerlala/add-ade20k
add ADE20K dataset
parents
18e06438
13c9de39
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
357 additions
and
18 deletions
+357
-18
research/deeplab/README.md
research/deeplab/README.md
+1
-0
research/deeplab/datasets/build_ade20k_data.py
research/deeplab/datasets/build_ade20k_data.py
+113
-0
research/deeplab/datasets/build_voc2012_data.py
research/deeplab/datasets/build_voc2012_data.py
+1
-2
research/deeplab/datasets/download_and_convert_ade20k.sh
research/deeplab/datasets/download_and_convert_ade20k.sh
+80
-0
research/deeplab/datasets/download_and_convert_voc2012.sh
research/deeplab/datasets/download_and_convert_voc2012.sh
+7
-7
research/deeplab/datasets/segmentation_dataset.py
research/deeplab/datasets/segmentation_dataset.py
+20
-0
research/deeplab/g3doc/ade20k.md
research/deeplab/g3doc/ade20k.md
+116
-0
research/deeplab/model.py
research/deeplab/model.py
+15
-8
research/deeplab/train.py
research/deeplab/train.py
+4
-1
No files found.
research/deeplab/README.md
View file @
6741cfce
...
...
@@ -90,6 +90,7 @@ Running:
*
<a
href=
'g3doc/installation.md'
>
Installation.
</a><br>
*
<a
href=
'g3doc/pascal.md'
>
Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.
</a><br>
*
<a
href=
'g3doc/cityscapes.md'
>
Running DeepLab on Cityscapes semantic segmentation dataset.
</a><br>
*
<a
href=
'g3doc/ade20k.md'
>
Running DeepLab on ADE20K semantic segmentation dataset.
</a><br>
Models:
...
...
research/deeplab/datasets/build_ade20k_data.py
0 → 100644
View file @
6741cfce
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import
math
import
os
import
random
import
string
import
sys
import
build_data
import
tensorflow
as
tf
FLAGS
=
tf
.
app
.
flags
.
FLAGS
tf
.
app
.
flags
.
DEFINE_string
(
'train_image_folder'
,
'./ADE20K/ADEChallengeData2016/images/training'
,
'Folder containing trainng images'
)
tf
.
app
.
flags
.
DEFINE_string
(
'train_image_label_folder'
,
'./ADE20K/ADEChallengeData2016/annotations/training'
,
'Folder containing annotations for trainng images'
)
tf
.
app
.
flags
.
DEFINE_string
(
'val_image_folder'
,
'./ADE20K/ADEChallengeData2016/images/validation'
,
'Folder containing validation images'
)
tf
.
app
.
flags
.
DEFINE_string
(
'val_image_label_folder'
,
'./ADE20K/ADEChallengeData2016/annotations/validation'
,
'Folder containing annotations for validation'
)
tf
.
app
.
flags
.
DEFINE_string
(
'output_dir'
,
'./ADE20K/tfrecord'
,
'Path to save converted SSTable of Tensorflow example'
)
_NUM_SHARDS
=
4
def
_convert_dataset
(
dataset_split
,
dataset_dir
,
dataset_label_dir
):
""" Converts the ADE20k dataset into into tfrecord format (SSTable).
Args:
dataset_split: Dataset split (e.g., train, val).
dataset_dir: Dir in which the dataset locates.
dataset_label_dir: Dir in which the annotations locates.
Raises:
RuntimeError: If loaded image and label have different shape.
"""
img_names
=
tf
.
gfile
.
Glob
(
os
.
path
.
join
(
dataset_dir
,
'*.jpg'
))
random
.
shuffle
(
img_names
)
seg_names
=
[]
for
f
in
img_names
:
# get the filename without the extension
basename
=
os
.
path
.
basename
(
f
).
split
(
"."
)[
0
]
# cover its corresponding *_seg.png
seg
=
os
.
path
.
join
(
dataset_label_dir
,
basename
+
'.png'
)
seg_names
.
append
(
seg
)
num_images
=
len
(
img_names
)
num_per_shard
=
int
(
math
.
ceil
(
num_images
/
float
(
_NUM_SHARDS
)))
image_reader
=
build_data
.
ImageReader
(
'jpeg'
,
channels
=
3
)
label_reader
=
build_data
.
ImageReader
(
'png'
,
channels
=
1
)
for
shard_id
in
range
(
_NUM_SHARDS
):
output_filename
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'%s-%05d-of-%05d.tfrecord'
%
(
dataset_split
,
shard_id
,
_NUM_SHARDS
))
with
tf
.
python_io
.
TFRecordWriter
(
output_filename
)
as
tfrecord_writer
:
start_idx
=
shard_id
*
num_per_shard
end_idx
=
min
((
shard_id
+
1
)
*
num_per_shard
,
num_images
)
for
i
in
range
(
start_idx
,
end_idx
):
sys
.
stdout
.
write
(
'
\r
>> Converting image %d/%d shard %d'
%
(
i
+
1
,
num_images
,
shard_id
))
sys
.
stdout
.
flush
()
# Read the image.
image_filename
=
img_names
[
i
]
image_data
=
tf
.
gfile
.
FastGFile
(
image_filename
,
'r'
).
read
()
height
,
width
=
image_reader
.
read_image_dims
(
image_data
)
# Read the semantic segmentation annotation.
seg_filename
=
seg_names
[
i
]
seg_data
=
tf
.
gfile
.
FastGFile
(
seg_filename
,
'r'
).
read
()
seg_height
,
seg_width
=
label_reader
.
read_image_dims
(
seg_data
)
if
height
!=
seg_height
or
width
!=
seg_width
:
raise
RuntimeError
(
'Shape mismatched between image and label.'
)
# Convert to tf example.
example
=
build_data
.
image_seg_to_tfexample
(
image_data
,
img_names
[
i
],
height
,
width
,
seg_data
)
tfrecord_writer
.
write
(
example
.
SerializeToString
())
sys
.
stdout
.
write
(
'
\n
'
)
sys
.
stdout
.
flush
()
def
main
(
unused_argv
):
tf
.
gfile
.
MakeDirs
(
FLAGS
.
output_dir
)
_convert_dataset
(
'train'
,
FLAGS
.
train_image_folder
,
FLAGS
.
train_image_label_folder
)
_convert_dataset
(
'val'
,
FLAGS
.
val_image_folder
,
FLAGS
.
val_image_label_folder
)
if
__name__
==
'__main__'
:
tf
.
app
.
run
()
research/deeplab/datasets/build_voc2012_data.py
View file @
6741cfce
...
...
@@ -50,7 +50,6 @@ The Example proto contains the following fields:
image/segmentation/class/encoded: encoded semantic segmentation content.
image/segmentation/class/format: semantic segmentation file format.
"""
import
glob
import
math
import
os.path
import
sys
...
...
@@ -133,7 +132,7 @@ def _convert_dataset(dataset_split):
def
main
(
unused_argv
):
dataset_splits
=
glob
.
g
lob
(
os
.
path
.
join
(
FLAGS
.
list_folder
,
'*.txt'
))
dataset_splits
=
tf
.
gfile
.
G
lob
(
os
.
path
.
join
(
FLAGS
.
list_folder
,
'*.txt'
))
for
dataset_split
in
dataset_splits
:
_convert_dataset
(
dataset_split
)
...
...
research/deeplab/datasets/download_and_convert_ade20k.sh
0 → 100644
View file @
6741cfce
#!/bin/bash
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Script to download and preprocess the PASCAL VOC 2012 dataset.
#
# Usage:
# bash ./download_and_convert_ade20k.sh
#
# The folder structure is assumed to be:
# + datasets
# - build_data.py
# - build_ade20k_data.py
# - download_and_convert_ade20k.sh
# + ADE20K
# + tfrecord
# + ADEChallengeData2016
# + annotations
# + training
# + validation
# + images
# + training
# + validation
# Exit immediately if a command exits with a non-zero status.
set
-e
CURRENT_DIR
=
$(
pwd
)
WORK_DIR
=
"./ADE20K"
mkdir
-p
"
${
WORK_DIR
}
"
cd
"
${
WORK_DIR
}
"
# Helper function to download and unpack ADE20K dataset.
download_and_uncompress
()
{
local
BASE_URL
=
${
1
}
local
FILENAME
=
${
2
}
if
[
!
-f
"
${
FILENAME
}
"
]
;
then
echo
"Downloading
${
FILENAME
}
to
${
WORK_DIR
}
"
wget
-nd
-c
"
${
BASE_URL
}
/
${
FILENAME
}
"
fi
echo
"Uncompressing
${
FILENAME
}
"
unzip
"
${
FILENAME
}
"
}
# Download the images.
BASE_URL
=
"http://data.csail.mit.edu/places/ADEchallenge"
FILENAME
=
"ADEChallengeData2016.zip"
download_and_uncompress
"
${
BASE_URL
}
"
"
${
FILENAME
}
"
cd
"
${
CURRENT_DIR
}
"
# Root path for ADE20K dataset.
ADE20K_ROOT
=
"
${
WORK_DIR
}
/ADEChallengeData2016"
# Build TFRecords of the dataset.
# First, create output directory for storing TFRecords.
OUTPUT_DIR
=
"
${
WORK_DIR
}
/tfrecord"
mkdir
-p
"
${
OUTPUT_DIR
}
"
echo
"Converting ADE20K dataset..."
python ./build_ade20k_data.py
\
--train_image_folder
=
"
${
ADE20K_ROOT
}
/images/training/"
\
--train_image_label_folder
=
"
${
ADE20K_ROOT
}
/annotations/training/"
\
--val_image_folder
=
"
${
ADE20K_ROOT
}
/images/validation/"
\
--val_image_label_folder
=
"
${
ADE20K_ROOT
}
/annotations/validation/"
\
--output_dir
=
"
${
OUTPUT_DIR
}
"
research/deeplab/datasets/download_and_convert_voc2012.sh
View file @
6741cfce
...
...
@@ -17,13 +17,13 @@
# Script to download and preprocess the PASCAL VOC 2012 dataset.
#
# Usage:
# bash ./download_and_
preprocess
_voc2012.sh
# bash ./download_and_
convert
_voc2012.sh
#
# The folder structure is assumed to be:
# + datasets
# - build_data.py
# - build_voc2012_data.py
# - download_and_
preprocess
_voc2012.sh
# - download_and_
convert
_voc2012.sh
# - remove_gt_colormap.py
# + pascal_voc_seg
# + VOCdevkit
...
...
@@ -37,27 +37,27 @@ set -e
CURRENT_DIR
=
$(
pwd
)
WORK_DIR
=
"./pascal_voc_seg"
mkdir
-p
${
WORK_DIR
}
cd
${
WORK_DIR
}
mkdir
-p
"
${
WORK_DIR
}
"
cd
"
${
WORK_DIR
}
"
# Helper function to download and unpack VOC 2012 dataset.
download_and_uncompress
()
{
local
BASE_URL
=
${
1
}
local
FILENAME
=
${
2
}
if
[
!
-f
${
FILENAME
}
]
;
then
if
[
!
-f
"
${
FILENAME
}
"
]
;
then
echo
"Downloading
${
FILENAME
}
to
${
WORK_DIR
}
"
wget
-nd
-c
"
${
BASE_URL
}
/
${
FILENAME
}
"
fi
echo
"Uncompressing
${
FILENAME
}
"
tar
-xf
${
FILENAME
}
tar
-xf
"
${
FILENAME
}
"
}
# Download the images.
BASE_URL
=
"http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
FILENAME
=
"VOCtrainval_11-May-2012.tar"
download_and_uncompress
${
BASE_URL
}
${
FILENAME
}
download_and_uncompress
"
${
BASE_URL
}
"
"
${
FILENAME
}
"
cd
"
${
CURRENT_DIR
}
"
...
...
research/deeplab/datasets/segmentation_dataset.py
View file @
6741cfce
...
...
@@ -31,6 +31,11 @@ images for the training, validation and test respectively.
The Cityscapes dataset contains 19 semantic labels (such as road, person, car,
and so on) for urban street scenes.
3. ADE20K dataset (http://groups.csail.mit.edu/vision/datasets/ADE20K)
The ADE20K dataset contains 150 semantic labels both urban street scenes and
indoor scenes.
References:
M. Everingham, S. M. A. Eslami, L. V. Gool, C. K. I. Williams, J. Winn,
and A. Zisserman, The pascal visual object classes challenge a retrospective.
...
...
@@ -39,6 +44,9 @@ References:
M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson,
U. Franke, S. Roth, and B. Schiele, "The cityscapes dataset for semantic urban
scene understanding," In Proc. of CVPR, 2016.
B. Zhou, H. Zhao, X. Puig, S. Fidler, A. Barriuso, A. Torralba, "Scene Parsing
through ADE20K dataset", In Proc. of CVPR, 2017.
"""
import
collections
import
os.path
...
...
@@ -85,10 +93,22 @@ _PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor(
ignore_label
=
255
,
)
# These number (i.e., 'train'/'test') seems to have to be hard coded
# You are required to figure it out for your training/testing example.
_ADE20K_INFORMATION
=
DatasetDescriptor
(
splits_to_sizes
=
{
'train'
:
20210
,
# num of samples in images/training
'val'
:
2000
,
# num of samples in images/validation
},
num_classes
=
150
,
ignore_label
=
255
,
)
_DATASETS_INFORMATION
=
{
'cityscapes'
:
_CITYSCAPES_INFORMATION
,
'pascal_voc_seg'
:
_PASCAL_VOC_SEG_INFORMATION
,
'ade20k'
:
_ADE20K_INFORMATION
,
}
# Default file pattern of TFRecord of TensorFlow Example.
...
...
research/deeplab/g3doc/ade20k.md
0 → 100644
View file @
6741cfce
# Running DeepLab on ADE20K Semantic Segmentation Dataset
This page walks through the steps required to run DeepLab on ADE20K dataset on a
local machine.
## Download dataset and convert to TFRecord
We have prepared the script (under the folder
`datasets`
) to download and
convert ADE20K semantic segmentation dataset to TFRecord.
```
bash
# From the tensorflow/models/research/deeplab/datasets directory.
bash download_and_convert_ade20k.sh
```
The converted dataset will be saved at
./deeplab/datasets/ADE20K/tfrecord
## Recommended Directory Structure for Training and Evaluation
```
+ datasets
- build_data.py
- build_ade20k_data.py
- download_and_convert_ade20k.sh
+ ADE20K
+ tfrecord
+ exp
+ train_on_train_set
+ train
+ eval
+ vis
+ ADEChallengeData2016
+ annotations
+ training
+ validation
+ images
+ training
+ validation
```
where the folder
`train_on_train_set`
stores the train/eval/vis events and
results (when training DeepLab on the ADE20K train set).
## Running the train/eval/vis jobs
A local training job using
`xception_65`
can be run with the following command:
```
bash
# From tensorflow/models/research/
python deeplab/train.py
\
--logtostderr
\
--training_number_of_steps
=
50000
\
--train_split
=
"train"
\
--model_variant
=
"xception_65"
\
--astrous_rates
=
6
\
--astrous_rates
=
12
\
--astrous_rates
=
18
\
--output_stride
=
16
\
--decoder_output_stride
=
4
\
--train_crop_size
=
513
\
--train_crop_size
=
513
\
--train_batch_size
=
4
\
--min_resize_value
=
350
\
--max_resize_value
=
500
\
--resize_factor
=
16
\
--fine_tune_batch_norm
=
False
\
--dataset
=
"ade20k"
\
--initialize_last_layer
=
False
\
--last_layers_contain_logits_only
=
True
\
--tf_initial_checkpoint
=
${
PATH_TO_INITIAL_CHECKPOINT
}
\
--train_logdir
=
${
PATH_TO_TRAIN_DIR
}
\
--dataset_dir
=
${
PATH_TO_DATASET
}
```
where ${PATH
\_
TO
\_
INITIAL
\_
CHECKPOINT} is the path to the initial checkpoint.
For example, if you are using the deeplabv3
\_
pascal
\_
train
\_
aug checkppoint, you
will set this to
`/path/to/deeplabv3\_pascal\_train\_aug/model.ckpt`
.
${PATH
\_
TO
\_
TRAIN
\_
DIR} is the directory in which training checkpoints and
events will be written to (it is recommended to set it to the
`train_on_train_set/train`
above), and ${PATH
\_
TO
\_
DATASET} is the directory in
which the ADE20K dataset resides (the
`tfrecord`
above)
**Note that for train.py:**
1.
In order to fine tune the BN layers, one needs to use large batch size (> 12),
and set fine_tune_batch_norm = True. Here, we simply use small batch size
during training for the purpose of demonstration. If the users have limited
GPU memory at hand, please fine-tune from our provided checkpoints whose
batch norm parameters have been trained, and use smaller learning rate with
fine_tune_batch_norm = False.
2.
User should fine tune the
`min_resize_value`
and
`max_resize_value`
to get
better result. Note that
`resize_factor`
has to be equal to
`output_stride`
.
2.
The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
setting output_stride=8.
3.
The users could skip the flag,
`decoder_output_stride`
, if you do not want
to use the decoder structure.
Currently there are no fine-tuned checkpoint for the ADE20K dataset.
## Running Tensorboard
Progress for training and evaluation jobs can be inspected using Tensorboard. If
using the recommended directory structure, Tensorboard can be run using the
following command:
```
bash
tensorboard
--logdir
=
${
PATH_TO_LOG_DIRECTORY
}
```
where
`${PATH_TO_LOG_DIRECTORY}`
points to the directory that contains the train
directorie (e.g., the folder
`train_on_train_set`
in the above example). Please
note it may take Tensorboard a couple minutes to populate with data.
research/deeplab/model.py
View file @
6741cfce
...
...
@@ -64,19 +64,26 @@ _CONCAT_PROJECTION_SCOPE = 'concat_projection'
_DECODER_SCOPE
=
'decoder'
def
get_extra_layer_scopes
():
def
get_extra_layer_scopes
(
last_layers_contain_logits_only
=
False
):
"""Gets the scopes for extra layers.
Args:
last_layers_contain_logits_only: Boolean, True if only consider logits as
the last layer (i.e., exclude ASPP module, decoder module and so on)
Returns:
A list of scopes for extra layers.
"""
return
[
_LOGITS_SCOPE_NAME
,
_IMAGE_POOLING_SCOPE
,
_ASPP_SCOPE
,
_CONCAT_PROJECTION_SCOPE
,
_DECODER_SCOPE
,
]
if
last_layers_contain_logits_only
:
return
[
_LOGITS_SCOPE_NAME
]
else
:
return
[
_LOGITS_SCOPE_NAME
,
_IMAGE_POOLING_SCOPE
,
_ASPP_SCOPE
,
_CONCAT_PROJECTION_SCOPE
,
_DECODER_SCOPE
,
]
def
predict_labels_multi_scale
(
images
,
...
...
research/deeplab/train.py
View file @
6741cfce
...
...
@@ -122,6 +122,9 @@ flags.DEFINE_string('tf_initial_checkpoint', None,
flags
.
DEFINE_boolean
(
'initialize_last_layer'
,
True
,
'Initialize the last layer.'
)
flags
.
DEFINE_boolean
(
'last_layers_contain_logits_only'
,
False
,
'Only consider logits as last layers or not.'
)
flags
.
DEFINE_integer
(
'slow_start_step'
,
0
,
'Training model with small learning rate for few steps.'
)
...
...
@@ -322,7 +325,7 @@ def main(unused_argv):
summaries
.
add
(
tf
.
summary
.
scalar
(
'total_loss'
,
total_loss
))
# Modify the gradients for biases and last layer variables.
last_layers
=
model
.
get_extra_layer_scopes
()
last_layers
=
model
.
get_extra_layer_scopes
(
FLAGS
.
last_layers_contain_logits_only
)
grad_mult
=
train_utils
.
get_model_gradient_multipliers
(
last_layers
,
FLAGS
.
last_layer_gradient_multiplier
)
if
grad_mult
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment