Commit b668f594 authored by pkulzc's avatar pkulzc
Browse files

Sync to latest master.

parents d5fc3ef0 32aa6563
["1.8.0-dev20180325", "v1.7.0-rc1-750-g6c1737e6c8"] ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]
\ No newline at end of file \ No newline at end of file
#!/bin/bash #!/bin/bash
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Presubmit script that run tests and lint under local environment. # Presubmit script that run tests and lint under local environment.
# Make sure that tensorflow and pylint is installed. # Make sure that tensorflow and pylint is installed.
......
...@@ -26,6 +26,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order ...@@ -26,6 +26,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order
from official.utils.arg_parsers import parsers from official.utils.arg_parsers import parsers
from official.utils.logs import hooks_helper from official.utils.logs import hooks_helper
from official.utils.misc import model_helpers
_CSV_COLUMNS = [ _CSV_COLUMNS = [
'age', 'workclass', 'fnlwgt', 'education', 'education_num', 'age', 'workclass', 'fnlwgt', 'education', 'education_num',
...@@ -211,12 +212,17 @@ def main(argv): ...@@ -211,12 +212,17 @@ def main(argv):
for key in sorted(results): for key in sorted(results):
print('%s: %s' % (key, results[key])) print('%s: %s' % (key, results[key]))
if model_helpers.past_stop_threshold(
flags.stop_threshold, results['accuracy']):
break
class WideDeepArgParser(argparse.ArgumentParser): class WideDeepArgParser(argparse.ArgumentParser):
"""Argument parser for running the wide deep model.""" """Argument parser for running the wide deep model."""
def __init__(self): def __init__(self):
super(WideDeepArgParser, self).__init__(parents=[parsers.BaseParser()]) super(WideDeepArgParser, self).__init__(parents=[
parsers.BaseParser(multi_gpu=False, num_gpu=False)])
self.add_argument( self.add_argument(
'--model_type', '-mt', type=str, default='wide_deep', '--model_type', '-mt', type=str, default='wide_deep',
choices=['wide', 'deep', 'wide_deep'], choices=['wide', 'deep', 'wide_deep'],
......
...@@ -90,6 +90,7 @@ Running: ...@@ -90,6 +90,7 @@ Running:
* <a href='g3doc/installation.md'>Installation.</a><br> * <a href='g3doc/installation.md'>Installation.</a><br>
* <a href='g3doc/pascal.md'>Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.</a><br> * <a href='g3doc/pascal.md'>Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.</a><br>
* <a href='g3doc/cityscapes.md'>Running DeepLab on Cityscapes semantic segmentation dataset.</a><br> * <a href='g3doc/cityscapes.md'>Running DeepLab on Cityscapes semantic segmentation dataset.</a><br>
* <a href='g3doc/ade20k.md'>Running DeepLab on ADE20K semantic segmentation dataset.</a><br>
Models: Models:
......
...@@ -160,7 +160,7 @@ def extract_features(images, ...@@ -160,7 +160,7 @@ def extract_features(images,
preprocess_images=True, preprocess_images=True,
num_classes=None, num_classes=None,
global_pool=False): global_pool=False):
"""Extracts features by the parituclar model_variant. """Extracts features by the particular model_variant.
Args: Args:
images: A tensor of size [batch, height, width, channels]. images: A tensor of size [batch, height, width, channels].
......
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import math
import os
import random
import string
import sys
import build_data
import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'train_image_folder',
'./ADE20K/ADEChallengeData2016/images/training',
'Folder containing trainng images')
tf.app.flags.DEFINE_string(
'train_image_label_folder',
'./ADE20K/ADEChallengeData2016/annotations/training',
'Folder containing annotations for trainng images')
tf.app.flags.DEFINE_string(
'val_image_folder',
'./ADE20K/ADEChallengeData2016/images/validation',
'Folder containing validation images')
tf.app.flags.DEFINE_string(
'val_image_label_folder',
'./ADE20K/ADEChallengeData2016/annotations/validation',
'Folder containing annotations for validation')
tf.app.flags.DEFINE_string(
'output_dir', './ADE20K/tfrecord',
'Path to save converted SSTable of Tensorflow example')
_NUM_SHARDS = 4
def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
""" Converts the ADE20k dataset into into tfrecord format (SSTable).
Args:
dataset_split: Dataset split (e.g., train, val).
dataset_dir: Dir in which the dataset locates.
dataset_label_dir: Dir in which the annotations locates.
Raises:
RuntimeError: If loaded image and label have different shape.
"""
img_names = tf.gfile.Glob(os.path.join(dataset_dir, '*.jpg'))
random.shuffle(img_names)
seg_names = []
for f in img_names:
# get the filename without the extension
basename = os.path.basename(f).split(".")[0]
# cover its corresponding *_seg.png
seg = os.path.join(dataset_label_dir, basename+'.png')
seg_names.append(seg)
num_images = len(img_names)
num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
image_reader = build_data.ImageReader('jpeg', channels=3)
label_reader = build_data.ImageReader('png', channels=1)
for shard_id in range(_NUM_SHARDS):
output_filename = os.path.join(
FLAGS.output_dir,
'%s-%05d-of-%05d.tfrecord' % (dataset_split, shard_id, _NUM_SHARDS))
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
start_idx = shard_id * num_per_shard
end_idx = min((shard_id + 1) * num_per_shard, num_images)
for i in range(start_idx, end_idx):
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
i + 1, num_images, shard_id))
sys.stdout.flush()
# Read the image.
image_filename = img_names[i]
image_data = tf.gfile.FastGFile(image_filename, 'r').read()
height, width = image_reader.read_image_dims(image_data)
# Read the semantic segmentation annotation.
seg_filename = seg_names[i]
seg_data = tf.gfile.FastGFile(seg_filename, 'r').read()
seg_height, seg_width = label_reader.read_image_dims(seg_data)
if height != seg_height or width != seg_width:
raise RuntimeError('Shape mismatched between image and label.')
# Convert to tf example.
example = build_data.image_seg_to_tfexample(
image_data, img_names[i], height, width, seg_data)
tfrecord_writer.write(example.SerializeToString())
sys.stdout.write('\n')
sys.stdout.flush()
def main(unused_argv):
tf.gfile.MakeDirs(FLAGS.output_dir)
_convert_dataset('train', FLAGS.train_image_folder, FLAGS.train_image_label_folder)
_convert_dataset('val', FLAGS.val_image_folder, FLAGS.val_image_label_folder)
if __name__ == '__main__':
tf.app.run()
...@@ -30,6 +30,7 @@ The Example proto contains the following fields: ...@@ -30,6 +30,7 @@ The Example proto contains the following fields:
image/segmentation/class/format: semantic segmentation file format. image/segmentation/class/format: semantic segmentation file format.
""" """
import collections import collections
import six
import tensorflow as tf import tensorflow as tf
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
...@@ -126,7 +127,7 @@ def _bytes_list_feature(values): ...@@ -126,7 +127,7 @@ def _bytes_list_feature(values):
A TF-Feature. A TF-Feature.
""" """
def norm2bytes(value): def norm2bytes(value):
return value.encode() if isinstance(value, str) else value return value.encode() if isinstance(value, str) and six.PY3 else value
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[norm2bytes(values)])) return tf.train.Feature(bytes_list=tf.train.BytesList(value=[norm2bytes(values)]))
......
...@@ -50,7 +50,6 @@ The Example proto contains the following fields: ...@@ -50,7 +50,6 @@ The Example proto contains the following fields:
image/segmentation/class/encoded: encoded semantic segmentation content. image/segmentation/class/encoded: encoded semantic segmentation content.
image/segmentation/class/format: semantic segmentation file format. image/segmentation/class/format: semantic segmentation file format.
""" """
import glob
import math import math
import os.path import os.path
import sys import sys
...@@ -133,7 +132,7 @@ def _convert_dataset(dataset_split): ...@@ -133,7 +132,7 @@ def _convert_dataset(dataset_split):
def main(unused_argv): def main(unused_argv):
dataset_splits = glob.glob(os.path.join(FLAGS.list_folder, '*.txt')) dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt'))
for dataset_split in dataset_splits: for dataset_split in dataset_splits:
_convert_dataset(dataset_split) _convert_dataset(dataset_split)
......
#!/bin/bash
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Script to download and preprocess the PASCAL VOC 2012 dataset.
#
# Usage:
# bash ./download_and_convert_ade20k.sh
#
# The folder structure is assumed to be:
# + datasets
# - build_data.py
# - build_ade20k_data.py
# - download_and_convert_ade20k.sh
# + ADE20K
# + tfrecord
# + ADEChallengeData2016
# + annotations
# + training
# + validation
# + images
# + training
# + validation
# Exit immediately if a command exits with a non-zero status.
set -e
CURRENT_DIR=$(pwd)
WORK_DIR="./ADE20K"
mkdir -p "${WORK_DIR}"
cd "${WORK_DIR}"
# Helper function to download and unpack ADE20K dataset.
download_and_uncompress() {
local BASE_URL=${1}
local FILENAME=${2}
if [ ! -f "${FILENAME}" ]; then
echo "Downloading ${FILENAME} to ${WORK_DIR}"
wget -nd -c "${BASE_URL}/${FILENAME}"
fi
echo "Uncompressing ${FILENAME}"
unzip "${FILENAME}"
}
# Download the images.
BASE_URL="http://data.csail.mit.edu/places/ADEchallenge"
FILENAME="ADEChallengeData2016.zip"
download_and_uncompress "${BASE_URL}" "${FILENAME}"
cd "${CURRENT_DIR}"
# Root path for ADE20K dataset.
ADE20K_ROOT="${WORK_DIR}/ADEChallengeData2016"
# Build TFRecords of the dataset.
# First, create output directory for storing TFRecords.
OUTPUT_DIR="${WORK_DIR}/tfrecord"
mkdir -p "${OUTPUT_DIR}"
echo "Converting ADE20K dataset..."
python ./build_ade20k_data.py \
--train_image_folder="${ADE20K_ROOT}/images/training/" \
--train_image_label_folder="${ADE20K_ROOT}/annotations/training/" \
--val_image_folder="${ADE20K_ROOT}/images/validation/" \
--val_image_label_folder="${ADE20K_ROOT}/annotations/validation/" \
--output_dir="${OUTPUT_DIR}"
...@@ -17,13 +17,13 @@ ...@@ -17,13 +17,13 @@
# Script to download and preprocess the PASCAL VOC 2012 dataset. # Script to download and preprocess the PASCAL VOC 2012 dataset.
# #
# Usage: # Usage:
# bash ./download_and_preprocess_voc2012.sh # bash ./download_and_convert_voc2012.sh
# #
# The folder structure is assumed to be: # The folder structure is assumed to be:
# + datasets # + datasets
# - build_data.py # - build_data.py
# - build_voc2012_data.py # - build_voc2012_data.py
# - download_and_preprocess_voc2012.sh # - download_and_convert_voc2012.sh
# - remove_gt_colormap.py # - remove_gt_colormap.py
# + pascal_voc_seg # + pascal_voc_seg
# + VOCdevkit # + VOCdevkit
...@@ -37,27 +37,27 @@ set -e ...@@ -37,27 +37,27 @@ set -e
CURRENT_DIR=$(pwd) CURRENT_DIR=$(pwd)
WORK_DIR="./pascal_voc_seg" WORK_DIR="./pascal_voc_seg"
mkdir -p ${WORK_DIR} mkdir -p "${WORK_DIR}"
cd ${WORK_DIR} cd "${WORK_DIR}"
# Helper function to download and unpack VOC 2012 dataset. # Helper function to download and unpack VOC 2012 dataset.
download_and_uncompress() { download_and_uncompress() {
local BASE_URL=${1} local BASE_URL=${1}
local FILENAME=${2} local FILENAME=${2}
if [ ! -f ${FILENAME} ]; then if [ ! -f "${FILENAME}" ]; then
echo "Downloading ${FILENAME} to ${WORK_DIR}" echo "Downloading ${FILENAME} to ${WORK_DIR}"
wget -nd -c "${BASE_URL}/${FILENAME}" wget -nd -c "${BASE_URL}/${FILENAME}"
fi fi
echo "Uncompressing ${FILENAME}" echo "Uncompressing ${FILENAME}"
tar -xf ${FILENAME} tar -xf "${FILENAME}"
} }
# Download the images. # Download the images.
BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/" BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
FILENAME="VOCtrainval_11-May-2012.tar" FILENAME="VOCtrainval_11-May-2012.tar"
download_and_uncompress ${BASE_URL} ${FILENAME} download_and_uncompress "${BASE_URL}" "${FILENAME}"
cd "${CURRENT_DIR}" cd "${CURRENT_DIR}"
......
...@@ -31,6 +31,11 @@ images for the training, validation and test respectively. ...@@ -31,6 +31,11 @@ images for the training, validation and test respectively.
The Cityscapes dataset contains 19 semantic labels (such as road, person, car, The Cityscapes dataset contains 19 semantic labels (such as road, person, car,
and so on) for urban street scenes. and so on) for urban street scenes.
3. ADE20K dataset (http://groups.csail.mit.edu/vision/datasets/ADE20K)
The ADE20K dataset contains 150 semantic labels both urban street scenes and
indoor scenes.
References: References:
M. Everingham, S. M. A. Eslami, L. V. Gool, C. K. I. Williams, J. Winn, M. Everingham, S. M. A. Eslami, L. V. Gool, C. K. I. Williams, J. Winn,
and A. Zisserman, The pascal visual object classes challenge a retrospective. and A. Zisserman, The pascal visual object classes challenge a retrospective.
...@@ -39,6 +44,9 @@ References: ...@@ -39,6 +44,9 @@ References:
M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson, M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson,
U. Franke, S. Roth, and B. Schiele, "The cityscapes dataset for semantic urban U. Franke, S. Roth, and B. Schiele, "The cityscapes dataset for semantic urban
scene understanding," In Proc. of CVPR, 2016. scene understanding," In Proc. of CVPR, 2016.
B. Zhou, H. Zhao, X. Puig, S. Fidler, A. Barriuso, A. Torralba, "Scene Parsing
through ADE20K dataset", In Proc. of CVPR, 2017.
""" """
import collections import collections
import os.path import os.path
...@@ -85,10 +93,22 @@ _PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor( ...@@ -85,10 +93,22 @@ _PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor(
ignore_label=255, ignore_label=255,
) )
# These number (i.e., 'train'/'test') seems to have to be hard coded
# You are required to figure it out for your training/testing example.
_ADE20K_INFORMATION = DatasetDescriptor(
splits_to_sizes = {
'train': 20210, # num of samples in images/training
'val': 2000, # num of samples in images/validation
},
num_classes=150,
ignore_label=255,
)
_DATASETS_INFORMATION = { _DATASETS_INFORMATION = {
'cityscapes': _CITYSCAPES_INFORMATION, 'cityscapes': _CITYSCAPES_INFORMATION,
'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION, 'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION,
'ade20k': _ADE20K_INFORMATION,
} }
# Default file pattern of TFRecord of TensorFlow Example. # Default file pattern of TFRecord of TensorFlow Example.
......
...@@ -126,7 +126,7 @@ def main(unused_argv): ...@@ -126,7 +126,7 @@ def main(unused_argv):
weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))
# Set ignore_label regions to label 0, because metrics.mean_iou requires # Set ignore_label regions to label 0, because metrics.mean_iou requires
# range of labels = [0, dataset.num_classes). Note the ignore_lable regions # range of labels = [0, dataset.num_classes). Note the ignore_label regions
# are not evaluated since the corresponding regions contain weights = 0. # are not evaluated since the corresponding regions contain weights = 0.
labels = tf.where( labels = tf.where(
tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)
......
# Running DeepLab on ADE20K Semantic Segmentation Dataset
This page walks through the steps required to run DeepLab on ADE20K dataset on a
local machine.
## Download dataset and convert to TFRecord
We have prepared the script (under the folder `datasets`) to download and
convert ADE20K semantic segmentation dataset to TFRecord.
```bash
# From the tensorflow/models/research/deeplab/datasets directory.
bash download_and_convert_ade20k.sh
```
The converted dataset will be saved at
./deeplab/datasets/ADE20K/tfrecord
## Recommended Directory Structure for Training and Evaluation
```
+ datasets
- build_data.py
- build_ade20k_data.py
- download_and_convert_ade20k.sh
+ ADE20K
+ tfrecord
+ exp
+ train_on_train_set
+ train
+ eval
+ vis
+ ADEChallengeData2016
+ annotations
+ training
+ validation
+ images
+ training
+ validation
```
where the folder `train_on_train_set` stores the train/eval/vis events and
results (when training DeepLab on the ADE20K train set).
## Running the train/eval/vis jobs
A local training job using `xception_65` can be run with the following command:
```bash
# From tensorflow/models/research/
python deeplab/train.py \
--logtostderr \
--training_number_of_steps=50000 \
--train_split="train" \
--model_variant="xception_65" \
--astrous_rates=6 \
--astrous_rates=12 \
--astrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--train_crop_size=513 \
--train_crop_size=513 \
--train_batch_size=4 \
--min_resize_value=350 \
--max_resize_value=500 \
--resize_factor=16 \
--fine_tune_batch_norm=False \
--dataset="ade20k" \
--initialize_last_layer=False \
--last_layers_contain_logits_only=True \
--tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
--train_logdir=${PATH_TO_TRAIN_DIR}\
--dataset_dir=${PATH_TO_DATASET}
```
where ${PATH\_TO\_INITIAL\_CHECKPOINT} is the path to the initial checkpoint.
For example, if you are using the deeplabv3\_pascal\_train\_aug checkppoint, you
will set this to `/path/to/deeplabv3\_pascal\_train\_aug/model.ckpt`.
${PATH\_TO\_TRAIN\_DIR} is the directory in which training checkpoints and
events will be written to (it is recommended to set it to the
`train_on_train_set/train` above), and ${PATH\_TO\_DATASET} is the directory in
which the ADE20K dataset resides (the `tfrecord` above)
**Note that for train.py:**
1. In order to fine tune the BN layers, one needs to use large batch size (> 12),
and set fine_tune_batch_norm = True. Here, we simply use small batch size
during training for the purpose of demonstration. If the users have limited
GPU memory at hand, please fine-tune from our provided checkpoints whose
batch norm parameters have been trained, and use smaller learning rate with
fine_tune_batch_norm = False.
2. User should fine tune the `min_resize_value` and `max_resize_value` to get
better result. Note that `resize_factor` has to be equal to `output_stride`.
2. The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
setting output_stride=8.
3. The users could skip the flag, `decoder_output_stride`, if you do not want
to use the decoder structure.
Currently there are no fine-tuned checkpoint for the ADE20K dataset.
## Running Tensorboard
Progress for training and evaluation jobs can be inspected using Tensorboard. If
using the recommended directory structure, Tensorboard can be run using the
following command:
```bash
tensorboard --logdir=${PATH_TO_LOG_DIRECTORY}
```
where `${PATH_TO_LOG_DIRECTORY}` points to the directory that contains the train
directorie (e.g., the folder `train_on_train_set` in the above example). Please
note it may take Tensorboard a couple minutes to populate with data.
...@@ -64,12 +64,19 @@ _CONCAT_PROJECTION_SCOPE = 'concat_projection' ...@@ -64,12 +64,19 @@ _CONCAT_PROJECTION_SCOPE = 'concat_projection'
_DECODER_SCOPE = 'decoder' _DECODER_SCOPE = 'decoder'
def get_extra_layer_scopes(): def get_extra_layer_scopes(last_layers_contain_logits_only=False):
"""Gets the scopes for extra layers. """Gets the scopes for extra layers.
Args:
last_layers_contain_logits_only: Boolean, True if only consider logits as
the last layer (i.e., exclude ASPP module, decoder module and so on)
Returns: Returns:
A list of scopes for extra layers. A list of scopes for extra layers.
""" """
if last_layers_contain_logits_only:
return [_LOGITS_SCOPE_NAME]
else:
return [ return [
_LOGITS_SCOPE_NAME, _LOGITS_SCOPE_NAME,
_IMAGE_POOLING_SCOPE, _IMAGE_POOLING_SCOPE,
......
...@@ -122,6 +122,9 @@ flags.DEFINE_string('tf_initial_checkpoint', None, ...@@ -122,6 +122,9 @@ flags.DEFINE_string('tf_initial_checkpoint', None,
flags.DEFINE_boolean('initialize_last_layer', True, flags.DEFINE_boolean('initialize_last_layer', True,
'Initialize the last layer.') 'Initialize the last layer.')
flags.DEFINE_boolean('last_layers_contain_logits_only', False,
'Only consider logits as last layers or not.')
flags.DEFINE_integer('slow_start_step', 0, flags.DEFINE_integer('slow_start_step', 0,
'Training model with small learning rate for few steps.') 'Training model with small learning rate for few steps.')
...@@ -322,7 +325,7 @@ def main(unused_argv): ...@@ -322,7 +325,7 @@ def main(unused_argv):
summaries.add(tf.summary.scalar('total_loss', total_loss)) summaries.add(tf.summary.scalar('total_loss', total_loss))
# Modify the gradients for biases and last layer variables. # Modify the gradients for biases and last layer variables.
last_layers = model.get_extra_layer_scopes() last_layers = model.get_extra_layer_scopes(FLAGS.last_layers_contain_logits_only)
grad_mult = train_utils.get_model_gradient_multipliers( grad_mult = train_utils.get_model_gradient_multipliers(
last_layers, FLAGS.last_layer_gradient_multiplier) last_layers, FLAGS.last_layer_gradient_multiplier)
if grad_mult: if grad_mult:
......
...@@ -21,6 +21,24 @@ Hyeonwoo Noh, Andre Araujo, Jack Sim, Tobias Weyand, Bohyung Han, ...@@ -21,6 +21,24 @@ Hyeonwoo Noh, Andre Araujo, Jack Sim, Tobias Weyand, Bohyung Han,
Proc. ICCV'17 Proc. ICCV'17
``` ```
## News
- DELF achieved state-of-the-art results in a CVPR'18 image retrieval paper:
[Radenovic et al., "Revisiting Oxford and Paris: Large-Scale Image Retrieval
Benchmarking"](https://arxiv.org/abs/1803.11285).
- DELF was featured in
[ModelDepot](https://modeldepot.io/mikeshi/delf/overview)
- DELF is now available in
[TF-Hub](https://www.tensorflow.org/hub/modules/google/delf/1)
## Dataset
The Google-Landmarks dataset has been released as part of two Kaggle challenges:
[Landmark Recognition](https://www.kaggle.com/c/landmark-recognition-challenge)
and [Landmark Retrieval](https://www.kaggle.com/c/landmark-retrieval-challenge).
If you make use of the dataset in your research, please consider citing the
paper mentioned above.
## Installation ## Installation
To be able to use this code, please follow [these To be able to use this code, please follow [these
...@@ -70,14 +88,6 @@ feature extraction and matching: ...@@ -70,14 +88,6 @@ feature extraction and matching:
- `delf_config_example.pbtxt` shows an example instantiation of the DelfConfig - `delf_config_example.pbtxt` shows an example instantiation of the DelfConfig
proto, used for DELF feature extraction. proto, used for DELF feature extraction.
## Dataset
The Google Landmarks dataset has been released as part of two Kaggle challenges:
[Landmark Recognition](https://www.kaggle.com/c/landmark-recognition-challenge)
and [Landmark Retrieval](https://www.kaggle.com/c/landmark-retrieval-challenge).
If you make use of the dataset in your research, please consider citing the
paper mentioned above.
## Maintainers ## Maintainers
Andr&eacute; Araujo (@andrefaraujo) Andr&eacute; Araujo (@andrefaraujo)
......
...@@ -30,6 +30,7 @@ Training a model requires the following: ...@@ -30,6 +30,7 @@ Training a model requires the following:
inventory*. The inventory describes the specific relationships that you'd inventory*. The inventory describes the specific relationships that you'd
like the model to differentiate (e.g. *part of* versus *composed of* versus like the model to differentiate (e.g. *part of* versus *composed of* versus
*purpose*), and generally may consist of tens of classes. *purpose*), and generally may consist of tens of classes.
You can download the dataset used in the paper from [here](https://vered1986.github.io/papers/Tratz2011_Dataset.tar.gz).
2. You'll need a collection of word embeddings: the path-based model uses the 2. You'll need a collection of word embeddings: the path-based model uses the
word embeddings as part of the path representation, and the distributional word embeddings as part of the path representation, and the distributional
models use the word embeddings directly as prediction features. models use the word embeddings directly as prediction features.
...@@ -130,3 +131,8 @@ train, dev, and test sets, and will include a confusion matrix for each. ...@@ -130,3 +131,8 @@ train, dev, and test sets, and will include a confusion matrix for each.
If you have any questions, issues, or suggestions, feel free to contact either If you have any questions, issues, or suggestions, feel free to contact either
@vered1986 or @waterson. @vered1986 or @waterson.
If you use this code for any published research, please include the following citation:
Olive Oil Is Made of Olives, Baby Oil Is Made for Babies: Interpreting Noun Compounds Using Paraphrases in a Neural Model.
Vered Shwartz and Chris Waterson. NAACL 2018. [link](https://arxiv.org/pdf/1803.08073.pdf).
...@@ -104,6 +104,8 @@ def dict_to_tf_example(data, ...@@ -104,6 +104,8 @@ def dict_to_tf_example(data,
truncated = [] truncated = []
poses = [] poses = []
difficult_obj = [] difficult_obj = []
if data.has_key('object'):
for obj in data['object']: for obj in data['object']:
difficult = bool(int(obj['difficult'])) difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult: if ignore_difficult_instances and difficult:
......
...@@ -31,7 +31,7 @@ while for the Weighted PASCAL VOC metric the final mAP value will be influenced ...@@ -31,7 +31,7 @@ while for the Weighted PASCAL VOC metric the final mAP value will be influenced
Similar to pascal voc 2007 detection metric, but computes the intersection over Similar to pascal voc 2007 detection metric, but computes the intersection over
union based on the object masks instead of object boxes. union based on the object masks instead of object boxes.
## Weighted PASCAL VOC detection metric ## Weighted PASCAL VOC instance segmentation metric
`EvalConfig.metrics_set='weighted_pascal_voc_instance_segmentation_metrics'` `EvalConfig.metrics_set='weighted_pascal_voc_instance_segmentation_metrics'`
......
...@@ -118,7 +118,8 @@ def batch_from_random(batch_size, output_height=224, output_width=224, ...@@ -118,7 +118,8 @@ def batch_from_random(batch_size, output_height=224, output_width=224,
[batch_size, output_height, output_width, num_channels] [batch_size, output_height, output_width, num_channels]
""" """
shape = [batch_size, output_height, output_width, num_channels] shape = [batch_size, output_height, output_width, num_channels]
return np.random.random_sample(shape) # Make sure we return float32, as float64 will not get cast automatically.
return np.random.random_sample(shape).astype(np.float32)
################################################################################ ################################################################################
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment