Unverified Commit abd50423 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Merge pull request #3846 from pkulzc/master

Internal changes for object detection
parents c3b26603 143464d2
......@@ -29,6 +29,7 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.go
* Jonathan Huang, github: [jch1](https://github.com/jch1)
* Vivek Rathod, github: [tombstone](https://github.com/tombstone)
* Ronny Votel, github: [ronnyvotel](https://github.com/ronnyvotel)
* Derek Chow, github: [derekjchow](https://github.com/derekjchow)
* Chen Sun, github: [jesu9](https://github.com/jesu9)
* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
......@@ -89,6 +90,16 @@ reporting an issue.
## Release information
### April 2, 2018
Supercharge your mobile phones with the next generation mobile object detector!
We are adding support for MobileNet V2 with SSDLite presented in
[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU (200ms vs. 270ms) at the same accuracy.
Along with the model definition, we are also releasing a model checkpoint trained on the COCO dataset.
<b>Thanks to contributors</b>: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek Rathod, Jonathan Huang
### February 9, 2018
We now support instance segmentation!! In this API update we support a number of instance segmentation models similar to those discussed in the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer to
......
......@@ -30,6 +30,7 @@ from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
......@@ -55,6 +56,8 @@ SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
'faster_rcnn_nas':
frcnn_nas.FasterRCNNNASFeatureExtractor,
'faster_rcnn_pnas':
frcnn_pnas.FasterRCNNPNASFeatureExtractor,
'faster_rcnn_inception_resnet_v2':
frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor,
'faster_rcnn_inception_v2':
......@@ -95,13 +98,19 @@ def build(model_config, is_training, add_summaries=True):
def _build_ssd_feature_extractor(feature_extractor_config, is_training,
reuse_weights=None):
reuse_weights=None,
inplace_batchnorm_update=False):
"""Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
Args:
feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs. When
this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Returns:
ssd_meta_arch.SSDFeatureExtractor based on config.
......@@ -126,7 +135,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
return feature_extractor_class(is_training, depth_multiplier, min_depth,
pad_to_multiple, conv_hyperparams,
batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise,
inplace_batchnorm_update)
def _build_ssd_model(ssd_config, is_training, add_summaries):
......@@ -140,6 +150,7 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
Returns:
SSDMetaArch based on the config.
Raises:
ValueError: If ssd_config.type is not recognized (i.e. not registered in
model_class_map).
......@@ -147,8 +158,10 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
num_classes = ssd_config.num_classes
# Feature extractor
feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
is_training)
feature_extractor = _build_ssd_feature_extractor(
feature_extractor_config=ssd_config.feature_extractor,
is_training=is_training,
inplace_batchnorm_update=ssd_config.inplace_batchnorm_update)
box_coder = box_coder_builder.build(ssd_config.box_coder)
matcher = matcher_builder.build(ssd_config.matcher)
......@@ -194,7 +207,8 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
def _build_faster_rcnn_feature_extractor(
feature_extractor_config, is_training, reuse_weights=None):
feature_extractor_config, is_training, reuse_weights=None,
inplace_batchnorm_update=False):
"""Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
Args:
......@@ -202,6 +216,11 @@ def _build_faster_rcnn_feature_extractor(
faster_rcnn.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs. When
this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Returns:
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
......@@ -209,6 +228,8 @@ def _build_faster_rcnn_feature_extractor(
Raises:
ValueError: On invalid feature extractor type.
"""
if inplace_batchnorm_update:
raise ValueError('inplace batchnorm updates not supported.')
feature_type = feature_extractor_config.type
first_stage_features_stride = (
feature_extractor_config.first_stage_features_stride)
......@@ -238,6 +259,7 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
Returns:
FasterRCNNMetaArch based on the config.
Raises:
ValueError: If frcnn_config.type is not recognized (i.e. not registered in
model_class_map).
......@@ -246,7 +268,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)
feature_extractor = _build_faster_rcnn_feature_extractor(
frcnn_config.feature_extractor, is_training)
frcnn_config.feature_extractor, is_training,
frcnn_config.inplace_batchnorm_update)
number_of_stages = frcnn_config.number_of_stages
first_stage_anchor_generator = anchor_generator_builder.build(
......
......@@ -25,6 +25,7 @@ from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
......@@ -297,6 +298,7 @@ class ModelBuilderTest(tf.test.TestCase):
def test_create_ssd_mobilenet_v1_model_from_config(self):
model_text_proto = """
ssd {
inplace_batchnorm_update: true
feature_extractor {
type: 'ssd_mobilenet_v1'
conv_hyperparams {
......@@ -519,6 +521,7 @@ class ModelBuilderTest(tf.test.TestCase):
def test_create_faster_rcnn_resnet_v1_models_from_config(self):
model_text_proto = """
faster_rcnn {
inplace_batchnorm_update: true
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
......@@ -726,6 +729,73 @@ class ModelBuilderTest(tf.test.TestCase):
model._feature_extractor,
frcnn_nas.FasterRCNNNASFeatureExtractor)
def test_create_faster_rcnn_pnas_model_from_config(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_pnas'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 17
maxpool_kernel_size: 1
maxpool_stride: 1
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
self.assertIsInstance(
model._feature_extractor,
frcnn_pnas.FasterRCNNPNASFeatureExtractor)
def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self):
model_text_proto = """
faster_rcnn {
......
......@@ -17,6 +17,7 @@
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import errors
from tensorflow.python.framework import ops
from object_detection.core import box_list
from object_detection.core import box_list_ops
......@@ -509,9 +510,13 @@ class BoxListOpsTest(tf.test.TestCase):
with self.assertRaises(ValueError):
box_list_ops.sort_by_field(boxes, 'misc')
with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
'Incorrect field size'):
sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
if ops._USE_C_API:
with self.assertRaises(ValueError):
box_list_ops.sort_by_field(boxes, 'weights')
else:
with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
'Incorrect field size'):
sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
def test_visualize_boxes_in_image(self):
image = tf.zeros((6, 4, 3))
......
......@@ -2279,7 +2279,11 @@ def resize_image(image,
return new_masks
def reshape_masks_branch():
new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
# The shape function will be computed for both branches of the
# condition, regardless of which branch is actually taken. Make sure
# that we don't trigger an assertion in the shape function when trying
# to reshape a non empty tensor into an empty one.
new_masks = tf.reshape(masks, [-1, new_size[0], new_size[1]])
return new_masks
masks = tf.cond(num_instances > 0, resize_masks_branch,
......
......@@ -64,7 +64,7 @@ cd ${SCRATCH_DIR}
# Download the images.
BASE_IMAGE_URL="http://images.cocodataset.org/zips"
# TRAIN_IMAGE_FILE="train2017.zip"
TRAIN_IMAGE_FILE="train2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"
......@@ -91,7 +91,7 @@ download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}
TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"
# # Build TFRecords of the image data.
# Build TFRecords of the image data.
cd "${CURRENT_DIR}"
python object_detection/dataset_tools/create_coco_tf_record.py \
--logtostderr \
......
......@@ -79,7 +79,7 @@ def visualize_detection_results(result_dict,
data corresponding to each image being evaluated. The following keys
are required:
'original_image': a numpy array representing the image with shape
[1, height, width, 3]
[1, height, width, 3] or [1, height, width, 1]
'detection_boxes': a numpy array of shape [N, 4]
'detection_scores': a numpy array of shape [N]
'detection_classes': a numpy array of shape [N]
......@@ -133,6 +133,8 @@ def visualize_detection_results(result_dict,
category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict[input_fields.original_image], axis=0)
if image.shape[2] == 1: # If one channel image, repeat in RGB.
image = np.tile(image, [1, 1, 3])
detection_boxes = result_dict[detection_fields.detection_boxes]
detection_scores = result_dict[detection_fields.detection_scores]
detection_classes = np.int32((result_dict[
......
......@@ -94,14 +94,24 @@ def _extract_predictions_and_losses(model,
if fields.InputDataFields.groundtruth_group_of in input_dict:
groundtruth[fields.InputDataFields.groundtruth_group_of] = (
input_dict[fields.InputDataFields.groundtruth_group_of])
groundtruth_masks_list = None
if fields.DetectionResultFields.detection_masks in detections:
groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
input_dict[fields.InputDataFields.groundtruth_instance_masks])
groundtruth_masks_list = [
input_dict[fields.InputDataFields.groundtruth_instance_masks]]
groundtruth_keypoints_list = None
if fields.DetectionResultFields.detection_keypoints in detections:
groundtruth[fields.InputDataFields.groundtruth_keypoints] = (
input_dict[fields.InputDataFields.groundtruth_keypoints])
groundtruth_keypoints_list = [
input_dict[fields.InputDataFields.groundtruth_keypoints]]
label_id_offset = 1
model.provide_groundtruth(
[input_dict[fields.InputDataFields.groundtruth_boxes]],
[tf.one_hot(input_dict[fields.InputDataFields.groundtruth_classes]
- label_id_offset, depth=model.num_classes)])
- label_id_offset, depth=model.num_classes)],
groundtruth_masks_list, groundtruth_keypoints_list)
losses_dict.update(model.loss(prediction_dict, true_image_shapes))
result_dict = eval_util.result_dict_for_single_example(
......@@ -205,7 +215,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
return {}
return {}, {}
global_step = tf.train.global_step(sess, tf.train.get_global_step())
if batch_index < eval_config.num_visualizations:
tag = 'image-{}'.format(batch_index)
......
......@@ -19,7 +19,9 @@ In the table below, we list each such pre-trained model including:
aware that these timings depend highly on one's specific hardware
configuration (these timings were performed using an Nvidia
GeForce GTX TITAN X card) and should be treated more as relative timings in
many cases.
many cases. Also note that desktop GPU timing does not always reflect mobile
run time. For example Mobilenet V2 is faster on mobile devices than Mobilenet
V1, but is slightly slower on desktop GPU.
* detector performance on subset of the COCO validation set or Open Images test split as measured by the dataset-specific mAP measure.
Here, higher is better, and we only report bounding box mAP rounded to the
nearest integer.
......@@ -68,6 +70,7 @@ Some remarks on frozen inference graphs:
| Model name | Speed (ms) | COCO mAP[^1] | Outputs |
| ------------ | :--------------: | :--------------: | :-------------: |
| [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) | 30 | 21 | Boxes |
| [ssd_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz) | 31 | 22 | Boxes |
| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz) | 42 | 24 | Boxes |
| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 58 | 28 | Boxes |
| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz) | 89 | 30 | Boxes |
......
......@@ -37,7 +37,7 @@ environment variable below:
export YOUR_GCS_BUCKET=${YOUR_GCS_BUCKET}
```
It is also possible to run locally by following
It is also possible to run locally by following
[the running locally instructions](running_locally.md).
## Installing Tensorflow and the Tensorflow Object Detection API
......
......@@ -58,7 +58,8 @@ def transform_input_data(tensor_dict,
Data transformation functions are applied in the following order.
1. data_augmentation_fn (optional): applied on tensor_dict.
2. model_preprocess_fn: applied only on image tensor in tensor_dict.
3. image_resizer_fn: applied only on instance mask tensor in tensor_dict.
3. image_resizer_fn: applied on original image and instance mask tensor in
tensor_dict.
4. one_hot_encoding: applied to classes tensor in tensor_dict.
5. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
same they can be merged into a single box with an associated k-hot class
......@@ -70,10 +71,11 @@ def transform_input_data(tensor_dict,
model_preprocess_fn: model's preprocess function to apply on image tensor.
This function must take in a 4-D float tensor and return a 4-D preprocess
float tensor and a tensor containing the true image shape.
image_resizer_fn: image resizer function to apply on groundtruth instance
masks. This function must take a 4-D float tensor of image and a 4-D
tensor of instances masks and return resized version of these along with
the true shapes.
image_resizer_fn: image resizer function to apply on original image (if
`retain_original_image` is True) and groundtruth instance masks. This
function must take a 3-D float tensor of an image and a 3-D tensor of
instance masks and return a resized version of these along with the true
shapes.
num_classes: number of max classes to one-hot (or k-hot) encode the class
labels.
data_augmentation_fn: (optional) data augmentation function to apply on
......@@ -88,17 +90,19 @@ def transform_input_data(tensor_dict,
after applying all the transformations.
"""
if retain_original_image:
tensor_dict[fields.InputDataFields.
original_image] = tensor_dict[fields.InputDataFields.image]
original_image_resized, _ = image_resizer_fn(
tensor_dict[fields.InputDataFields.image])
tensor_dict[fields.InputDataFields.original_image] = tf.cast(
original_image_resized, tf.uint8)
# Apply data augmentation ops.
if data_augmentation_fn is not None:
tensor_dict = data_augmentation_fn(tensor_dict)
# Apply model preprocessing ops and resize instance masks.
image = tf.expand_dims(
tf.to_float(tensor_dict[fields.InputDataFields.image]), axis=0)
preprocessed_resized_image, true_image_shape = model_preprocess_fn(image)
image = tensor_dict[fields.InputDataFields.image]
preprocessed_resized_image, true_image_shape = model_preprocess_fn(
tf.expand_dims(tf.to_float(image), axis=0))
tensor_dict[fields.InputDataFields.image] = tf.squeeze(
preprocessed_resized_image, axis=0)
tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
......@@ -156,6 +160,52 @@ def augment_input_data(tensor_dict, data_augmentation_options):
return tensor_dict
def _get_labels_dict(input_dict):
"""Extracts labels dict from input dict."""
required_label_keys = [
fields.InputDataFields.num_groundtruth_boxes,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_weights
]
labels_dict = {}
for key in required_label_keys:
labels_dict[key] = input_dict[key]
optional_label_keys = [
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_area,
fields.InputDataFields.groundtruth_is_crowd,
fields.InputDataFields.groundtruth_difficult
]
for key in optional_label_keys:
if key in input_dict:
labels_dict[key] = input_dict[key]
if fields.InputDataFields.groundtruth_difficult in labels_dict:
labels_dict[fields.InputDataFields.groundtruth_difficult] = tf.cast(
labels_dict[fields.InputDataFields.groundtruth_difficult], tf.int32)
return labels_dict
def _get_features_dict(input_dict):
"""Extracts features dict from input dict."""
hash_from_source_id = tf.string_to_hash_bucket_fast(
input_dict[fields.InputDataFields.source_id], HASH_BINS)
features = {
fields.InputDataFields.image:
input_dict[fields.InputDataFields.image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape:
input_dict[fields.InputDataFields.true_image_shape]
}
if fields.InputDataFields.original_image in input_dict:
features[fields.InputDataFields.original_image] = input_dict[
fields.InputDataFields.original_image]
return features
def create_train_input_fn(train_config, train_input_config,
model_config):
"""Creates a train `input` function for `Estimator`.
......@@ -184,6 +234,8 @@ def create_train_input_fn(train_config, train_input_config,
features[fields.InputDataFields.true_image_shape] is a [batch_size, 3]
int32 tensor representing the true image shapes, as preprocessed
images could be padded.
features[fields.InputDataFields.original_image] (optional) is a
[batch_size, H, W, C] float32 tensor with original images.
labels: Dictionary of groundtruth tensors.
labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size]
int32 tensor indicating the number of groundtruth boxes.
......@@ -233,7 +285,8 @@ def create_train_input_fn(train_config, train_input_config,
transform_input_data, model_preprocess_fn=model.preprocess,
image_resizer_fn=image_resizer_fn,
num_classes=config_util.get_number_of_classes(model_config),
data_augmentation_fn=data_augmentation_fn)
data_augmentation_fn=data_augmentation_fn,
retain_original_image=train_config.retain_original_images)
dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
train_input_config,
transform_input_data_fn=transform_data_fn,
......@@ -242,35 +295,8 @@ def create_train_input_fn(train_config, train_input_config,
num_classes=config_util.get_number_of_classes(model_config),
spatial_image_shape=config_util.get_spatial_image_size(
image_resizer_config))
tensor_dict = dataset_util.make_initializable_iterator(dataset).get_next()
hash_from_source_id = tf.string_to_hash_bucket_fast(
tensor_dict[fields.InputDataFields.source_id], HASH_BINS)
features = {
fields.InputDataFields.image: tensor_dict[fields.InputDataFields.image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape: tensor_dict[
fields.InputDataFields.true_image_shape]
}
labels = {
fields.InputDataFields.num_groundtruth_boxes: tensor_dict[
fields.InputDataFields.num_groundtruth_boxes],
fields.InputDataFields.groundtruth_boxes: tensor_dict[
fields.InputDataFields.groundtruth_boxes],
fields.InputDataFields.groundtruth_classes: tensor_dict[
fields.InputDataFields.groundtruth_classes],
fields.InputDataFields.groundtruth_weights: tensor_dict[
fields.InputDataFields.groundtruth_weights]
}
if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
labels[fields.InputDataFields.groundtruth_keypoints] = tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
if fields.InputDataFields.groundtruth_instance_masks in tensor_dict:
labels[fields.InputDataFields.groundtruth_instance_masks] = tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
return features, labels
input_dict = dataset_util.make_initializable_iterator(dataset).get_next()
return (_get_features_dict(input_dict), _get_labels_dict(input_dict))
return _train_input_fn
......@@ -345,7 +371,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
image_resizer_fn=image_resizer_fn,
num_classes=num_classes,
data_augmentation_fn=None,
retain_original_image=True)
retain_original_image=eval_config.retain_original_images)
dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
eval_input_config,
transform_input_data_fn=transform_data_fn,
......@@ -355,36 +381,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
image_resizer_config))
input_dict = dataset_util.make_initializable_iterator(dataset).get_next()
hash_from_source_id = tf.string_to_hash_bucket_fast(
input_dict[fields.InputDataFields.source_id], HASH_BINS)
features = {
fields.InputDataFields.image:
input_dict[fields.InputDataFields.image],
fields.InputDataFields.original_image:
input_dict[fields.InputDataFields.original_image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape:
input_dict[fields.InputDataFields.true_image_shape]
}
labels = {
fields.InputDataFields.groundtruth_boxes:
input_dict[fields.InputDataFields.groundtruth_boxes],
fields.InputDataFields.groundtruth_classes:
input_dict[fields.InputDataFields.groundtruth_classes],
fields.InputDataFields.groundtruth_area:
input_dict[fields.InputDataFields.groundtruth_area],
fields.InputDataFields.groundtruth_is_crowd:
input_dict[fields.InputDataFields.groundtruth_is_crowd],
fields.InputDataFields.groundtruth_difficult:
tf.cast(input_dict[fields.InputDataFields.groundtruth_difficult],
tf.int32)
}
if fields.InputDataFields.groundtruth_instance_masks in input_dict:
labels[fields.InputDataFields.groundtruth_instance_masks] = input_dict[
fields.InputDataFields.groundtruth_instance_masks]
return features, labels
return (_get_features_dict(input_dict), _get_labels_dict(input_dict))
return _eval_input_fn
......
......@@ -34,16 +34,12 @@ FLAGS = tf.flags.FLAGS
def _get_configs_for_model(model_name):
"""Returns configurations for model."""
fname = os.path.join(
FLAGS.test_srcdir,
('google3/third_party/tensorflow_models/'
'object_detection/samples/configs/' + model_name + '.config'))
label_map_path = os.path.join(FLAGS.test_srcdir,
('google3/third_party/tensorflow_models/'
'object_detection/data/pet_label_map.pbtxt'))
data_path = os.path.join(FLAGS.test_srcdir,
('google3/third_party/tensorflow_models/'
'object_detection/test_data/pets_examples.record'))
fname = os.path.join(tf.resource_loader.get_data_files_path(),
'samples/configs/' + model_name + '.config')
label_map_path = os.path.join(tf.resource_loader.get_data_files_path(),
'data/pet_label_map.pbtxt')
data_path = os.path.join(tf.resource_loader.get_data_files_path(),
'test_data/pets_examples.record')
configs = config_util.get_configs_from_pipeline_file(fname)
return config_util.merge_external_params_with_configs(
configs,
......@@ -462,22 +458,31 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32))
}
def fake_image_resizer_fn(image, masks):
def fake_image_resizer_fn(image, masks=None):
resized_image = tf.image.resize_images(image, [8, 8])
resized_masks = tf.transpose(
tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]),
[2, 0, 1])
return resized_image, resized_masks, tf.shape(resized_image)
results = [resized_image]
if masks is not None:
resized_masks = tf.transpose(
tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]),
[2, 0, 1])
results.append(resized_masks)
results.append(tf.shape(resized_image))
return results
num_classes = 3
input_transformation_fn = functools.partial(
inputs.transform_input_data,
model_preprocess_fn=_fake_model_preprocessor_fn,
image_resizer_fn=fake_image_resizer_fn,
num_classes=num_classes)
num_classes=num_classes,
retain_original_image=True)
with self.test_session() as sess:
transformed_inputs = sess.run(
input_transformation_fn(tensor_dict=tensor_dict))
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].dtype, tf.uint8)
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [8, 8, 3])
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])
......
......@@ -46,7 +46,8 @@ class SSDFeatureExtractor(object):
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""Constructor.
Args:
......@@ -64,6 +65,10 @@ class SSDFeatureExtractor(object):
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
"""
self._is_training = is_training
self._depth_multiplier = depth_multiplier
......@@ -71,6 +76,7 @@ class SSDFeatureExtractor(object):
self._pad_to_multiple = pad_to_multiple
self._conv_hyperparams = conv_hyperparams
self._batch_norm_trainable = batch_norm_trainable
self._inplace_batchnorm_update = inplace_batchnorm_update
self._reuse_weights = reuse_weights
self._use_explicit_padding = use_explicit_padding
self._use_depthwise = use_depthwise
......@@ -108,7 +114,29 @@ class SSDFeatureExtractor(object):
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
pass
batchnorm_updates_collections = (None if self._inplace_batchnorm_update
else tf.GraphKeys.UPDATE_OPS)
with slim.arg_scope([slim.batch_norm],
updates_collections=batchnorm_updates_collections):
return self._extract_features(preprocessed_inputs)
@abstractmethod
def _extract_features(self, preprocessed_inputs):
"""Extracts features from preprocessed inputs.
This function is responsible for extracting feature maps from preprocessed
images.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
raise NotImplementedError
class SSDMetaArch(model.DetectionModel):
......
......@@ -49,8 +49,8 @@ tf.flags.DEFINE_string('model_dir', None, 'Path to output model directory '
'where event and checkpoint files will be written.')
tf.flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
'file.')
tf.flags.DEFINE_integer('num_train_steps', 500000, 'Number of train steps.')
tf.flags.DEFINE_integer('num_eval_steps', 10000, 'Number of train steps.')
tf.flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
tf.flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
FLAGS = tf.flags.FLAGS
......@@ -225,7 +225,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
labels,
unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
elif mode == tf.estimator.ModeKeys.EVAL:
labels = unstack_batch(labels, unpad_groundtruth_tensors=False)
# For evaling on train data, it is necessary to check whether groundtruth
# must be unpadded.
boxes_shape = (
labels[fields.InputDataFields.groundtruth_boxes].get_shape()
.as_list())
unpad_groundtruth_tensors = True if boxes_shape[1] is not None else False
labels = unstack_batch(
labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
......@@ -241,7 +248,9 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
groundtruth_boxes_list=gt_boxes_list,
groundtruth_classes_list=gt_classes_list,
groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list)
groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_weights_list=labels[
fields.InputDataFields.groundtruth_weights])
preprocessed_images = features[fields.InputDataFields.image]
prediction_dict = detection_model.predict(
......@@ -250,14 +259,6 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
prediction_dict, features[fields.InputDataFields.true_image_shape])
if mode == tf.estimator.ModeKeys.TRAIN:
if not train_config.fine_tune_checkpoint_type:
# train_config.from_detection_checkpoint field is deprecated. For
# backward compatibility, sets finetune_checkpoint_type based on
# from_detection_checkpoint.
if train_config.from_detection_checkpoint:
train_config.fine_tune_checkpoint_type = 'detection'
else:
train_config.fine_tune_checkpoint_type = 'classification'
if train_config.fine_tune_checkpoint and hparams.load_pretrained:
if not train_config.fine_tune_checkpoint_type:
# train_config.from_detection_checkpoint field is deprecated. For
......@@ -341,17 +342,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
}
eval_metric_ops = None
if mode == tf.estimator.ModeKeys.EVAL:
# Detection summaries during eval.
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
class_agnostic = (fields.DetectionResultFields.detection_classes
not in detections)
groundtruth = _get_groundtruth_data(detection_model, class_agnostic)
use_original_images = fields.InputDataFields.original_image in features
eval_images = (
original_images = (
features[fields.InputDataFields.original_image] if use_original_images
else features[fields.InputDataFields.image])
eval_dict = eval_util.result_dict_for_single_example(
eval_images[0:1],
original_images[0:1],
features[inputs.HASH_KEY][0],
detections,
groundtruth,
......@@ -363,21 +363,26 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
else:
category_index = label_map_util.create_category_index_from_labelmap(
eval_input_config.label_map_path)
img_summary = None
if not use_tpu and use_original_images:
detection_and_groundtruth = (
vis_utils.draw_side_by_side_evaluation_image(
eval_dict, category_index, max_boxes_to_draw=20,
min_score_thresh=0.2))
tf.summary.image('Detections_Left_Groundtruth_Right',
detection_and_groundtruth)
# Eval metrics on a single image.
eval_metrics = eval_config.metrics_set
if not eval_metrics:
eval_metrics = ['coco_detection_metrics']
eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_metrics, category_index.values(), eval_dict,
include_metrics_per_category=False)
img_summary = tf.summary.image('Detections_Left_Groundtruth_Right',
detection_and_groundtruth)
if mode == tf.estimator.ModeKeys.EVAL:
# Eval metrics on a single example.
eval_metrics = eval_config.metrics_set
if not eval_metrics:
eval_metrics = ['coco_detection_metrics']
eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_metrics, category_index.values(), eval_dict,
include_metrics_per_category=False)
if img_summary is not None:
eval_metric_ops['Detections_Left_Groundtruth_Right'] = (
img_summary, tf.no_op())
if use_tpu:
return tf.contrib.tpu.TPUEstimatorSpec(
......
......@@ -32,20 +32,19 @@ from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.utils import config_util
FLAGS = tf.flags.FLAGS
MODEL_NAME_FOR_TEST = model_test_util.SSD_INCEPTION_MODEL_NAME
def _get_data_path():
"""Returns an absolute path to TFRecord file."""
return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'test_data',
return os.path.join(tf.resource_loader.get_data_files_path(), 'test_data',
'pets_examples.record')
def _get_labelmap_path():
"""Returns an absolute path to label map file."""
return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'data',
return os.path.join(tf.resource_loader.get_data_files_path(), 'data',
'pet_label_map.pbtxt')
......
......@@ -28,13 +28,12 @@ FLAGS = tf.flags.FLAGS
FASTER_RCNN_MODEL_NAME = 'faster_rcnn_resnet50_pets'
SSD_INCEPTION_MODEL_NAME = 'ssd_inception_v2_pets'
PATH_BASE = 'google3/third_party/tensorflow_models/object_detection/'
def GetPipelineConfigPath(model_name):
"""Returns path to the local pipeline config file."""
return os.path.join(FLAGS.test_srcdir, PATH_BASE, 'samples', 'configs',
model_name + '.config')
return os.path.join(tf.resource_loader.get_data_files_path(), 'samples',
'configs', model_name + '.config')
def InitializeFlags(model_name_for_test):
......
......@@ -53,7 +53,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
batch_norm_trainable=True,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False):
use_depthwise=False,
inplace_batchnorm_update=False):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args:
......@@ -71,6 +72,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Raises:
ValueError: upon invalid `pad_to_multiple` values.
......@@ -82,9 +88,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise)
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def extract_features(self, preprocessed_inputs):
def _extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......
......@@ -22,30 +22,6 @@ from nets import mobilenet_v1
slim = tf.contrib.slim
def _batch_norm_arg_scope(list_ops,
use_batch_norm=True,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
batch_norm_scale=False,
train_batch_norm=False):
"""Slim arg scope for Mobilenet V1 batch norm."""
if use_batch_norm:
batch_norm_params = {
'is_training': train_batch_norm,
'scale': batch_norm_scale,
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon
}
normalizer_fn = slim.batch_norm
else:
normalizer_fn = None
batch_norm_params = None
return slim.arg_scope(list_ops,
normalizer_fn=normalizer_fn,
normalizer_params=batch_norm_params)
class FasterRCNNMobilenetV1FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN Mobilenet V1 feature extractor implementation."""
......@@ -121,18 +97,19 @@ class FasterRCNNMobilenetV1FeatureExtractor(
['image size must at least be 33 in both height and width.'])
with tf.control_dependencies([shape_assert]):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
batch_norm_scale=True,
train_batch_norm=self._train_batch_norm):
with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(
is_training=self._train_batch_norm,
weight_decay=self._weight_decay)):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
_, activations = mobilenet_v1.mobilenet_v1_base(
preprocessed_inputs,
final_endpoint='Conv2d_13_pointwise',
final_endpoint='Conv2d_11_pointwise',
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
scope=scope)
return activations['Conv2d_13_pointwise'], activations
return activations['Conv2d_11_pointwise'], activations
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
"""Extracts second stage box classifier features.
......@@ -152,9 +129,10 @@ class FasterRCNNMobilenetV1FeatureExtractor(
depth = lambda d: max(int(d * 1.0), 16)
with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights):
with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
batch_norm_scale=True,
train_batch_norm=self._train_batch_norm):
with slim.arg_scope(
mobilenet_v1.mobilenet_v1_arg_scope(
is_training=self._train_batch_norm,
weight_decay=self._weight_decay)):
with slim.arg_scope(
[slim.conv2d, slim.separable_conv2d], padding='SAME'):
net = slim.separable_conv2d(
......
......@@ -44,7 +44,7 @@ class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [4, 7, 7, 1024])
self.assertAllEqual(features_shape_out, [4, 14, 14, 512])
def test_extract_proposal_features_stride_eight(self):
feature_extractor = self._build_feature_extractor(
......@@ -59,7 +59,7 @@ class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [4, 7, 7, 1024])
self.assertAllEqual(features_shape_out, [4, 14, 14, 512])
def test_extract_proposal_features_half_size_input(self):
feature_extractor = self._build_feature_extractor(
......@@ -74,7 +74,7 @@ class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
with self.test_session() as sess:
sess.run(init_op)
features_shape_out = sess.run(features_shape)
self.assertAllEqual(features_shape_out, [1, 4, 4, 1024])
self.assertAllEqual(features_shape_out, [1, 7, 7, 512])
def test_extract_proposal_features_dies_on_invalid_stride(self):
with self.assertRaises(ValueError):
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""PNASNet Faster R-CNN implementation.
Based on PNASNet model: https://arxiv.org/abs/1712.00559
"""
import tensorflow as tf
from object_detection.meta_architectures import faster_rcnn_meta_arch
from nets.nasnet import nasnet_utils
from nets.nasnet import pnasnet
arg_scope = tf.contrib.framework.arg_scope
slim = tf.contrib.slim
def pnasnet_large_arg_scope_for_detection(is_batch_norm_training=False):
"""Defines the default arg scope for the PNASNet Large for object detection.
This provides a small edit to switch batch norm training on and off.
Args:
is_batch_norm_training: Boolean indicating whether to train with batch norm.
Returns:
An `arg_scope` to use for the PNASNet Large Model.
"""
imagenet_scope = pnasnet.pnasnet_large_arg_scope()
with arg_scope(imagenet_scope):
with arg_scope([slim.batch_norm], is_training=is_batch_norm_training) as sc:
return sc
def _filter_scaling(reduction_indices, start_cell_num):
"""Compute the expected filter scaling at given PNASNet cell start_cell_num.
In the pnasnet.py code, filter_scaling starts at 1.0. We instead
adapt filter scaling to depend on the starting cell.
At first cells, before any reduction, filter_scalling is 1.0. With passing
any reduction cell, the filter_scaling is multiplied by 2.
Args:
reduction_indices: list of int indices.
start_cell_num: int.
Returns:
filter_scaling: float.
"""
filter_scaling = 1.0
for ind in reduction_indices:
if ind < start_cell_num:
filter_scaling *= 2.0
return filter_scaling
# Note: This is largely a copy of _build_pnasnet_base inside pnasnet.py but
# with special edits to remove instantiation of the stem and the special
# ability to receive as input a pair of hidden states. It constructs only
# a sub-network from the original PNASNet model, starting from the
# start_cell_num cell and with modified final layer.
def _build_pnasnet_base(
hidden_previous, hidden, normal_cell, hparams, true_cell_num,
start_cell_num):
"""Constructs a PNASNet image model for proposal classifier features."""
# Find where to place the reduction cells or stride normal cells
reduction_indices = nasnet_utils.calc_reduction_layers(
hparams.num_cells, hparams.num_reduction_layers)
filter_scaling = _filter_scaling(reduction_indices, start_cell_num)
# Note: The None is prepended to match the behavior of _imagenet_stem()
cell_outputs = [None, hidden_previous, hidden]
net = hidden
# Run the cells
for cell_num in range(start_cell_num, hparams.num_cells):
is_reduction = cell_num in reduction_indices
stride = 2 if is_reduction else 1
if is_reduction: filter_scaling *= hparams.filter_scaling_rate
prev_layer = cell_outputs[-2]
net = normal_cell(
net,
scope='cell_{}'.format(cell_num),
filter_scaling=filter_scaling,
stride=stride,
prev_layer=prev_layer,
cell_num=true_cell_num)
true_cell_num += 1
cell_outputs.append(net)
# Final nonlinearity.
# Note that we have dropped the final pooling, dropout and softmax layers
# from the default pnasnet version.
with tf.variable_scope('final_layer'):
net = tf.nn.relu(net)
return net
# TODO(shlens): Only fixed_shape_resizer is currently supported for PNASNet
# featurization. The reason for this is that pnasnet.py only supports
# inputs with fully known shapes. We need to update pnasnet.py to handle
# shapes not known at compile time.
class FasterRCNNPNASFeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN with PNASNet feature extractor implementation."""
def __init__(self,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0):
"""Constructor.
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
Raises:
ValueError: If `first_stage_features_stride` is not 16.
"""
if first_stage_features_stride != 16:
raise ValueError('`first_stage_features_stride` must be 16.')
super(FasterRCNNPNASFeatureExtractor, self).__init__(
is_training, first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
def preprocess(self, resized_inputs):
"""Faster R-CNN with PNAS preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
representing a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: A [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
"""
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_proposal_features(self, preprocessed_inputs, scope):
"""Extracts first stage RPN features.
Extracts features using the first half of the PNASNet network.
We construct the network in `align_feature_maps=True` mode, which means
that all VALID paddings in the network are changed to SAME padding so that
the feature maps are aligned.
Args:
preprocessed_inputs: A [batch, height, width, channels] float32 tensor
representing a batch of images.
scope: A scope name.
Returns:
rpn_feature_map: A tensor with shape [batch, height, width, depth]
end_points: A dictionary mapping feature extractor tensor names to tensors
Raises:
ValueError: If the created network is missing the required activation.
"""
del scope
if len(preprocessed_inputs.get_shape().as_list()) != 4:
raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a '
'tensor of shape %s' % preprocessed_inputs.get_shape())
with slim.arg_scope(pnasnet_large_arg_scope_for_detection(
is_batch_norm_training=self._train_batch_norm)):
with arg_scope([slim.conv2d,
slim.batch_norm,
slim.separable_conv2d],
reuse=self._reuse_weights):
_, end_points = pnasnet.build_pnasnet_large(
preprocessed_inputs, num_classes=None,
is_training=self._is_training,
final_endpoint='Cell_7')
# Note that both 'Cell_6' and 'Cell_7' have equal depth = 2160.
# Cell_7 is the last cell before second reduction.
rpn_feature_map = tf.concat([end_points['Cell_6'],
end_points['Cell_7']], 3)
# pnasnet.py does not maintain the batch size in the first dimension.
# This work around permits us retaining the batch for below.
batch = preprocessed_inputs.get_shape().as_list()[0]
shape_without_batch = rpn_feature_map.get_shape().as_list()[1:]
rpn_feature_map_shape = [batch] + shape_without_batch
rpn_feature_map.set_shape(rpn_feature_map_shape)
return rpn_feature_map, end_points
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
"""Extracts second stage box classifier features.
This function reconstructs the "second half" of the PNASNet
network after the part defined in `_extract_proposal_features`.
Args:
proposal_feature_maps: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, crop_height, crop_width, depth]
representing the feature map cropped to each proposal.
scope: A scope name.
Returns:
proposal_classifier_features: A 4-D float tensor with shape
[batch_size * self.max_num_proposals, height, width, depth]
representing box classifier features for each proposal.
"""
del scope
# Number of used stem cells.
num_stem_cells = 2
# Note that we always feed into 2 layers of equal depth
# where the first N channels corresponds to previous hidden layer
# and the second N channels correspond to the final hidden layer.
hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3)
# Note that what follows is largely a copy of build_pnasnet_large() within
# pnasnet.py. We are copying to minimize code pollution in slim.
# TODO(shlens,skornblith): Determine the appropriate drop path schedule.
# For now the schedule is the default (1.0->0.7 over 250,000 train steps).
hparams = pnasnet.large_imagenet_config()
if not self._is_training:
hparams.set_hparam('drop_path_keep_prob', 1.0)
# Calculate the total number of cells in the network
total_num_cells = hparams.num_cells + num_stem_cells
normal_cell = pnasnet.PNasNetNormalCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob,
total_num_cells, hparams.total_training_steps)
with arg_scope([slim.dropout, nasnet_utils.drop_path],
is_training=self._is_training):
with arg_scope([slim.batch_norm], is_training=self._train_batch_norm):
with arg_scope([slim.avg_pool2d,
slim.max_pool2d,
slim.conv2d,
slim.batch_norm,
slim.separable_conv2d,
nasnet_utils.factorized_reduction,
nasnet_utils.global_avg_pool,
nasnet_utils.get_channel_index,
nasnet_utils.get_channel_dim],
data_format=hparams.data_format):
# This corresponds to the cell number just past 'Cell_7' used by
# _extract_proposal_features().
start_cell_num = 8
true_cell_num = start_cell_num + num_stem_cells
with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()):
net = _build_pnasnet_base(
hidden_previous,
hidden,
normal_cell=normal_cell,
hparams=hparams,
true_cell_num=true_cell_num,
start_cell_num=start_cell_num)
proposal_classifier_features = net
return proposal_classifier_features
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
"""Returns a map of variables to load from a foreign checkpoint.
Note that this overrides the default implementation in
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
PNASNet checkpoints.
Args:
first_stage_feature_extractor_scope: A scope name for the first stage
feature extractor.
second_stage_feature_extractor_scope: A scope name for the second stage
feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.global_variables():
if variable.op.name.startswith(
first_stage_feature_extractor_scope):
var_name = variable.op.name.replace(
first_stage_feature_extractor_scope + '/', '')
var_name += '/ExponentialMovingAverage'
variables_to_restore[var_name] = variable
if variable.op.name.startswith(
second_stage_feature_extractor_scope):
var_name = variable.op.name.replace(
second_stage_feature_extractor_scope + '/', '')
var_name += '/ExponentialMovingAverage'
variables_to_restore[var_name] = variable
return variables_to_restore
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment