Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
fd7b6887
Unverified
Commit
fd7b6887
authored
Feb 09, 2018
by
Jonathan Huang
Committed by
GitHub
Feb 09, 2018
Browse files
Merge pull request #3293 from pkulzc/master
Internal changes of object_detection
parents
f98ec55e
1efe98bb
Changes
200
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1785 additions
and
200 deletions
+1785
-200
research/object_detection/model.py
research/object_detection/model.py
+490
-0
research/object_detection/model_hparams.py
research/object_detection/model_hparams.py
+44
-0
research/object_detection/model_test.py
research/object_detection/model_test.py
+266
-0
research/object_detection/model_test_util.py
research/object_detection/model_test_util.py
+54
-0
research/object_detection/model_tpu.py
research/object_detection/model_tpu.py
+262
-0
research/object_detection/models/BUILD
research/object_detection/models/BUILD
+57
-20
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
...ion/models/embedded_ssd_mobilenet_v1_feature_extractor.py
+30
-10
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
...odels/embedded_ssd_mobilenet_v1_feature_extractor_test.py
+24
-12
research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py
...dels/faster_rcnn_inception_resnet_v2_feature_extractor.py
+1
-1
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
...tection/models/faster_rcnn_resnet_v1_feature_extractor.py
+2
-1
research/object_detection/models/feature_map_generators.py
research/object_detection/models/feature_map_generators.py
+58
-3
research/object_detection/models/feature_map_generators_test.py
...ch/object_detection/models/feature_map_generators_test.py
+29
-1
research/object_detection/models/ssd_feature_extractor_test.py
...rch/object_detection/models/ssd_feature_extractor_test.py
+36
-30
research/object_detection/models/ssd_inception_v2_feature_extractor.py
...ct_detection/models/ssd_inception_v2_feature_extractor.py
+28
-23
research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
...tection/models/ssd_inception_v2_feature_extractor_test.py
+29
-17
research/object_detection/models/ssd_inception_v3_feature_extractor.py
...ct_detection/models/ssd_inception_v3_feature_extractor.py
+27
-23
research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
...tection/models/ssd_inception_v3_feature_extractor_test.py
+29
-17
research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
...ct_detection/models/ssd_mobilenet_v1_feature_extractor.py
+30
-24
research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
...tection/models/ssd_mobilenet_v1_feature_extractor_test.py
+30
-18
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
...t_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+259
-0
No files found.
research/object_detection/model.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Creates and runs `Experiment` for object detection model.
This uses the TF.learn framework to define and run an object detection model
wrapped in an `Estimator`.
Note that this module is only compatible with SSD Meta architecture at the
moment.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
os
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
tensorflow.contrib.learn.python.learn
import
learn_runner
from
tensorflow.contrib.tpu.python.tpu
import
tpu_optimizer
from
object_detection
import
eval_util
from
object_detection
import
inputs
from
object_detection
import
model_hparams
from
object_detection.builders
import
model_builder
from
object_detection.builders
import
optimizer_builder
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.metrics
import
coco_evaluation
from
object_detection.utils
import
config_util
from
object_detection.utils
import
label_map_util
from
object_detection.utils
import
shape_utils
from
object_detection.utils
import
variables_helper
from
object_detection.utils
import
visualization_utils
as
vis_utils
tf
.
flags
.
DEFINE_string
(
'model_dir'
,
None
,
'Path to output model directory '
'where event and checkpoint files will be written.'
)
tf
.
flags
.
DEFINE_string
(
'pipeline_config_path'
,
None
,
'Path to pipeline config '
'file.'
)
tf
.
flags
.
DEFINE_integer
(
'num_train_steps'
,
500000
,
'Number of train steps.'
)
tf
.
flags
.
DEFINE_integer
(
'num_eval_steps'
,
10000
,
'Number of train steps.'
)
FLAGS
=
tf
.
flags
.
FLAGS
def
_get_groundtruth_data
(
detection_model
,
class_agnostic
):
"""Extracts groundtruth data from detection_model.
Args:
detection_model: A `DetectionModel` object.
class_agnostic: Whether the detections are class_agnostic.
Returns:
A tuple of:
groundtruth: Dictionary with the following fields:
'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
normalized coordinates.
'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
'groundtruth_masks': 3D float32 tensor of instance masks (if provided in
groundtruth)
class_agnostic: Boolean indicating whether detections are class agnostic.
"""
input_data_fields
=
fields
.
InputDataFields
()
groundtruth_boxes
=
detection_model
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
)[
0
]
# For class-agnostic models, groundtruth one-hot encodings collapse to all
# ones.
if
class_agnostic
:
groundtruth_boxes_shape
=
tf
.
shape
(
groundtruth_boxes
)
groundtruth_classes_one_hot
=
tf
.
ones
([
groundtruth_boxes_shape
[
0
],
1
])
else
:
groundtruth_classes_one_hot
=
detection_model
.
groundtruth_lists
(
fields
.
BoxListFields
.
classes
)[
0
]
label_id_offset
=
1
# Applying label id offset (b/63711816)
groundtruth_classes
=
(
tf
.
argmax
(
groundtruth_classes_one_hot
,
axis
=
1
)
+
label_id_offset
)
groundtruth
=
{
input_data_fields
.
groundtruth_boxes
:
groundtruth_boxes
,
input_data_fields
.
groundtruth_classes
:
groundtruth_classes
}
if
detection_model
.
groundtruth_has_field
(
fields
.
BoxListFields
.
masks
):
groundtruth
[
input_data_fields
.
groundtruth_instance_masks
]
=
(
detection_model
.
groundtruth_lists
(
fields
.
BoxListFields
.
masks
)[
0
])
return
groundtruth
def
unstack_batch
(
tensor_dict
,
unpad_groundtruth_tensors
=
True
):
"""Unstacks all tensors in `tensor_dict` along 0th dimension.
Unstacks tensor from the tensor dict along 0th dimension and returns a
tensor_dict containing values that are lists of unstacked tensors.
Tensors in the `tensor_dict` are expected to be of one of the three shapes:
1. [batch_size]
2. [batch_size, height, width, channels]
3. [batch_size, num_boxes, d1, d2, ... dn]
When unpad_tensors is set to true, unstacked tensors of form 3 above are
sliced along the `num_boxes` dimension using the value in tensor
field.InputDataFields.num_groundtruth_boxes.
Note that this function has a static list of input data fields and has to be
kept in sync with the InputDataFields defined in core/standard_fields.py
Args:
tensor_dict: A dictionary of batched groundtruth tensors.
unpad_groundtruth_tensors: Whether to remove padding along `num_boxes`
dimension of the groundtruth tensors.
Returns:
A dictionary where the keys are from fields.InputDataFields and values are
a list of unstacked (optionally unpadded) tensors.
Raises:
ValueError: If unpad_tensors is True and `tensor_dict` does not contain
`num_groundtruth_boxes` tensor.
"""
unbatched_tensor_dict
=
{
key
:
tf
.
unstack
(
tensor
)
for
key
,
tensor
in
tensor_dict
.
items
()}
if
unpad_groundtruth_tensors
:
if
(
fields
.
InputDataFields
.
num_groundtruth_boxes
not
in
unbatched_tensor_dict
):
raise
ValueError
(
'`num_groundtruth_boxes` not found in tensor_dict. '
'Keys available: {}'
.
format
(
unbatched_tensor_dict
.
keys
()))
unbatched_unpadded_tensor_dict
=
{}
unpad_keys
=
set
([
# List of input data fields that are padded along the num_boxes
# dimension. This list has to be kept in sync with InputDataFields in
# standard_fields.py.
fields
.
InputDataFields
.
groundtruth_instance_masks
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_keypoints
,
fields
.
InputDataFields
.
groundtruth_group_of
,
fields
.
InputDataFields
.
groundtruth_difficult
,
fields
.
InputDataFields
.
groundtruth_is_crowd
,
fields
.
InputDataFields
.
groundtruth_area
,
fields
.
InputDataFields
.
groundtruth_weights
]).
intersection
(
set
(
unbatched_tensor_dict
.
keys
()))
for
key
in
unpad_keys
:
unpadded_tensor_list
=
[]
for
num_gt
,
padded_tensor
in
zip
(
unbatched_tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
],
unbatched_tensor_dict
[
key
]):
tensor_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
padded_tensor
)
slice_begin
=
tf
.
zeros
([
len
(
tensor_shape
)],
dtype
=
tf
.
int32
)
slice_size
=
tf
.
stack
(
[
num_gt
]
+
[
-
1
if
dim
is
None
else
dim
for
dim
in
tensor_shape
[
1
:]])
unpadded_tensor
=
tf
.
slice
(
padded_tensor
,
slice_begin
,
slice_size
)
unpadded_tensor_list
.
append
(
unpadded_tensor
)
unbatched_unpadded_tensor_dict
[
key
]
=
unpadded_tensor_list
unbatched_tensor_dict
.
update
(
unbatched_unpadded_tensor_dict
)
return
unbatched_tensor_dict
def
create_model_fn
(
detection_model_fn
,
configs
,
hparams
,
use_tpu
=
False
):
"""Creates a model function for `Estimator`.
Args:
detection_model_fn: Function that returns a `DetectionModel` instance.
configs: Dictionary of pipeline config objects.
hparams: `HParams` object.
use_tpu: Boolean indicating whether model should be constructed for
use on TPU.
Returns:
`model_fn` for `Estimator`.
"""
train_config
=
configs
[
'train_config'
]
eval_input_config
=
configs
[
'eval_input_config'
]
def
model_fn
(
features
,
labels
,
mode
,
params
=
None
):
"""Constructs the object detection model.
Args:
features: Dictionary of feature tensors, returned from `input_fn`.
labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
otherwise None.
mode: Mode key from tf.estimator.ModeKeys.
params: Parameter dictionary passed from the estimator.
Returns:
An `EstimatorSpec` that encapsulates the model and its serving
configurations.
"""
params
=
params
or
{}
total_loss
,
train_op
,
detections
,
export_outputs
=
None
,
None
,
None
,
None
is_training
=
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
detection_model
=
detection_model_fn
(
is_training
=
is_training
,
add_summaries
=
(
not
use_tpu
))
scaffold_fn
=
None
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
labels
=
unstack_batch
(
labels
,
unpad_groundtruth_tensors
=
train_config
.
unpad_groundtruth_tensors
)
elif
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
labels
=
unstack_batch
(
labels
,
unpad_groundtruth_tensors
=
False
)
if
mode
in
(
tf
.
estimator
.
ModeKeys
.
TRAIN
,
tf
.
estimator
.
ModeKeys
.
EVAL
):
gt_boxes_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
]
gt_classes_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_classes
]
gt_masks_list
=
None
if
fields
.
InputDataFields
.
groundtruth_instance_masks
in
labels
:
gt_masks_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
gt_keypoints_list
=
None
if
fields
.
InputDataFields
.
groundtruth_keypoints
in
labels
:
gt_keypoints_list
=
labels
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
detection_model
.
provide_groundtruth
(
groundtruth_boxes_list
=
gt_boxes_list
,
groundtruth_classes_list
=
gt_classes_list
,
groundtruth_masks_list
=
gt_masks_list
,
groundtruth_keypoints_list
=
gt_keypoints_list
)
preprocessed_images
=
features
[
fields
.
InputDataFields
.
image
]
prediction_dict
=
detection_model
.
predict
(
preprocessed_images
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
detections
=
detection_model
.
postprocess
(
prediction_dict
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
if
train_config
.
fine_tune_checkpoint
and
hparams
.
load_pretrained
:
asg_map
=
detection_model
.
restore_map
(
from_detection_checkpoint
=
train_config
.
from_detection_checkpoint
,
load_all_detection_checkpoint_vars
=
(
train_config
.
load_all_detection_checkpoint_vars
))
available_var_map
=
(
variables_helper
.
get_variables_available_in_checkpoint
(
asg_map
,
train_config
.
fine_tune_checkpoint
,
include_global_step
=
False
))
if
use_tpu
:
def
tpu_scaffold
():
tf
.
train
.
init_from_checkpoint
(
train_config
.
fine_tune_checkpoint
,
available_var_map
)
return
tf
.
train
.
Scaffold
()
scaffold_fn
=
tpu_scaffold
else
:
tf
.
train
.
init_from_checkpoint
(
train_config
.
fine_tune_checkpoint
,
available_var_map
)
if
mode
in
(
tf
.
estimator
.
ModeKeys
.
TRAIN
,
tf
.
estimator
.
ModeKeys
.
EVAL
):
losses_dict
=
detection_model
.
loss
(
prediction_dict
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
losses
=
[
loss_tensor
for
loss_tensor
in
losses_dict
.
itervalues
()]
total_loss
=
tf
.
add_n
(
losses
,
name
=
'total_loss'
)
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
global_step
=
tf
.
train
.
get_or_create_global_step
()
training_optimizer
,
optimizer_summary_vars
=
optimizer_builder
.
build
(
train_config
.
optimizer
)
if
use_tpu
:
training_optimizer
=
tpu_optimizer
.
CrossShardOptimizer
(
training_optimizer
)
# Optionally freeze some layers by setting their gradients to be zero.
trainable_variables
=
None
if
train_config
.
freeze_variables
:
trainable_variables
=
tf
.
contrib
.
framework
.
filter_variables
(
tf
.
trainable_variables
(),
exclude_patterns
=
train_config
.
freeze_variables
)
clip_gradients_value
=
None
if
train_config
.
gradient_clipping_by_norm
>
0
:
clip_gradients_value
=
train_config
.
gradient_clipping_by_norm
if
not
use_tpu
:
for
var
in
optimizer_summary_vars
:
tf
.
summary
.
scalar
(
var
.
op
.
name
,
var
)
summaries
=
[]
if
use_tpu
else
None
train_op
=
tf
.
contrib
.
layers
.
optimize_loss
(
loss
=
total_loss
,
global_step
=
global_step
,
learning_rate
=
None
,
clip_gradients
=
clip_gradients_value
,
optimizer
=
training_optimizer
,
variables
=
trainable_variables
,
summaries
=
summaries
,
name
=
''
)
# Preventing scope prefix on all variables.
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
export_outputs
=
{
tf
.
saved_model
.
signature_constants
.
PREDICT_METHOD_NAME
:
tf
.
estimator
.
export
.
PredictOutput
(
detections
)
}
eval_metric_ops
=
None
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
# Detection summaries during eval.
class_agnostic
=
(
fields
.
DetectionResultFields
.
detection_classes
not
in
detections
)
groundtruth
=
_get_groundtruth_data
(
detection_model
,
class_agnostic
)
eval_dict
=
eval_util
.
result_dict_for_single_example
(
tf
.
expand_dims
(
features
[
fields
.
InputDataFields
.
original_image
][
0
],
0
),
features
[
inputs
.
HASH_KEY
][
0
],
detections
,
groundtruth
,
class_agnostic
=
class_agnostic
,
scale_to_absolute
=
False
)
if
class_agnostic
:
category_index
=
label_map_util
.
create_class_agnostic_category_index
()
else
:
category_index
=
label_map_util
.
create_category_index_from_labelmap
(
eval_input_config
.
label_map_path
)
detection_and_groundtruth
=
vis_utils
.
draw_side_by_side_evaluation_image
(
eval_dict
,
category_index
,
max_boxes_to_draw
=
20
,
min_score_thresh
=
0.2
)
if
not
use_tpu
:
tf
.
summary
.
image
(
'Detections_Left_Groundtruth_Right'
,
detection_and_groundtruth
)
# Eval metrics on a single image.
detection_fields
=
fields
.
DetectionResultFields
()
input_data_fields
=
fields
.
InputDataFields
()
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_index
.
values
())
eval_metric_ops
=
coco_evaluator
.
get_estimator_eval_metric_ops
(
image_id
=
eval_dict
[
input_data_fields
.
key
],
groundtruth_boxes
=
eval_dict
[
input_data_fields
.
groundtruth_boxes
],
groundtruth_classes
=
eval_dict
[
input_data_fields
.
groundtruth_classes
],
detection_boxes
=
eval_dict
[
detection_fields
.
detection_boxes
],
detection_scores
=
eval_dict
[
detection_fields
.
detection_scores
],
detection_classes
=
eval_dict
[
detection_fields
.
detection_classes
])
if
use_tpu
:
return
tf
.
contrib
.
tpu
.
TPUEstimatorSpec
(
mode
=
mode
,
scaffold_fn
=
scaffold_fn
,
predictions
=
detections
,
loss
=
total_loss
,
train_op
=
train_op
,
eval_metrics
=
eval_metric_ops
,
export_outputs
=
export_outputs
)
else
:
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
predictions
=
detections
,
loss
=
total_loss
,
train_op
=
train_op
,
eval_metric_ops
=
eval_metric_ops
,
export_outputs
=
export_outputs
)
return
model_fn
def
_build_experiment_fn
(
train_steps
,
eval_steps
):
"""Returns a function that creates an `Experiment`."""
def
build_experiment
(
run_config
,
hparams
):
"""Builds an `Experiment` from configuration and hyperparameters.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
Returns:
An `Experiment` object.
"""
return
populate_experiment
(
run_config
,
hparams
,
FLAGS
.
pipeline_config_path
,
train_steps
,
eval_steps
)
return
build_experiment
def
populate_experiment
(
run_config
,
hparams
,
pipeline_config_path
,
train_steps
=
None
,
eval_steps
=
None
,
model_fn_creator
=
create_model_fn
,
**
kwargs
):
"""Populates an `Experiment` object.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
number of evaluation steps is set from the `EvalConfig` proto.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
* Args:
* `detection_model_fn`: Function that returns `DetectionModel` instance.
* `configs`: Dictionary of pipeline config objects.
* `hparams`: `HParams` object.
* Returns:
`model_fn` for `Estimator`.
**kwargs: Additional keyword arguments for configuration override.
Returns:
An `Experiment` that defines all aspects of training, evaluation, and
export.
"""
configs
=
config_util
.
get_configs_from_pipeline_file
(
pipeline_config_path
)
configs
=
config_util
.
merge_external_params_with_configs
(
configs
,
hparams
,
train_steps
=
train_steps
,
eval_steps
=
eval_steps
,
**
kwargs
)
model_config
=
configs
[
'model'
]
train_config
=
configs
[
'train_config'
]
train_input_config
=
configs
[
'train_input_config'
]
eval_config
=
configs
[
'eval_config'
]
eval_input_config
=
configs
[
'eval_input_config'
]
if
train_steps
is
None
:
train_steps
=
train_config
.
num_steps
if
train_config
.
num_steps
else
None
if
eval_steps
is
None
:
eval_steps
=
eval_config
.
num_examples
if
eval_config
.
num_examples
else
None
detection_model_fn
=
functools
.
partial
(
model_builder
.
build
,
model_config
=
model_config
)
# Create the input functions for TRAIN/EVAL.
train_input_fn
=
inputs
.
create_train_input_fn
(
train_config
=
train_config
,
train_input_config
=
train_input_config
,
model_config
=
model_config
)
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_config
=
eval_config
,
eval_input_config
=
eval_input_config
,
model_config
=
model_config
)
export_strategies
=
[
tf
.
contrib
.
learn
.
utils
.
saved_model_export_utils
.
make_export_strategy
(
serving_input_fn
=
inputs
.
create_predict_input_fn
(
model_config
=
model_config
))
]
estimator
=
tf
.
estimator
.
Estimator
(
model_fn
=
model_fn_creator
(
detection_model_fn
,
configs
,
hparams
),
config
=
run_config
)
if
run_config
.
is_chief
:
# Store the final pipeline config for traceability.
pipeline_config_final
=
config_util
.
create_pipeline_proto_from_configs
(
configs
)
pipeline_config_final_path
=
os
.
path
.
join
(
estimator
.
model_dir
,
'pipeline.config'
)
config_text
=
text_format
.
MessageToString
(
pipeline_config_final
)
with
tf
.
gfile
.
Open
(
pipeline_config_final_path
,
'wb'
)
as
f
:
tf
.
logging
.
info
(
'Writing as-run pipeline config file to %s'
,
pipeline_config_final_path
)
f
.
write
(
config_text
)
return
tf
.
contrib
.
learn
.
Experiment
(
estimator
=
estimator
,
train_input_fn
=
train_input_fn
,
eval_input_fn
=
eval_input_fn
,
train_steps
=
train_steps
,
eval_steps
=
eval_steps
,
export_strategies
=
export_strategies
,
eval_delay_secs
=
120
,)
def
main
(
unused_argv
):
tf
.
flags
.
mark_flag_as_required
(
'model_dir'
)
tf
.
flags
.
mark_flag_as_required
(
'pipeline_config_path'
)
config
=
tf
.
contrib
.
learn
.
RunConfig
(
model_dir
=
FLAGS
.
model_dir
)
learn_runner
.
run
(
experiment_fn
=
_build_experiment_fn
(
FLAGS
.
num_train_steps
,
FLAGS
.
num_eval_steps
),
run_config
=
config
,
hparams
=
model_hparams
.
create_hparams
())
if
__name__
==
'__main__'
:
tf
.
app
.
run
()
research/object_detection/model_hparams.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Hyperparameters for the object detection model in TF.learn.
This file consolidates and documents the hyperparameters used by the model.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
def
create_hparams
(
hparams_overrides
=
None
):
"""Returns hyperparameters, including any flag value overrides.
Args:
hparams_overrides: Optional hparams overrides, represented as a
string containing comma-separated hparam_name=value pairs.
Returns:
The hyperparameters as a tf.HParams object.
"""
hparams
=
tf
.
contrib
.
training
.
HParams
(
# Whether a fine tuning checkpoint (provided in the pipeline config)
# should be loaded for training.
load_pretrained
=
True
)
# Override any of the preceding hyperparameter values.
if
hparams_overrides
:
hparams
=
hparams
.
parse
(
hparams_overrides
)
return
hparams
research/object_detection/model_test.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object detection model."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
os
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection
import
inputs
from
object_detection
import
model
from
object_detection
import
model_hparams
from
object_detection
import
model_test_util
from
object_detection.builders
import
model_builder
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
config_util
FLAGS
=
tf
.
flags
.
FLAGS
MODEL_NAME_FOR_TEST
=
model_test_util
.
SSD_INCEPTION_MODEL_NAME
def
_get_data_path
():
"""Returns an absolute path to TFRecord file."""
return
os
.
path
.
join
(
FLAGS
.
test_srcdir
,
model_test_util
.
PATH_BASE
,
'test_data'
,
'pets_examples.record'
)
def
_get_labelmap_path
():
"""Returns an absolute path to label map file."""
return
os
.
path
.
join
(
FLAGS
.
test_srcdir
,
model_test_util
.
PATH_BASE
,
'data'
,
'pet_label_map.pbtxt'
)
def
_get_configs_for_model
(
model_name
):
"""Returns configurations for model."""
filename
=
model_test_util
.
GetPipelineConfigPath
(
model_name
)
data_path
=
_get_data_path
()
label_map_path
=
_get_labelmap_path
()
configs
=
config_util
.
get_configs_from_pipeline_file
(
filename
)
configs
=
config_util
.
merge_external_params_with_configs
(
configs
,
train_input_path
=
data_path
,
eval_input_path
=
data_path
,
label_map_path
=
label_map_path
)
return
configs
def
setUpModule
():
model_test_util
.
InitializeFlags
(
MODEL_NAME_FOR_TEST
)
class
ModelTflearnTest
(
tf
.
test
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
tf
.
reset_default_graph
()
def
_assert_outputs_for_train_eval
(
self
,
configs
,
mode
,
class_agnostic
=
False
):
model_config
=
configs
[
'model'
]
train_config
=
configs
[
'train_config'
]
with
tf
.
Graph
().
as_default
():
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
features
,
labels
=
inputs
.
create_train_input_fn
(
configs
[
'train_config'
],
configs
[
'train_input_config'
],
configs
[
'model'
])()
batch_size
=
train_config
.
batch_size
else
:
features
,
labels
=
inputs
.
create_eval_input_fn
(
configs
[
'eval_config'
],
configs
[
'eval_input_config'
],
configs
[
'model'
])()
batch_size
=
1
detection_model_fn
=
functools
.
partial
(
model_builder
.
build
,
model_config
=
model_config
,
is_training
=
True
)
hparams
=
model_hparams
.
create_hparams
(
hparams_overrides
=
'load_pretrained=false'
)
model_fn
=
model
.
create_model_fn
(
detection_model_fn
,
configs
,
hparams
)
estimator_spec
=
model_fn
(
features
,
labels
,
mode
)
self
.
assertIsNotNone
(
estimator_spec
.
loss
)
self
.
assertIsNotNone
(
estimator_spec
.
predictions
)
if
class_agnostic
:
self
.
assertNotIn
(
'detection_classes'
,
estimator_spec
.
predictions
)
else
:
detection_classes
=
estimator_spec
.
predictions
[
'detection_classes'
]
self
.
assertEqual
(
batch_size
,
detection_classes
.
shape
.
as_list
()[
0
])
self
.
assertEqual
(
tf
.
float32
,
detection_classes
.
dtype
)
detection_boxes
=
estimator_spec
.
predictions
[
'detection_boxes'
]
detection_scores
=
estimator_spec
.
predictions
[
'detection_scores'
]
num_detections
=
estimator_spec
.
predictions
[
'num_detections'
]
self
.
assertEqual
(
batch_size
,
detection_boxes
.
shape
.
as_list
()[
0
])
self
.
assertEqual
(
tf
.
float32
,
detection_boxes
.
dtype
)
self
.
assertEqual
(
batch_size
,
detection_scores
.
shape
.
as_list
()[
0
])
self
.
assertEqual
(
tf
.
float32
,
detection_scores
.
dtype
)
self
.
assertEqual
(
tf
.
float32
,
num_detections
.
dtype
)
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
self
.
assertIsNotNone
(
estimator_spec
.
train_op
)
return
estimator_spec
def
_assert_outputs_for_predict
(
self
,
configs
):
model_config
=
configs
[
'model'
]
with
tf
.
Graph
().
as_default
():
features
,
_
=
inputs
.
create_eval_input_fn
(
configs
[
'eval_config'
],
configs
[
'eval_input_config'
],
configs
[
'model'
])()
detection_model_fn
=
functools
.
partial
(
model_builder
.
build
,
model_config
=
model_config
,
is_training
=
False
)
hparams
=
model_hparams
.
create_hparams
(
hparams_overrides
=
'load_pretrained=false'
)
model_fn
=
model
.
create_model_fn
(
detection_model_fn
,
configs
,
hparams
)
estimator_spec
=
model_fn
(
features
,
None
,
tf
.
estimator
.
ModeKeys
.
PREDICT
)
self
.
assertIsNone
(
estimator_spec
.
loss
)
self
.
assertIsNone
(
estimator_spec
.
train_op
)
self
.
assertIsNotNone
(
estimator_spec
.
predictions
)
self
.
assertIsNotNone
(
estimator_spec
.
export_outputs
)
self
.
assertIn
(
tf
.
saved_model
.
signature_constants
.
PREDICT_METHOD_NAME
,
estimator_spec
.
export_outputs
)
def
testModelFnInTrainMode
(
self
):
"""Tests the model function in TRAIN mode."""
configs
=
_get_configs_for_model
(
MODEL_NAME_FOR_TEST
)
self
.
_assert_outputs_for_train_eval
(
configs
,
tf
.
estimator
.
ModeKeys
.
TRAIN
)
def
testModelFnInEvalMode
(
self
):
"""Tests the model function in EVAL mode."""
configs
=
_get_configs_for_model
(
MODEL_NAME_FOR_TEST
)
self
.
_assert_outputs_for_train_eval
(
configs
,
tf
.
estimator
.
ModeKeys
.
EVAL
)
def
testModelFnInPredictMode
(
self
):
"""Tests the model function in PREDICT mode."""
configs
=
_get_configs_for_model
(
MODEL_NAME_FOR_TEST
)
self
.
_assert_outputs_for_predict
(
configs
)
def
testExperiment
(
self
):
"""Tests that the `Experiment` object is constructed correctly."""
experiment
=
model_test_util
.
BuildExperiment
()
model_dir
=
experiment
.
estimator
.
model_dir
pipeline_config_path
=
os
.
path
.
join
(
model_dir
,
'pipeline.config'
)
self
.
assertTrue
(
tf
.
gfile
.
Exists
(
pipeline_config_path
))
class
UnbatchTensorsTest
(
tf
.
test
.
TestCase
):
def
test_unbatch_without_unpadding
(
self
):
image_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
2
,
None
,
None
,
None
])
groundtruth_boxes_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
2
,
None
,
None
])
groundtruth_classes_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
2
,
None
,
None
])
groundtruth_weights_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
2
,
None
])
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image_placeholder
,
fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_boxes_placeholder
,
fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_classes_placeholder
,
fields
.
InputDataFields
.
groundtruth_weights
:
groundtruth_weights_placeholder
}
unbatched_tensor_dict
=
model
.
unstack_batch
(
tensor_dict
,
unpad_groundtruth_tensors
=
False
)
with
self
.
test_session
()
as
sess
:
unbatched_tensor_dict_out
=
sess
.
run
(
unbatched_tensor_dict
,
feed_dict
=
{
image_placeholder
:
np
.
random
.
rand
(
2
,
4
,
4
,
3
).
astype
(
np
.
float32
),
groundtruth_boxes_placeholder
:
np
.
random
.
rand
(
2
,
5
,
4
).
astype
(
np
.
float32
),
groundtruth_classes_placeholder
:
np
.
random
.
rand
(
2
,
5
,
6
).
astype
(
np
.
float32
),
groundtruth_weights_placeholder
:
np
.
random
.
rand
(
2
,
5
).
astype
(
np
.
float32
)
})
for
image_out
in
unbatched_tensor_dict_out
[
fields
.
InputDataFields
.
image
]:
self
.
assertAllEqual
(
image_out
.
shape
,
[
4
,
4
,
3
])
for
groundtruth_boxes_out
in
unbatched_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_boxes
]:
self
.
assertAllEqual
(
groundtruth_boxes_out
.
shape
,
[
5
,
4
])
for
groundtruth_classes_out
in
unbatched_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_classes
]:
self
.
assertAllEqual
(
groundtruth_classes_out
.
shape
,
[
5
,
6
])
for
groundtruth_weights_out
in
unbatched_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_weights
]:
self
.
assertAllEqual
(
groundtruth_weights_out
.
shape
,
[
5
])
def
test_unbatch_and_unpad_groundtruth_tensors
(
self
):
image_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
2
,
None
,
None
,
None
])
groundtruth_boxes_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
2
,
5
,
None
])
groundtruth_classes_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
2
,
5
,
None
])
groundtruth_weights_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
2
,
5
])
num_groundtruth_placeholder
=
tf
.
placeholder
(
tf
.
int32
,
[
2
])
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image_placeholder
,
fields
.
InputDataFields
.
groundtruth_boxes
:
groundtruth_boxes_placeholder
,
fields
.
InputDataFields
.
groundtruth_classes
:
groundtruth_classes_placeholder
,
fields
.
InputDataFields
.
groundtruth_weights
:
groundtruth_weights_placeholder
,
fields
.
InputDataFields
.
num_groundtruth_boxes
:
num_groundtruth_placeholder
}
unbatched_tensor_dict
=
model
.
unstack_batch
(
tensor_dict
,
unpad_groundtruth_tensors
=
True
)
with
self
.
test_session
()
as
sess
:
unbatched_tensor_dict_out
=
sess
.
run
(
unbatched_tensor_dict
,
feed_dict
=
{
image_placeholder
:
np
.
random
.
rand
(
2
,
4
,
4
,
3
).
astype
(
np
.
float32
),
groundtruth_boxes_placeholder
:
np
.
random
.
rand
(
2
,
5
,
4
).
astype
(
np
.
float32
),
groundtruth_classes_placeholder
:
np
.
random
.
rand
(
2
,
5
,
6
).
astype
(
np
.
float32
),
groundtruth_weights_placeholder
:
np
.
random
.
rand
(
2
,
5
).
astype
(
np
.
float32
),
num_groundtruth_placeholder
:
np
.
array
([
3
,
3
],
np
.
int32
)
})
for
image_out
in
unbatched_tensor_dict_out
[
fields
.
InputDataFields
.
image
]:
self
.
assertAllEqual
(
image_out
.
shape
,
[
4
,
4
,
3
])
for
groundtruth_boxes_out
in
unbatched_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_boxes
]:
self
.
assertAllEqual
(
groundtruth_boxes_out
.
shape
,
[
3
,
4
])
for
groundtruth_classes_out
in
unbatched_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_classes
]:
self
.
assertAllEqual
(
groundtruth_classes_out
.
shape
,
[
3
,
6
])
for
groundtruth_weights_out
in
unbatched_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_weights
]:
self
.
assertAllEqual
(
groundtruth_weights_out
.
shape
,
[
3
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/model_test_util.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common utils for tests for object detection tflearn model."""
from
__future__
import
absolute_import
import
os
import
tempfile
import
tensorflow
as
tf
from
object_detection
import
model
from
object_detection
import
model_hparams
FLAGS
=
tf
.
flags
.
FLAGS
FASTER_RCNN_MODEL_NAME
=
'faster_rcnn_resnet50_pets'
SSD_INCEPTION_MODEL_NAME
=
'ssd_inception_v2_pets'
PATH_BASE
=
'google3/third_party/tensorflow_models/object_detection/'
def
GetPipelineConfigPath
(
model_name
):
"""Returns path to the local pipeline config file."""
return
os
.
path
.
join
(
FLAGS
.
test_srcdir
,
PATH_BASE
,
'samples'
,
'configs'
,
model_name
+
'.config'
)
def
InitializeFlags
(
model_name_for_test
):
FLAGS
.
model_dir
=
tempfile
.
mkdtemp
()
FLAGS
.
pipeline_config_path
=
GetPipelineConfigPath
(
model_name_for_test
)
def
BuildExperiment
():
"""Builds an Experiment object for testing purposes."""
run_config
=
tf
.
contrib
.
learn
.
RunConfig
()
hparams
=
model_hparams
.
create_hparams
(
hparams_overrides
=
'load_pretrained=false'
)
# pylint: disable=protected-access
experiment_fn
=
model
.
_build_experiment_fn
(
10
,
10
)
# pylint: enable=protected-access
return
experiment_fn
(
run_config
,
hparams
)
research/object_detection/model_tpu.py
0 → 100644
View file @
fd7b6887
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Creates and runs `Estimator` for object detection model on TPUs.
This uses the TPUEstimator API to define and run a model in TRAIN/EVAL modes.
"""
# pylint: enable=line-too-long
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
os
import
tensorflow
as
tf
from
tensorflow.contrib.tpu.python.tpu
import
tpu_config
from
tensorflow.contrib.tpu.python.tpu
import
tpu_estimator
from
tensorflow.contrib.training.python.training
import
evaluation
from
object_detection
import
inputs
from
object_detection
import
model
from
object_detection
import
model_hparams
from
object_detection.builders
import
model_builder
from
object_detection.utils
import
config_util
tf
.
flags
.
DEFINE_bool
(
'use_tpu'
,
True
,
'Use TPUs rather than plain CPUs'
)
# Cloud TPU Cluster Resolvers
tf
.
flags
.
DEFINE_string
(
'gcp_project'
,
default
=
None
,
help
=
'Project name for the Cloud TPU-enabled project. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.'
)
tf
.
flags
.
DEFINE_string
(
'tpu_zone'
,
default
=
None
,
help
=
'GCE zone where the Cloud TPU is located in. If not specified, we '
'will attempt to automatically detect the GCE project from metadata.'
)
tf
.
flags
.
DEFINE_string
(
'tpu_name'
,
default
=
None
,
help
=
'Name of the Cloud TPU for Cluster Resolvers. You must specify either '
'this flag or --master.'
)
tf
.
flags
.
DEFINE_string
(
'master'
,
default
=
None
,
help
=
'GRPC URL of the master (e.g. grpc://ip.address.of.tpu:8470). You '
'must specify either this flag or --tpu_name.'
)
tf
.
flags
.
DEFINE_integer
(
'num_shards'
,
8
,
'Number of shards (TPU cores).'
)
tf
.
flags
.
DEFINE_integer
(
'iterations_per_loop'
,
100
,
'Number of iterations per TPU training loop.'
)
# For mode=train_and_eval, evaluation occurs after training is finished.
# Note: independently of steps_per_checkpoint, estimator will save the most
# recent checkpoint every 10 minutes by default for train_and_eval
tf
.
flags
.
DEFINE_string
(
'mode'
,
'train_and_eval'
,
'Mode to run: train, eval, train_and_eval'
)
tf
.
flags
.
DEFINE_integer
(
'train_batch_size'
,
32
*
8
,
'Batch size for training.'
)
# For EVAL.
tf
.
flags
.
DEFINE_integer
(
'min_eval_interval_secs'
,
180
,
'Minimum seconds between evaluations.'
)
tf
.
flags
.
DEFINE_integer
(
'eval_timeout_secs'
,
None
,
'Maximum seconds between checkpoints before evaluation terminates.'
)
FLAGS
=
tf
.
flags
.
FLAGS
def
create_estimator
(
run_config
,
hparams
,
pipeline_config_path
,
train_steps
=
None
,
eval_steps
=
None
,
train_batch_size
=
None
,
model_fn_creator
=
model
.
create_model_fn
,
use_tpu
=
False
,
num_shards
=
1
,
params
=
None
,
**
kwargs
):
"""Creates an `Estimator` object.
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
eval_steps: Number of evaluation steps per evaluation cycle. If None, the
number of evaluation steps is set from the `EvalConfig` proto.
train_batch_size: Training batch size. If none, use batch size from
`TrainConfig` proto.
model_fn_creator: A function that creates a `model_fn` for `Estimator`.
Follows the signature:
* Args:
* `detection_model_fn`: Function that returns `DetectionModel` instance.
* `configs`: Dictionary of pipeline config objects.
* `hparams`: `HParams` object.
* Returns:
`model_fn` for `Estimator`.
use_tpu: Boolean, whether training and evaluation should run on TPU.
num_shards: Number of shards (TPU cores).
params: Parameter dictionary passed from the estimator.
**kwargs: Additional keyword arguments for configuration override.
Returns:
Estimator: A estimator object used for training and evaluation
train_input_fn: Input function for the training loop
eval_input_fn: Input function for the evaluation run
train_steps: Number of training steps either from arg `train_steps` or
`TrainConfig` proto
eval_steps: Number of evaluation steps either from arg `eval_steps` or
`EvalConfig` proto
"""
configs
=
config_util
.
get_configs_from_pipeline_file
(
pipeline_config_path
)
configs
=
config_util
.
merge_external_params_with_configs
(
configs
,
hparams
,
train_steps
=
train_steps
,
eval_steps
=
eval_steps
,
batch_size
=
train_batch_size
,
**
kwargs
)
model_config
=
configs
[
'model'
]
train_config
=
configs
[
'train_config'
]
train_input_config
=
configs
[
'train_input_config'
]
eval_config
=
configs
[
'eval_config'
]
eval_input_config
=
configs
[
'eval_input_config'
]
if
params
is
None
:
params
=
{}
if
train_steps
is
None
:
train_steps
=
train_config
.
num_steps
if
train_config
.
num_steps
else
None
if
eval_steps
is
None
:
eval_steps
=
eval_config
.
num_examples
if
eval_config
.
num_examples
else
None
detection_model_fn
=
functools
.
partial
(
model_builder
.
build
,
model_config
=
model_config
)
# Create the input functions for TRAIN/EVAL.
train_input_fn
=
inputs
.
create_train_input_fn
(
train_config
=
train_config
,
train_input_config
=
train_input_config
,
model_config
=
model_config
)
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_config
=
eval_config
,
eval_input_config
=
eval_input_config
,
model_config
=
model_config
)
estimator
=
tpu_estimator
.
TPUEstimator
(
model_fn
=
model_fn_creator
(
detection_model_fn
,
configs
,
hparams
,
use_tpu
),
train_batch_size
=
train_config
.
batch_size
,
# For each core, only batch size 1 is supported for eval.
eval_batch_size
=
num_shards
*
1
if
use_tpu
else
1
,
use_tpu
=
use_tpu
,
config
=
run_config
,
params
=
params
)
return
estimator
,
train_input_fn
,
eval_input_fn
,
train_steps
,
eval_steps
def
main
(
unused_argv
):
tf
.
flags
.
mark_flag_as_required
(
'model_dir'
)
tf
.
flags
.
mark_flag_as_required
(
'pipeline_config_path'
)
if
FLAGS
.
master
is
None
and
FLAGS
.
tpu_name
is
None
:
raise
RuntimeError
(
'You must specify either --master or --tpu_name.'
)
if
FLAGS
.
master
is
not
None
:
if
FLAGS
.
tpu_name
is
not
None
:
tf
.
logging
.
warn
(
'Both --master and --tpu_name are set. Ignoring '
'--tpu_name and using --master.'
)
tpu_grpc_url
=
FLAGS
.
master
else
:
tpu_cluster_resolver
=
(
tf
.
contrib
.
cluster_resolver
.
python
.
training
.
TPUClusterResolver
(
tpu_names
=
[
FLAGS
.
tpu_name
],
zone
=
FLAGS
.
tpu_zone
,
project
=
FLAGS
.
gcp_project
))
tpu_grpc_url
=
tpu_cluster_resolver
.
get_master
()
config
=
tpu_config
.
RunConfig
(
master
=
tpu_grpc_url
,
evaluation_master
=
tpu_grpc_url
,
model_dir
=
FLAGS
.
model_dir
,
tpu_config
=
tpu_config
.
TPUConfig
(
iterations_per_loop
=
FLAGS
.
iterations_per_loop
,
num_shards
=
FLAGS
.
num_shards
))
params
=
{}
estimator
,
train_input_fn
,
eval_input_fn
,
train_steps
,
eval_steps
=
(
create_estimator
(
config
,
model_hparams
.
create_hparams
(),
FLAGS
.
pipeline_config_path
,
train_steps
=
FLAGS
.
num_train_steps
,
eval_steps
=
FLAGS
.
num_eval_steps
,
train_batch_size
=
FLAGS
.
train_batch_size
,
use_tpu
=
FLAGS
.
use_tpu
,
num_shards
=
FLAGS
.
num_shards
,
params
=
params
))
if
FLAGS
.
mode
in
[
'train'
,
'train_and_eval'
]:
estimator
.
train
(
input_fn
=
train_input_fn
,
max_steps
=
train_steps
)
if
FLAGS
.
mode
==
'train_and_eval'
:
# Eval one time.
eval_results
=
estimator
.
evaluate
(
input_fn
=
eval_input_fn
,
steps
=
eval_steps
)
tf
.
logging
.
info
(
'Eval results: %s'
%
eval_results
)
# Continuously evaluating.
if
FLAGS
.
mode
==
'eval'
:
def
terminate_eval
():
tf
.
logging
.
info
(
'Terminating eval after %d seconds of no checkpoints'
%
FLAGS
.
eval_timeout_secs
)
return
True
# Run evaluation when there's a new checkpoint.
for
ckpt
in
evaluation
.
checkpoints_iterator
(
FLAGS
.
model_dir
,
min_interval_secs
=
FLAGS
.
min_eval_interval_secs
,
timeout
=
FLAGS
.
eval_timeout_secs
,
timeout_fn
=
terminate_eval
):
tf
.
logging
.
info
(
'Starting to evaluate.'
)
try
:
eval_results
=
estimator
.
evaluate
(
input_fn
=
eval_input_fn
,
steps
=
eval_steps
,
checkpoint_path
=
ckpt
)
tf
.
logging
.
info
(
'Eval results: %s'
%
eval_results
)
# Terminate eval job when final checkpoint is reached
current_step
=
int
(
os
.
path
.
basename
(
ckpt
).
split
(
'-'
)[
1
])
if
current_step
>=
train_steps
:
tf
.
logging
.
info
(
'Evaluation finished after training step %d'
%
current_step
)
break
except
tf
.
errors
.
NotFoundError
:
tf
.
logging
.
info
(
'Checkpoint %s no longer exists, skipping checkpoint'
%
ckpt
)
if
__name__
==
'__main__'
:
tf
.
app
.
run
()
research/object_detection/models/BUILD
View file @
fd7b6887
...
@@ -15,6 +15,7 @@ py_library(
...
@@ -15,6 +15,7 @@ py_library(
],
],
deps
=
[
deps
=
[
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow/models/research/object_detection/utils:ops"
,
],
],
)
)
...
@@ -36,6 +37,7 @@ py_library(
...
@@ -36,6 +37,7 @@ py_library(
],
],
deps
=
[
deps
=
[
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow/models/research/object_detection/utils:test_case"
,
],
],
)
)
...
@@ -47,9 +49,10 @@ py_library(
...
@@ -47,9 +49,10 @@ py_library(
deps
=
[
deps
=
[
":feature_map_generators"
,
":feature_map_generators"
,
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow_models/object_detection/utils:ops"
,
"//tensorflow/models/research/object_detection/utils:ops"
,
"//tensorflow_models/slim:inception_v2"
,
"//tensorflow/models/research/object_detection/utils:shape_utils"
,
"//third_party/tensorflow_models/slim:inception_v2"
,
],
],
)
)
...
@@ -61,9 +64,10 @@ py_library(
...
@@ -61,9 +64,10 @@ py_library(
deps
=
[
deps
=
[
":feature_map_generators"
,
":feature_map_generators"
,
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow_models/object_detection/utils:ops"
,
"//tensorflow/models/research/object_detection/utils:ops"
,
"//tensorflow_models/slim:inception_v3"
,
"//tensorflow/models/research/object_detection/utils:shape_utils"
,
"//third_party/tensorflow_models/slim:inception_v3"
,
],
],
)
)
...
@@ -73,9 +77,10 @@ py_library(
...
@@ -73,9 +77,10 @@ py_library(
deps
=
[
deps
=
[
":feature_map_generators"
,
":feature_map_generators"
,
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow_models/object_detection/utils:ops"
,
"//tensorflow/models/research/object_detection/utils:ops"
,
"//tensorflow_models/slim:mobilenet_v1"
,
"//tensorflow/models/research/object_detection/utils:shape_utils"
,
"//third_party/tensorflow_models/slim:mobilenet_v1"
,
],
],
)
)
...
@@ -86,8 +91,40 @@ py_library(
...
@@ -86,8 +91,40 @@ py_library(
":feature_map_generators"
,
":feature_map_generators"
,
":ssd_mobilenet_v1_feature_extractor"
,
":ssd_mobilenet_v1_feature_extractor"
,
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/utils:ops"
,
"//tensorflow/models/research/object_detection/utils:ops"
,
"//tensorflow_models/slim:mobilenet_v1"
,
"//third_party/tensorflow_models/slim:mobilenet_v1"
,
],
)
py_library
(
name
=
"ssd_resnet_v1_fpn_feature_extractor"
,
srcs
=
[
"ssd_resnet_v1_fpn_feature_extractor.py"
],
deps
=
[
":feature_map_generators"
,
"//tensorflow"
,
"//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch"
,
"//tensorflow/models/research/object_detection/utils:ops"
,
"//tensorflow/models/research/object_detection/utils:shape_utils"
,
"//third_party/tensorflow_models/slim:resnet_v1"
,
],
)
py_library
(
name
=
"ssd_resnet_v1_fpn_feature_extractor_testbase"
,
srcs
=
[
"ssd_resnet_v1_fpn_feature_extractor_testbase.py"
],
deps
=
[
"//tensorflow/models/research/object_detection/models:ssd_feature_extractor_test"
,
],
)
py_test
(
name
=
"ssd_resnet_v1_fpn_feature_extractor_test"
,
timeout
=
"long"
,
srcs
=
[
"ssd_resnet_v1_fpn_feature_extractor_test.py"
],
deps
=
[
":ssd_resnet_v1_fpn_feature_extractor"
,
":ssd_resnet_v1_fpn_feature_extractor_testbase"
,
"//tensorflow"
,
],
],
)
)
...
@@ -153,8 +190,8 @@ py_library(
...
@@ -153,8 +190,8 @@ py_library(
],
],
deps
=
[
deps
=
[
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow
/
models/
research/
object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow_models/slim:nasnet"
,
"//
third_party/
tensorflow_models/slim:nasnet"
,
],
],
)
)
...
@@ -165,8 +202,8 @@ py_library(
...
@@ -165,8 +202,8 @@ py_library(
],
],
deps
=
[
deps
=
[
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow
/
models/
research/
object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow_models/slim:inception_resnet_v2"
,
"//
third_party/
tensorflow_models/slim:inception_resnet_v2"
,
],
],
)
)
...
@@ -188,8 +225,8 @@ py_library(
...
@@ -188,8 +225,8 @@ py_library(
],
],
deps
=
[
deps
=
[
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow
/
models/
research/
object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow_models/slim:inception_v2"
,
"//
third_party/
tensorflow_models/slim:inception_v2"
,
],
],
)
)
...
@@ -211,9 +248,9 @@ py_library(
...
@@ -211,9 +248,9 @@ py_library(
],
],
deps
=
[
deps
=
[
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow
/
models/
research/
object_detection/meta_architectures:faster_rcnn_meta_arch"
,
"//tensorflow_models/slim:resnet_utils"
,
"//
third_party/
tensorflow_models/slim:resnet_utils"
,
"//tensorflow_models/slim:resnet_v1"
,
"//
third_party/
tensorflow_models/slim:resnet_v1"
,
],
],
)
)
...
...
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
View file @
fd7b6887
...
@@ -51,7 +51,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
...
@@ -51,7 +51,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
pad_to_multiple
,
pad_to_multiple
,
conv_hyperparams
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
):
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args:
Args:
...
@@ -66,6 +68,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
...
@@ -66,6 +68,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
(e.g. 1), it is desirable to disable batch norm update and use
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
Raises:
Raises:
ValueError: upon invalid `pad_to_multiple` values.
ValueError: upon invalid `pad_to_multiple` values.
...
@@ -76,7 +81,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
...
@@ -76,7 +81,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
super
(
EmbeddedSSDMobileNetV1FeatureExtractor
,
self
).
__init__
(
super
(
EmbeddedSSDMobileNetV1FeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
)
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
,
use_depthwise
)
def
extract_features
(
self
,
preprocessed_inputs
):
def
extract_features
(
self
,
preprocessed_inputs
):
"""Extract features from preprocessed inputs.
"""Extract features from preprocessed inputs.
...
@@ -88,13 +94,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
...
@@ -88,13 +94,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
Returns:
Returns:
feature_maps: a list of tensors where the ith tensor has shape
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
[batch, height_i, width_i, depth_i]
Raises:
ValueError: if image height or width are not 256 pixels.
"""
"""
preprocessed_inputs
.
get_shape
().
assert_has_rank
(
4
)
image_shape
=
preprocessed_inputs
.
get_shape
()
shape_assert
=
tf
.
Assert
(
image_shape
.
assert_has_rank
(
4
)
tf
.
logical_and
(
image_height
=
image_shape
[
1
].
value
tf
.
equal
(
tf
.
shape
(
preprocessed_inputs
)[
1
],
256
),
image_width
=
image_shape
[
2
].
value
tf
.
equal
(
tf
.
shape
(
preprocessed_inputs
)[
2
],
256
)),
[
'image size must be 256 in both height and width.'
])
if
image_height
is
None
or
image_width
is
None
:
shape_assert
=
tf
.
Assert
(
tf
.
logical_and
(
tf
.
equal
(
tf
.
shape
(
preprocessed_inputs
)[
1
],
256
),
tf
.
equal
(
tf
.
shape
(
preprocessed_inputs
)[
2
],
256
)),
[
'image size must be 256 in both height and width.'
])
with
tf
.
control_dependencies
([
shape_assert
]):
preprocessed_inputs
=
tf
.
identity
(
preprocessed_inputs
)
elif
image_height
!=
256
or
image_width
!=
256
:
raise
ValueError
(
'image size must be = 256 in both height and width;'
' image dim = %d,%d'
%
(
image_height
,
image_width
))
feature_map_layout
=
{
feature_map_layout
=
{
'from_layer'
:
[
'from_layer'
:
[
...
@@ -102,10 +120,12 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
...
@@ -102,10 +120,12 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
],
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
],
'conv_kernel_size'
:
[
-
1
,
-
1
,
3
,
3
,
2
],
'conv_kernel_size'
:
[
-
1
,
-
1
,
3
,
3
,
2
],
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
'use_depthwise'
:
self
.
_use_depthwise
,
}
}
with
tf
.
control_dependencies
([
shape_assert
]
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
slim
.
arg_scope
(
[
slim
.
batch_norm
],
fused
=
False
):
with
tf
.
variable_scope
(
'MobilenetV1'
,
with
tf
.
variable_scope
(
'MobilenetV1'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
reuse
=
self
.
_reuse_weights
)
as
scope
:
_
,
image_features
=
mobilenet_v1
.
mobilenet_v1_base
(
_
,
image_features
=
mobilenet_v1
.
mobilenet_v1_base
(
...
...
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
View file @
fd7b6887
...
@@ -22,7 +22,7 @@ from object_detection.models import ssd_feature_extractor_test
...
@@ -22,7 +22,7 @@ from object_detection.models import ssd_feature_extractor_test
class
EmbeddedSSDMobileNetV1FeatureExtractorTest
(
class
EmbeddedSSDMobileNetV1FeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
,
tf
.
test
.
TestCase
):
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
is_training
=
True
,
batch_norm_trainable
=
True
):
is_training
=
True
,
batch_norm_trainable
=
True
):
...
@@ -51,11 +51,23 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
...
@@ -51,11 +51,23 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
image_width
=
256
image_width
=
256
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
16
,
16
,
512
),
(
4
,
8
,
8
,
1024
),
expected_feature_map_shape
=
[(
2
,
16
,
16
,
512
),
(
2
,
8
,
8
,
1024
),
(
4
,
4
,
4
,
512
),
(
4
,
2
,
2
,
256
),
(
2
,
4
,
4
,
512
),
(
2
,
2
,
2
,
256
),
(
4
,
1
,
1
,
256
)]
(
2
,
1
,
1
,
256
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_dynamic_inputs
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
16
,
16
,
512
),
(
2
,
8
,
8
,
1024
),
(
2
,
4
,
4
,
512
),
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
256
)]
self
.
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
...
@@ -63,10 +75,10 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
...
@@ -63,10 +75,10 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
image_width
=
256
image_width
=
256
depth_multiplier
=
0.5
**
12
depth_multiplier
=
0.5
**
12
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
16
,
16
,
32
),
(
4
,
8
,
8
,
32
),
(
4
,
4
,
4
,
32
),
expected_feature_map_shape
=
[(
2
,
16
,
16
,
32
),
(
2
,
8
,
8
,
32
),
(
2
,
4
,
4
,
32
),
(
4
,
2
,
2
,
32
),
(
4
,
1
,
1
,
32
)]
(
2
,
2
,
2
,
32
),
(
2
,
1
,
1
,
32
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1
(
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1
(
...
@@ -75,11 +87,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
...
@@ -75,11 +87,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
image_width
=
256
image_width
=
256
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
16
,
16
,
512
),
(
4
,
8
,
8
,
1024
),
expected_feature_map_shape
=
[(
2
,
16
,
16
,
512
),
(
2
,
8
,
8
,
1024
),
(
4
,
4
,
4
,
512
),
(
4
,
2
,
2
,
256
),
(
2
,
4
,
4
,
512
),
(
2
,
2
,
2
,
256
),
(
4
,
1
,
1
,
256
)]
(
2
,
1
,
1
,
256
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_raises_error_with_pad_to_multiple_not_1
(
self
):
def
test_extract_features_raises_error_with_pad_to_multiple_not_1
(
self
):
...
...
research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py
View file @
fd7b6887
...
@@ -180,7 +180,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
...
@@ -180,7 +180,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
InceptionResnetV2 checkpoints.
InceptionResnetV2 checkpoints.
TODO: revisit whether it's possible to force the
TODO
(jonathanhuang,rathodv)
: revisit whether it's possible to force the
`Repeat` namescope as created in `_extract_box_classifier_features` to
`Repeat` namescope as created in `_extract_box_classifier_features` to
start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can
start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can
be used.
be used.
...
...
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
View file @
fd7b6887
...
@@ -111,7 +111,8 @@ class FasterRCNNResnetV1FeatureExtractor(
...
@@ -111,7 +111,8 @@ class FasterRCNNResnetV1FeatureExtractor(
with
tf
.
control_dependencies
([
shape_assert
]):
with
tf
.
control_dependencies
([
shape_assert
]):
# Disables batchnorm for fine-tuning with smaller batch sizes.
# Disables batchnorm for fine-tuning with smaller batch sizes.
# TODO: Figure out if it is needed when image batch size is bigger.
# TODO: Figure out if it is needed when image
# batch size is bigger.
with
slim
.
arg_scope
(
with
slim
.
arg_scope
(
resnet_utils
.
resnet_arg_scope
(
resnet_utils
.
resnet_arg_scope
(
batch_norm_epsilon
=
1e-5
,
batch_norm_epsilon
=
1e-5
,
...
...
research/object_detection/models/feature_map_generators.py
View file @
fd7b6887
...
@@ -25,6 +25,7 @@ of final feature maps.
...
@@ -25,6 +25,7 @@ of final feature maps.
"""
"""
import
collections
import
collections
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.utils
import
ops
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
...
@@ -115,6 +116,9 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
...
@@ -115,6 +116,9 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map_keys
=
[]
feature_map_keys
=
[]
feature_maps
=
[]
feature_maps
=
[]
base_from_layer
=
''
base_from_layer
=
''
use_explicit_padding
=
False
if
'use_explicit_padding'
in
feature_map_layout
:
use_explicit_padding
=
feature_map_layout
[
'use_explicit_padding'
]
use_depthwise
=
False
use_depthwise
=
False
if
'use_depthwise'
in
feature_map_layout
:
if
'use_depthwise'
in
feature_map_layout
:
use_depthwise
=
feature_map_layout
[
'use_depthwise'
]
use_depthwise
=
feature_map_layout
[
'use_depthwise'
]
...
@@ -139,16 +143,21 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
...
@@ -139,16 +143,21 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
padding
=
'SAME'
,
padding
=
'SAME'
,
stride
=
1
,
stride
=
1
,
scope
=
layer_name
)
scope
=
layer_name
)
stride
=
2
layer_name
=
'{}_2_Conv2d_{}_{}x{}_s2_{}'
.
format
(
layer_name
=
'{}_2_Conv2d_{}_{}x{}_s2_{}'
.
format
(
base_from_layer
,
index
,
conv_kernel_size
,
conv_kernel_size
,
base_from_layer
,
index
,
conv_kernel_size
,
conv_kernel_size
,
depth_fn
(
layer_depth
))
depth_fn
(
layer_depth
))
stride
=
2
padding
=
'SAME'
if
use_explicit_padding
:
padding
=
'VALID'
intermediate_layer
=
ops
.
fixed_padding
(
intermediate_layer
,
conv_kernel_size
)
if
use_depthwise
:
if
use_depthwise
:
feature_map
=
slim
.
separable_conv2d
(
feature_map
=
slim
.
separable_conv2d
(
intermediate_layer
,
intermediate_layer
,
None
,
[
conv_kernel_size
,
conv_kernel_size
],
None
,
[
conv_kernel_size
,
conv_kernel_size
],
depth_multiplier
=
1
,
depth_multiplier
=
1
,
padding
=
'SAME'
,
padding
=
padding
,
stride
=
stride
,
stride
=
stride
,
scope
=
layer_name
+
'_depthwise'
)
scope
=
layer_name
+
'_depthwise'
)
feature_map
=
slim
.
conv2d
(
feature_map
=
slim
.
conv2d
(
...
@@ -161,10 +170,56 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
...
@@ -161,10 +170,56 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map
=
slim
.
conv2d
(
feature_map
=
slim
.
conv2d
(
intermediate_layer
,
intermediate_layer
,
depth_fn
(
layer_depth
),
[
conv_kernel_size
,
conv_kernel_size
],
depth_fn
(
layer_depth
),
[
conv_kernel_size
,
conv_kernel_size
],
padding
=
'SAME'
,
padding
=
padding
,
stride
=
stride
,
stride
=
stride
,
scope
=
layer_name
)
scope
=
layer_name
)
feature_map_keys
.
append
(
layer_name
)
feature_map_keys
.
append
(
layer_name
)
feature_maps
.
append
(
feature_map
)
feature_maps
.
append
(
feature_map
)
return
collections
.
OrderedDict
(
return
collections
.
OrderedDict
(
[(
x
,
y
)
for
(
x
,
y
)
in
zip
(
feature_map_keys
,
feature_maps
)])
[(
x
,
y
)
for
(
x
,
y
)
in
zip
(
feature_map_keys
,
feature_maps
)])
def
fpn_top_down_feature_maps
(
image_features
,
depth
,
scope
=
None
):
"""Generates `top-down` feature maps for Feature Pyramid Networks.
See https://arxiv.org/abs/1612.03144 for details.
Args:
image_features: list of image feature tensors. Spatial resolutions of
succesive tensors must reduce exactly by a factor of 2.
depth: depth of output feature maps.
scope: A scope name to wrap this op under.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
with
tf
.
variable_scope
(
scope
,
'top_down'
,
image_features
):
num_levels
=
len
(
image_features
)
output_feature_maps_list
=
[]
output_feature_map_keys
=
[]
with
slim
.
arg_scope
(
[
slim
.
conv2d
],
activation_fn
=
None
,
normalizer_fn
=
None
,
padding
=
'SAME'
,
stride
=
1
):
top_down
=
slim
.
conv2d
(
image_features
[
-
1
],
depth
,
[
1
,
1
],
scope
=
'projection_%d'
%
num_levels
)
output_feature_maps_list
.
append
(
top_down
)
output_feature_map_keys
.
append
(
'top_down_feature_map_%d'
%
(
num_levels
-
1
))
for
level
in
reversed
(
range
(
num_levels
-
1
)):
top_down
=
ops
.
nearest_neighbor_upsampling
(
top_down
,
2
)
residual
=
slim
.
conv2d
(
image_features
[
level
],
depth
,
[
1
,
1
],
scope
=
'projection_%d'
%
(
level
+
1
))
top_down
=
0.5
*
top_down
+
0.5
*
residual
output_feature_maps_list
.
append
(
slim
.
conv2d
(
top_down
,
depth
,
[
3
,
3
],
activation_fn
=
None
,
scope
=
'smoothing_%d'
%
(
level
+
1
)))
output_feature_map_keys
.
append
(
'top_down_feature_map_%d'
%
level
)
return
collections
.
OrderedDict
(
reversed
(
zip
(
output_feature_map_keys
,
output_feature_maps_list
)))
research/object_detection/models/feature_map_generators_test.py
View file @
fd7b6887
...
@@ -40,7 +40,7 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
...
@@ -40,7 +40,7 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
}
}
# TODO
(rathodv)
: add tests with different anchor strides.
# TODO: add tests with different anchor strides.
class
MultiResolutionFeatureMapGeneratorTest
(
tf
.
test
.
TestCase
):
class
MultiResolutionFeatureMapGeneratorTest
(
tf
.
test
.
TestCase
):
def
test_get_expected_feature_map_shapes_with_inception_v2
(
self
):
def
test_get_expected_feature_map_shapes_with_inception_v2
(
self
):
...
@@ -134,6 +134,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
...
@@ -134,6 +134,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
class
FPNFeatureMapGeneratorTest
(
tf
.
test
.
TestCase
):
def
test_get_expected_feature_map_shapes
(
self
):
image_features
=
[
tf
.
random_uniform
([
4
,
8
,
8
,
256
],
dtype
=
tf
.
float32
),
tf
.
random_uniform
([
4
,
4
,
4
,
256
],
dtype
=
tf
.
float32
),
tf
.
random_uniform
([
4
,
2
,
2
,
256
],
dtype
=
tf
.
float32
),
tf
.
random_uniform
([
4
,
1
,
1
,
256
],
dtype
=
tf
.
float32
),
]
feature_maps
=
feature_map_generators
.
fpn_top_down_feature_maps
(
image_features
=
image_features
,
depth
=
128
)
expected_feature_map_shapes
=
{
'top_down_feature_map_0'
:
(
4
,
8
,
8
,
128
),
'top_down_feature_map_1'
:
(
4
,
4
,
4
,
128
),
'top_down_feature_map_2'
:
(
4
,
2
,
2
,
128
),
'top_down_feature_map_3'
:
(
4
,
1
,
1
,
128
)
}
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
{
key
:
value
.
shape
for
key
,
value
in
out_feature_maps
.
items
()}
self
.
assertDictEqual
(
out_feature_map_shapes
,
expected_feature_map_shapes
)
class
GetDepthFunctionTest
(
tf
.
test
.
TestCase
):
class
GetDepthFunctionTest
(
tf
.
test
.
TestCase
):
def
test_return_min_depth_when_multiplier_is_small
(
self
):
def
test_return_min_depth_when_multiplier_is_small
(
self
):
...
...
research/object_detection/models/ssd_feature_extractor_test.py
View file @
fd7b6887
...
@@ -17,33 +17,14 @@
...
@@ -17,33 +17,14 @@
from
abc
import
abstractmethod
from
abc
import
abstractmethod
import
itertools
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.utils
import
test_case
class
SsdFeatureExtractorTestBase
(
object
):
def
_validate_features_shape
(
self
,
class
SsdFeatureExtractorTestBase
(
test_case
.
TestCase
):
feature_extractor
,
preprocessed_inputs
,
expected_feature_map_shapes
):
"""Checks the extracted features are of correct shape.
Args:
feature_extractor: The feature extractor to test.
preprocessed_inputs: A [batch, height, width, 3] tensor to extract
features with.
expected_feature_map_shapes: The expected shape of the extracted features.
"""
feature_maps
=
feature_extractor
.
extract_features
(
preprocessed_inputs
)
feature_map_shapes
=
[
tf
.
shape
(
feature_map
)
for
feature_map
in
feature_maps
]
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
feature_map_shapes_out
=
sess
.
run
(
feature_map_shapes
)
for
shape_out
,
exp_shape_out
in
zip
(
feature_map_shapes_out
,
expected_feature_map_shapes
):
self
.
assertAllEqual
(
shape_out
,
exp_shape_out
)
@
abstractmethod
@
abstractmethod
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
):
...
@@ -59,14 +40,39 @@ class SsdFeatureExtractorTestBase(object):
...
@@ -59,14 +40,39 @@ class SsdFeatureExtractorTestBase(object):
pass
pass
def
check_extract_features_returns_correct_shape
(
def
check_extract_features_returns_correct_shape
(
self
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
self
,
batch_size
,
image_height
,
image_width
,
depth_multiplier
,
expected_feature_map_shapes_out
):
pad_to_multiple
,
expected_feature_map_shapes
):
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
def
graph_fn
(
image_tensor
):
pad_to_multiple
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
preprocessed_inputs
=
tf
.
random_uniform
(
pad_to_multiple
)
[
4
,
image_height
,
image_width
,
3
],
dtype
=
tf
.
float32
)
feature_maps
=
feature_extractor
.
extract_features
(
image_tensor
)
self
.
_validate_features_shape
(
return
feature_maps
feature_extractor
,
preprocessed_inputs
,
expected_feature_map_shapes_out
)
image_tensor
=
np
.
random
.
rand
(
batch_size
,
image_height
,
image_width
,
3
).
astype
(
np
.
float32
)
feature_maps
=
self
.
execute
(
graph_fn
,
[
image_tensor
])
for
feature_map
,
expected_shape
in
itertools
.
izip
(
feature_maps
,
expected_feature_map_shapes
):
self
.
assertAllEqual
(
feature_map
.
shape
,
expected_shape
)
def
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
self
,
batch_size
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shapes
):
def
graph_fn
(
image_height
,
image_width
):
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
image_tensor
=
tf
.
random_uniform
([
batch_size
,
image_height
,
image_width
,
3
],
dtype
=
tf
.
float32
)
feature_maps
=
feature_extractor
.
extract_features
(
image_tensor
)
return
feature_maps
feature_maps
=
self
.
execute_cpu
(
graph_fn
,
[
np
.
array
(
image_height
,
dtype
=
np
.
int32
),
np
.
array
(
image_width
,
dtype
=
np
.
int32
)
])
for
feature_map
,
expected_shape
in
itertools
.
izip
(
feature_maps
,
expected_feature_map_shapes
):
self
.
assertAllEqual
(
feature_map
.
shape
,
expected_shape
)
def
check_extract_features_raises_error_with_invalid_image_size
(
def
check_extract_features_raises_error_with_invalid_image_size
(
self
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
):
self
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
):
...
...
research/object_detection/models/ssd_inception_v2_feature_extractor.py
View file @
fd7b6887
...
@@ -19,6 +19,7 @@ import tensorflow as tf
...
@@ -19,6 +19,7 @@ import tensorflow as tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.models
import
feature_map_generators
from
object_detection.utils
import
ops
from
object_detection.utils
import
ops
from
object_detection.utils
import
shape_utils
from
nets
import
inception_v2
from
nets
import
inception_v2
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
...
@@ -34,7 +35,9 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -34,7 +35,9 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple
,
pad_to_multiple
,
conv_hyperparams
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
):
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
):
"""InceptionV2 Feature Extractor for SSD Models.
"""InceptionV2 Feature Extractor for SSD Models.
Args:
Args:
...
@@ -49,10 +52,14 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -49,10 +52,14 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
(e.g. 1), it is desirable to disable batch norm update and use
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
"""
"""
super
(
SSDInceptionV2FeatureExtractor
,
self
).
__init__
(
super
(
SSDInceptionV2FeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
)
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
,
use_depthwise
)
def
preprocess
(
self
,
resized_inputs
):
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
"""SSD preprocessing.
...
@@ -80,32 +87,30 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -80,32 +87,30 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_maps: a list of tensors where the ith tensor has shape
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
[batch, height_i, width_i, depth_i]
"""
"""
preprocessed_inputs
.
get_shape
().
assert_has_rank
(
4
)
preprocessed_inputs
=
shape_utils
.
check_min_image_dim
(
shape_assert
=
tf
.
Assert
(
33
,
preprocessed_inputs
)
tf
.
logical_and
(
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
1
],
33
),
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
2
],
33
)),
[
'image size must at least be 33 in both height and width.'
])
feature_map_layout
=
{
feature_map_layout
=
{
'from_layer'
:
[
'Mixed_4c'
,
'Mixed_5c'
,
''
,
''
,
''
,
''
],
'from_layer'
:
[
'Mixed_4c'
,
'Mixed_5c'
,
''
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
,
128
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
,
128
],
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
'use_depthwise'
:
self
.
_use_depthwise
,
}
}
with
tf
.
control_dependencies
([
shape_assert
]):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
tf
.
variable_scope
(
'InceptionV2'
,
with
tf
.
variable_scope
(
'InceptionV2'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
reuse
=
self
.
_reuse_weights
)
as
scope
:
_
,
image_features
=
inception_v2
.
inception_v2_base
(
_
,
image_features
=
inception_v2
.
inception_v2_base
(
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
final_endpoint
=
'Mixed_5c'
,
final_endpoint
=
'Mixed_5c'
,
min_depth
=
self
.
_min_depth
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
depth_multiplier
=
self
.
_depth_multiplier
,
scope
=
scope
)
scope
=
scope
)
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_map_layout
=
feature_map_layout
,
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
image_features
=
image_features
)
return
feature_maps
.
values
()
return
feature_maps
.
values
()
research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
View file @
fd7b6887
...
@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v2_feature_extractor
...
@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v2_feature_extractor
class
SsdInceptionV2FeatureExtractorTest
(
class
SsdInceptionV2FeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
,
tf
.
test
.
TestCase
):
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
is_training
=
True
,
batch_norm_trainable
=
True
):
is_training
=
True
,
batch_norm_trainable
=
True
):
...
@@ -49,11 +49,23 @@ class SsdInceptionV2FeatureExtractorTest(
...
@@ -49,11 +49,23 @@ class SsdInceptionV2FeatureExtractorTest(
image_width
=
128
image_width
=
128
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
8
,
8
,
576
),
(
4
,
4
,
4
,
1024
),
expected_feature_map_shape
=
[(
2
,
8
,
8
,
576
),
(
2
,
4
,
4
,
1024
),
(
4
,
2
,
2
,
512
),
(
4
,
1
,
1
,
256
),
(
2
,
2
,
2
,
512
),
(
2
,
1
,
1
,
256
),
(
4
,
1
,
1
,
256
),
(
4
,
1
,
1
,
128
)]
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_dynamic_inputs
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
8
,
8
,
576
),
(
2
,
4
,
4
,
1024
),
(
2
,
2
,
2
,
512
),
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_299
(
self
):
def
test_extract_features_returns_correct_shapes_299
(
self
):
...
@@ -61,11 +73,11 @@ class SsdInceptionV2FeatureExtractorTest(
...
@@ -61,11 +73,11 @@ class SsdInceptionV2FeatureExtractorTest(
image_width
=
299
image_width
=
299
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
19
,
19
,
576
),
(
4
,
10
,
10
,
1024
),
expected_feature_map_shape
=
[(
2
,
19
,
19
,
576
),
(
2
,
10
,
10
,
1024
),
(
4
,
5
,
5
,
512
),
(
4
,
3
,
3
,
256
),
(
2
,
5
,
5
,
512
),
(
2
,
3
,
3
,
256
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
128
)]
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
...
@@ -73,11 +85,11 @@ class SsdInceptionV2FeatureExtractorTest(
...
@@ -73,11 +85,11 @@ class SsdInceptionV2FeatureExtractorTest(
image_width
=
299
image_width
=
299
depth_multiplier
=
0.5
**
12
depth_multiplier
=
0.5
**
12
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
19
,
19
,
128
),
(
4
,
10
,
10
,
128
),
expected_feature_map_shape
=
[(
2
,
19
,
19
,
128
),
(
2
,
10
,
10
,
128
),
(
4
,
5
,
5
,
32
),
(
4
,
3
,
3
,
32
),
(
2
,
5
,
5
,
32
),
(
2
,
3
,
3
,
32
),
(
4
,
2
,
2
,
32
),
(
4
,
1
,
1
,
32
)]
(
2
,
2
,
2
,
32
),
(
2
,
1
,
1
,
32
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
...
@@ -85,11 +97,11 @@ class SsdInceptionV2FeatureExtractorTest(
...
@@ -85,11 +97,11 @@ class SsdInceptionV2FeatureExtractorTest(
image_width
=
299
image_width
=
299
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
32
pad_to_multiple
=
32
expected_feature_map_shape
=
[(
4
,
20
,
20
,
576
),
(
4
,
10
,
10
,
1024
),
expected_feature_map_shape
=
[(
2
,
20
,
20
,
576
),
(
2
,
10
,
10
,
1024
),
(
4
,
5
,
5
,
512
),
(
4
,
3
,
3
,
256
),
(
2
,
5
,
5
,
512
),
(
2
,
3
,
3
,
256
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
128
)]
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
...
...
research/object_detection/models/ssd_inception_v3_feature_extractor.py
View file @
fd7b6887
...
@@ -19,6 +19,7 @@ import tensorflow as tf
...
@@ -19,6 +19,7 @@ import tensorflow as tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.models
import
feature_map_generators
from
object_detection.utils
import
ops
from
object_detection.utils
import
ops
from
object_detection.utils
import
shape_utils
from
nets
import
inception_v3
from
nets
import
inception_v3
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
...
@@ -34,7 +35,9 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -34,7 +35,9 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple
,
pad_to_multiple
,
conv_hyperparams
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
):
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
):
"""InceptionV3 Feature Extractor for SSD Models.
"""InceptionV3 Feature Extractor for SSD Models.
Args:
Args:
...
@@ -49,10 +52,14 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -49,10 +52,14 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
(e.g. 1), it is desirable to disable batch norm update and use
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
"""
"""
super
(
SSDInceptionV3FeatureExtractor
,
self
).
__init__
(
super
(
SSDInceptionV3FeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
)
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
,
use_depthwise
)
def
preprocess
(
self
,
resized_inputs
):
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
"""SSD preprocessing.
...
@@ -80,32 +87,29 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -80,32 +87,29 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_maps: a list of tensors where the ith tensor has shape
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
[batch, height_i, width_i, depth_i]
"""
"""
preprocessed_inputs
.
get_shape
().
assert_has_rank
(
4
)
preprocessed_inputs
=
shape_utils
.
check_min_image_dim
(
shape_assert
=
tf
.
Assert
(
33
,
preprocessed_inputs
)
tf
.
logical_and
(
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
1
],
33
),
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
2
],
33
)),
[
'image size must at least be 33 in both height and width.'
])
feature_map_layout
=
{
feature_map_layout
=
{
'from_layer'
:
[
'Mixed_5d'
,
'Mixed_6e'
,
'Mixed_7c'
,
''
,
''
,
''
],
'from_layer'
:
[
'Mixed_5d'
,
'Mixed_6e'
,
'Mixed_7c'
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
-
1
,
-
1
,
512
,
256
,
128
],
'layer_depth'
:
[
-
1
,
-
1
,
-
1
,
512
,
256
,
128
],
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
'use_depthwise'
:
self
.
_use_depthwise
,
}
}
with
tf
.
control_dependencies
([
shape_assert
]):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
tf
.
variable_scope
(
'InceptionV3'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
with
tf
.
variable_scope
(
'InceptionV3'
,
_
,
image_features
=
inception_v3
.
inception_v3_base
(
reuse
=
self
.
_reuse_weights
)
as
scope
:
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
_
,
image_features
=
inception_v3
.
inception_v3_base
(
final_endpoint
=
'Mixed_7c'
,
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
min_depth
=
self
.
_min_depth
,
final_endpoint
=
'Mixed_7c'
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
scope
=
scope
)
depth_multiplier
=
self
.
_depth_multiplier
,
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
scope
=
scope
)
feature_map_layout
=
feature_map_layout
,
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
depth_multiplier
=
self
.
_depth_multiplier
,
feature_map_layout
=
feature_map_layout
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
insert_1x1_conv
=
True
,
min_depth
=
self
.
_min_depth
,
image_features
=
image_features
)
insert_1x1_conv
=
True
,
image_features
=
image_features
)
return
feature_maps
.
values
()
return
feature_maps
.
values
()
research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
View file @
fd7b6887
...
@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v3_feature_extractor
...
@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v3_feature_extractor
class
SsdInceptionV3FeatureExtractorTest
(
class
SsdInceptionV3FeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
,
tf
.
test
.
TestCase
):
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
is_training
=
True
,
batch_norm_trainable
=
True
):
is_training
=
True
,
batch_norm_trainable
=
True
):
...
@@ -49,11 +49,23 @@ class SsdInceptionV3FeatureExtractorTest(
...
@@ -49,11 +49,23 @@ class SsdInceptionV3FeatureExtractorTest(
image_width
=
128
image_width
=
128
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
13
,
13
,
288
),
(
4
,
6
,
6
,
768
),
expected_feature_map_shape
=
[(
2
,
13
,
13
,
288
),
(
2
,
6
,
6
,
768
),
(
4
,
2
,
2
,
2048
),
(
4
,
1
,
1
,
512
),
(
2
,
2
,
2
,
2048
),
(
2
,
1
,
1
,
512
),
(
4
,
1
,
1
,
256
),
(
4
,
1
,
1
,
128
)]
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_dynamic_inputs
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
13
,
13
,
288
),
(
2
,
6
,
6
,
768
),
(
2
,
2
,
2
,
2048
),
(
2
,
1
,
1
,
512
),
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_299
(
self
):
def
test_extract_features_returns_correct_shapes_299
(
self
):
...
@@ -61,11 +73,11 @@ class SsdInceptionV3FeatureExtractorTest(
...
@@ -61,11 +73,11 @@ class SsdInceptionV3FeatureExtractorTest(
image_width
=
299
image_width
=
299
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
35
,
35
,
288
),
(
4
,
17
,
17
,
768
),
expected_feature_map_shape
=
[(
2
,
35
,
35
,
288
),
(
2
,
17
,
17
,
768
),
(
4
,
8
,
8
,
2048
),
(
4
,
4
,
4
,
512
),
(
2
,
8
,
8
,
2048
),
(
2
,
4
,
4
,
512
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
128
)]
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
...
@@ -73,11 +85,11 @@ class SsdInceptionV3FeatureExtractorTest(
...
@@ -73,11 +85,11 @@ class SsdInceptionV3FeatureExtractorTest(
image_width
=
299
image_width
=
299
depth_multiplier
=
0.5
**
12
depth_multiplier
=
0.5
**
12
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
35
,
35
,
128
),
(
4
,
17
,
17
,
128
),
expected_feature_map_shape
=
[(
2
,
35
,
35
,
128
),
(
2
,
17
,
17
,
128
),
(
4
,
8
,
8
,
192
),
(
4
,
4
,
4
,
32
),
(
2
,
8
,
8
,
192
),
(
2
,
4
,
4
,
32
),
(
4
,
2
,
2
,
32
),
(
4
,
1
,
1
,
32
)]
(
2
,
2
,
2
,
32
),
(
2
,
1
,
1
,
32
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
...
@@ -85,11 +97,11 @@ class SsdInceptionV3FeatureExtractorTest(
...
@@ -85,11 +97,11 @@ class SsdInceptionV3FeatureExtractorTest(
image_width
=
299
image_width
=
299
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
32
pad_to_multiple
=
32
expected_feature_map_shape
=
[(
4
,
37
,
37
,
288
),
(
4
,
18
,
18
,
768
),
expected_feature_map_shape
=
[(
2
,
37
,
37
,
288
),
(
2
,
18
,
18
,
768
),
(
4
,
8
,
8
,
2048
),
(
4
,
4
,
4
,
512
),
(
2
,
8
,
8
,
2048
),
(
2
,
4
,
4
,
512
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
128
)]
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
...
...
research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
View file @
fd7b6887
...
@@ -20,6 +20,7 @@ import tensorflow as tf
...
@@ -20,6 +20,7 @@ import tensorflow as tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.models
import
feature_map_generators
from
object_detection.utils
import
ops
from
object_detection.utils
import
ops
from
object_detection.utils
import
shape_utils
from
nets
import
mobilenet_v1
from
nets
import
mobilenet_v1
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
...
@@ -35,7 +36,9 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -35,7 +36,9 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
pad_to_multiple
,
pad_to_multiple
,
conv_hyperparams
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
):
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
):
"""MobileNetV1 Feature Extractor for SSD Models.
"""MobileNetV1 Feature Extractor for SSD Models.
Args:
Args:
...
@@ -50,10 +53,14 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -50,10 +53,14 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
(e.g. 1), it is desirable to disable batch norm update and use
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
"""
"""
super
(
SSDMobileNetV1FeatureExtractor
,
self
).
__init__
(
super
(
SSDMobileNetV1FeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
)
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
,
use_depthwise
)
def
preprocess
(
self
,
resized_inputs
):
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
"""SSD preprocessing.
...
@@ -81,34 +88,33 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -81,34 +88,33 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_maps: a list of tensors where the ith tensor has shape
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
[batch, height_i, width_i, depth_i]
"""
"""
preprocessed_inputs
.
get_shape
().
assert_has_rank
(
4
)
preprocessed_inputs
=
shape_utils
.
check_min_image_dim
(
shape_assert
=
tf
.
Assert
(
33
,
preprocessed_inputs
)
tf
.
logical_and
(
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
1
],
33
),
tf
.
greater_equal
(
tf
.
shape
(
preprocessed_inputs
)[
2
],
33
)),
[
'image size must at least be 33 in both height and width.'
])
feature_map_layout
=
{
feature_map_layout
=
{
'from_layer'
:
[
'Conv2d_11_pointwise'
,
'Conv2d_13_pointwise'
,
''
,
''
,
'from_layer'
:
[
'Conv2d_11_pointwise'
,
'Conv2d_13_pointwise'
,
''
,
''
,
''
,
''
],
''
,
''
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
,
128
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
,
128
],
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
'use_depthwise'
:
self
.
_use_depthwise
,
}
}
with
tf
.
control_dependencies
([
shape_assert
]
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
# TODO: Enable fused batch norm once quantization supports it.
with
slim
.
arg_scope
([
slim
.
batch_norm
],
fused
=
False
):
with
slim
.
arg_scope
([
slim
.
batch_norm
],
fused
=
False
):
with
tf
.
variable_scope
(
'MobilenetV1'
,
with
tf
.
variable_scope
(
'MobilenetV1'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
reuse
=
self
.
_reuse_weights
)
as
scope
:
_
,
image_features
=
mobilenet_v1
.
mobilenet_v1_base
(
_
,
image_features
=
mobilenet_v1
.
mobilenet_v1_base
(
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
final_endpoint
=
'Conv2d_13_pointwise'
,
final_endpoint
=
'Conv2d_13_pointwise'
,
min_depth
=
self
.
_min_depth
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
depth_multiplier
=
self
.
_depth_multiplier
,
scope
=
scope
)
scope
=
scope
)
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_map_layout
=
feature_map_layout
,
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
image_features
=
image_features
)
return
feature_maps
.
values
()
return
feature_maps
.
values
()
research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
View file @
fd7b6887
...
@@ -24,7 +24,7 @@ slim = tf.contrib.slim
...
@@ -24,7 +24,7 @@ slim = tf.contrib.slim
class
SsdMobilenetV1FeatureExtractorTest
(
class
SsdMobilenetV1FeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
,
tf
.
test
.
TestCase
):
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
is_training
=
True
,
batch_norm_trainable
=
True
):
is_training
=
True
,
batch_norm_trainable
=
True
):
...
@@ -52,11 +52,11 @@ class SsdMobilenetV1FeatureExtractorTest(
...
@@ -52,11 +52,11 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width
=
128
image_width
=
128
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
8
,
8
,
512
),
(
4
,
4
,
4
,
1024
),
expected_feature_map_shape
=
[(
2
,
8
,
8
,
512
),
(
2
,
4
,
4
,
1024
),
(
4
,
2
,
2
,
512
),
(
4
,
1
,
1
,
256
),
(
2
,
2
,
2
,
512
),
(
2
,
1
,
1
,
256
),
(
4
,
1
,
1
,
256
),
(
4
,
1
,
1
,
128
)]
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_299
(
self
):
def
test_extract_features_returns_correct_shapes_299
(
self
):
...
@@ -64,11 +64,23 @@ class SsdMobilenetV1FeatureExtractorTest(
...
@@ -64,11 +64,23 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width
=
299
image_width
=
299
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
19
,
19
,
512
),
(
4
,
10
,
10
,
1024
),
expected_feature_map_shape
=
[(
2
,
19
,
19
,
512
),
(
2
,
10
,
10
,
1024
),
(
4
,
5
,
5
,
512
),
(
4
,
3
,
3
,
256
),
(
2
,
5
,
5
,
512
),
(
2
,
3
,
3
,
256
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
128
)]
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_with_dynamic_image_shape
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
8
,
8
,
512
),
(
2
,
4
,
4
,
1024
),
(
2
,
2
,
2
,
512
),
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
...
@@ -76,11 +88,11 @@ class SsdMobilenetV1FeatureExtractorTest(
...
@@ -76,11 +88,11 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width
=
299
image_width
=
299
depth_multiplier
=
0.5
**
12
depth_multiplier
=
0.5
**
12
pad_to_multiple
=
1
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
4
,
19
,
19
,
32
),
(
4
,
10
,
10
,
32
),
expected_feature_map_shape
=
[(
2
,
19
,
19
,
32
),
(
2
,
10
,
10
,
32
),
(
4
,
5
,
5
,
32
),
(
4
,
3
,
3
,
32
),
(
2
,
5
,
5
,
32
),
(
2
,
3
,
3
,
32
),
(
4
,
2
,
2
,
32
),
(
4
,
1
,
1
,
32
)]
(
2
,
2
,
2
,
32
),
(
2
,
1
,
1
,
32
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
...
@@ -88,11 +100,11 @@ class SsdMobilenetV1FeatureExtractorTest(
...
@@ -88,11 +100,11 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width
=
299
image_width
=
299
depth_multiplier
=
1.0
depth_multiplier
=
1.0
pad_to_multiple
=
32
pad_to_multiple
=
32
expected_feature_map_shape
=
[(
4
,
20
,
20
,
512
),
(
4
,
10
,
10
,
1024
),
expected_feature_map_shape
=
[(
2
,
20
,
20
,
512
),
(
2
,
10
,
10
,
1024
),
(
4
,
5
,
5
,
512
),
(
4
,
3
,
3
,
256
),
(
2
,
5
,
5
,
512
),
(
2
,
3
,
3
,
256
),
(
4
,
2
,
2
,
256
),
(
4
,
1
,
1
,
128
)]
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
self
.
check_extract_features_returns_correct_shape
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
expected_feature_map_shape
)
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
...
@@ -108,7 +120,7 @@ class SsdMobilenetV1FeatureExtractorTest(
...
@@ -108,7 +120,7 @@ class SsdMobilenetV1FeatureExtractorTest(
image_width
=
128
image_width
=
128
depth_multiplier
=
1
depth_multiplier
=
1
pad_to_multiple
=
1
pad_to_multiple
=
1
test_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
test_image
=
np
.
random
.
rand
(
2
,
image_height
,
image_width
,
3
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
...
...
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
0 → 100644
View file @
fd7b6887
"""SSD Feature Pyramid Network (FPN) feature extractors based on Resnet v1.
See https://arxiv.org/abs/1708.02002 for details.
"""
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.utils
import
ops
from
object_detection.utils
import
shape_utils
from
nets
import
resnet_v1
slim
=
tf
.
contrib
.
slim
class
_SSDResnetV1FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
"""SSD FPN feature extractor based on Resnet v1 architecture."""
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
resnet_base_fn
,
resnet_scope_name
,
fpn_scope_name
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
):
"""SSD FPN feature extractor based on Resnet v1 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
UNUSED currently.
min_depth: minimum feature extractor depth. UNUSED Currently.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
resnet_base_fn: base resnet network to use.
resnet_scope_name: scope name under which to construct resnet
fpn_scope_name: scope name under which to construct the feature pyramid
network.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
Raises:
ValueError: On supplying invalid arguments for unused arguments.
"""
super
(
_SSDResnetV1FpnFeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
)
if
self
.
_depth_multiplier
!=
1.0
:
raise
ValueError
(
'Only depth 1.0 is supported, found: {}'
.
format
(
self
.
_depth_multiplier
))
if
self
.
_use_explicit_padding
is
True
:
raise
ValueError
(
'Explicit padding is not a valid option.'
)
self
.
_resnet_base_fn
=
resnet_base_fn
self
.
_resnet_scope_name
=
resnet_scope_name
self
.
_fpn_scope_name
=
fpn_scope_name
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means
=
[
123.68
,
116.779
,
103.939
]
return
resized_inputs
-
[[
channel_means
]]
def
_filter_features
(
self
,
image_features
):
# TODO: Change resnet endpoint to strip scope prefixes instead
# of munging the scope here.
filtered_image_features
=
dict
({})
for
key
,
feature
in
image_features
.
items
():
feature_name
=
key
.
split
(
'/'
)[
-
1
]
if
feature_name
in
[
'block2'
,
'block3'
,
'block4'
]:
filtered_image_features
[
feature_name
]
=
feature
return
filtered_image_features
def
extract_features
(
self
,
preprocessed_inputs
):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
Raises:
ValueError: depth multiplier is not supported.
"""
if
self
.
_depth_multiplier
!=
1.0
:
raise
ValueError
(
'Depth multiplier not supported.'
)
preprocessed_inputs
=
shape_utils
.
check_min_image_dim
(
129
,
preprocessed_inputs
)
with
tf
.
variable_scope
(
self
.
_resnet_scope_name
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
with
slim
.
arg_scope
(
resnet_v1
.
resnet_arg_scope
()):
_
,
image_features
=
self
.
_resnet_base_fn
(
inputs
=
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
num_classes
=
None
,
is_training
=
self
.
_is_training
and
self
.
_batch_norm_trainable
,
global_pool
=
False
,
output_stride
=
None
,
store_non_strided_activations
=
True
,
scope
=
scope
)
image_features
=
self
.
_filter_features
(
image_features
)
last_feature_map
=
image_features
[
'block4'
]
with
tf
.
variable_scope
(
self
.
_fpn_scope_name
,
reuse
=
self
.
_reuse_weights
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
for
i
in
range
(
5
,
7
):
last_feature_map
=
slim
.
conv2d
(
last_feature_map
,
num_outputs
=
256
,
kernel_size
=
[
3
,
3
],
stride
=
2
,
padding
=
'SAME'
,
scope
=
'block{}'
.
format
(
i
))
image_features
[
'bottomup_{}'
.
format
(
i
)]
=
last_feature_map
feature_maps
=
feature_map_generators
.
fpn_top_down_feature_maps
(
[
image_features
[
key
]
for
key
in
[
'block2'
,
'block3'
,
'block4'
,
'bottomup_5'
,
'bottomup_6'
]
],
depth
=
256
,
scope
=
'top_down_features'
)
return
feature_maps
.
values
()
class
SSDResnet50V1FpnFeatureExtractor
(
_SSDResnetV1FpnFeatureExtractor
):
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
):
"""Resnet50 v1 FPN Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
"""
super
(
SSDResnet50V1FpnFeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
resnet_v1
.
resnet_v1_50
,
'resnet_v1_50'
,
'fpn'
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
)
class
SSDResnet101V1FpnFeatureExtractor
(
_SSDResnetV1FpnFeatureExtractor
):
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
):
"""Resnet101 v1 FPN Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
"""
super
(
SSDResnet101V1FpnFeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
resnet_v1
.
resnet_v1_101
,
'resnet_v1_101'
,
'fpn'
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
)
class
SSDResnet152V1FpnFeatureExtractor
(
_SSDResnetV1FpnFeatureExtractor
):
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
):
"""Resnet152 v1 FPN Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
"""
super
(
SSDResnet152V1FpnFeatureExtractor
,
self
).
__init__
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
resnet_v1
.
resnet_v1_152
,
'resnet_v1_152'
,
'fpn'
,
batch_norm_trainable
,
reuse_weights
,
use_explicit_padding
)
Prev
1
2
3
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment