Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
47bc1813
Commit
47bc1813
authored
Jul 01, 2020
by
syiming
Browse files
Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize
parents
d8611151
b035a227
Changes
329
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2184 additions
and
155 deletions
+2184
-155
research/object_detection/metrics/calibration_metrics_tf1_test.py
.../object_detection/metrics/calibration_metrics_tf1_test.py
+3
-0
research/object_detection/metrics/coco_evaluation.py
research/object_detection/metrics/coco_evaluation.py
+533
-0
research/object_detection/metrics/coco_evaluation_test.py
research/object_detection/metrics/coco_evaluation_test.py
+221
-0
research/object_detection/metrics/coco_tools.py
research/object_detection/metrics/coco_tools.py
+4
-1
research/object_detection/metrics/offline_eval_map_corloc.py
research/object_detection/metrics/offline_eval_map_corloc.py
+2
-2
research/object_detection/model_lib.py
research/object_detection/model_lib.py
+2
-2
research/object_detection/model_lib_tf1_test.py
research/object_detection/model_lib_tf1_test.py
+6
-7
research/object_detection/model_lib_tf2_test.py
research/object_detection/model_lib_tf2_test.py
+38
-27
research/object_detection/model_lib_v2.py
research/object_detection/model_lib_v2.py
+70
-115
research/object_detection/model_main_tf2.py
research/object_detection/model_main_tf2.py
+99
-0
research/object_detection/models/bidirectional_feature_pyramid_generators.py
...ection/models/bidirectional_feature_pyramid_generators.py
+486
-0
research/object_detection/models/bidirectional_feature_pyramid_generators_tf2_test.py
...dels/bidirectional_feature_pyramid_generators_tf2_test.py
+167
-0
research/object_detection/models/center_net_hourglass_feature_extractor.py
...etection/models/center_net_hourglass_feature_extractor.py
+75
-0
research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py
...models/center_net_hourglass_feature_extractor_tf2_test.py
+44
-0
research/object_detection/models/center_net_resnet_feature_extractor.py
...t_detection/models/center_net_resnet_feature_extractor.py
+149
-0
research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py
...on/models/center_net_resnet_feature_extractor_tf2_test.py
+54
-0
research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
...tion/models/center_net_resnet_v1_fpn_feature_extractor.py
+176
-0
research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
...ls/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
+49
-0
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_tf1_test.py
...s/embedded_ssd_mobilenet_v1_feature_extractor_tf1_test.py
+3
-0
research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_tf1_test.py
...er_rcnn_inception_resnet_v2_feature_extractor_tf1_test.py
+3
-1
No files found.
Too many changes to show.
To preserve performance only
329 of 329+
files are displayed.
Plain diff
Email patch
research/object_detection/metrics/calibration_metrics_test.py
→
research/object_detection/metrics/calibration_metrics_
tf1_
test.py
View file @
47bc1813
...
...
@@ -18,11 +18,14 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
from
object_detection.metrics
import
calibration_metrics
from
object_detection.utils
import
tf_version
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Skipping TF1.X only test.'
)
class
CalibrationLibTest
(
tf
.
test
.
TestCase
):
@
staticmethod
...
...
research/object_detection/metrics/coco_evaluation.py
View file @
47bc1813
...
...
@@ -24,6 +24,7 @@ import tensorflow.compat.v1 as tf
from
object_detection.core
import
standard_fields
from
object_detection.metrics
import
coco_tools
from
object_detection.utils
import
json_utils
from
object_detection.utils
import
np_mask_ops
from
object_detection.utils
import
object_detection_evaluation
...
...
@@ -1263,3 +1264,535 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
eval_metric_ops
[
metric_name
]
=
(
tf
.
py_func
(
value_func_factory
(
metric_name
),
[],
np
.
float32
),
update_op
)
return
eval_metric_ops
class
CocoPanopticSegmentationEvaluator
(
object_detection_evaluation
.
DetectionEvaluator
):
"""Class to evaluate PQ (panoptic quality) metric on COCO dataset.
More details about this metric: https://arxiv.org/pdf/1801.00868.pdf.
"""
def
__init__
(
self
,
categories
,
include_metrics_per_category
=
False
,
iou_threshold
=
0.5
,
ioa_threshold
=
0.5
):
"""Constructor.
Args:
categories: A list of dicts, each of which has the following keys -
'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name e.g., 'cat', 'dog'.
include_metrics_per_category: If True, include metrics for each category.
iou_threshold: intersection-over-union threshold for mask matching (with
normal groundtruths).
ioa_threshold: intersection-over-area threshold for mask matching with
"is_crowd" groundtruths.
"""
super
(
CocoPanopticSegmentationEvaluator
,
self
).
__init__
(
categories
)
self
.
_groundtruth_masks
=
{}
self
.
_groundtruth_class_labels
=
{}
self
.
_groundtruth_is_crowd
=
{}
self
.
_predicted_masks
=
{}
self
.
_predicted_class_labels
=
{}
self
.
_include_metrics_per_category
=
include_metrics_per_category
self
.
_iou_threshold
=
iou_threshold
self
.
_ioa_threshold
=
ioa_threshold
def
clear
(
self
):
"""Clears the state to prepare for a fresh evaluation."""
self
.
_groundtruth_masks
.
clear
()
self
.
_groundtruth_class_labels
.
clear
()
self
.
_groundtruth_is_crowd
.
clear
()
self
.
_predicted_masks
.
clear
()
self
.
_predicted_class_labels
.
clear
()
def
add_single_ground_truth_image_info
(
self
,
image_id
,
groundtruth_dict
):
"""Adds groundtruth for a single image to be used for evaluation.
If the image has already been added, a warning is logged, and groundtruth is
ignored.
Args:
image_id: A unique string/integer identifier for the image.
groundtruth_dict: A dictionary containing -
InputDataFields.groundtruth_classes: integer numpy array of shape
[num_masks] containing 1-indexed groundtruth classes for the mask.
InputDataFields.groundtruth_instance_masks: uint8 numpy array of shape
[num_masks, image_height, image_width] containing groundtruth masks.
The elements of the array must be in {0, 1}.
InputDataFields.groundtruth_is_crowd (optional): integer numpy array of
shape [num_boxes] containing iscrowd flag for groundtruth boxes.
"""
if
image_id
in
self
.
_groundtruth_masks
:
tf
.
logging
.
warning
(
'Ignoring groundtruth with image %s, since it has already been '
'added to the ground truth database.'
,
image_id
)
return
self
.
_groundtruth_masks
[
image_id
]
=
groundtruth_dict
[
standard_fields
.
InputDataFields
.
groundtruth_instance_masks
]
self
.
_groundtruth_class_labels
[
image_id
]
=
groundtruth_dict
[
standard_fields
.
InputDataFields
.
groundtruth_classes
]
groundtruth_is_crowd
=
groundtruth_dict
.
get
(
standard_fields
.
InputDataFields
.
groundtruth_is_crowd
)
# Drop groundtruth_is_crowd if empty tensor.
if
groundtruth_is_crowd
is
not
None
and
not
groundtruth_is_crowd
.
size
>
0
:
groundtruth_is_crowd
=
None
if
groundtruth_is_crowd
is
not
None
:
self
.
_groundtruth_is_crowd
[
image_id
]
=
groundtruth_is_crowd
def
add_single_detected_image_info
(
self
,
image_id
,
detections_dict
):
"""Adds detections for a single image to be used for evaluation.
If a detection has already been added for this image id, a warning is
logged, and the detection is skipped.
Args:
image_id: A unique string/integer identifier for the image.
detections_dict: A dictionary containing -
DetectionResultFields.detection_classes: integer numpy array of shape
[num_masks] containing 1-indexed detection classes for the masks.
DetectionResultFields.detection_masks: optional uint8 numpy array of
shape [num_masks, image_height, image_width] containing instance
masks. The elements of the array must be in {0, 1}.
Raises:
ValueError: If results and groundtruth shape don't match.
"""
if
image_id
not
in
self
.
_groundtruth_masks
:
raise
ValueError
(
'Missing groundtruth for image id: {}'
.
format
(
image_id
))
detection_masks
=
detections_dict
[
standard_fields
.
DetectionResultFields
.
detection_masks
]
self
.
_predicted_masks
[
image_id
]
=
detection_masks
self
.
_predicted_class_labels
[
image_id
]
=
detections_dict
[
standard_fields
.
DetectionResultFields
.
detection_classes
]
groundtruth_mask_shape
=
self
.
_groundtruth_masks
[
image_id
].
shape
if
groundtruth_mask_shape
[
1
:]
!=
detection_masks
.
shape
[
1
:]:
raise
ValueError
(
"The shape of results doesn't match groundtruth."
)
def
evaluate
(
self
):
"""Evaluates the detection masks and returns a dictionary of coco metrics.
Returns:
A dictionary holding -
1. summary_metric:
'PanopticQuality@%.2fIOU': mean panoptic quality averaged over classes at
the required IOU.
'SegmentationQuality@%.2fIOU': mean segmentation quality averaged over
classes at the required IOU.
'RecognitionQuality@%.2fIOU': mean recognition quality averaged over
classes at the required IOU.
'NumValidClasses': number of valid classes. A valid class should have at
least one normal (is_crowd=0) groundtruth mask or one predicted mask.
'NumTotalClasses': number of total classes.
2. per_category_pq: if include_metrics_per_category is True, category
specific results with keys of the form:
'PanopticQuality@%.2fIOU_ByCategory/category'.
"""
# Evaluate and accumulate the iou/tp/fp/fn.
sum_tp_iou
,
sum_num_tp
,
sum_num_fp
,
sum_num_fn
=
self
.
_evaluate_all_masks
()
# Compute PQ metric for each category and average over all classes.
mask_metrics
=
self
.
_compute_panoptic_metrics
(
sum_tp_iou
,
sum_num_tp
,
sum_num_fp
,
sum_num_fn
)
return
mask_metrics
def
get_estimator_eval_metric_ops
(
self
,
eval_dict
):
"""Returns a dictionary of eval metric ops.
Note that once value_op is called, the detections and groundtruth added via
update_op are cleared.
Args:
eval_dict: A dictionary that holds tensors for evaluating object detection
performance. For single-image evaluation, this dictionary may be
produced from eval_util.result_dict_for_single_example(). If multi-image
evaluation, `eval_dict` should contain the fields
'num_gt_masks_per_image' and 'num_det_masks_per_image' to properly unpad
the tensors from the batch.
Returns:
a dictionary of metric names to tuple of value_op and update_op that can
be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all
update ops must be run together and similarly all value ops must be run
together to guarantee correct behaviour.
"""
def
update_op
(
image_id_batched
,
groundtruth_classes_batched
,
groundtruth_instance_masks_batched
,
groundtruth_is_crowd_batched
,
num_gt_masks_per_image
,
detection_classes_batched
,
detection_masks_batched
,
num_det_masks_per_image
):
"""Update op for metrics."""
for
(
image_id
,
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
num_gt_mask
,
detection_classes
,
detection_masks
,
num_det_mask
)
in
zip
(
image_id_batched
,
groundtruth_classes_batched
,
groundtruth_instance_masks_batched
,
groundtruth_is_crowd_batched
,
num_gt_masks_per_image
,
detection_classes_batched
,
detection_masks_batched
,
num_det_masks_per_image
):
self
.
add_single_ground_truth_image_info
(
image_id
,
{
'groundtruth_classes'
:
groundtruth_classes
[:
num_gt_mask
],
'groundtruth_instance_masks'
:
groundtruth_instance_masks
[:
num_gt_mask
],
'groundtruth_is_crowd'
:
groundtruth_is_crowd
[:
num_gt_mask
]
})
self
.
add_single_detected_image_info
(
image_id
,
{
'detection_classes'
:
detection_classes
[:
num_det_mask
],
'detection_masks'
:
detection_masks
[:
num_det_mask
]
})
# Unpack items from the evaluation dictionary.
(
image_id
,
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
num_gt_masks_per_image
,
detection_classes
,
detection_masks
,
num_det_masks_per_image
)
=
self
.
_unpack_evaluation_dictionary_items
(
eval_dict
)
update_op
=
tf
.
py_func
(
update_op
,
[
image_id
,
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
num_gt_masks_per_image
,
detection_classes
,
detection_masks
,
num_det_masks_per_image
],
[])
metric_names
=
[
'PanopticQuality@%.2fIOU'
%
self
.
_iou_threshold
,
'SegmentationQuality@%.2fIOU'
%
self
.
_iou_threshold
,
'RecognitionQuality@%.2fIOU'
%
self
.
_iou_threshold
]
if
self
.
_include_metrics_per_category
:
for
category_dict
in
self
.
_categories
:
metric_names
.
append
(
'PanopticQuality@%.2fIOU_ByCategory/%s'
%
(
self
.
_iou_threshold
,
category_dict
[
'name'
]))
def
first_value_func
():
self
.
_metrics
=
self
.
evaluate
()
self
.
clear
()
return
np
.
float32
(
self
.
_metrics
[
metric_names
[
0
]])
def
value_func_factory
(
metric_name
):
def
value_func
():
return
np
.
float32
(
self
.
_metrics
[
metric_name
])
return
value_func
# Ensure that the metrics are only evaluated once.
first_value_op
=
tf
.
py_func
(
first_value_func
,
[],
tf
.
float32
)
eval_metric_ops
=
{
metric_names
[
0
]:
(
first_value_op
,
update_op
)}
with
tf
.
control_dependencies
([
first_value_op
]):
for
metric_name
in
metric_names
[
1
:]:
eval_metric_ops
[
metric_name
]
=
(
tf
.
py_func
(
value_func_factory
(
metric_name
),
[],
np
.
float32
),
update_op
)
return
eval_metric_ops
def
_evaluate_all_masks
(
self
):
"""Evaluate all masks and compute sum iou/TP/FP/FN."""
sum_num_tp
=
{
category
[
'id'
]:
0
for
category
in
self
.
_categories
}
sum_num_fp
=
sum_num_tp
.
copy
()
sum_num_fn
=
sum_num_tp
.
copy
()
sum_tp_iou
=
sum_num_tp
.
copy
()
for
image_id
in
self
.
_groundtruth_class_labels
:
# Separate normal and is_crowd groundtruth
crowd_gt_indices
=
self
.
_groundtruth_is_crowd
.
get
(
image_id
)
(
normal_gt_masks
,
normal_gt_classes
,
crowd_gt_masks
,
crowd_gt_classes
)
=
self
.
_separate_normal_and_crowd_labels
(
crowd_gt_indices
,
self
.
_groundtruth_masks
[
image_id
],
self
.
_groundtruth_class_labels
[
image_id
])
# Mask matching to normal GT.
predicted_masks
=
self
.
_predicted_masks
[
image_id
]
predicted_class_labels
=
self
.
_predicted_class_labels
[
image_id
]
(
overlaps
,
pred_matched
,
gt_matched
)
=
self
.
_match_predictions_to_groundtruths
(
predicted_masks
,
predicted_class_labels
,
normal_gt_masks
,
normal_gt_classes
,
self
.
_iou_threshold
,
is_crowd
=
False
,
with_replacement
=
False
)
# Accumulate true positives.
for
(
class_id
,
is_matched
,
overlap
)
in
zip
(
predicted_class_labels
,
pred_matched
,
overlaps
):
if
is_matched
:
sum_num_tp
[
class_id
]
+=
1
sum_tp_iou
[
class_id
]
+=
overlap
# Accumulate false negatives.
for
(
class_id
,
is_matched
)
in
zip
(
normal_gt_classes
,
gt_matched
):
if
not
is_matched
:
sum_num_fn
[
class_id
]
+=
1
# Match remaining predictions to crowd gt.
remained_pred_indices
=
np
.
logical_not
(
pred_matched
)
remained_pred_masks
=
predicted_masks
[
remained_pred_indices
,
:,
:]
remained_pred_classes
=
predicted_class_labels
[
remained_pred_indices
]
_
,
pred_matched
,
_
=
self
.
_match_predictions_to_groundtruths
(
remained_pred_masks
,
remained_pred_classes
,
crowd_gt_masks
,
crowd_gt_classes
,
self
.
_ioa_threshold
,
is_crowd
=
True
,
with_replacement
=
True
)
# Accumulate false positives
for
(
class_id
,
is_matched
)
in
zip
(
remained_pred_classes
,
pred_matched
):
if
not
is_matched
:
sum_num_fp
[
class_id
]
+=
1
return
sum_tp_iou
,
sum_num_tp
,
sum_num_fp
,
sum_num_fn
def
_compute_panoptic_metrics
(
self
,
sum_tp_iou
,
sum_num_tp
,
sum_num_fp
,
sum_num_fn
):
"""Compute PQ metric for each category and average over all classes.
Args:
sum_tp_iou: dict, summed true positive intersection-over-union (IoU) for
each class, keyed by class_id.
sum_num_tp: the total number of true positives for each class, keyed by
class_id.
sum_num_fp: the total number of false positives for each class, keyed by
class_id.
sum_num_fn: the total number of false negatives for each class, keyed by
class_id.
Returns:
mask_metrics: a dictionary containing averaged metrics over all classes,
and per-category metrics if required.
"""
mask_metrics
=
{}
sum_pq
=
0
sum_sq
=
0
sum_rq
=
0
num_valid_classes
=
0
for
category
in
self
.
_categories
:
class_id
=
category
[
'id'
]
(
panoptic_quality
,
segmentation_quality
,
recognition_quality
)
=
self
.
_compute_panoptic_metrics_single_class
(
sum_tp_iou
[
class_id
],
sum_num_tp
[
class_id
],
sum_num_fp
[
class_id
],
sum_num_fn
[
class_id
])
if
panoptic_quality
is
not
None
:
sum_pq
+=
panoptic_quality
sum_sq
+=
segmentation_quality
sum_rq
+=
recognition_quality
num_valid_classes
+=
1
if
self
.
_include_metrics_per_category
:
mask_metrics
[
'PanopticQuality@%.2fIOU_ByCategory/%s'
%
(
self
.
_iou_threshold
,
category
[
'name'
])]
=
panoptic_quality
mask_metrics
[
'PanopticQuality@%.2fIOU'
%
self
.
_iou_threshold
]
=
sum_pq
/
num_valid_classes
mask_metrics
[
'SegmentationQuality@%.2fIOU'
%
self
.
_iou_threshold
]
=
sum_sq
/
num_valid_classes
mask_metrics
[
'RecognitionQuality@%.2fIOU'
%
self
.
_iou_threshold
]
=
sum_rq
/
num_valid_classes
mask_metrics
[
'NumValidClasses'
]
=
num_valid_classes
mask_metrics
[
'NumTotalClasses'
]
=
len
(
self
.
_categories
)
return
mask_metrics
def
_compute_panoptic_metrics_single_class
(
self
,
sum_tp_iou
,
num_tp
,
num_fp
,
num_fn
):
"""Compute panoptic metrics: panoptic/segmentation/recognition quality.
More computation details in https://arxiv.org/pdf/1801.00868.pdf.
Args:
sum_tp_iou: summed true positive intersection-over-union (IoU) for a
specific class.
num_tp: the total number of true positives for a specific class.
num_fp: the total number of false positives for a specific class.
num_fn: the total number of false negatives for a specific class.
Returns:
panoptic_quality: sum_tp_iou / (num_tp + 0.5*num_fp + 0.5*num_fn).
segmentation_quality: sum_tp_iou / num_tp.
recognition_quality: num_tp / (num_tp + 0.5*num_fp + 0.5*num_fn).
"""
denominator
=
num_tp
+
0.5
*
num_fp
+
0.5
*
num_fn
# Calculate metric only if there is at least one GT or one prediction.
if
denominator
>
0
:
recognition_quality
=
num_tp
/
denominator
if
num_tp
>
0
:
segmentation_quality
=
sum_tp_iou
/
num_tp
else
:
# If there is no TP for this category.
segmentation_quality
=
0
panoptic_quality
=
segmentation_quality
*
recognition_quality
return
panoptic_quality
,
segmentation_quality
,
recognition_quality
else
:
return
None
,
None
,
None
def
_separate_normal_and_crowd_labels
(
self
,
crowd_gt_indices
,
groundtruth_masks
,
groundtruth_classes
):
"""Separate normal and crowd groundtruth class_labels and masks.
Args:
crowd_gt_indices: None or array of shape [num_groundtruths]. If None, all
groundtruths are treated as normal ones.
groundtruth_masks: array of shape [num_groundtruths, height, width].
groundtruth_classes: array of shape [num_groundtruths].
Returns:
normal_gt_masks: array of shape [num_normal_groundtruths, height, width].
normal_gt_classes: array of shape [num_normal_groundtruths].
crowd_gt_masks: array of shape [num_crowd_groundtruths, height, width].
crowd_gt_classes: array of shape [num_crowd_groundtruths].
Raises:
ValueError: if the shape of groundtruth classes doesn't match groundtruth
masks or if the shape of crowd_gt_indices.
"""
if
groundtruth_masks
.
shape
[
0
]
!=
groundtruth_classes
.
shape
[
0
]:
raise
ValueError
(
"The number of masks doesn't match the number of labels."
)
if
crowd_gt_indices
is
None
:
# All gts are treated as normal
crowd_gt_indices
=
np
.
zeros
(
groundtruth_masks
.
shape
,
dtype
=
np
.
bool
)
else
:
if
groundtruth_masks
.
shape
[
0
]
!=
crowd_gt_indices
.
shape
[
0
]:
raise
ValueError
(
"The number of masks doesn't match the number of is_crowd labels."
)
crowd_gt_indices
=
crowd_gt_indices
.
astype
(
np
.
bool
)
normal_gt_indices
=
np
.
logical_not
(
crowd_gt_indices
)
if
normal_gt_indices
.
size
:
normal_gt_masks
=
groundtruth_masks
[
normal_gt_indices
,
:,
:]
normal_gt_classes
=
groundtruth_classes
[
normal_gt_indices
]
crowd_gt_masks
=
groundtruth_masks
[
crowd_gt_indices
,
:,
:]
crowd_gt_classes
=
groundtruth_classes
[
crowd_gt_indices
]
else
:
# No groundtruths available, groundtruth_masks.shape = (0, h, w)
normal_gt_masks
=
groundtruth_masks
normal_gt_classes
=
groundtruth_classes
crowd_gt_masks
=
groundtruth_masks
crowd_gt_classes
=
groundtruth_classes
return
normal_gt_masks
,
normal_gt_classes
,
crowd_gt_masks
,
crowd_gt_classes
def
_match_predictions_to_groundtruths
(
self
,
predicted_masks
,
predicted_classes
,
groundtruth_masks
,
groundtruth_classes
,
matching_threshold
,
is_crowd
=
False
,
with_replacement
=
False
):
"""Match the predicted masks to groundtruths.
Args:
predicted_masks: array of shape [num_predictions, height, width].
predicted_classes: array of shape [num_predictions].
groundtruth_masks: array of shape [num_groundtruths, height, width].
groundtruth_classes: array of shape [num_groundtruths].
matching_threshold: if the overlap between a prediction and a groundtruth
is larger than this threshold, the prediction is true positive.
is_crowd: whether the groundtruths are crowd annotation or not. If True,
use intersection over area (IoA) as the overlapping metric; otherwise
use intersection over union (IoU).
with_replacement: whether a groundtruth can be matched to multiple
predictions. By default, for normal groundtruths, only 1-1 matching is
allowed for normal groundtruths; for crowd groundtruths, 1-to-many must
be allowed.
Returns:
best_overlaps: array of shape [num_predictions]. Values representing the
IoU
or IoA with best matched groundtruth.
pred_matched: array of shape [num_predictions]. Boolean value representing
whether the ith prediction is matched to a groundtruth.
gt_matched: array of shape [num_groundtruth]. Boolean value representing
whether the ith groundtruth is matched to a prediction.
Raises:
ValueError: if the shape of groundtruth/predicted masks doesn't match
groundtruth/predicted classes.
"""
if
groundtruth_masks
.
shape
[
0
]
!=
groundtruth_classes
.
shape
[
0
]:
raise
ValueError
(
"The number of GT masks doesn't match the number of labels."
)
if
predicted_masks
.
shape
[
0
]
!=
predicted_classes
.
shape
[
0
]:
raise
ValueError
(
"The number of predicted masks doesn't match the number of labels."
)
gt_matched
=
np
.
zeros
(
groundtruth_classes
.
shape
,
dtype
=
np
.
bool
)
pred_matched
=
np
.
zeros
(
predicted_classes
.
shape
,
dtype
=
np
.
bool
)
best_overlaps
=
np
.
zeros
(
predicted_classes
.
shape
)
for
pid
in
range
(
predicted_classes
.
shape
[
0
]):
best_overlap
=
0
matched_gt_id
=
-
1
for
gid
in
range
(
groundtruth_classes
.
shape
[
0
]):
if
predicted_classes
[
pid
]
==
groundtruth_classes
[
gid
]:
if
(
not
with_replacement
)
and
gt_matched
[
gid
]:
continue
if
not
is_crowd
:
overlap
=
np_mask_ops
.
iou
(
predicted_masks
[
pid
:
pid
+
1
],
groundtruth_masks
[
gid
:
gid
+
1
])[
0
,
0
]
else
:
overlap
=
np_mask_ops
.
ioa
(
groundtruth_masks
[
gid
:
gid
+
1
],
predicted_masks
[
pid
:
pid
+
1
])[
0
,
0
]
if
overlap
>=
matching_threshold
and
overlap
>
best_overlap
:
matched_gt_id
=
gid
best_overlap
=
overlap
if
matched_gt_id
>=
0
:
gt_matched
[
matched_gt_id
]
=
True
pred_matched
[
pid
]
=
True
best_overlaps
[
pid
]
=
best_overlap
return
best_overlaps
,
pred_matched
,
gt_matched
def
_unpack_evaluation_dictionary_items
(
self
,
eval_dict
):
"""Unpack items from the evaluation dictionary."""
input_data_fields
=
standard_fields
.
InputDataFields
detection_fields
=
standard_fields
.
DetectionResultFields
image_id
=
eval_dict
[
input_data_fields
.
key
]
groundtruth_classes
=
eval_dict
[
input_data_fields
.
groundtruth_classes
]
groundtruth_instance_masks
=
eval_dict
[
input_data_fields
.
groundtruth_instance_masks
]
groundtruth_is_crowd
=
eval_dict
.
get
(
input_data_fields
.
groundtruth_is_crowd
,
None
)
num_gt_masks_per_image
=
eval_dict
.
get
(
input_data_fields
.
num_groundtruth_boxes
,
None
)
detection_classes
=
eval_dict
[
detection_fields
.
detection_classes
]
detection_masks
=
eval_dict
[
detection_fields
.
detection_masks
]
num_det_masks_per_image
=
eval_dict
.
get
(
detection_fields
.
num_detections
,
None
)
if
groundtruth_is_crowd
is
None
:
groundtruth_is_crowd
=
tf
.
zeros_like
(
groundtruth_classes
,
dtype
=
tf
.
bool
)
if
not
image_id
.
shape
.
as_list
():
# Apply a batch dimension to all tensors.
image_id
=
tf
.
expand_dims
(
image_id
,
0
)
groundtruth_classes
=
tf
.
expand_dims
(
groundtruth_classes
,
0
)
groundtruth_instance_masks
=
tf
.
expand_dims
(
groundtruth_instance_masks
,
0
)
groundtruth_is_crowd
=
tf
.
expand_dims
(
groundtruth_is_crowd
,
0
)
detection_classes
=
tf
.
expand_dims
(
detection_classes
,
0
)
detection_masks
=
tf
.
expand_dims
(
detection_masks
,
0
)
if
num_gt_masks_per_image
is
None
:
num_gt_masks_per_image
=
tf
.
shape
(
groundtruth_classes
)[
1
:
2
]
else
:
num_gt_masks_per_image
=
tf
.
expand_dims
(
num_gt_masks_per_image
,
0
)
if
num_det_masks_per_image
is
None
:
num_det_masks_per_image
=
tf
.
shape
(
detection_classes
)[
1
:
2
]
else
:
num_det_masks_per_image
=
tf
.
expand_dims
(
num_det_masks_per_image
,
0
)
else
:
if
num_gt_masks_per_image
is
None
:
num_gt_masks_per_image
=
tf
.
tile
(
tf
.
shape
(
groundtruth_classes
)[
1
:
2
],
multiples
=
tf
.
shape
(
groundtruth_classes
)[
0
:
1
])
if
num_det_masks_per_image
is
None
:
num_det_masks_per_image
=
tf
.
tile
(
tf
.
shape
(
detection_classes
)[
1
:
2
],
multiples
=
tf
.
shape
(
detection_classes
)[
0
:
1
])
return
(
image_id
,
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
num_gt_masks_per_image
,
detection_classes
,
detection_masks
,
num_det_masks_per_image
)
research/object_detection/metrics/coco_evaluation_test.py
View file @
47bc1813
...
...
@@ -18,10 +18,12 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
from
object_detection.core
import
standard_fields
from
object_detection.metrics
import
coco_evaluation
from
object_detection.utils
import
tf_version
def
_get_categories_list
():
...
...
@@ -250,6 +252,7 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
})
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Only Supported in TF1.X'
)
class
CocoEvaluationPyFuncTest
(
tf
.
test
.
TestCase
):
def
testGetOneMAPWithMatchingGroundtruthAndDetections
(
self
):
...
...
@@ -926,6 +929,7 @@ class CocoKeypointEvaluationTest(tf.test.TestCase):
-
1.0
)
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Only Supported in TF1.X'
)
class
CocoKeypointEvaluationPyFuncTest
(
tf
.
test
.
TestCase
):
def
testGetOneMAPWithMatchingKeypoints
(
self
):
...
...
@@ -1438,6 +1442,7 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
self
.
assertFalse
(
coco_evaluator
.
_detection_masks_list
)
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Only Supported in TF1.X'
)
class
CocoMaskEvaluationPyFuncTest
(
tf
.
test
.
TestCase
):
def
testAddEvalDict
(
self
):
...
...
@@ -1716,5 +1721,221 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertFalse
(
coco_evaluator
.
_detection_masks_list
)
def
_get_panoptic_test_data
():
# image1 contains 3 people in gt, (2 normal annotation and 1 "is_crowd"
# annotation), and 3 people in prediction.
gt_masks1
=
np
.
zeros
((
3
,
50
,
50
),
dtype
=
np
.
uint8
)
result_masks1
=
np
.
zeros
((
3
,
50
,
50
),
dtype
=
np
.
uint8
)
gt_masks1
[
0
,
10
:
20
,
20
:
30
]
=
1
result_masks1
[
0
,
10
:
18
,
20
:
30
]
=
1
gt_masks1
[
1
,
25
:
30
,
25
:
35
]
=
1
result_masks1
[
1
,
18
:
25
,
25
:
30
]
=
1
gt_masks1
[
2
,
40
:
50
,
40
:
50
]
=
1
result_masks1
[
2
,
47
:
50
,
47
:
50
]
=
1
gt_class1
=
np
.
array
([
1
,
1
,
1
])
gt_is_crowd1
=
np
.
array
([
0
,
0
,
1
])
result_class1
=
np
.
array
([
1
,
1
,
1
])
# image2 contains 1 dog and 1 cat in gt, while 1 person and 1 dog in
# prediction.
gt_masks2
=
np
.
zeros
((
2
,
30
,
40
),
dtype
=
np
.
uint8
)
result_masks2
=
np
.
zeros
((
2
,
30
,
40
),
dtype
=
np
.
uint8
)
gt_masks2
[
0
,
5
:
15
,
20
:
35
]
=
1
gt_masks2
[
1
,
20
:
30
,
0
:
10
]
=
1
result_masks2
[
0
,
20
:
25
,
10
:
15
]
=
1
result_masks2
[
1
,
6
:
15
,
15
:
35
]
=
1
gt_class2
=
np
.
array
([
2
,
3
])
gt_is_crowd2
=
np
.
array
([
0
,
0
])
result_class2
=
np
.
array
([
1
,
2
])
gt_class
=
[
gt_class1
,
gt_class2
]
gt_masks
=
[
gt_masks1
,
gt_masks2
]
gt_is_crowd
=
[
gt_is_crowd1
,
gt_is_crowd2
]
result_class
=
[
result_class1
,
result_class2
]
result_masks
=
[
result_masks1
,
result_masks2
]
return
gt_class
,
gt_masks
,
gt_is_crowd
,
result_class
,
result_masks
class
CocoPanopticEvaluationTest
(
tf
.
test
.
TestCase
):
def
test_panoptic_quality
(
self
):
pq_evaluator
=
coco_evaluation
.
CocoPanopticSegmentationEvaluator
(
_get_categories_list
(),
include_metrics_per_category
=
True
)
(
gt_class
,
gt_masks
,
gt_is_crowd
,
result_class
,
result_masks
)
=
_get_panoptic_test_data
()
for
i
in
range
(
2
):
pq_evaluator
.
add_single_ground_truth_image_info
(
image_id
=
'image%d'
%
i
,
groundtruth_dict
=
{
standard_fields
.
InputDataFields
.
groundtruth_classes
:
gt_class
[
i
],
standard_fields
.
InputDataFields
.
groundtruth_instance_masks
:
gt_masks
[
i
],
standard_fields
.
InputDataFields
.
groundtruth_is_crowd
:
gt_is_crowd
[
i
]
})
pq_evaluator
.
add_single_detected_image_info
(
image_id
=
'image%d'
%
i
,
detections_dict
=
{
standard_fields
.
DetectionResultFields
.
detection_classes
:
result_class
[
i
],
standard_fields
.
DetectionResultFields
.
detection_masks
:
result_masks
[
i
]
})
metrics
=
pq_evaluator
.
evaluate
()
self
.
assertAlmostEqual
(
metrics
[
'PanopticQuality@0.50IOU_ByCategory/person'
],
0.32
)
self
.
assertAlmostEqual
(
metrics
[
'PanopticQuality@0.50IOU_ByCategory/dog'
],
135.0
/
195
)
self
.
assertAlmostEqual
(
metrics
[
'PanopticQuality@0.50IOU_ByCategory/cat'
],
0
)
self
.
assertAlmostEqual
(
metrics
[
'SegmentationQuality@0.50IOU'
],
(
0.8
+
135.0
/
195
)
/
3
)
self
.
assertAlmostEqual
(
metrics
[
'RecognitionQuality@0.50IOU'
],
(
0.4
+
1
)
/
3
)
self
.
assertAlmostEqual
(
metrics
[
'PanopticQuality@0.50IOU'
],
(
0.32
+
135.0
/
195
)
/
3
)
self
.
assertEqual
(
metrics
[
'NumValidClasses'
],
3
)
self
.
assertEqual
(
metrics
[
'NumTotalClasses'
],
3
)
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Only Supported in TF1.X'
)
class
CocoPanopticEvaluationPyFuncTest
(
tf
.
test
.
TestCase
):
def
testPanopticQualityNoBatch
(
self
):
pq_evaluator
=
coco_evaluation
.
CocoPanopticSegmentationEvaluator
(
_get_categories_list
(),
include_metrics_per_category
=
True
)
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
groundtruth_classes
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
))
groundtruth_masks
=
tf
.
placeholder
(
tf
.
uint8
,
shape
=
(
None
,
None
,
None
))
groundtruth_is_crowd
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
))
detection_classes
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
))
detection_masks
=
tf
.
placeholder
(
tf
.
uint8
,
shape
=
(
None
,
None
,
None
))
input_data_fields
=
standard_fields
.
InputDataFields
detection_fields
=
standard_fields
.
DetectionResultFields
eval_dict
=
{
input_data_fields
.
key
:
image_id
,
input_data_fields
.
groundtruth_classes
:
groundtruth_classes
,
input_data_fields
.
groundtruth_instance_masks
:
groundtruth_masks
,
input_data_fields
.
groundtruth_is_crowd
:
groundtruth_is_crowd
,
detection_fields
.
detection_classes
:
detection_classes
,
detection_fields
.
detection_masks
:
detection_masks
,
}
eval_metric_ops
=
pq_evaluator
.
get_estimator_eval_metric_ops
(
eval_dict
)
_
,
update_op
=
eval_metric_ops
[
'PanopticQuality@0.50IOU'
]
(
gt_class
,
gt_masks
,
gt_is_crowd
,
result_class
,
result_masks
)
=
_get_panoptic_test_data
()
with
self
.
test_session
()
as
sess
:
for
i
in
range
(
2
):
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image%d'
%
i
,
groundtruth_classes
:
gt_class
[
i
],
groundtruth_masks
:
gt_masks
[
i
],
groundtruth_is_crowd
:
gt_is_crowd
[
i
],
detection_classes
:
result_class
[
i
],
detection_masks
:
result_masks
[
i
]
})
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
items
():
metrics
[
key
]
=
value_op
metrics
=
sess
.
run
(
metrics
)
self
.
assertAlmostEqual
(
metrics
[
'PanopticQuality@0.50IOU'
],
(
0.32
+
135.0
/
195
)
/
3
)
def
testPanopticQualityBatched
(
self
):
pq_evaluator
=
coco_evaluation
.
CocoPanopticSegmentationEvaluator
(
_get_categories_list
(),
include_metrics_per_category
=
True
)
batch_size
=
2
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
batch_size
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
batch_size
,
None
))
groundtruth_masks
=
tf
.
placeholder
(
tf
.
uint8
,
shape
=
(
batch_size
,
None
,
None
,
None
))
groundtruth_is_crowd
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
batch_size
,
None
))
detection_classes
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
batch_size
,
None
))
detection_masks
=
tf
.
placeholder
(
tf
.
uint8
,
shape
=
(
batch_size
,
None
,
None
,
None
))
num_gt_masks_per_image
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
batch_size
))
num_det_masks_per_image
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
batch_size
))
input_data_fields
=
standard_fields
.
InputDataFields
detection_fields
=
standard_fields
.
DetectionResultFields
eval_dict
=
{
input_data_fields
.
key
:
image_id
,
input_data_fields
.
groundtruth_classes
:
groundtruth_classes
,
input_data_fields
.
groundtruth_instance_masks
:
groundtruth_masks
,
input_data_fields
.
groundtruth_is_crowd
:
groundtruth_is_crowd
,
input_data_fields
.
num_groundtruth_boxes
:
num_gt_masks_per_image
,
detection_fields
.
detection_classes
:
detection_classes
,
detection_fields
.
detection_masks
:
detection_masks
,
detection_fields
.
num_detections
:
num_det_masks_per_image
,
}
eval_metric_ops
=
pq_evaluator
.
get_estimator_eval_metric_ops
(
eval_dict
)
_
,
update_op
=
eval_metric_ops
[
'PanopticQuality@0.50IOU'
]
(
gt_class
,
gt_masks
,
gt_is_crowd
,
result_class
,
result_masks
)
=
_get_panoptic_test_data
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
[
'image0'
,
'image1'
],
groundtruth_classes
:
np
.
stack
([
gt_class
[
0
],
np
.
pad
(
gt_class
[
1
],
(
0
,
1
),
mode
=
'constant'
)
],
axis
=
0
),
groundtruth_masks
:
np
.
stack
([
np
.
pad
(
gt_masks
[
0
],
((
0
,
0
),
(
0
,
10
),
(
0
,
10
)),
mode
=
'constant'
),
np
.
pad
(
gt_masks
[
1
],
((
0
,
1
),
(
0
,
30
),
(
0
,
20
)),
mode
=
'constant'
),
],
axis
=
0
),
groundtruth_is_crowd
:
np
.
stack
([
gt_is_crowd
[
0
],
np
.
pad
(
gt_is_crowd
[
1
],
(
0
,
1
),
mode
=
'constant'
)
],
axis
=
0
),
num_gt_masks_per_image
:
np
.
array
([
3
,
2
]),
detection_classes
:
np
.
stack
([
result_class
[
0
],
np
.
pad
(
result_class
[
1
],
(
0
,
1
),
mode
=
'constant'
)
],
axis
=
0
),
detection_masks
:
np
.
stack
([
np
.
pad
(
result_masks
[
0
],
((
0
,
0
),
(
0
,
10
),
(
0
,
10
)),
mode
=
'constant'
),
np
.
pad
(
result_masks
[
1
],
((
0
,
1
),
(
0
,
30
),
(
0
,
20
)),
mode
=
'constant'
),
],
axis
=
0
),
num_det_masks_per_image
:
np
.
array
([
3
,
2
]),
})
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
items
():
metrics
[
key
]
=
value_op
metrics
=
sess
.
run
(
metrics
)
self
.
assertAlmostEqual
(
metrics
[
'PanopticQuality@0.50IOU'
],
(
0.32
+
135.0
/
195
)
/
3
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/metrics/coco_tools.py
View file @
47bc1813
...
...
@@ -52,6 +52,7 @@ from pycocotools import coco
from
pycocotools
import
cocoeval
from
pycocotools
import
mask
import
six
from
six.moves
import
range
from
six.moves
import
zip
import
tensorflow.compat.v1
as
tf
...
...
@@ -353,7 +354,9 @@ def _RleCompress(masks):
Returns:
A pycocotools Run-length encoding of the mask.
"""
return
mask
.
encode
(
np
.
asfortranarray
(
masks
))
rle
=
mask
.
encode
(
np
.
asfortranarray
(
masks
))
rle
[
'counts'
]
=
six
.
ensure_str
(
rle
[
'counts'
])
return
rle
def
ExportSingleImageGroundtruthToCoco
(
image_id
,
...
...
research/object_detection/metrics/offline_eval_map_corloc.py
View file @
47bc1813
...
...
@@ -36,8 +36,8 @@ import os
import
re
import
tensorflow.compat.v1
as
tf
from
object_detection
import
eval_util
from
object_detection.core
import
standard_fields
from
object_detection.legacy
import
evaluator
from
object_detection.metrics
import
tf_example_parser
from
object_detection.utils
import
config_util
from
object_detection.utils
import
label_map_util
...
...
@@ -94,7 +94,7 @@ def read_data_and_evaluate(input_config, eval_config):
categories
=
label_map_util
.
create_categories_from_labelmap
(
input_config
.
label_map_path
)
object_detection_evaluators
=
eval
uator
.
get_evaluators
(
object_detection_evaluators
=
eval
_util
.
get_evaluators
(
eval_config
,
categories
)
# Support a single evaluator
object_detection_evaluator
=
object_detection_evaluators
[
0
]
...
...
research/object_detection/model_lib.py
View file @
47bc1813
...
...
@@ -23,9 +23,9 @@ import functools
import
os
import
tensorflow.compat.v1
as
tf
import
tensorflow.compat.v2
as
tf2
import
tf_slim
as
slim
from
object_detection
import
eval_util
from
object_detection
import
exporter
as
exporter_lib
from
object_detection
import
inputs
...
...
@@ -349,7 +349,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
from
tensorflow.python.keras.engine
import
base_layer_utils
# pylint: disable=g-import-not-at-top
# Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
base_layer_utils
.
enable_v2_dtype_behavior
()
tf
.
compat
.
v
2
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
tf2
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
'mixed_bfloat16'
)
detection_model
=
detection_model_fn
(
is_training
=
is_training
,
add_summaries
=
(
not
use_tpu
))
...
...
research/object_detection/model_lib_test.py
→
research/object_detection/model_lib_
tf1_
test.py
View file @
47bc1813
...
...
@@ -20,19 +20,17 @@ from __future__ import print_function
import
functools
import
os
import
unittest
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
from
tensorflow.contrib.tpu.python.tpu
import
tpu_config
from
tensorflow.contrib.tpu.python.tpu
import
tpu_estimator
from
object_detection
import
inputs
from
object_detection
import
model_hparams
from
object_detection
import
model_lib
from
object_detection.builders
import
model_builder
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
config_util
from
object_detection.utils
import
tf_version
# Model for test. Options are:
...
...
@@ -122,6 +120,7 @@ def _make_initializable_iterator(dataset):
return
iterator
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Skipping TF1.X only test.'
)
class
ModelLibTest
(
tf
.
test
.
TestCase
):
@
classmethod
...
...
@@ -337,8 +336,7 @@ class ModelLibTest(tf.test.TestCase):
def
test_create_tpu_estimator_and_inputs
(
self
):
"""Tests that number of train/eval defaults to config values."""
run_config
=
tpu_config
.
RunConfig
()
run_config
=
tf
.
estimator
.
tpu
.
RunConfig
()
hparams
=
model_hparams
.
create_hparams
(
hparams_overrides
=
'load_pretrained=false'
)
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
...
...
@@ -352,7 +350,7 @@ class ModelLibTest(tf.test.TestCase):
estimator
=
train_and_eval_dict
[
'estimator'
]
train_steps
=
train_and_eval_dict
[
'train_steps'
]
self
.
assertIsInstance
(
estimator
,
t
pu_
estimator
.
TPUEstimator
)
self
.
assertIsInstance
(
estimator
,
t
f
.
estimator
.
tpu
.
TPUEstimator
)
self
.
assertEqual
(
20
,
train_steps
)
def
test_create_train_and_eval_specs
(
self
):
...
...
@@ -406,6 +404,7 @@ class ModelLibTest(tf.test.TestCase):
self
.
assertEqual
(
None
,
experiment
.
eval_steps
)
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Skipping TF1.X only test.'
)
class
UnbatchTensorsTest
(
tf
.
test
.
TestCase
):
def
test_unbatch_without_unpadding
(
self
):
...
...
research/object_detection/model_lib_
v
2_test.py
→
research/object_detection/model_lib_
tf
2_test.py
View file @
47bc1813
...
...
@@ -20,18 +20,19 @@ from __future__ import print_function
import
os
import
tempfile
import
unittest
import
numpy
as
np
import
six
import
tensorflow.compat.v1
as
tf
import
tensorflow.compat.v2
as
tf2
from
object_detection
import
inputs
from
object_detection
import
model_hparams
from
object_detection
import
model_lib_v2
from
object_detection.builders
import
model_builder
from
object_detection.core
import
model
from
object_detection.protos
import
train_pb2
from
object_detection.utils
import
config_util
from
object_detection.utils
import
tf_version
if
six
.
PY2
:
import
mock
# pylint: disable=g-importing-member,g-import-not-at-top
...
...
@@ -72,6 +73,7 @@ def _get_config_kwarg_overrides():
}
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
ModelLibTest
(
tf
.
test
.
TestCase
):
@
classmethod
...
...
@@ -80,24 +82,25 @@ class ModelLibTest(tf.test.TestCase):
def
test_train_loop_then_eval_loop
(
self
):
"""Tests that Estimator and input function are constructed correctly."""
hparams
=
model_hparams
.
create_hparams
(
hparams_overrides
=
'load_pretrained=false'
)
model_dir
=
tf
.
test
.
get_temp_dir
()
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
new_pipeline_config_path
=
os
.
path
.
join
(
model_dir
,
'new_pipeline.config'
)
config_util
.
clear_fine_tune_checkpoint
(
pipeline_config_path
,
new_pipeline_config_path
)
config_kwarg_overrides
=
_get_config_kwarg_overrides
()
model_dir
=
tf
.
test
.
get_temp_dir
()
train_steps
=
2
model_lib_v2
.
train_loop
(
hparams
,
pipeline_config_path
,
model_dir
=
model_dir
,
train_steps
=
train_steps
,
checkpoint_every_n
=
1
,
**
config_kwarg_overrides
)
strategy
=
tf2
.
distribute
.
OneDeviceStrategy
(
device
=
'/cpu:0'
)
with
strategy
.
scope
():
model_lib_v2
.
train_loop
(
new_pipeline_config_path
,
model_dir
=
model_dir
,
train_steps
=
train_steps
,
checkpoint_every_n
=
1
,
**
config_kwarg_overrides
)
model_lib_v2
.
eval_continuously
(
hparams
,
pipeline_config_path
,
new_pipeline_config_path
,
model_dir
=
model_dir
,
checkpoint_dir
=
model_dir
,
train_steps
=
train_steps
,
...
...
@@ -120,6 +123,9 @@ class SimpleModel(model.DetectionModel):
return
[]
def
restore_map
(
self
,
*
args
,
**
kwargs
):
pass
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
):
return
{
'model'
:
self
}
def
preprocess
(
self
,
_
):
...
...
@@ -139,27 +145,31 @@ class SimpleModel(model.DetectionModel):
return
[]
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
ModelCheckpointTest
(
tf
.
test
.
TestCase
):
"""Test for model checkpoint related functionality."""
def
test_checkpoint_max_to_keep
(
self
):
"""Test that only the most recent checkpoints are kept."""
strategy
=
tf2
.
distribute
.
OneDeviceStrategy
(
device
=
'/cpu:0'
)
with
mock
.
patch
.
object
(
model_builder
,
'build'
,
autospec
=
True
)
as
mock_builder
:
mock_builder
.
return_value
=
SimpleModel
()
hparams
=
model_hparams
.
create_hparams
(
hparams_overrides
=
'load_pretrained=false'
)
with
strategy
.
scope
():
mock_builder
.
return_value
=
SimpleModel
()
model_dir
=
tempfile
.
mkdtemp
(
dir
=
self
.
get_temp_dir
())
pipeline_config_path
=
get_pipeline_config_path
(
MODEL_NAME_FOR_TEST
)
new_pipeline_config_path
=
os
.
path
.
join
(
model_dir
,
'new_pipeline.config'
)
config_util
.
clear_fine_tune_checkpoint
(
pipeline_config_path
,
new_pipeline_config_path
)
config_kwarg_overrides
=
_get_config_kwarg_overrides
()
model_dir
=
tempfile
.
mkdtemp
(
dir
=
self
.
get_temp_dir
())
model_lib_v2
.
train_loop
(
hparams
,
pipeline_config_path
,
model_dir
=
model_dir
,
train_steps
=
20
,
checkpoint_every_n
=
2
,
checkpoint_max_to_keep
=
3
,
**
config_kwarg_overrides
)
with
strategy
.
scope
():
model_lib_v2
.
train_loop
(
new_pipeline_config_path
,
model_dir
=
model_dir
,
train_steps
=
20
,
checkpoint_every_n
=
2
,
checkpoint_max_to_keep
=
3
,
**
config_kwarg_overrides
)
ckpt_files
=
tf
.
io
.
gfile
.
glob
(
os
.
path
.
join
(
model_dir
,
'ckpt-*.index'
))
self
.
assertEqual
(
len
(
ckpt_files
),
3
,
'{} not of length 3.'
.
format
(
ckpt_files
))
...
...
@@ -167,10 +177,11 @@ class ModelCheckpointTest(tf.test.TestCase):
class
IncompatibleModel
(
SimpleModel
):
def
restore_
map
(
self
,
*
args
,
**
kwargs
):
def
restore_
from_objects
(
self
,
*
args
,
**
kwargs
):
return
{
'weight'
:
self
.
weight
}
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
CheckpointV2Test
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
...
...
@@ -199,7 +210,6 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2
.
load_fine_tune_checkpoint
(
self
.
_model
,
self
.
_ckpt_path
,
checkpoint_type
=
''
,
checkpoint_version
=
train_pb2
.
CheckpointVersion
.
V2
,
load_all_detection_checkpoint_vars
=
True
,
input_dataset
=
self
.
_train_input_fn
(),
unpad_groundtruth_tensors
=
True
)
np
.
testing
.
assert_allclose
(
self
.
_model
.
weight
.
numpy
(),
42
)
...
...
@@ -212,8 +222,9 @@ class CheckpointV2Test(tf.test.TestCase):
model_lib_v2
.
load_fine_tune_checkpoint
(
IncompatibleModel
(),
self
.
_ckpt_path
,
checkpoint_type
=
''
,
checkpoint_version
=
train_pb2
.
CheckpointVersion
.
V2
,
load_all_detection_checkpoint_vars
=
True
,
input_dataset
=
self
.
_train_input_fn
(),
unpad_groundtruth_tensors
=
True
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/model_lib_v2.py
View file @
47bc1813
...
...
@@ -34,7 +34,6 @@ from object_detection.protos import train_pb2
from
object_detection.utils
import
config_util
from
object_detection.utils
import
label_map_util
from
object_detection.utils
import
ops
from
object_detection.utils
import
variables_helper
from
object_detection.utils
import
visualization_utils
as
vutils
# pylint: disable=g-import-not-at-top
...
...
@@ -47,13 +46,6 @@ except ImportError:
MODEL_BUILD_UTIL_MAP
=
model_lib
.
MODEL_BUILD_UTIL_MAP
### NOTE: This file is a wip.
### TODO(kaftan): Explore adding unit tests for individual methods
### TODO(kaftan): Add unit test that checks training on a single image w/
#### groundtruth, and verfiy that loss goes to zero.
#### Possibly have version that takes it as the whole train & eval dataset,
#### & verify the loss output from the eval_loop method.
### TODO(kaftan): Make sure the unit tests run in TAP presubmits or Kokoro
RESTORE_MAP_ERROR_TEMPLATE
=
(
'Since we are restoring a v2 style checkpoint'
...
...
@@ -277,14 +269,21 @@ def validate_tf_v2_checkpoint_restore_map(checkpoint_restore_map):
"""
for
key
,
value
in
checkpoint_restore_map
.
items
():
if
not
(
isinstance
(
key
,
str
)
and
isinstance
(
value
,
tf
.
Module
)):
if
not
(
isinstance
(
key
,
str
)
and
(
isinstance
(
value
,
tf
.
Module
)
or
isinstance
(
value
,
tf
.
train
.
Checkpoint
))):
raise
TypeError
(
RESTORE_MAP_ERROR_TEMPLATE
.
format
(
key
.
__class__
.
__name__
,
value
.
__class__
.
__name__
))
def
is_object_based_checkpoint
(
checkpoint_path
):
"""Returns true if `checkpoint_path` points to an object-based checkpoint."""
var_names
=
[
var
[
0
]
for
var
in
tf
.
train
.
list_variables
(
checkpoint_path
)]
return
'_CHECKPOINTABLE_OBJECT_GRAPH'
in
var_names
def
load_fine_tune_checkpoint
(
model
,
checkpoint_path
,
checkpoint_type
,
checkpoint_version
,
load_all_detection_checkpoint_vars
,
input_dataset
,
model
,
checkpoint_path
,
checkpoint_type
,
checkpoint_version
,
input_dataset
,
unpad_groundtruth_tensors
):
"""Load a fine tuning classification or detection checkpoint.
...
...
@@ -292,8 +291,7 @@ def load_fine_tune_checkpoint(
the model by computing a dummy loss. (Models might not have built their
variables before their first execution)
It then loads a variable-name based classification or detection checkpoint
that comes from converted TF 1.x slim model checkpoints.
It then loads an object-based classification or detection checkpoint.
This method updates the model in-place and does not return a value.
...
...
@@ -306,14 +304,22 @@ def load_fine_tune_checkpoint(
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`.
checkpoint_version: train_pb2.CheckpointVersion.V1 or V2 enum indicating
whether to load checkpoints in V1 style or V2 style.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type` is `detection`). If False, only variables
within the feature extractor scopes are included. Default False.
whether to load checkpoints in V1 style or V2 style. In this binary
we only support V2 style (object-based) checkpoints.
input_dataset: The tf.data Dataset the model is being trained on. Needed
to get the shapes for the dummy loss computation.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
Raises:
IOError: if `checkpoint_path` does not point at a valid object-based
checkpoint
ValueError: if `checkpoint_version` is not train_pb2.CheckpointVersion.V2
"""
if
not
is_object_based_checkpoint
(
checkpoint_path
):
raise
IOError
(
'Checkpoint is expected to be an object-based checkpoint.'
)
if
checkpoint_version
==
train_pb2
.
CheckpointVersion
.
V1
:
raise
ValueError
(
'Checkpoint version should be V2'
)
features
,
labels
=
iter
(
input_dataset
).
next
()
@
tf
.
function
...
...
@@ -330,35 +336,20 @@ def load_fine_tune_checkpoint(
labels
)
strategy
=
tf
.
compat
.
v2
.
distribute
.
get_strategy
()
strategy
.
run
(
strategy
.
experimental_run_v2
(
_dummy_computation_fn
,
args
=
(
features
,
labels
,
))
if
checkpoint_version
==
train_pb2
.
CheckpointVersion
.
V1
:
var_map
=
model
.
restore_map
(
fine_tune_checkpoint_type
=
checkpoint_type
,
load_all_detection_checkpoint_vars
=
(
load_all_detection_checkpoint_vars
))
available_var_map
=
variables_helper
.
get_variables_available_in_checkpoint
(
var_map
,
checkpoint_path
,
include_global_step
=
False
)
tf
.
train
.
init_from_checkpoint
(
checkpoint_path
,
available_var_map
)
elif
checkpoint_version
==
train_pb2
.
CheckpointVersion
.
V2
:
restore_map
=
model
.
restore_map
(
fine_tune_checkpoint_type
=
checkpoint_type
,
load_all_detection_checkpoint_vars
=
(
load_all_detection_checkpoint_vars
))
validate_tf_v2_checkpoint_restore_map
(
restore_map
)
ckpt
=
tf
.
train
.
Checkpoint
(
**
restore_map
)
ckpt
.
restore
(
checkpoint_path
).
assert_existing_objects_matched
()
def
_get_filepath
(
strategy
,
filepath
):
restore_from_objects_dict
=
model
.
restore_from_objects
(
fine_tune_checkpoint_type
=
checkpoint_type
)
validate_tf_v2_checkpoint_restore_map
(
restore_from_objects_dict
)
ckpt
=
tf
.
train
.
Checkpoint
(
**
restore_from_objects_dict
)
ckpt
.
restore
(
checkpoint_path
).
assert_existing_objects_matched
()
def
get_filepath
(
strategy
,
filepath
):
"""Get appropriate filepath for worker.
Args:
...
...
@@ -377,7 +368,7 @@ def _get_filepath(strategy, filepath):
return
os
.
path
.
join
(
filepath
,
'temp_worker_{:03d}'
.
format
(
task_id
))
def
_
clean_temporary_directories
(
strategy
,
filepath
):
def
clean_temporary_directories
(
strategy
,
filepath
):
"""Temporary directory clean up for MultiWorker Mirrored Strategy.
This is needed for all non-chief workers.
...
...
@@ -392,14 +383,12 @@ def _clean_temporary_directories(strategy, filepath):
def
train_loop
(
hparams
,
pipeline_config_path
,
model_dir
,
config_override
=
None
,
train_steps
=
None
,
use_tpu
=
False
,
save_final_config
=
False
,
export_to_tpu
=
None
,
checkpoint_every_n
=
1000
,
checkpoint_max_to_keep
=
7
,
**
kwargs
):
...
...
@@ -417,7 +406,6 @@ def train_loop(
8. Logs the training metrics as TensorBoard summaries.
Args:
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
model_dir:
The directory to save checkpoints and summaries to.
...
...
@@ -428,10 +416,6 @@ def train_loop(
use_tpu: Boolean, whether training and evaluation should run on TPU.
save_final_config: Whether to save final config (obtained after applying
overrides) to `model_dir`.
export_to_tpu: When use_tpu and export_to_tpu are true,
`export_savedmodel()` exports a metagraph for serving on TPU besides the
one on CPU. If export_to_tpu is not provided, we will look for it in
hparams too.
checkpoint_every_n:
Checkpoint every n training steps.
checkpoint_max_to_keep:
...
...
@@ -453,7 +437,7 @@ def train_loop(
'use_bfloat16'
:
configs
[
'train_config'
].
use_bfloat16
and
use_tpu
})
configs
=
merge_external_params_with_configs
(
configs
,
hparams
,
kwargs_dict
=
kwargs
)
configs
,
None
,
kwargs_dict
=
kwargs
)
model_config
=
configs
[
'model'
]
train_config
=
configs
[
'train_config'
]
train_input_config
=
configs
[
'train_input_config'
]
...
...
@@ -468,33 +452,14 @@ def train_loop(
if
train_steps
is
None
and
train_config
.
num_steps
!=
0
:
train_steps
=
train_config
.
num_steps
# Read export_to_tpu from hparams if not passed.
if
export_to_tpu
is
None
:
export_to_tpu
=
hparams
.
get
(
'export_to_tpu'
,
False
)
tf
.
logging
.
info
(
'train_loop: use_tpu %s, export_to_tpu %s'
,
use_tpu
,
export_to_tpu
)
if
kwargs
[
'use_bfloat16'
]:
tf
.
compat
.
v2
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
'mixed_bfloat16'
)
# Parse the checkpoint fine tuning configs
if
hparams
.
load_pretrained
:
fine_tune_checkpoint_path
=
train_config
.
fine_tune_checkpoint
else
:
fine_tune_checkpoint_path
=
None
load_all_detection_checkpoint_vars
=
(
train_config
.
load_all_detection_checkpoint_vars
)
# TODO(kaftan) (or anyone else): move this piece of config munging to
## utils/config_util.py
if
not
train_config
.
fine_tune_checkpoint_type
:
# train_config.from_detection_checkpoint field is deprecated. For
# backward compatibility, set train_config.fine_tune_checkpoint_type
# based on train_config.from_detection_checkpoint.
if
train_config
.
from_detection_checkpoint
:
train_config
.
fine_tune_checkpoint_type
=
'detection'
else
:
train_config
.
fine_tune_checkpoint_type
=
'classification'
if
train_config
.
load_all_detection_checkpoint_vars
:
raise
ValueError
(
'train_pb2.load_all_detection_checkpoint_vars '
'unsupported in TF2'
)
config_util
.
update_fine_tune_checkpoint_type
(
train_config
)
fine_tune_checkpoint_type
=
train_config
.
fine_tune_checkpoint_type
fine_tune_checkpoint_version
=
train_config
.
fine_tune_checkpoint_version
...
...
@@ -539,8 +504,8 @@ def train_loop(
## Train the model
# Get the appropriate filepath (temporary or not) based on whether the worker
# is the chief.
summary_writer_filepath
=
_
get_filepath
(
strategy
,
os
.
path
.
join
(
model_dir
,
'train'
))
summary_writer_filepath
=
get_filepath
(
strategy
,
os
.
path
.
join
(
model_dir
,
'train'
))
summary_writer
=
tf
.
compat
.
v2
.
summary
.
create_file_writer
(
summary_writer_filepath
)
...
...
@@ -556,18 +521,18 @@ def train_loop(
with
tf
.
compat
.
v2
.
summary
.
record_if
(
lambda
:
global_step
%
num_steps_per_iteration
==
0
):
# Load a fine-tuning checkpoint.
if
fine_tune_checkpoint_path
:
load_fine_tune_checkpoint
(
detection_model
,
fine_tune_checkpoint_path
,
if
train_config
.
fine_tune_checkpoint
:
load_fine_tune_checkpoint
(
detection_model
,
train_config
.
fine_tune_checkpoint
,
fine_tune_checkpoint_type
,
fine_tune_checkpoint_version
,
load_all_detection_checkpoint_vars
,
train_input
,
unpad_groundtruth_tensors
)
ckpt
=
tf
.
compat
.
v2
.
train
.
Checkpoint
(
step
=
global_step
,
model
=
detection_model
,
optimizer
=
optimizer
)
manager_dir
=
_
get_filepath
(
strategy
,
model_dir
)
manager_dir
=
get_filepath
(
strategy
,
model_dir
)
if
not
strategy
.
extended
.
should_checkpoint
:
checkpoint_max_to_keep
=
1
manager
=
tf
.
compat
.
v2
.
train
.
CheckpointManager
(
...
...
@@ -597,7 +562,7 @@ def train_loop(
def
_sample_and_train
(
strategy
,
train_step_fn
,
data_iterator
):
features
,
labels
=
data_iterator
.
next
()
per_replica_losses
=
strategy
.
run
(
per_replica_losses
=
strategy
.
experimental_run_v2
(
train_step_fn
,
args
=
(
features
,
labels
))
# TODO(anjalisridhar): explore if it is safe to remove the
## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
...
...
@@ -615,6 +580,10 @@ def train_loop(
return
_sample_and_train
(
strategy
,
train_step_fn
,
data_iterator
)
train_input_iter
=
iter
(
train_input
)
if
int
(
global_step
.
value
())
==
0
:
manager
.
save
()
checkpointed_step
=
int
(
global_step
.
value
())
logged_step
=
global_step
.
value
()
...
...
@@ -646,8 +615,8 @@ def train_loop(
# Remove the checkpoint directories of the non-chief workers that
# MultiWorkerMirroredStrategy forces us to save during sync distributed
# training.
_
clean_temporary_directories
(
strategy
,
manager_dir
)
_
clean_temporary_directories
(
strategy
,
summary_writer_filepath
)
clean_temporary_directories
(
strategy
,
manager_dir
)
clean_temporary_directories
(
strategy
,
summary_writer_filepath
)
def
eager_eval_loop
(
...
...
@@ -767,28 +736,25 @@ def eager_eval_loop(
return
eval_dict
,
losses_dict
,
class_agnostic
agnostic_categories
=
label_map_util
.
create_class_agnostic_category_index
()
per_class_categories
=
label_map_util
.
create_category_index_from_labelmap
(
eval_input_config
.
label_map_path
)
keypoint_edges
=
[
(
kp
.
start
,
kp
.
end
)
for
kp
in
eval_config
.
keypoint_edge
]
for
i
,
(
features
,
labels
)
in
enumerate
(
eval_dataset
):
eval_dict
,
losses_dict
,
class_agnostic
=
compute_eval_dict
(
features
,
labels
)
if
class_agnostic
:
category_index
=
agnostic_categories
else
:
category_index
=
per_class_categories
if
i
%
100
==
0
:
tf
.
logging
.
info
(
'Finished eval step %d'
,
i
)
use_original_images
=
fields
.
InputDataFields
.
original_image
in
features
if
not
use_tpu
and
use_original_images
:
# Summary for input images.
tf
.
compat
.
v2
.
summary
.
image
(
name
=
'eval_input_images'
,
step
=
global_step
,
data
=
eval_dict
[
'original_image'
],
max_outputs
=
1
)
# Summary for prediction/groundtruth side-by-side images.
if
class_agnostic
:
category_index
=
label_map_util
.
create_class_agnostic_category_index
()
else
:
category_index
=
label_map_util
.
create_category_index_from_labelmap
(
eval_input_config
.
label_map_path
)
keypoint_edges
=
[
(
kp
.
start
,
kp
.
end
)
for
kp
in
eval_config
.
keypoint_edge
]
if
use_original_images
and
i
<
eval_config
.
num_visualizations
:
sbys_image_list
=
vutils
.
draw_side_by_side_evaluation_image
(
eval_dict
,
category_index
=
category_index
,
...
...
@@ -798,10 +764,10 @@ def eager_eval_loop(
keypoint_edges
=
keypoint_edges
or
None
)
sbys_images
=
tf
.
concat
(
sbys_image_list
,
axis
=
0
)
tf
.
compat
.
v2
.
summary
.
image
(
name
=
'eval_side_by_side
'
,
name
=
'eval_side_by_side
_'
+
str
(
i
)
,
step
=
global_step
,
data
=
sbys_images
,
max_outputs
=
eval_config
.
num_visualizations
)
max_outputs
=
1
)
if
evaluators
is
None
:
if
class_agnostic
:
...
...
@@ -830,14 +796,15 @@ def eager_eval_loop(
eval_metrics
[
loss_key
]
=
loss_metrics
[
loss_key
].
result
()
eval_metrics
=
{
str
(
k
):
v
for
k
,
v
in
eval_metrics
.
items
()}
tf
.
logging
.
info
(
'Eval metrics at step %d'
,
global_step
)
for
k
in
eval_metrics
:
tf
.
compat
.
v2
.
summary
.
scalar
(
k
,
eval_metrics
[
k
],
step
=
global_step
)
tf
.
logging
.
info
(
'
\t
+ %s: %f'
,
k
,
eval_metrics
[
k
])
return
eval_metrics
def
eval_continuously
(
hparams
,
pipeline_config_path
,
config_override
=
None
,
train_steps
=
None
,
...
...
@@ -846,7 +813,6 @@ def eval_continuously(
use_tpu
=
False
,
override_eval_num_epochs
=
True
,
postprocess_on_cpu
=
False
,
export_to_tpu
=
None
,
model_dir
=
None
,
checkpoint_dir
=
None
,
wait_interval
=
180
,
...
...
@@ -859,7 +825,6 @@ def eval_continuously(
on the evaluation data.
Args:
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
...
...
@@ -875,10 +840,6 @@ def eval_continuously(
eval_input.
postprocess_on_cpu: When use_tpu and postprocess_on_cpu are true,
postprocess is scheduled on the host cpu.
export_to_tpu: When use_tpu and export_to_tpu are true,
`export_savedmodel()` exports a metagraph for serving on TPU besides the
one on CPU. If export_to_tpu is not provided, we will look for it in
hparams too.
model_dir: Directory to output resulting evaluation summaries to.
checkpoint_dir: Directory that contains the training checkpoints.
wait_interval: The mimmum number of seconds to wait before checking for a
...
...
@@ -906,7 +867,7 @@ def eval_continuously(
tf
.
logging
.
warning
(
'Forced number of epochs for all eval validations to be 1.'
)
configs
=
merge_external_params_with_configs
(
configs
,
hparams
,
kwargs_dict
=
kwargs
)
configs
,
None
,
kwargs_dict
=
kwargs
)
model_config
=
configs
[
'model'
]
train_input_config
=
configs
[
'train_input_config'
]
eval_config
=
configs
[
'eval_config'
]
...
...
@@ -938,12 +899,6 @@ def eval_continuously(
model
=
detection_model
)
eval_inputs
.
append
((
eval_input_config
.
name
,
next_eval_input
))
# Read export_to_tpu from hparams if not passed.
if
export_to_tpu
is
None
:
export_to_tpu
=
hparams
.
get
(
'export_to_tpu'
,
False
)
tf
.
logging
.
info
(
'eval_continuously: use_tpu %s, export_to_tpu %s'
,
use_tpu
,
export_to_tpu
)
global_step
=
tf
.
compat
.
v2
.
Variable
(
0
,
trainable
=
False
,
dtype
=
tf
.
compat
.
v2
.
dtypes
.
int64
)
...
...
@@ -956,7 +911,7 @@ def eval_continuously(
for
eval_name
,
eval_input
in
eval_inputs
:
summary_writer
=
tf
.
compat
.
v2
.
summary
.
create_file_writer
(
model_dir
+
'
/
eval'
+
eval_name
)
os
.
path
.
join
(
model_dir
,
'eval'
,
eval_name
)
)
with
summary_writer
.
as_default
():
eager_eval_loop
(
detection_model
,
...
...
research/object_detection/model_main_tf2.py
0 → 100644
View file @
47bc1813
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Creates and runs TF2 object detection models.
For local training/evaluation run:
PIPELINE_CONFIG_PATH=path/to/pipeline.config
MODEL_DIR=/tmp/model_outputs
NUM_TRAIN_STEPS=10000
SAMPLE_1_OF_N_EVAL_EXAMPLES=1
python model_main_tf2.py -- \
--model_dir=$MODEL_DIR --num_train_steps=$NUM_TRAIN_STEPS \
--sample_1_of_n_eval_examples=$SAMPLE_1_OF_N_EVAL_EXAMPLES \
--pipeline_config_path=$PIPELINE_CONFIG_PATH \
--alsologtostderr
"""
from
absl
import
flags
import
tensorflow.compat.v2
as
tf
from
object_detection
import
model_lib_v2
flags
.
DEFINE_string
(
'pipeline_config_path'
,
None
,
'Path to pipeline config '
'file.'
)
flags
.
DEFINE_integer
(
'num_train_steps'
,
None
,
'Number of train steps.'
)
flags
.
DEFINE_bool
(
'eval_on_train_data'
,
False
,
'Enable evaluating on train '
'data (only supported in distributed training).'
)
flags
.
DEFINE_integer
(
'sample_1_of_n_eval_examples'
,
None
,
'Will sample one of '
'every n eval input examples, where n is provided.'
)
flags
.
DEFINE_integer
(
'sample_1_of_n_eval_on_train_examples'
,
5
,
'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.'
)
flags
.
DEFINE_string
(
'model_dir'
,
None
,
'Path to output model directory '
'where event and checkpoint files will be written.'
)
flags
.
DEFINE_string
(
'checkpoint_dir'
,
None
,
'Path to directory holding a checkpoint. If '
'`checkpoint_dir` is provided, this binary operates in eval-only mode, '
'writing resulting metrics to `model_dir`.'
)
flags
.
DEFINE_integer
(
'eval_timeout'
,
3600
,
'Number of seconds to wait for an'
'evaluation checkpoint before exiting.'
)
flags
.
DEFINE_bool
(
'use_tpu'
,
False
,
'Whether the job is executing on a TPU.'
)
flags
.
DEFINE_integer
(
'num_workers'
,
1
,
'When num_workers > 1, training uses '
'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
'MirroredStrategy.'
)
FLAGS
=
flags
.
FLAGS
def
main
(
unused_argv
):
flags
.
mark_flag_as_required
(
'model_dir'
)
flags
.
mark_flag_as_required
(
'pipeline_config_path'
)
tf
.
config
.
set_soft_device_placement
(
True
)
if
FLAGS
.
checkpoint_dir
:
model_lib_v2
.
eval_continuously
(
pipeline_config_path
=
FLAGS
.
pipeline_config_path
,
model_dir
=
FLAGS
.
model_dir
,
train_steps
=
FLAGS
.
num_train_steps
,
sample_1_of_n_eval_examples
=
FLAGS
.
sample_1_of_n_eval_examples
,
sample_1_of_n_eval_on_train_examples
=
(
FLAGS
.
sample_1_of_n_eval_on_train_examples
),
checkpoint_dir
=
FLAGS
.
checkpoint_dir
,
wait_interval
=
300
,
timeout
=
FLAGS
.
eval_timeout
)
else
:
if
FLAGS
.
use_tpu
:
resolver
=
tf
.
distribute
.
cluster_resolver
.
TPUClusterResolver
()
tf
.
config
.
experimental_connect_to_cluster
(
resolver
)
tf
.
tpu
.
experimental
.
initialize_tpu_system
(
resolver
)
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
(
resolver
)
elif
FLAGS
.
num_workers
>
1
:
strategy
=
tf
.
distribute
.
experimental
.
MultiWorkerMirroredStrategy
()
else
:
strategy
=
tf
.
compat
.
v2
.
distribute
.
MirroredStrategy
()
with
strategy
.
scope
():
model_lib_v2
.
train_loop
(
pipeline_config_path
=
FLAGS
.
pipeline_config_path
,
model_dir
=
FLAGS
.
model_dir
,
train_steps
=
FLAGS
.
num_train_steps
,
use_tpu
=
FLAGS
.
use_tpu
)
if
__name__
==
'__main__'
:
tf
.
compat
.
v1
.
app
.
run
()
research/object_detection/models/bidirectional_feature_pyramid_generators.py
0 → 100644
View file @
47bc1813
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to generate bidirectional feature pyramids based on image features.
Provides bidirectional feature pyramid network (BiFPN) generators that can be
used to build object detection feature extractors, as proposed by Tan et al.
See https://arxiv.org/abs/1911.09070 for more details.
"""
import
collections
import
functools
from
six.moves
import
range
from
six.moves
import
zip
import
tensorflow
as
tf
from
object_detection.utils
import
bifpn_utils
def
_create_bifpn_input_config
(
fpn_min_level
,
fpn_max_level
,
input_max_level
,
level_scales
=
None
):
"""Creates a BiFPN input config for the input levels from a backbone network.
Args:
fpn_min_level: the minimum pyramid level (highest feature map resolution) to
use in the BiFPN.
fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
use in the BiFPN.
input_max_level: the maximum pyramid level that will be provided as input to
the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
from input_max_level, up to the desired fpn_max_level.
level_scales: a list of pyramid level scale factors. If 'None', each level's
scale is set to 2^level by default, which corresponds to each successive
feature map scaling by a factor of 2.
Returns:
A list of dictionaries for each feature map expected as input to the BiFPN,
where each has entries for the feature map 'name' and 'scale'.
"""
if
not
level_scales
:
level_scales
=
[
2
**
i
for
i
in
range
(
fpn_min_level
,
fpn_max_level
+
1
)]
bifpn_input_params
=
[]
for
i
in
range
(
fpn_min_level
,
min
(
fpn_max_level
,
input_max_level
)
+
1
):
bifpn_input_params
.
append
({
'name'
:
'0_up_lvl_{}'
.
format
(
i
),
'scale'
:
level_scales
[
i
-
fpn_min_level
]
})
return
bifpn_input_params
def
_get_bifpn_output_node_names
(
fpn_min_level
,
fpn_max_level
,
node_config
):
"""Returns a list of BiFPN output node names, given a BiFPN node config.
Args:
fpn_min_level: the minimum pyramid level (highest feature map resolution)
used by the BiFPN.
fpn_max_level: the maximum pyramid level (lowest feature map resolution)
used by the BiFPN.
node_config: the BiFPN node_config, a list of dictionaries corresponding to
each node in the BiFPN computation graph, where each entry should have an
associated 'name'.
Returns:
A list of strings corresponding to the names of the output BiFPN nodes.
"""
num_output_nodes
=
fpn_max_level
-
fpn_min_level
+
1
return
[
node
[
'name'
]
for
node
in
node_config
[
-
num_output_nodes
:]]
def
_create_bifpn_node_config
(
bifpn_num_iterations
,
bifpn_num_filters
,
fpn_min_level
,
fpn_max_level
,
input_max_level
,
bifpn_node_params
=
None
,
level_scales
=
None
):
"""Creates a config specifying a bidirectional feature pyramid network.
Args:
bifpn_num_iterations: the number of top-down bottom-up feature computations
to repeat in the BiFPN.
bifpn_num_filters: the number of filters (channels) for every feature map
used in the BiFPN.
fpn_min_level: the minimum pyramid level (highest feature map resolution) to
use in the BiFPN.
fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
use in the BiFPN.
input_max_level: the maximum pyramid level that will be provided as input to
the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
from input_max_level, up to the desired fpn_max_level.
bifpn_node_params: If not 'None', a dictionary of additional default BiFPN
node parameters that will be applied to all BiFPN nodes.
level_scales: a list of pyramid level scale factors. If 'None', each level's
scale is set to 2^level by default, which corresponds to each successive
feature map scaling by a factor of 2.
Returns:
A list of dictionaries used to define nodes in the BiFPN computation graph,
as proposed by EfficientDet, Tan et al (https://arxiv.org/abs/1911.09070).
Each node's entry has the corresponding keys:
name: String. The name of this node in the BiFPN. The node name follows
the format '{bifpn_iteration}_{dn|up}_lvl_{pyramid_level}', where 'dn'
or 'up' refers to whether the node is in the top-down or bottom-up
portion of a single BiFPN iteration.
scale: the scale factor for this node, by default 2^level.
inputs: A list of names of nodes which are inputs to this node.
num_channels: The number of channels for this node.
combine_method: String. Name of the method used to combine input
node feature maps, 'fast_attention' by default for nodes which have more
than one input. Otherwise, 'None' for nodes with only one input node.
input_op: A (partial) function which is called to construct the layers
that will be applied to this BiFPN node's inputs. This function is
called with the arguments:
input_op(name, input_scale, input_num_channels, output_scale,
output_num_channels, conv_hyperparams, is_training,
freeze_batchnorm)
post_combine_op: A (partial) function which is called to construct the
layers that will be applied to the result of the combine operation for
this BiFPN node. This function will be called with the arguments:
post_combine_op(name, conv_hyperparams, is_training, freeze_batchnorm)
If 'None', then no layers will be applied after the combine operation
for this node.
"""
if
not
level_scales
:
level_scales
=
[
2
**
i
for
i
in
range
(
fpn_min_level
,
fpn_max_level
+
1
)]
default_node_params
=
{
'num_channels'
:
bifpn_num_filters
,
'combine_method'
:
'fast_attention'
,
'input_op'
:
functools
.
partial
(
_create_bifpn_resample_block
,
downsample_method
=
'max_pooling'
),
'post_combine_op'
:
functools
.
partial
(
bifpn_utils
.
create_conv_block
,
num_filters
=
bifpn_num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
'SAME'
,
use_separable
=
True
,
apply_batchnorm
=
True
,
apply_activation
=
True
,
conv_bn_act_pattern
=
False
),
}
if
bifpn_node_params
:
default_node_params
.
update
(
bifpn_node_params
)
bifpn_node_params
=
[]
# Create additional base pyramid levels not provided as input to the BiFPN.
# Note, combine_method and post_combine_op are set to None for additional
# base pyramid levels because they do not combine multiple input BiFPN nodes.
for
i
in
range
(
input_max_level
+
1
,
fpn_max_level
+
1
):
node_params
=
dict
(
default_node_params
)
node_params
.
update
({
'name'
:
'0_up_lvl_{}'
.
format
(
i
),
'scale'
:
level_scales
[
i
-
fpn_min_level
],
'inputs'
:
[
'0_up_lvl_{}'
.
format
(
i
-
1
)],
'combine_method'
:
None
,
'post_combine_op'
:
None
,
})
bifpn_node_params
.
append
(
node_params
)
for
i
in
range
(
bifpn_num_iterations
):
# The first bottom-up feature pyramid (which includes the input pyramid
# levels from the backbone network and the additional base pyramid levels)
# is indexed at 0. So, the first top-down bottom-up pass of the BiFPN is
# indexed from 1, and repeated for bifpn_num_iterations iterations.
bifpn_i
=
i
+
1
# Create top-down nodes.
for
level_i
in
reversed
(
range
(
fpn_min_level
,
fpn_max_level
)):
inputs
=
[]
# BiFPN nodes in the top-down pass receive input from the corresponding
# level from the previous BiFPN iteration's bottom-up pass, except for the
# bottom-most (min) level node, which is computed once in the initial
# bottom-up pass, and is afterwards only computed in each top-down pass.
if
level_i
>
fpn_min_level
or
bifpn_i
==
1
:
inputs
.
append
(
'{}_up_lvl_{}'
.
format
(
bifpn_i
-
1
,
level_i
))
else
:
inputs
.
append
(
'{}_dn_lvl_{}'
.
format
(
bifpn_i
-
1
,
level_i
))
inputs
.
append
(
bifpn_node_params
[
-
1
][
'name'
])
node_params
=
dict
(
default_node_params
)
node_params
.
update
({
'name'
:
'{}_dn_lvl_{}'
.
format
(
bifpn_i
,
level_i
),
'scale'
:
level_scales
[
level_i
-
fpn_min_level
],
'inputs'
:
inputs
})
bifpn_node_params
.
append
(
node_params
)
# Create bottom-up nodes.
for
level_i
in
range
(
fpn_min_level
+
1
,
fpn_max_level
+
1
):
# BiFPN nodes in the bottom-up pass receive input from the corresponding
# level from the preceding top-down pass, except for the top (max) level
# which does not have a corresponding node in the top-down pass.
inputs
=
[
'{}_up_lvl_{}'
.
format
(
bifpn_i
-
1
,
level_i
)]
if
level_i
<
fpn_max_level
:
inputs
.
append
(
'{}_dn_lvl_{}'
.
format
(
bifpn_i
,
level_i
))
inputs
.
append
(
bifpn_node_params
[
-
1
][
'name'
])
node_params
=
dict
(
default_node_params
)
node_params
.
update
({
'name'
:
'{}_up_lvl_{}'
.
format
(
bifpn_i
,
level_i
),
'scale'
:
level_scales
[
level_i
-
fpn_min_level
],
'inputs'
:
inputs
})
bifpn_node_params
.
append
(
node_params
)
return
bifpn_node_params
def
_create_bifpn_resample_block
(
name
,
input_scale
,
input_num_channels
,
output_scale
,
output_num_channels
,
conv_hyperparams
,
is_training
,
freeze_batchnorm
,
downsample_method
=
None
,
use_native_resize_op
=
False
,
maybe_apply_1x1_conv
=
True
,
apply_1x1_pre_sampling
=
True
,
apply_1x1_post_sampling
=
False
):
"""Creates resample block layers for input feature maps to BiFPN nodes.
Args:
name: String. Name used for this block of layers.
input_scale: Scale factor of the input feature map.
input_num_channels: Number of channels in the input feature map.
output_scale: Scale factor of the output feature map.
output_num_channels: Number of channels in the output feature map.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
is_training: Indicates whether the feature generator is in training mode.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
downsample_method: String. Method to use when downsampling feature maps.
use_native_resize_op: Bool. Whether to use the native resize up when
upsampling feature maps.
maybe_apply_1x1_conv: Bool. If 'True', a 1x1 convolution will only be
applied if the input_num_channels differs from the output_num_channels.
apply_1x1_pre_sampling: Bool. Whether a 1x1 convolution will be applied to
the input feature map before the up/down-sampling operation.
apply_1x1_post_sampling: Bool. Whether a 1x1 convolution will be applied to
the input feature map after the up/down-sampling operation.
Returns:
A list of layers which may be applied to the input feature maps in order to
compute feature maps with the specified scale and number of channels.
"""
# By default, 1x1 convolutions are only applied before sampling when the
# number of input and output channels differ.
if
maybe_apply_1x1_conv
and
output_num_channels
==
input_num_channels
:
apply_1x1_pre_sampling
=
False
apply_1x1_post_sampling
=
False
apply_bn_for_resampling
=
True
layers
=
[]
if
apply_1x1_pre_sampling
:
layers
.
extend
(
bifpn_utils
.
create_conv_block
(
name
=
name
+
'1x1_pre_sample/'
,
num_filters
=
output_num_channels
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'SAME'
,
use_separable
=
False
,
apply_batchnorm
=
apply_bn_for_resampling
,
apply_activation
=
False
,
conv_hyperparams
=
conv_hyperparams
,
is_training
=
is_training
,
freeze_batchnorm
=
freeze_batchnorm
))
layers
.
extend
(
bifpn_utils
.
create_resample_feature_map_ops
(
input_scale
,
output_scale
,
downsample_method
,
use_native_resize_op
,
conv_hyperparams
,
is_training
,
freeze_batchnorm
,
name
))
if
apply_1x1_post_sampling
:
layers
.
extend
(
bifpn_utils
.
create_conv_block
(
name
=
name
+
'1x1_post_sample/'
,
num_filters
=
output_num_channels
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'SAME'
,
use_separable
=
False
,
apply_batchnorm
=
apply_bn_for_resampling
,
apply_activation
=
False
,
conv_hyperparams
=
conv_hyperparams
,
is_training
=
is_training
,
freeze_batchnorm
=
freeze_batchnorm
))
return
layers
def
_create_bifpn_combine_op
(
num_inputs
,
name
,
combine_method
):
"""Creates a BiFPN output config, a list of the output BiFPN node names.
Args:
num_inputs: The number of inputs to this combine operation.
name: String. The name of this combine operation.
combine_method: String. The method used to combine input feature maps.
Returns:
A function which may be called with a list of num_inputs feature maps
and which will return a single feature map.
"""
combine_op
=
None
if
num_inputs
<
1
:
raise
ValueError
(
'Expected at least 1 input for BiFPN combine.'
)
elif
num_inputs
==
1
:
combine_op
=
lambda
x
:
x
[
0
]
else
:
combine_op
=
bifpn_utils
.
BiFPNCombineLayer
(
combine_method
=
combine_method
,
name
=
name
)
return
combine_op
class
KerasBiFpnFeatureMaps
(
tf
.
keras
.
Model
):
"""Generates Keras based BiFPN feature maps from an input feature map pyramid.
A Keras model that generates multi-scale feature maps for detection by
iteratively computing top-down and bottom-up feature pyramids, as in the
EfficientDet paper by Tan et al, see arxiv.org/abs/1911.09070 for details.
"""
def
__init__
(
self
,
bifpn_num_iterations
,
bifpn_num_filters
,
fpn_min_level
,
fpn_max_level
,
input_max_level
,
is_training
,
conv_hyperparams
,
freeze_batchnorm
,
bifpn_node_params
=
None
,
name
=
None
):
"""Constructor.
Args:
bifpn_num_iterations: The number of top-down bottom-up iterations.
bifpn_num_filters: The number of filters (channels) to be used for all
feature maps in this BiFPN.
fpn_min_level: The minimum pyramid level (highest feature map resolution)
to use in the BiFPN.
fpn_max_level: The maximum pyramid level (lowest feature map resolution)
to use in the BiFPN.
input_max_level: The maximum pyramid level that will be provided as input
to the BiFPN. Accordingly, the BiFPN will compute any additional pyramid
levels from input_max_level up to the desired fpn_max_level, with each
successivel level downsampling by a scale factor of 2 by default.
is_training: Indicates whether the feature generator is in training mode.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
bifpn_node_params: An optional dictionary that may be used to specify
default parameters for BiFPN nodes, without the need to provide a custom
bifpn_node_config. For example, if '{ combine_method: 'sum' }', then all
BiFPN nodes will combine input feature maps by summation, rather than
by the default fast attention method.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super
(
KerasBiFpnFeatureMaps
,
self
).
__init__
(
name
=
name
)
bifpn_node_config
=
_create_bifpn_node_config
(
bifpn_num_iterations
,
bifpn_num_filters
,
fpn_min_level
,
fpn_max_level
,
input_max_level
,
bifpn_node_params
)
bifpn_input_config
=
_create_bifpn_input_config
(
fpn_min_level
,
fpn_max_level
,
input_max_level
)
bifpn_output_node_names
=
_get_bifpn_output_node_names
(
fpn_min_level
,
fpn_max_level
,
bifpn_node_config
)
self
.
bifpn_node_config
=
bifpn_node_config
self
.
bifpn_output_node_names
=
bifpn_output_node_names
self
.
node_input_blocks
=
[]
self
.
node_combine_op
=
[]
self
.
node_post_combine_block
=
[]
all_node_params
=
bifpn_input_config
all_node_names
=
[
node
[
'name'
]
for
node
in
all_node_params
]
for
node_config
in
bifpn_node_config
:
# Maybe transform and/or resample input feature maps.
input_blocks
=
[]
for
input_name
in
node_config
[
'inputs'
]:
if
input_name
not
in
all_node_names
:
raise
ValueError
(
'Input feature map ({}) does not exist:'
.
format
(
input_name
))
input_index
=
all_node_names
.
index
(
input_name
)
input_params
=
all_node_params
[
input_index
]
input_block
=
node_config
[
'input_op'
](
name
=
'{}/input_{}/'
.
format
(
node_config
[
'name'
],
input_name
),
input_scale
=
input_params
[
'scale'
],
input_num_channels
=
input_params
.
get
(
'num_channels'
,
None
),
output_scale
=
node_config
[
'scale'
],
output_num_channels
=
node_config
[
'num_channels'
],
conv_hyperparams
=
conv_hyperparams
,
is_training
=
is_training
,
freeze_batchnorm
=
freeze_batchnorm
)
input_blocks
.
append
((
input_index
,
input_block
))
# Combine input feature maps.
combine_op
=
_create_bifpn_combine_op
(
num_inputs
=
len
(
input_blocks
),
name
=
(
node_config
[
'name'
]
+
'/combine'
),
combine_method
=
node_config
[
'combine_method'
])
# Post-combine layers.
post_combine_block
=
[]
if
node_config
[
'post_combine_op'
]:
post_combine_block
.
extend
(
node_config
[
'post_combine_op'
](
name
=
node_config
[
'name'
]
+
'/post_combine/'
,
conv_hyperparams
=
conv_hyperparams
,
is_training
=
is_training
,
freeze_batchnorm
=
freeze_batchnorm
))
self
.
node_input_blocks
.
append
(
input_blocks
)
self
.
node_combine_op
.
append
(
combine_op
)
self
.
node_post_combine_block
.
append
(
post_combine_block
)
all_node_params
.
append
(
node_config
)
all_node_names
.
append
(
node_config
[
'name'
])
def
call
(
self
,
feature_pyramid
):
"""Compute BiFPN feature maps from input feature pyramid.
Executed when calling the `.__call__` method on input.
Args:
feature_pyramid: list of tuples of (tensor_name, image_feature_tensor).
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
tensors where each tensor has shape [batch, height_i, width_i, depth_i].
"""
feature_maps
=
[
el
[
1
]
for
el
in
feature_pyramid
]
output_feature_maps
=
[
None
for
node
in
self
.
bifpn_output_node_names
]
for
index
,
node
in
enumerate
(
self
.
bifpn_node_config
):
node_scope
=
'node_{:02d}'
.
format
(
index
)
with
tf
.
name_scope
(
node_scope
):
# Apply layer blocks to this node's input feature maps.
input_block_results
=
[]
for
input_index
,
input_block
in
self
.
node_input_blocks
[
index
]:
block_result
=
feature_maps
[
input_index
]
for
layer
in
input_block
:
block_result
=
layer
(
block_result
)
input_block_results
.
append
(
block_result
)
# Combine the resulting feature maps.
node_result
=
self
.
node_combine_op
[
index
](
input_block_results
)
# Apply post-combine layer block if applicable.
for
layer
in
self
.
node_post_combine_block
[
index
]:
node_result
=
layer
(
node_result
)
feature_maps
.
append
(
node_result
)
if
node
[
'name'
]
in
self
.
bifpn_output_node_names
:
index
=
self
.
bifpn_output_node_names
.
index
(
node
[
'name'
])
output_feature_maps
[
index
]
=
node_result
return
collections
.
OrderedDict
(
zip
(
self
.
bifpn_output_node_names
,
output_feature_maps
))
research/object_detection/models/bidirectional_feature_pyramid_generators_tf2_test.py
0 → 100644
View file @
47bc1813
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for bidirectional feature pyramid generators."""
import
unittest
from
absl.testing
import
parameterized
import
tensorflow.compat.v1
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.models
import
bidirectional_feature_pyramid_generators
as
bifpn_generators
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.utils
import
test_case
from
object_detection.utils
import
test_utils
from
object_detection.utils
import
tf_version
@
parameterized
.
parameters
({
'bifpn_num_iterations'
:
2
},
{
'bifpn_num_iterations'
:
8
})
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
BiFPNFeaturePyramidGeneratorTest
(
test_case
.
TestCase
):
def
_build_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
force_use_bias: true
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
test_get_expected_feature_map_shapes
(
self
,
bifpn_num_iterations
):
with
test_utils
.
GraphContextOrNone
()
as
g
:
image_features
=
[
(
'block3'
,
tf
.
random_uniform
([
4
,
16
,
16
,
256
],
dtype
=
tf
.
float32
)),
(
'block4'
,
tf
.
random_uniform
([
4
,
8
,
8
,
256
],
dtype
=
tf
.
float32
)),
(
'block5'
,
tf
.
random_uniform
([
4
,
4
,
4
,
256
],
dtype
=
tf
.
float32
))
]
bifpn_generator
=
bifpn_generators
.
KerasBiFpnFeatureMaps
(
bifpn_num_iterations
=
bifpn_num_iterations
,
bifpn_num_filters
=
128
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
input_max_level
=
5
,
is_training
=
True
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
freeze_batchnorm
=
False
)
def
graph_fn
():
feature_maps
=
bifpn_generator
(
image_features
)
return
feature_maps
expected_feature_map_shapes
=
{
'{}_dn_lvl_3'
.
format
(
bifpn_num_iterations
):
(
4
,
16
,
16
,
128
),
'{}_up_lvl_4'
.
format
(
bifpn_num_iterations
):
(
4
,
8
,
8
,
128
),
'{}_up_lvl_5'
.
format
(
bifpn_num_iterations
):
(
4
,
4
,
4
,
128
),
'{}_up_lvl_6'
.
format
(
bifpn_num_iterations
):
(
4
,
2
,
2
,
128
),
'{}_up_lvl_7'
.
format
(
bifpn_num_iterations
):
(
4
,
1
,
1
,
128
)}
out_feature_maps
=
self
.
execute
(
graph_fn
,
[],
g
)
out_feature_map_shapes
=
dict
(
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
def
test_get_expected_variable_names
(
self
,
bifpn_num_iterations
):
with
test_utils
.
GraphContextOrNone
()
as
g
:
image_features
=
[
(
'block3'
,
tf
.
random_uniform
([
4
,
16
,
16
,
256
],
dtype
=
tf
.
float32
)),
(
'block4'
,
tf
.
random_uniform
([
4
,
8
,
8
,
256
],
dtype
=
tf
.
float32
)),
(
'block5'
,
tf
.
random_uniform
([
4
,
4
,
4
,
256
],
dtype
=
tf
.
float32
))
]
bifpn_generator
=
bifpn_generators
.
KerasBiFpnFeatureMaps
(
bifpn_num_iterations
=
bifpn_num_iterations
,
bifpn_num_filters
=
128
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
input_max_level
=
5
,
is_training
=
True
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
freeze_batchnorm
=
False
,
name
=
'bifpn'
)
def
graph_fn
():
return
bifpn_generator
(
image_features
)
self
.
execute
(
graph_fn
,
[],
g
)
expected_variables
=
[
'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/bias'
,
'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/kernel'
,
'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias'
,
'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel'
,
'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias'
,
'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel'
,
'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/bias'
,
'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/kernel'
,
'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias'
,
'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel'
,
'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias'
,
'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel'
]
expected_node_variable_patterns
=
[
[
'bifpn/node_{:02}/{}_dn_lvl_6/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_dn_lvl_5/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_dn_lvl_4/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_dn_lvl_3/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_up_lvl_4/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_up_lvl_5/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_up_lvl_6/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/pointwise_kernel'
],
[
'bifpn/node_{:02}/{}_up_lvl_7/combine/bifpn_combine_weights'
,
'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/bias'
,
'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/depthwise_kernel'
,
'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/pointwise_kernel'
]]
node_i
=
2
for
iter_i
in
range
(
1
,
bifpn_num_iterations
+
1
):
for
node_variable_patterns
in
expected_node_variable_patterns
:
for
pattern
in
node_variable_patterns
:
expected_variables
.
append
(
pattern
.
format
(
node_i
,
iter_i
))
node_i
+=
1
expected_variables
=
set
(
expected_variables
)
actual_variable_set
=
set
(
[
var
.
name
.
split
(
':'
)[
0
]
for
var
in
bifpn_generator
.
variables
])
self
.
assertSetEqual
(
expected_variables
,
actual_variable_set
)
# TODO(aom): Tests for create_bifpn_combine_op.
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/center_net_hourglass_feature_extractor.py
0 → 100644
View file @
47bc1813
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Hourglass[1] feature extractor for CenterNet[2] meta architecture.
[1]: https://arxiv.org/abs/1603.06937
[2]: https://arxiv.org/abs/1904.07850
"""
from
object_detection.meta_architectures
import
center_net_meta_arch
from
object_detection.models.keras_models
import
hourglass_network
class
CenterNetHourglassFeatureExtractor
(
center_net_meta_arch
.
CenterNetFeatureExtractor
):
"""The hourglass feature extractor for CenterNet.
This class is a thin wrapper around the HourglassFeatureExtractor class
along with some preprocessing methods inherited from the base class.
"""
def
__init__
(
self
,
hourglass_net
,
channel_means
=
(
0.
,
0.
,
0.
),
channel_stds
=
(
1.
,
1.
,
1.
),
bgr_ordering
=
False
):
"""Intializes the feature extractor.
Args:
hourglass_net: The underlying hourglass network to use.
channel_means: A tuple of floats, denoting the mean of each channel
which will be subtracted from it.
channel_stds: A tuple of floats, denoting the standard deviation of each
channel. Each channel will be divided by its standard deviation value.
bgr_ordering: bool, if set will change the channel ordering to be in the
[blue, red, green] order.
"""
super
(
CenterNetHourglassFeatureExtractor
,
self
).
__init__
(
channel_means
=
channel_means
,
channel_stds
=
channel_stds
,
bgr_ordering
=
bgr_ordering
)
self
.
_network
=
hourglass_net
def
call
(
self
,
inputs
):
return
self
.
_network
(
inputs
)
@
property
def
out_stride
(
self
):
"""The stride in the output image of the network."""
return
4
@
property
def
num_feature_outputs
(
self
):
"""Ther number of feature outputs returned by the feature extractor."""
return
self
.
_network
.
num_hourglasses
def
get_model
(
self
):
return
self
.
_network
def
hourglass_104
(
channel_means
,
channel_stds
,
bgr_ordering
):
"""The Hourglass-104 backbone for CenterNet."""
network
=
hourglass_network
.
hourglass_104
()
return
CenterNetHourglassFeatureExtractor
(
network
,
channel_means
=
channel_means
,
channel_stds
=
channel_stds
,
bgr_ordering
=
bgr_ordering
)
official/r1/utils/logs/mock_lib
.py
→
research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test
.py
View file @
47bc1813
# Copyright 20
18
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
20
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -12,25 +12,33 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Testing hourglass feature extractor for CenterNet."""
import
unittest
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
"""Mock objects and related functions for testing."""
from
object_detection.models
import
center_net_hourglass_feature_extractor
as
hourglass
from
object_detection.models.keras_models
import
hourglass_network
from
object_detection.utils
import
test_case
from
object_detection.utils
import
tf_version
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
CenterNetHourglassFeatureExtractorTest
(
test_case
.
TestCase
):
class
MockBenchmarkLogger
(
object
):
"""This is a mock logger that can be used in dependent tests."""
def
test_center_net_hourglass_feature_extractor
(
self
):
def
__init__
(
self
):
self
.
logged_metric
=
[]
net
=
hourglass_network
.
HourglassNetwork
(
num_stages
=
4
,
blocks_per_stage
=
[
2
,
3
,
4
,
5
,
6
],
channel_dims
=
[
4
,
6
,
8
,
10
,
12
,
14
],
num_hourglasses
=
2
)
def
log_metric
(
self
,
name
,
value
,
unit
=
None
,
global_step
=
None
,
extras
=
None
):
self
.
logged_metric
.
append
({
"name"
:
name
,
"value"
:
float
(
value
),
"unit"
:
unit
,
"global_step"
:
global_step
,
"extras"
:
extras
})
model
=
hourglass
.
CenterNetHourglassFeatureExtractor
(
net
)
def
graph_fn
():
return
model
(
tf
.
zeros
((
2
,
64
,
64
,
3
),
dtype
=
np
.
float32
))
outputs
=
self
.
execute
(
graph_fn
,
[])
self
.
assertEqual
(
outputs
[
0
].
shape
,
(
2
,
16
,
16
,
6
))
self
.
assertEqual
(
outputs
[
1
].
shape
,
(
2
,
16
,
16
,
6
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/center_net_resnet_feature_extractor.py
0 → 100644
View file @
47bc1813
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Resnetv2 based feature extractors for CenterNet[1] meta architecture.
[1]: https://arxiv.org/abs/1904.07850
"""
import
tensorflow.compat.v1
as
tf
from
object_detection.meta_architectures.center_net_meta_arch
import
CenterNetFeatureExtractor
class
CenterNetResnetFeatureExtractor
(
CenterNetFeatureExtractor
):
"""Resnet v2 base feature extractor for the CenterNet model."""
def
__init__
(
self
,
resnet_type
,
channel_means
=
(
0.
,
0.
,
0.
),
channel_stds
=
(
1.
,
1.
,
1.
),
bgr_ordering
=
False
):
"""Initializes the feature extractor with a specific ResNet architecture.
Args:
resnet_type: A string specifying which kind of ResNet to use. Currently
only `resnet_v2_50` and `resnet_v2_101` are supported.
channel_means: A tuple of floats, denoting the mean of each channel
which will be subtracted from it.
channel_stds: A tuple of floats, denoting the standard deviation of each
channel. Each channel will be divided by its standard deviation value.
bgr_ordering: bool, if set will change the channel ordering to be in the
[blue, red, green] order.
"""
super
(
CenterNetResnetFeatureExtractor
,
self
).
__init__
(
channel_means
=
channel_means
,
channel_stds
=
channel_stds
,
bgr_ordering
=
bgr_ordering
)
if
resnet_type
==
'resnet_v2_101'
:
self
.
_base_model
=
tf
.
keras
.
applications
.
ResNet101V2
(
weights
=
None
)
output_layer
=
'conv5_block3_out'
elif
resnet_type
==
'resnet_v2_50'
:
self
.
_base_model
=
tf
.
keras
.
applications
.
ResNet50V2
(
weights
=
None
)
output_layer
=
'conv5_block3_out'
else
:
raise
ValueError
(
'Unknown Resnet Model {}'
.
format
(
resnet_type
))
output_layer
=
self
.
_base_model
.
get_layer
(
output_layer
)
self
.
_resnet_model
=
tf
.
keras
.
models
.
Model
(
inputs
=
self
.
_base_model
.
input
,
outputs
=
output_layer
.
output
)
resnet_output
=
self
.
_resnet_model
(
self
.
_base_model
.
input
)
for
num_filters
in
[
256
,
128
,
64
]:
# TODO(vighneshb) This section has a few differences from the paper
# Figure out how much of a performance impact they have.
# 1. We use a simple convolution instead of a deformable convolution
conv
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
'same'
)
resnet_output
=
conv
(
resnet_output
)
resnet_output
=
tf
.
keras
.
layers
.
BatchNormalization
()(
resnet_output
)
resnet_output
=
tf
.
keras
.
layers
.
ReLU
()(
resnet_output
)
# 2. We use the default initialization for the convolution layers
# instead of initializing it to do bilinear upsampling.
conv_transpose
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
filters
=
num_filters
,
kernel_size
=
3
,
strides
=
2
,
padding
=
'same'
)
resnet_output
=
conv_transpose
(
resnet_output
)
resnet_output
=
tf
.
keras
.
layers
.
BatchNormalization
()(
resnet_output
)
resnet_output
=
tf
.
keras
.
layers
.
ReLU
()(
resnet_output
)
self
.
_feature_extractor_model
=
tf
.
keras
.
models
.
Model
(
inputs
=
self
.
_base_model
.
input
,
outputs
=
resnet_output
)
def
preprocess
(
self
,
resized_inputs
):
"""Preprocess input images for the ResNet model.
This scales images in the range [0, 255] to the range [-1, 1]
Args:
resized_inputs: a [batch, height, width, channels] float32 tensor.
Returns:
outputs: a [batch, height, width, channels] float32 tensor.
"""
resized_inputs
=
super
(
CenterNetResnetFeatureExtractor
,
self
).
preprocess
(
resized_inputs
)
return
tf
.
keras
.
applications
.
resnet_v2
.
preprocess_input
(
resized_inputs
)
def
load_feature_extractor_weights
(
self
,
path
):
self
.
_base_model
.
load_weights
(
path
)
def
get_base_model
(
self
):
"""Get base resnet model for inspection and testing."""
return
self
.
_base_model
def
call
(
self
,
inputs
):
"""Returns image features extracted by the backbone.
Args:
inputs: An image tensor of shape [batch_size, input_height,
input_width, 3]
Returns:
features_list: A list of length 1 containing a tensor of shape
[batch_size, input_height // 4, input_width // 4, 64] containing
the features extracted by the ResNet.
"""
return
[
self
.
_feature_extractor_model
(
inputs
)]
@
property
def
num_feature_outputs
(
self
):
return
1
@
property
def
out_stride
(
self
):
return
4
def
resnet_v2_101
(
channel_means
,
channel_stds
,
bgr_ordering
):
"""The ResNet v2 101 feature extractor."""
return
CenterNetResnetFeatureExtractor
(
resnet_type
=
'resnet_v2_101'
,
channel_means
=
channel_means
,
channel_stds
=
channel_stds
,
bgr_ordering
=
bgr_ordering
)
def
resnet_v2_50
(
channel_means
,
channel_stds
,
bgr_ordering
):
"""The ResNet v2 50 feature extractor."""
return
CenterNetResnetFeatureExtractor
(
resnet_type
=
'resnet_v2_50'
,
channel_means
=
channel_means
,
channel_stds
=
channel_stds
,
bgr_ordering
=
bgr_ordering
)
research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py
0 → 100644
View file @
47bc1813
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Testing ResNet v2 models for the CenterNet meta architecture."""
import
unittest
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
from
object_detection.models
import
center_net_resnet_feature_extractor
from
object_detection.utils
import
test_case
from
object_detection.utils
import
tf_version
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
CenterNetResnetFeatureExtractorTest
(
test_case
.
TestCase
):
def
test_output_size
(
self
):
"""Verify that shape of features returned by the backbone is correct."""
model
=
center_net_resnet_feature_extractor
.
\
CenterNetResnetFeatureExtractor
(
'resnet_v2_101'
)
def
graph_fn
():
img
=
np
.
zeros
((
8
,
224
,
224
,
3
),
dtype
=
np
.
float32
)
processed_img
=
model
.
preprocess
(
img
)
return
model
(
processed_img
)
outputs
=
self
.
execute
(
graph_fn
,
[])
self
.
assertEqual
(
outputs
.
shape
,
(
8
,
56
,
56
,
64
))
def
test_output_size_resnet50
(
self
):
"""Verify that shape of features returned by the backbone is correct."""
model
=
center_net_resnet_feature_extractor
.
\
CenterNetResnetFeatureExtractor
(
'resnet_v2_50'
)
def
graph_fn
():
img
=
np
.
zeros
((
8
,
224
,
224
,
3
),
dtype
=
np
.
float32
)
processed_img
=
model
.
preprocess
(
img
)
return
model
(
processed_img
)
outputs
=
self
.
execute
(
graph_fn
,
[])
self
.
assertEqual
(
outputs
.
shape
,
(
8
,
56
,
56
,
64
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
0 → 100644
View file @
47bc1813
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Resnetv1 FPN [1] based feature extractors for CenterNet[2] meta architecture.
[1]: https://arxiv.org/abs/1612.03144.
[2]: https://arxiv.org/abs/1904.07850.
"""
import
tensorflow.compat.v1
as
tf
from
object_detection.meta_architectures.center_net_meta_arch
import
CenterNetFeatureExtractor
_RESNET_MODEL_OUTPUT_LAYERS
=
{
'resnet_v1_50'
:
[
'conv2_block3_out'
,
'conv3_block4_out'
,
'conv4_block6_out'
,
'conv5_block3_out'
],
'resnet_v1_101'
:
[
'conv2_block3_out'
,
'conv3_block4_out'
,
'conv4_block23_out'
,
'conv5_block3_out'
],
}
class
CenterNetResnetV1FpnFeatureExtractor
(
CenterNetFeatureExtractor
):
"""Resnet v1 FPN base feature extractor for the CenterNet model.
This feature extractor uses residual skip connections and nearest neighbor
upsampling to produce an output feature map of stride 4, which has precise
localization information along with strong semantic information from the top
of the net. This design does not exactly follow the original FPN design,
specifically:
- Since only one output map is necessary for heatmap prediction (stride 4
output), the top-down feature maps can have different numbers of channels.
Specifically, the top down feature maps have the following sizes:
[h/4, w/4, 64], [h/8, w/8, 128], [h/16, w/16, 256], [h/32, w/32, 256].
- No additional coarse features are used after conv5_x.
"""
def
__init__
(
self
,
resnet_type
,
channel_means
=
(
0.
,
0.
,
0.
),
channel_stds
=
(
1.
,
1.
,
1.
),
bgr_ordering
=
False
):
"""Initializes the feature extractor with a specific ResNet architecture.
Args:
resnet_type: A string specifying which kind of ResNet to use. Currently
only `resnet_v1_50` and `resnet_v1_101` are supported.
channel_means: A tuple of floats, denoting the mean of each channel
which will be subtracted from it.
channel_stds: A tuple of floats, denoting the standard deviation of each
channel. Each channel will be divided by its standard deviation value.
bgr_ordering: bool, if set will change the channel ordering to be in the
[blue, red, green] order.
"""
super
(
CenterNetResnetV1FpnFeatureExtractor
,
self
).
__init__
(
channel_means
=
channel_means
,
channel_stds
=
channel_stds
,
bgr_ordering
=
bgr_ordering
)
if
resnet_type
==
'resnet_v1_50'
:
self
.
_base_model
=
tf
.
keras
.
applications
.
ResNet50
(
weights
=
None
)
elif
resnet_type
==
'resnet_v1_101'
:
self
.
_base_model
=
tf
.
keras
.
applications
.
ResNet101
(
weights
=
None
)
else
:
raise
ValueError
(
'Unknown Resnet Model {}'
.
format
(
resnet_type
))
output_layers
=
_RESNET_MODEL_OUTPUT_LAYERS
[
resnet_type
]
outputs
=
[
self
.
_base_model
.
get_layer
(
output_layer_name
).
output
for
output_layer_name
in
output_layers
]
self
.
_resnet_model
=
tf
.
keras
.
models
.
Model
(
inputs
=
self
.
_base_model
.
input
,
outputs
=
outputs
)
resnet_outputs
=
self
.
_resnet_model
(
self
.
_base_model
.
input
)
# Construct the top-down feature maps.
top_layer
=
resnet_outputs
[
-
1
]
residual_op
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
256
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
)
top_down
=
residual_op
(
top_layer
)
num_filters_list
=
[
256
,
128
,
64
]
for
i
,
num_filters
in
enumerate
(
num_filters_list
):
level_ind
=
2
-
i
# Upsample.
upsample_op
=
tf
.
keras
.
layers
.
UpSampling2D
(
2
,
interpolation
=
'nearest'
)
top_down
=
upsample_op
(
top_down
)
# Residual (skip-connection) from bottom-up pathway.
residual_op
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
num_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
)
residual
=
residual_op
(
resnet_outputs
[
level_ind
])
# Merge.
top_down
=
top_down
+
residual
next_num_filters
=
num_filters_list
[
i
+
1
]
if
i
+
1
<=
2
else
64
conv
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
next_num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
'same'
)
top_down
=
conv
(
top_down
)
top_down
=
tf
.
keras
.
layers
.
BatchNormalization
()(
top_down
)
top_down
=
tf
.
keras
.
layers
.
ReLU
()(
top_down
)
self
.
_feature_extractor_model
=
tf
.
keras
.
models
.
Model
(
inputs
=
self
.
_base_model
.
input
,
outputs
=
top_down
)
def
preprocess
(
self
,
resized_inputs
):
"""Preprocess input images for the ResNet model.
This scales images in the range [0, 255] to the range [-1, 1]
Args:
resized_inputs: a [batch, height, width, channels] float32 tensor.
Returns:
outputs: a [batch, height, width, channels] float32 tensor.
"""
resized_inputs
=
super
(
CenterNetResnetV1FpnFeatureExtractor
,
self
).
preprocess
(
resized_inputs
)
return
tf
.
keras
.
applications
.
resnet
.
preprocess_input
(
resized_inputs
)
def
load_feature_extractor_weights
(
self
,
path
):
self
.
_base_model
.
load_weights
(
path
)
def
get_base_model
(
self
):
"""Get base resnet model for inspection and testing."""
return
self
.
_base_model
def
call
(
self
,
inputs
):
"""Returns image features extracted by the backbone.
Args:
inputs: An image tensor of shape [batch_size, input_height,
input_width, 3]
Returns:
features_list: A list of length 1 containing a tensor of shape
[batch_size, input_height // 4, input_width // 4, 64] containing
the features extracted by the ResNet.
"""
return
[
self
.
_feature_extractor_model
(
inputs
)]
@
property
def
num_feature_outputs
(
self
):
return
1
@
property
def
out_stride
(
self
):
return
4
def
resnet_v1_101_fpn
(
channel_means
,
channel_stds
,
bgr_ordering
):
"""The ResNet v1 101 FPN feature extractor."""
return
CenterNetResnetV1FpnFeatureExtractor
(
resnet_type
=
'resnet_v1_101'
,
channel_means
=
channel_means
,
channel_stds
=
channel_stds
,
bgr_ordering
=
bgr_ordering
)
def
resnet_v1_50_fpn
(
channel_means
,
channel_stds
,
bgr_ordering
):
"""The ResNet v1 50 FPN feature extractor."""
return
CenterNetResnetV1FpnFeatureExtractor
(
resnet_type
=
'resnet_v1_50'
,
channel_means
=
channel_means
,
channel_stds
=
channel_stds
,
bgr_ordering
=
bgr_ordering
)
research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
0 → 100644
View file @
47bc1813
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Testing ResNet v1 FPN models for the CenterNet meta architecture."""
import
unittest
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
from
object_detection.models
import
center_net_resnet_v1_fpn_feature_extractor
from
object_detection.utils
import
test_case
from
object_detection.utils
import
tf_version
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
CenterNetResnetV1FpnFeatureExtractorTest
(
test_case
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
{
'resnet_type'
:
'resnet_v1_50'
},
{
'resnet_type'
:
'resnet_v1_101'
},
)
def
test_correct_output_size
(
self
,
resnet_type
):
"""Verify that shape of features returned by the backbone is correct."""
model
=
center_net_resnet_v1_fpn_feature_extractor
.
\
CenterNetResnetV1FpnFeatureExtractor
(
resnet_type
)
def
graph_fn
():
img
=
np
.
zeros
((
8
,
224
,
224
,
3
),
dtype
=
np
.
float32
)
processed_img
=
model
.
preprocess
(
img
)
return
model
(
processed_img
)
self
.
assertEqual
(
self
.
execute
(
graph_fn
,
[]).
shape
,
(
8
,
56
,
56
,
64
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
→
research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_
tf1_
test.py
View file @
47bc1813
...
...
@@ -14,13 +14,16 @@
# ==============================================================================
"""Tests for embedded_ssd_mobilenet_v1_feature_extractor."""
import
unittest
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
from
object_detection.models
import
embedded_ssd_mobilenet_v1_feature_extractor
from
object_detection.models
import
ssd_feature_extractor_test
from
object_detection.utils
import
tf_version
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Skipping TF1.X only test.'
)
class
EmbeddedSSDMobileNetV1FeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
...
...
research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
→
research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_
tf1_
test.py
View file @
47bc1813
...
...
@@ -14,12 +14,14 @@
# ==============================================================================
"""Tests for models.faster_rcnn_inception_resnet_v2_feature_extractor."""
import
unittest
import
tensorflow.compat.v1
as
tf
from
object_detection.models
import
faster_rcnn_inception_resnet_v2_feature_extractor
as
frcnn_inc_res
from
object_detection.utils
import
tf_version
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Skipping TF1.X only test.'
)
class
FasterRcnnInceptionResnetV2FeatureExtractorTest
(
tf
.
test
.
TestCase
):
def
_build_feature_extractor
(
self
,
first_stage_features_stride
):
...
...
Prev
1
…
10
11
12
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment