Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e293e338
Commit
e293e338
authored
Dec 03, 2021
by
Yeqing Li
Committed by
A. Unique TensorFlower
Dec 03, 2021
Browse files
Internal change
PiperOrigin-RevId: 413981781
parent
aa6aed37
Changes
59
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
5582 additions
and
0 deletions
+5582
-0
official/legacy/detection/evaluation/coco_evaluator.py
official/legacy/detection/evaluation/coco_evaluator.py
+617
-0
official/legacy/detection/evaluation/coco_utils.py
official/legacy/detection/evaluation/coco_utils.py
+374
-0
official/legacy/detection/evaluation/factory.py
official/legacy/detection/evaluation/factory.py
+52
-0
official/legacy/detection/executor/__init__.py
official/legacy/detection/executor/__init__.py
+14
-0
official/legacy/detection/executor/detection_executor.py
official/legacy/detection/executor/detection_executor.py
+159
-0
official/legacy/detection/executor/distributed_executor.py
official/legacy/detection/executor/distributed_executor.py
+805
-0
official/legacy/detection/main.py
official/legacy/detection/main.py
+264
-0
official/legacy/detection/modeling/__init__.py
official/legacy/detection/modeling/__init__.py
+14
-0
official/legacy/detection/modeling/architecture/__init__.py
official/legacy/detection/modeling/architecture/__init__.py
+14
-0
official/legacy/detection/modeling/architecture/factory.py
official/legacy/detection/modeling/architecture/factory.py
+217
-0
official/legacy/detection/modeling/architecture/fpn.py
official/legacy/detection/modeling/architecture/fpn.py
+151
-0
official/legacy/detection/modeling/architecture/heads.py
official/legacy/detection/modeling/architecture/heads.py
+1279
-0
official/legacy/detection/modeling/architecture/identity.py
official/legacy/detection/modeling/architecture/identity.py
+28
-0
official/legacy/detection/modeling/architecture/nn_blocks.py
official/legacy/detection/modeling/architecture/nn_blocks.py
+316
-0
official/legacy/detection/modeling/architecture/nn_ops.py
official/legacy/detection/modeling/architecture/nn_ops.py
+109
-0
official/legacy/detection/modeling/architecture/resnet.py
official/legacy/detection/modeling/architecture/resnet.py
+352
-0
official/legacy/detection/modeling/architecture/spinenet.py
official/legacy/detection/modeling/architecture/spinenet.py
+503
-0
official/legacy/detection/modeling/base_model.py
official/legacy/detection/modeling/base_model.py
+135
-0
official/legacy/detection/modeling/checkpoint_utils.py
official/legacy/detection/modeling/checkpoint_utils.py
+142
-0
official/legacy/detection/modeling/factory.py
official/legacy/detection/modeling/factory.py
+37
-0
No files found.
official/legacy/detection/evaluation/coco_evaluator.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The COCO-style evaluator.
The following snippet demonstrates the use of interfaces:
evaluator = COCOEvaluator(...)
for _ in range(num_evals):
for _ in range(num_batches_per_eval):
predictions, groundtruth = predictor.predict(...) # pop a batch.
evaluator.update(predictions, groundtruths) # aggregate internal stats.
evaluator.evaluate() # finish one full eval.
See also: https://github.com/cocodataset/cocoapi/
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
atexit
import
copy
import
tempfile
from
absl
import
logging
import
numpy
as
np
from
pycocotools
import
cocoeval
import
six
import
tensorflow
as
tf
from
official.legacy.detection.evaluation
import
coco_utils
from
official.legacy.detection.utils
import
class_utils
class
MetricWrapper
(
object
):
"""Metric Wrapper of the COCO evaluator."""
# This is only a wrapper for COCO metric and works on for numpy array. So it
# doesn't inherit from tf.keras.layers.Layer or tf.keras.metrics.Metric.
def
__init__
(
self
,
evaluator
):
self
.
_evaluator
=
evaluator
def
update_state
(
self
,
y_true
,
y_pred
):
"""Update internal states."""
labels
=
tf
.
nest
.
map_structure
(
lambda
x
:
x
.
numpy
(),
y_true
)
outputs
=
tf
.
nest
.
map_structure
(
lambda
x
:
x
.
numpy
(),
y_pred
)
groundtruths
=
{}
predictions
=
{}
for
key
,
val
in
outputs
.
items
():
if
isinstance
(
val
,
tuple
):
val
=
np
.
concatenate
(
val
)
predictions
[
key
]
=
val
for
key
,
val
in
labels
.
items
():
if
isinstance
(
val
,
tuple
):
val
=
np
.
concatenate
(
val
)
groundtruths
[
key
]
=
val
self
.
_evaluator
.
update
(
predictions
,
groundtruths
)
def
result
(
self
):
return
self
.
_evaluator
.
evaluate
()
def
reset_states
(
self
):
return
self
.
_evaluator
.
reset
()
class
COCOEvaluator
(
object
):
"""COCO evaluation metric class."""
def
__init__
(
self
,
annotation_file
,
include_mask
,
need_rescale_bboxes
=
True
):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
annotation_file: a JSON file that stores annotations of the eval dataset.
If `annotation_file` is None, groundtruth annotations will be loaded
from the dataloader.
include_mask: a boolean to indicate whether or not to include the mask
eval.
need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back
to absolute values (`image_info` is needed in this case).
"""
if
annotation_file
:
if
annotation_file
.
startswith
(
'gs://'
):
_
,
local_val_json
=
tempfile
.
mkstemp
(
suffix
=
'.json'
)
tf
.
io
.
gfile
.
remove
(
local_val_json
)
tf
.
io
.
gfile
.
copy
(
annotation_file
,
local_val_json
)
atexit
.
register
(
tf
.
io
.
gfile
.
remove
,
local_val_json
)
else
:
local_val_json
=
annotation_file
self
.
_coco_gt
=
coco_utils
.
COCOWrapper
(
eval_type
=
(
'mask'
if
include_mask
else
'box'
),
annotation_file
=
local_val_json
)
self
.
_annotation_file
=
annotation_file
self
.
_include_mask
=
include_mask
self
.
_metric_names
=
[
'AP'
,
'AP50'
,
'AP75'
,
'APs'
,
'APm'
,
'APl'
,
'ARmax1'
,
'ARmax10'
,
'ARmax100'
,
'ARs'
,
'ARm'
,
'ARl'
]
self
.
_required_prediction_fields
=
[
'source_id'
,
'num_detections'
,
'detection_classes'
,
'detection_scores'
,
'detection_boxes'
]
self
.
_need_rescale_bboxes
=
need_rescale_bboxes
if
self
.
_need_rescale_bboxes
:
self
.
_required_prediction_fields
.
append
(
'image_info'
)
self
.
_required_groundtruth_fields
=
[
'source_id'
,
'height'
,
'width'
,
'classes'
,
'boxes'
]
if
self
.
_include_mask
:
mask_metric_names
=
[
'mask_'
+
x
for
x
in
self
.
_metric_names
]
self
.
_metric_names
.
extend
(
mask_metric_names
)
self
.
_required_prediction_fields
.
extend
([
'detection_masks'
])
self
.
_required_groundtruth_fields
.
extend
([
'masks'
])
self
.
reset
()
def
reset
(
self
):
"""Resets internal states for a fresh run."""
self
.
_predictions
=
{}
if
not
self
.
_annotation_file
:
self
.
_groundtruths
=
{}
def
evaluate
(
self
):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if
not
self
.
_annotation_file
:
logging
.
info
(
'Thre is no annotation_file in COCOEvaluator.'
)
gt_dataset
=
coco_utils
.
convert_groundtruths_to_coco_dataset
(
self
.
_groundtruths
)
coco_gt
=
coco_utils
.
COCOWrapper
(
eval_type
=
(
'mask'
if
self
.
_include_mask
else
'box'
),
gt_dataset
=
gt_dataset
)
else
:
logging
.
info
(
'Using annotation file: %s'
,
self
.
_annotation_file
)
coco_gt
=
self
.
_coco_gt
coco_predictions
=
coco_utils
.
convert_predictions_to_coco_annotations
(
self
.
_predictions
)
coco_dt
=
coco_gt
.
loadRes
(
predictions
=
coco_predictions
)
image_ids
=
[
ann
[
'image_id'
]
for
ann
in
coco_predictions
]
coco_eval
=
cocoeval
.
COCOeval
(
coco_gt
,
coco_dt
,
iouType
=
'bbox'
)
coco_eval
.
params
.
imgIds
=
image_ids
coco_eval
.
evaluate
()
coco_eval
.
accumulate
()
coco_eval
.
summarize
()
coco_metrics
=
coco_eval
.
stats
if
self
.
_include_mask
:
mcoco_eval
=
cocoeval
.
COCOeval
(
coco_gt
,
coco_dt
,
iouType
=
'segm'
)
mcoco_eval
.
params
.
imgIds
=
image_ids
mcoco_eval
.
evaluate
()
mcoco_eval
.
accumulate
()
mcoco_eval
.
summarize
()
mask_coco_metrics
=
mcoco_eval
.
stats
if
self
.
_include_mask
:
metrics
=
np
.
hstack
((
coco_metrics
,
mask_coco_metrics
))
else
:
metrics
=
coco_metrics
# Cleans up the internal variables in order for a fresh eval next time.
self
.
reset
()
metrics_dict
=
{}
for
i
,
name
in
enumerate
(
self
.
_metric_names
):
metrics_dict
[
name
]
=
metrics
[
i
].
astype
(
np
.
float32
)
return
metrics_dict
def
_process_predictions
(
self
,
predictions
):
image_scale
=
np
.
tile
(
predictions
[
'image_info'
][:,
2
:
3
,
:],
(
1
,
1
,
2
))
predictions
[
'detection_boxes'
]
=
(
predictions
[
'detection_boxes'
].
astype
(
np
.
float32
))
predictions
[
'detection_boxes'
]
/=
image_scale
if
'detection_outer_boxes'
in
predictions
:
predictions
[
'detection_outer_boxes'
]
=
(
predictions
[
'detection_outer_boxes'
].
astype
(
np
.
float32
))
predictions
[
'detection_outer_boxes'
]
/=
image_scale
def
update
(
self
,
predictions
,
groundtruths
=
None
):
"""Update and aggregate detection results and groundtruth data.
Args:
predictions: a dictionary of numpy arrays including the fields below. See
different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- image_info [if `need_rescale_bboxes` is True]: a numpy array of
float of shape [batch_size, 4, 2].
- num_detections: a numpy array of int of shape [batch_size].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_classes: a numpy array of int of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K].
Optional fields:
- detection_masks: a numpy array of float of shape [batch_size, K,
mask_height, mask_width].
groundtruths: a dictionary of numpy arrays including the fields below. See
also different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- height: a numpy array of int of shape [batch_size].
- width: a numpy array of int of shape [batch_size].
- num_detections: a numpy array of int of shape [batch_size].
- boxes: a numpy array of float of shape [batch_size, K, 4].
- classes: a numpy array of int of shape [batch_size, K].
Optional fields:
- is_crowds: a numpy array of int of shape [batch_size, K]. If the
field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the field
is absent, the area is calculated using either boxes or masks
depending on which one is available.
- masks: a numpy array of float of shape [batch_size, K, mask_height,
mask_width],
Raises:
ValueError: if the required prediction or groundtruth fields are not
present in the incoming `predictions` or `groundtruths`.
"""
for
k
in
self
.
_required_prediction_fields
:
if
k
not
in
predictions
:
raise
ValueError
(
'Missing the required key `{}` in predictions!'
.
format
(
k
))
if
self
.
_need_rescale_bboxes
:
self
.
_process_predictions
(
predictions
)
for
k
,
v
in
six
.
iteritems
(
predictions
):
if
k
not
in
self
.
_predictions
:
self
.
_predictions
[
k
]
=
[
v
]
else
:
self
.
_predictions
[
k
].
append
(
v
)
if
not
self
.
_annotation_file
:
assert
groundtruths
for
k
in
self
.
_required_groundtruth_fields
:
if
k
not
in
groundtruths
:
raise
ValueError
(
'Missing the required key `{}` in groundtruths!'
.
format
(
k
))
for
k
,
v
in
six
.
iteritems
(
groundtruths
):
if
k
not
in
self
.
_groundtruths
:
self
.
_groundtruths
[
k
]
=
[
v
]
else
:
self
.
_groundtruths
[
k
].
append
(
v
)
class
OlnXclassEvaluator
(
COCOEvaluator
):
"""COCO evaluation metric class."""
def
__init__
(
self
,
annotation_file
,
include_mask
,
need_rescale_bboxes
=
True
,
use_category
=
True
,
seen_class
=
'all'
):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
annotation_file: a JSON file that stores annotations of the eval dataset.
If `annotation_file` is None, groundtruth annotations will be loaded
from the dataloader.
include_mask: a boolean to indicate whether or not to include the mask
eval.
need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back
to absolute values (`image_info` is needed in this case).
use_category: if `False`, treat all object in all classes in one
foreground category.
seen_class: 'all' or 'voc' or 'nonvoc'
"""
super
(
OlnXclassEvaluator
,
self
).
__init__
(
annotation_file
=
annotation_file
,
include_mask
=
include_mask
,
need_rescale_bboxes
=
need_rescale_bboxes
)
self
.
_use_category
=
use_category
self
.
_seen_class
=
seen_class
self
.
_seen_class_ids
=
class_utils
.
coco_split_class_ids
(
seen_class
)
self
.
_metric_names
=
[
'AP'
,
'AP50'
,
'AP75'
,
'APs'
,
'APm'
,
'APl'
,
'ARmax10'
,
'ARmax20'
,
'ARmax50'
,
'ARmax100'
,
'ARmax200'
,
'ARmax10s'
,
'ARmax10m'
,
'ARmax10l'
]
if
self
.
_seen_class
!=
'all'
:
self
.
_metric_names
.
extend
([
'AP_seen'
,
'AP50_seen'
,
'AP75_seen'
,
'APs_seen'
,
'APm_seen'
,
'APl_seen'
,
'ARmax10_seen'
,
'ARmax20_seen'
,
'ARmax50_seen'
,
'ARmax100_seen'
,
'ARmax200_seen'
,
'ARmax10s_seen'
,
'ARmax10m_seen'
,
'ARmax10l_seen'
,
'AP_novel'
,
'AP50_novel'
,
'AP75_novel'
,
'APs_novel'
,
'APm_novel'
,
'APl_novel'
,
'ARmax10_novel'
,
'ARmax20_novel'
,
'ARmax50_novel'
,
'ARmax100_novel'
,
'ARmax200_novel'
,
'ARmax10s_novel'
,
'ARmax10m_novel'
,
'ARmax10l_novel'
,
])
if
self
.
_include_mask
:
mask_metric_names
=
[
'mask_'
+
x
for
x
in
self
.
_metric_names
]
self
.
_metric_names
.
extend
(
mask_metric_names
)
self
.
_required_prediction_fields
.
extend
([
'detection_masks'
])
self
.
_required_groundtruth_fields
.
extend
([
'masks'
])
self
.
reset
()
def
evaluate
(
self
):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if
not
self
.
_annotation_file
:
logging
.
info
(
'Thre is no annotation_file in COCOEvaluator.'
)
gt_dataset
=
coco_utils
.
convert_groundtruths_to_coco_dataset
(
self
.
_groundtruths
)
coco_gt
=
coco_utils
.
COCOWrapper
(
eval_type
=
(
'mask'
if
self
.
_include_mask
else
'box'
),
gt_dataset
=
gt_dataset
)
else
:
logging
.
info
(
'Using annotation file: %s'
,
self
.
_annotation_file
)
coco_gt
=
self
.
_coco_gt
coco_predictions
=
coco_utils
.
convert_predictions_to_coco_annotations
(
self
.
_predictions
)
coco_dt
=
coco_gt
.
loadRes
(
predictions
=
coco_predictions
)
image_ids
=
[
ann
[
'image_id'
]
for
ann
in
coco_predictions
]
# Class manipulation: 'all' split samples -> ignored_split = 0.
for
idx
,
ann
in
enumerate
(
coco_gt
.
dataset
[
'annotations'
]):
coco_gt
.
dataset
[
'annotations'
][
idx
][
'ignored_split'
]
=
0
coco_eval
=
cocoeval
.
OlnCOCOevalXclassWrapper
(
coco_gt
,
coco_dt
,
iou_type
=
'bbox'
)
coco_eval
.
params
.
maxDets
=
[
10
,
20
,
50
,
100
,
200
]
coco_eval
.
params
.
imgIds
=
image_ids
coco_eval
.
params
.
useCats
=
0
if
not
self
.
_use_category
else
1
coco_eval
.
evaluate
()
coco_eval
.
accumulate
()
coco_eval
.
summarize
()
coco_metrics
=
coco_eval
.
stats
if
self
.
_include_mask
:
mcoco_eval
=
cocoeval
.
OlnCOCOevalXclassWrapper
(
coco_gt
,
coco_dt
,
iou_type
=
'segm'
)
mcoco_eval
.
params
.
maxDets
=
[
10
,
20
,
50
,
100
,
200
]
mcoco_eval
.
params
.
imgIds
=
image_ids
mcoco_eval
.
params
.
useCats
=
0
if
not
self
.
_use_category
else
1
mcoco_eval
.
evaluate
()
mcoco_eval
.
accumulate
()
mcoco_eval
.
summarize
()
mask_coco_metrics
=
mcoco_eval
.
stats
if
self
.
_include_mask
:
metrics
=
np
.
hstack
((
coco_metrics
,
mask_coco_metrics
))
else
:
metrics
=
coco_metrics
if
self
.
_seen_class
!=
'all'
:
# for seen class eval, samples of novel_class are ignored.
coco_gt_seen
=
copy
.
deepcopy
(
coco_gt
)
for
idx
,
ann
in
enumerate
(
coco_gt
.
dataset
[
'annotations'
]):
if
ann
[
'category_id'
]
in
self
.
_seen_class_ids
:
coco_gt_seen
.
dataset
[
'annotations'
][
idx
][
'ignored_split'
]
=
0
else
:
coco_gt_seen
.
dataset
[
'annotations'
][
idx
][
'ignored_split'
]
=
1
coco_eval_seen
=
cocoeval
.
OlnCOCOevalXclassWrapper
(
coco_gt_seen
,
coco_dt
,
iou_type
=
'bbox'
)
coco_eval_seen
.
params
.
maxDets
=
[
10
,
20
,
50
,
100
,
200
]
coco_eval_seen
.
params
.
imgIds
=
image_ids
coco_eval_seen
.
params
.
useCats
=
0
if
not
self
.
_use_category
else
1
coco_eval_seen
.
evaluate
()
coco_eval_seen
.
accumulate
()
coco_eval_seen
.
summarize
()
coco_metrics_seen
=
coco_eval_seen
.
stats
if
self
.
_include_mask
:
mcoco_eval_seen
=
cocoeval
.
OlnCOCOevalXclassWrapper
(
coco_gt_seen
,
coco_dt
,
iou_type
=
'segm'
)
mcoco_eval_seen
.
params
.
maxDets
=
[
10
,
20
,
50
,
100
,
200
]
mcoco_eval_seen
.
params
.
imgIds
=
image_ids
mcoco_eval_seen
.
params
.
useCats
=
0
if
not
self
.
_use_category
else
1
mcoco_eval_seen
.
evaluate
()
mcoco_eval_seen
.
accumulate
()
mcoco_eval_seen
.
summarize
()
mask_coco_metrics_seen
=
mcoco_eval_seen
.
stats
# for novel class eval, samples of seen_class are ignored.
coco_gt_novel
=
copy
.
deepcopy
(
coco_gt
)
for
idx
,
ann
in
enumerate
(
coco_gt
.
dataset
[
'annotations'
]):
if
ann
[
'category_id'
]
in
self
.
_seen_class_ids
:
coco_gt_novel
.
dataset
[
'annotations'
][
idx
][
'ignored_split'
]
=
1
else
:
coco_gt_novel
.
dataset
[
'annotations'
][
idx
][
'ignored_split'
]
=
0
coco_eval_novel
=
cocoeval
.
OlnCOCOevalXclassWrapper
(
coco_gt_novel
,
coco_dt
,
iou_type
=
'bbox'
)
coco_eval_novel
.
params
.
maxDets
=
[
10
,
20
,
50
,
100
,
200
]
coco_eval_novel
.
params
.
imgIds
=
image_ids
coco_eval_novel
.
params
.
useCats
=
0
if
not
self
.
_use_category
else
1
coco_eval_novel
.
evaluate
()
coco_eval_novel
.
accumulate
()
coco_eval_novel
.
summarize
()
coco_metrics_novel
=
coco_eval_novel
.
stats
if
self
.
_include_mask
:
mcoco_eval_novel
=
cocoeval
.
OlnCOCOevalXclassWrapper
(
coco_gt_novel
,
coco_dt
,
iou_type
=
'segm'
)
mcoco_eval_novel
.
params
.
maxDets
=
[
10
,
20
,
50
,
100
,
200
]
mcoco_eval_novel
.
params
.
imgIds
=
image_ids
mcoco_eval_novel
.
params
.
useCats
=
0
if
not
self
.
_use_category
else
1
mcoco_eval_novel
.
evaluate
()
mcoco_eval_novel
.
accumulate
()
mcoco_eval_novel
.
summarize
()
mask_coco_metrics_novel
=
mcoco_eval_novel
.
stats
# Combine all splits.
if
self
.
_include_mask
:
metrics
=
np
.
hstack
((
coco_metrics
,
coco_metrics_seen
,
coco_metrics_novel
,
mask_coco_metrics
,
mask_coco_metrics_seen
,
mask_coco_metrics_novel
))
else
:
metrics
=
np
.
hstack
((
coco_metrics
,
coco_metrics_seen
,
coco_metrics_novel
))
# Cleans up the internal variables in order for a fresh eval next time.
self
.
reset
()
metrics_dict
=
{}
for
i
,
name
in
enumerate
(
self
.
_metric_names
):
metrics_dict
[
name
]
=
metrics
[
i
].
astype
(
np
.
float32
)
return
metrics_dict
class
OlnXdataEvaluator
(
OlnXclassEvaluator
):
"""COCO evaluation metric class."""
def
__init__
(
self
,
annotation_file
,
include_mask
,
need_rescale_bboxes
=
True
,
use_category
=
True
,
seen_class
=
'all'
):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
annotation_file: a JSON file that stores annotations of the eval dataset.
If `annotation_file` is None, groundtruth annotations will be loaded
from the dataloader.
include_mask: a boolean to indicate whether or not to include the mask
eval.
need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back
to absolute values (`image_info` is needed in this case).
use_category: if `False`, treat all object in all classes in one
foreground category.
seen_class: 'all' or 'voc' or 'nonvoc'
"""
super
(
OlnXdataEvaluator
,
self
).
__init__
(
annotation_file
=
annotation_file
,
include_mask
=
include_mask
,
need_rescale_bboxes
=
need_rescale_bboxes
,
use_category
=
False
,
seen_class
=
'all'
)
def
evaluate
(
self
):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if
not
self
.
_annotation_file
:
logging
.
info
(
'Thre is no annotation_file in COCOEvaluator.'
)
gt_dataset
=
coco_utils
.
convert_groundtruths_to_coco_dataset
(
self
.
_groundtruths
)
coco_gt
=
coco_utils
.
COCOWrapper
(
eval_type
=
(
'mask'
if
self
.
_include_mask
else
'box'
),
gt_dataset
=
gt_dataset
)
else
:
logging
.
info
(
'Using annotation file: %s'
,
self
.
_annotation_file
)
coco_gt
=
self
.
_coco_gt
coco_predictions
=
coco_utils
.
convert_predictions_to_coco_annotations
(
self
.
_predictions
)
coco_dt
=
coco_gt
.
loadRes
(
predictions
=
coco_predictions
)
image_ids
=
[
ann
[
'image_id'
]
for
ann
in
coco_predictions
]
# Class manipulation: 'all' split samples -> ignored_split = 0.
for
idx
,
_
in
enumerate
(
coco_gt
.
dataset
[
'annotations'
]):
coco_gt
.
dataset
[
'annotations'
][
idx
][
'ignored_split'
]
=
0
coco_eval
=
cocoeval
.
OlnCOCOevalWrapper
(
coco_gt
,
coco_dt
,
iou_type
=
'bbox'
)
coco_eval
.
params
.
maxDets
=
[
10
,
20
,
50
,
100
,
200
]
coco_eval
.
params
.
imgIds
=
image_ids
coco_eval
.
params
.
useCats
=
0
if
not
self
.
_use_category
else
1
coco_eval
.
evaluate
()
coco_eval
.
accumulate
()
coco_eval
.
summarize
()
coco_metrics
=
coco_eval
.
stats
if
self
.
_include_mask
:
mcoco_eval
=
cocoeval
.
OlnCOCOevalWrapper
(
coco_gt
,
coco_dt
,
iou_type
=
'segm'
)
mcoco_eval
.
params
.
maxDets
=
[
10
,
20
,
50
,
100
,
200
]
mcoco_eval
.
params
.
imgIds
=
image_ids
mcoco_eval
.
params
.
useCats
=
0
if
not
self
.
_use_category
else
1
mcoco_eval
.
evaluate
()
mcoco_eval
.
accumulate
()
mcoco_eval
.
summarize
()
mask_coco_metrics
=
mcoco_eval
.
stats
if
self
.
_include_mask
:
metrics
=
np
.
hstack
((
coco_metrics
,
mask_coco_metrics
))
else
:
metrics
=
coco_metrics
# Cleans up the internal variables in order for a fresh eval next time.
self
.
reset
()
metrics_dict
=
{}
for
i
,
name
in
enumerate
(
self
.
_metric_names
):
metrics_dict
[
name
]
=
metrics
[
i
].
astype
(
np
.
float32
)
return
metrics_dict
class
ShapeMaskCOCOEvaluator
(
COCOEvaluator
):
"""COCO evaluation metric class for ShapeMask."""
def
__init__
(
self
,
mask_eval_class
,
**
kwargs
):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
mask_eval_class: the set of classes for mask evaluation.
**kwargs: other keyword arguments passed to the parent class initializer.
"""
super
(
ShapeMaskCOCOEvaluator
,
self
).
__init__
(
**
kwargs
)
self
.
_mask_eval_class
=
mask_eval_class
self
.
_eval_categories
=
class_utils
.
coco_split_class_ids
(
mask_eval_class
)
if
mask_eval_class
!=
'all'
:
self
.
_metric_names
=
[
x
.
replace
(
'mask'
,
'novel_mask'
)
for
x
in
self
.
_metric_names
]
def
evaluate
(
self
):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if
not
self
.
_annotation_file
:
gt_dataset
=
coco_utils
.
convert_groundtruths_to_coco_dataset
(
self
.
_groundtruths
)
coco_gt
=
coco_utils
.
COCOWrapper
(
eval_type
=
(
'mask'
if
self
.
_include_mask
else
'box'
),
gt_dataset
=
gt_dataset
)
else
:
coco_gt
=
self
.
_coco_gt
coco_predictions
=
coco_utils
.
convert_predictions_to_coco_annotations
(
self
.
_predictions
)
coco_dt
=
coco_gt
.
loadRes
(
predictions
=
coco_predictions
)
image_ids
=
[
ann
[
'image_id'
]
for
ann
in
coco_predictions
]
coco_eval
=
cocoeval
.
COCOeval
(
coco_gt
,
coco_dt
,
iouType
=
'bbox'
)
coco_eval
.
params
.
imgIds
=
image_ids
coco_eval
.
evaluate
()
coco_eval
.
accumulate
()
coco_eval
.
summarize
()
coco_metrics
=
coco_eval
.
stats
if
self
.
_include_mask
:
mcoco_eval
=
cocoeval
.
COCOeval
(
coco_gt
,
coco_dt
,
iouType
=
'segm'
)
mcoco_eval
.
params
.
imgIds
=
image_ids
mcoco_eval
.
evaluate
()
mcoco_eval
.
accumulate
()
mcoco_eval
.
summarize
()
if
self
.
_mask_eval_class
==
'all'
:
metrics
=
np
.
hstack
((
coco_metrics
,
mcoco_eval
.
stats
))
else
:
mask_coco_metrics
=
mcoco_eval
.
category_stats
val_catg_idx
=
np
.
isin
(
mcoco_eval
.
params
.
catIds
,
self
.
_eval_categories
)
# Gather the valid evaluation of the eval categories.
if
np
.
any
(
val_catg_idx
):
mean_val_metrics
=
[]
for
mid
in
range
(
len
(
self
.
_metric_names
)
//
2
):
mean_val_metrics
.
append
(
np
.
nanmean
(
mask_coco_metrics
[
mid
][
val_catg_idx
]))
mean_val_metrics
=
np
.
array
(
mean_val_metrics
)
else
:
mean_val_metrics
=
np
.
zeros
(
len
(
self
.
_metric_names
)
//
2
)
metrics
=
np
.
hstack
((
coco_metrics
,
mean_val_metrics
))
else
:
metrics
=
coco_metrics
# Cleans up the internal variables in order for a fresh eval next time.
self
.
reset
()
metrics_dict
=
{}
for
i
,
name
in
enumerate
(
self
.
_metric_names
):
metrics_dict
[
name
]
=
metrics
[
i
].
astype
(
np
.
float32
)
return
metrics_dict
official/legacy/detection/evaluation/coco_utils.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Util functions related to pycocotools and COCO eval."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
copy
import
json
from
absl
import
logging
import
numpy
as
np
from
PIL
import
Image
from
pycocotools
import
coco
from
pycocotools
import
mask
as
mask_api
import
six
import
tensorflow
as
tf
from
official.legacy.detection.dataloader
import
tf_example_decoder
from
official.legacy.detection.utils
import
box_utils
from
official.legacy.detection.utils
import
mask_utils
class
COCOWrapper
(
coco
.
COCO
):
"""COCO wrapper class.
This class wraps COCO API object, which provides the following additional
functionalities:
1. Support string type image id.
2. Support loading the groundtruth dataset using the external annotation
dictionary.
3. Support loading the prediction results using the external annotation
dictionary.
"""
def
__init__
(
self
,
eval_type
=
'box'
,
annotation_file
=
None
,
gt_dataset
=
None
):
"""Instantiates a COCO-style API object.
Args:
eval_type: either 'box' or 'mask'.
annotation_file: a JSON file that stores annotations of the eval dataset.
This is required if `gt_dataset` is not provided.
gt_dataset: the groundtruth eval datatset in COCO API format.
"""
if
((
annotation_file
and
gt_dataset
)
or
((
not
annotation_file
)
and
(
not
gt_dataset
))):
raise
ValueError
(
'One and only one of `annotation_file` and `gt_dataset` '
'needs to be specified.'
)
if
eval_type
not
in
[
'box'
,
'mask'
]:
raise
ValueError
(
'The `eval_type` can only be either `box` or `mask`.'
)
coco
.
COCO
.
__init__
(
self
,
annotation_file
=
annotation_file
)
self
.
_eval_type
=
eval_type
if
gt_dataset
:
self
.
dataset
=
gt_dataset
self
.
createIndex
()
def
loadRes
(
self
,
predictions
):
"""Loads result file and return a result api object.
Args:
predictions: a list of dictionary each representing an annotation in COCO
format. The required fields are `image_id`, `category_id`, `score`,
`bbox`, `segmentation`.
Returns:
res: result COCO api object.
Raises:
ValueError: if the set of image id from predctions is not the subset of
the set of image id of the groundtruth dataset.
"""
res
=
coco
.
COCO
()
res
.
dataset
[
'images'
]
=
copy
.
deepcopy
(
self
.
dataset
[
'images'
])
res
.
dataset
[
'categories'
]
=
copy
.
deepcopy
(
self
.
dataset
[
'categories'
])
image_ids
=
[
ann
[
'image_id'
]
for
ann
in
predictions
]
if
set
(
image_ids
)
!=
(
set
(
image_ids
)
&
set
(
self
.
getImgIds
())):
raise
ValueError
(
'Results do not correspond to the current dataset!'
)
for
ann
in
predictions
:
x1
,
x2
,
y1
,
y2
=
[
ann
[
'bbox'
][
0
],
ann
[
'bbox'
][
0
]
+
ann
[
'bbox'
][
2
],
ann
[
'bbox'
][
1
],
ann
[
'bbox'
][
1
]
+
ann
[
'bbox'
][
3
]]
if
self
.
_eval_type
==
'box'
:
ann
[
'area'
]
=
ann
[
'bbox'
][
2
]
*
ann
[
'bbox'
][
3
]
ann
[
'segmentation'
]
=
[
[
x1
,
y1
,
x1
,
y2
,
x2
,
y2
,
x2
,
y1
]]
elif
self
.
_eval_type
==
'mask'
:
ann
[
'area'
]
=
mask_api
.
area
(
ann
[
'segmentation'
])
res
.
dataset
[
'annotations'
]
=
copy
.
deepcopy
(
predictions
)
res
.
createIndex
()
return
res
def
convert_predictions_to_coco_annotations
(
predictions
):
"""Converts a batch of predictions to annotations in COCO format.
Args:
predictions: a dictionary of lists of numpy arrays including the following
fields. K below denotes the maximum number of instances per image.
Required fields:
- source_id: a list of numpy arrays of int or string of shape
[batch_size].
- num_detections: a list of numpy arrays of int of shape [batch_size].
- detection_boxes: a list of numpy arrays of float of shape
[batch_size, K, 4], where coordinates are in the original image
space (not the scaled image space).
- detection_classes: a list of numpy arrays of int of shape
[batch_size, K].
- detection_scores: a list of numpy arrays of float of shape
[batch_size, K].
Optional fields:
- detection_masks: a list of numpy arrays of float of shape
[batch_size, K, mask_height, mask_width].
Returns:
coco_predictions: prediction in COCO annotation format.
"""
coco_predictions
=
[]
num_batches
=
len
(
predictions
[
'source_id'
])
batch_size
=
predictions
[
'source_id'
][
0
].
shape
[
0
]
max_num_detections
=
predictions
[
'detection_classes'
][
0
].
shape
[
1
]
use_outer_box
=
'detection_outer_boxes'
in
predictions
for
i
in
range
(
num_batches
):
predictions
[
'detection_boxes'
][
i
]
=
box_utils
.
yxyx_to_xywh
(
predictions
[
'detection_boxes'
][
i
])
if
use_outer_box
:
predictions
[
'detection_outer_boxes'
][
i
]
=
box_utils
.
yxyx_to_xywh
(
predictions
[
'detection_outer_boxes'
][
i
])
mask_boxes
=
predictions
[
'detection_outer_boxes'
]
else
:
mask_boxes
=
predictions
[
'detection_boxes'
]
for
j
in
range
(
batch_size
):
if
'detection_masks'
in
predictions
:
image_masks
=
mask_utils
.
paste_instance_masks
(
predictions
[
'detection_masks'
][
i
][
j
],
mask_boxes
[
i
][
j
],
int
(
predictions
[
'image_info'
][
i
][
j
,
0
,
0
]),
int
(
predictions
[
'image_info'
][
i
][
j
,
0
,
1
]))
binary_masks
=
(
image_masks
>
0.0
).
astype
(
np
.
uint8
)
encoded_masks
=
[
mask_api
.
encode
(
np
.
asfortranarray
(
binary_mask
))
for
binary_mask
in
list
(
binary_masks
)]
for
k
in
range
(
max_num_detections
):
ann
=
{}
ann
[
'image_id'
]
=
predictions
[
'source_id'
][
i
][
j
]
ann
[
'category_id'
]
=
predictions
[
'detection_classes'
][
i
][
j
,
k
]
ann
[
'bbox'
]
=
predictions
[
'detection_boxes'
][
i
][
j
,
k
]
ann
[
'score'
]
=
predictions
[
'detection_scores'
][
i
][
j
,
k
]
if
'detection_masks'
in
predictions
:
ann
[
'segmentation'
]
=
encoded_masks
[
k
]
coco_predictions
.
append
(
ann
)
for
i
,
ann
in
enumerate
(
coco_predictions
):
ann
[
'id'
]
=
i
+
1
return
coco_predictions
def
convert_groundtruths_to_coco_dataset
(
groundtruths
,
label_map
=
None
):
"""Converts groundtruths to the dataset in COCO format.
Args:
groundtruths: a dictionary of numpy arrays including the fields below.
Note that each element in the list represent the number for a single
example without batch dimension. K below denotes the actual number of
instances for each image.
Required fields:
- source_id: a list of numpy arrays of int or string of shape
[batch_size].
- height: a list of numpy arrays of int of shape [batch_size].
- width: a list of numpy arrays of int of shape [batch_size].
- num_detections: a list of numpy arrays of int of shape [batch_size].
- boxes: a list of numpy arrays of float of shape [batch_size, K, 4],
where coordinates are in the original image space (not the
normalized coordinates).
- classes: a list of numpy arrays of int of shape [batch_size, K].
Optional fields:
- is_crowds: a list of numpy arrays of int of shape [batch_size, K]. If
th field is absent, it is assumed that this instance is not crowd.
- areas: a list of numy arrays of float of shape [batch_size, K]. If the
field is absent, the area is calculated using either boxes or
masks depending on which one is available.
- masks: a list of numpy arrays of string of shape [batch_size, K],
label_map: (optional) a dictionary that defines items from the category id
to the category name. If `None`, collect the category mappping from the
`groundtruths`.
Returns:
coco_groundtruths: the groundtruth dataset in COCO format.
"""
source_ids
=
np
.
concatenate
(
groundtruths
[
'source_id'
],
axis
=
0
)
heights
=
np
.
concatenate
(
groundtruths
[
'height'
],
axis
=
0
)
widths
=
np
.
concatenate
(
groundtruths
[
'width'
],
axis
=
0
)
gt_images
=
[{
'id'
:
int
(
i
),
'height'
:
int
(
h
),
'width'
:
int
(
w
)}
for
i
,
h
,
w
in
zip
(
source_ids
,
heights
,
widths
)]
gt_annotations
=
[]
num_batches
=
len
(
groundtruths
[
'source_id'
])
batch_size
=
groundtruths
[
'source_id'
][
0
].
shape
[
0
]
for
i
in
range
(
num_batches
):
for
j
in
range
(
batch_size
):
num_instances
=
groundtruths
[
'num_detections'
][
i
][
j
]
for
k
in
range
(
num_instances
):
ann
=
{}
ann
[
'image_id'
]
=
int
(
groundtruths
[
'source_id'
][
i
][
j
])
if
'is_crowds'
in
groundtruths
:
ann
[
'iscrowd'
]
=
int
(
groundtruths
[
'is_crowds'
][
i
][
j
,
k
])
else
:
ann
[
'iscrowd'
]
=
0
ann
[
'category_id'
]
=
int
(
groundtruths
[
'classes'
][
i
][
j
,
k
])
boxes
=
groundtruths
[
'boxes'
][
i
]
ann
[
'bbox'
]
=
[
float
(
boxes
[
j
,
k
,
1
]),
float
(
boxes
[
j
,
k
,
0
]),
float
(
boxes
[
j
,
k
,
3
]
-
boxes
[
j
,
k
,
1
]),
float
(
boxes
[
j
,
k
,
2
]
-
boxes
[
j
,
k
,
0
])]
if
'areas'
in
groundtruths
:
ann
[
'area'
]
=
float
(
groundtruths
[
'areas'
][
i
][
j
,
k
])
else
:
ann
[
'area'
]
=
float
(
(
boxes
[
j
,
k
,
3
]
-
boxes
[
j
,
k
,
1
])
*
(
boxes
[
j
,
k
,
2
]
-
boxes
[
j
,
k
,
0
]))
if
'masks'
in
groundtruths
:
mask
=
Image
.
open
(
six
.
BytesIO
(
groundtruths
[
'masks'
][
i
][
j
,
k
]))
width
,
height
=
mask
.
size
np_mask
=
(
np
.
array
(
mask
.
getdata
()).
reshape
(
height
,
width
).
astype
(
np
.
uint8
))
np_mask
[
np_mask
>
0
]
=
255
encoded_mask
=
mask_api
.
encode
(
np
.
asfortranarray
(
np_mask
))
ann
[
'segmentation'
]
=
encoded_mask
if
'areas'
not
in
groundtruths
:
ann
[
'area'
]
=
mask_api
.
area
(
encoded_mask
)
gt_annotations
.
append
(
ann
)
for
i
,
ann
in
enumerate
(
gt_annotations
):
ann
[
'id'
]
=
i
+
1
if
label_map
:
gt_categories
=
[{
'id'
:
i
,
'name'
:
label_map
[
i
]}
for
i
in
label_map
]
else
:
category_ids
=
[
gt
[
'category_id'
]
for
gt
in
gt_annotations
]
gt_categories
=
[{
'id'
:
i
}
for
i
in
set
(
category_ids
)]
gt_dataset
=
{
'images'
:
gt_images
,
'categories'
:
gt_categories
,
'annotations'
:
copy
.
deepcopy
(
gt_annotations
),
}
return
gt_dataset
class
COCOGroundtruthGenerator
(
object
):
"""Generates the groundtruth annotations from a single example."""
def
__init__
(
self
,
file_pattern
,
num_examples
,
include_mask
):
self
.
_file_pattern
=
file_pattern
self
.
_num_examples
=
num_examples
self
.
_include_mask
=
include_mask
self
.
_dataset_fn
=
tf
.
data
.
TFRecordDataset
def
_parse_single_example
(
self
,
example
):
"""Parses a single serialized tf.Example proto.
Args:
example: a serialized tf.Example proto string.
Returns:
A dictionary of groundtruth with the following fields:
source_id: a scalar tensor of int64 representing the image source_id.
height: a scalar tensor of int64 representing the image height.
width: a scalar tensor of int64 representing the image width.
boxes: a float tensor of shape [K, 4], representing the groundtruth
boxes in absolute coordinates with respect to the original image size.
classes: a int64 tensor of shape [K], representing the class labels of
each instances.
is_crowds: a bool tensor of shape [K], indicating whether the instance
is crowd.
areas: a float tensor of shape [K], indicating the area of each
instance.
masks: a string tensor of shape [K], containing the bytes of the png
mask of each instance.
"""
decoder
=
tf_example_decoder
.
TfExampleDecoder
(
include_mask
=
self
.
_include_mask
)
decoded_tensors
=
decoder
.
decode
(
example
)
image
=
decoded_tensors
[
'image'
]
image_size
=
tf
.
shape
(
image
)[
0
:
2
]
boxes
=
box_utils
.
denormalize_boxes
(
decoded_tensors
[
'groundtruth_boxes'
],
image_size
)
groundtruths
=
{
'source_id'
:
tf
.
string_to_number
(
decoded_tensors
[
'source_id'
],
out_type
=
tf
.
int64
),
'height'
:
decoded_tensors
[
'height'
],
'width'
:
decoded_tensors
[
'width'
],
'num_detections'
:
tf
.
shape
(
decoded_tensors
[
'groundtruth_classes'
])[
0
],
'boxes'
:
boxes
,
'classes'
:
decoded_tensors
[
'groundtruth_classes'
],
'is_crowds'
:
decoded_tensors
[
'groundtruth_is_crowd'
],
'areas'
:
decoded_tensors
[
'groundtruth_area'
],
}
if
self
.
_include_mask
:
groundtruths
.
update
({
'masks'
:
decoded_tensors
[
'groundtruth_instance_masks_png'
],
})
return
groundtruths
def
_build_pipeline
(
self
):
"""Builds data pipeline to generate groundtruth annotations."""
dataset
=
tf
.
data
.
Dataset
.
list_files
(
self
.
_file_pattern
,
shuffle
=
False
)
dataset
=
dataset
.
apply
(
tf
.
data
.
experimental
.
parallel_interleave
(
lambda
filename
:
self
.
_dataset_fn
(
filename
).
prefetch
(
1
),
cycle_length
=
32
,
sloppy
=
False
))
dataset
=
dataset
.
map
(
self
.
_parse_single_example
,
num_parallel_calls
=
64
)
dataset
=
dataset
.
prefetch
(
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
batch
(
1
,
drop_remainder
=
False
)
return
dataset
def
__call__
(
self
):
with
tf
.
Graph
().
as_default
():
dataset
=
self
.
_build_pipeline
()
groundtruth
=
dataset
.
make_one_shot_iterator
().
get_next
()
with
tf
.
Session
()
as
sess
:
for
_
in
range
(
self
.
_num_examples
):
groundtruth_result
=
sess
.
run
(
groundtruth
)
yield
groundtruth_result
def
scan_and_generator_annotation_file
(
file_pattern
,
num_samples
,
include_mask
,
annotation_file
):
"""Scans and generate the COCO-style annotation JSON file given a dataset."""
groundtruth_generator
=
COCOGroundtruthGenerator
(
file_pattern
,
num_samples
,
include_mask
)
generate_annotation_file
(
groundtruth_generator
,
annotation_file
)
def
generate_annotation_file
(
groundtruth_generator
,
annotation_file
):
"""Generates COCO-style annotation JSON file given a groundtruth generator."""
groundtruths
=
{}
logging
.
info
(
'Loading groundtruth annotations from dataset to memory...'
)
for
groundtruth
in
groundtruth_generator
():
for
k
,
v
in
six
.
iteritems
(
groundtruth
):
if
k
not
in
groundtruths
:
groundtruths
[
k
]
=
[
v
]
else
:
groundtruths
[
k
].
append
(
v
)
gt_dataset
=
convert_groundtruths_to_coco_dataset
(
groundtruths
)
logging
.
info
(
'Saving groundtruth annotations to the JSON file...'
)
with
tf
.
io
.
gfile
.
GFile
(
annotation_file
,
'w'
)
as
f
:
f
.
write
(
json
.
dumps
(
gt_dataset
))
logging
.
info
(
'Done saving the JSON file...'
)
official/legacy/detection/evaluation/factory.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluator factory."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
official.legacy.detection.evaluation
import
coco_evaluator
def
evaluator_generator
(
params
):
"""Generator function for various evaluators."""
if
params
.
type
==
'box'
:
evaluator
=
coco_evaluator
.
COCOEvaluator
(
annotation_file
=
params
.
val_json_file
,
include_mask
=
False
)
elif
params
.
type
==
'box_and_mask'
:
evaluator
=
coco_evaluator
.
COCOEvaluator
(
annotation_file
=
params
.
val_json_file
,
include_mask
=
True
)
elif
params
.
type
==
'oln_xclass_box'
:
evaluator
=
coco_evaluator
.
OlnXclassEvaluator
(
annotation_file
=
params
.
val_json_file
,
include_mask
=
False
,
use_category
=
False
,
seen_class
=
params
.
seen_class
,)
elif
params
.
type
==
'oln_xclass_box_and_mask'
:
evaluator
=
coco_evaluator
.
OlnXclassEvaluator
(
annotation_file
=
params
.
val_json_file
,
include_mask
=
True
,
use_category
=
False
,
seen_class
=
params
.
seen_class
,)
elif
params
.
type
==
'oln_xdata_box'
:
evaluator
=
coco_evaluator
.
OlnXdataEvaluator
(
annotation_file
=
params
.
val_json_file
,
include_mask
=
False
,
use_category
=
False
,
seen_class
=
'all'
,)
elif
params
.
type
==
'shapemask_box_and_mask'
:
evaluator
=
coco_evaluator
.
ShapeMaskCOCOEvaluator
(
mask_eval_class
=
params
.
mask_eval_class
,
annotation_file
=
params
.
val_json_file
,
include_mask
=
True
)
else
:
raise
ValueError
(
'Evaluator %s is not supported.'
%
params
.
type
)
return
coco_evaluator
.
MetricWrapper
(
evaluator
)
official/legacy/detection/executor/__init__.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/legacy/detection/executor/detection_executor.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An executor class for running model on TensorFlow 2.0."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl
import
logging
import
tensorflow
as
tf
from
official.legacy.detection.executor
import
distributed_executor
as
executor
from
official.vision.utils.object_detection
import
visualization_utils
class
DetectionDistributedExecutor
(
executor
.
DistributedExecutor
):
"""Detection specific customer training loop executor.
Subclasses the DistributedExecutor and adds support for numpy based metrics.
"""
def
__init__
(
self
,
predict_post_process_fn
=
None
,
trainable_variables_filter
=
None
,
**
kwargs
):
super
(
DetectionDistributedExecutor
,
self
).
__init__
(
**
kwargs
)
if
predict_post_process_fn
:
assert
callable
(
predict_post_process_fn
)
if
trainable_variables_filter
:
assert
callable
(
trainable_variables_filter
)
self
.
_predict_post_process_fn
=
predict_post_process_fn
self
.
_trainable_variables_filter
=
trainable_variables_filter
self
.
eval_steps
=
tf
.
Variable
(
0
,
trainable
=
False
,
dtype
=
tf
.
int32
,
synchronization
=
tf
.
VariableSynchronization
.
ON_READ
,
aggregation
=
tf
.
VariableAggregation
.
ONLY_FIRST_REPLICA
,
shape
=
[])
def
_create_replicated_step
(
self
,
strategy
,
model
,
loss_fn
,
optimizer
,
metric
=
None
):
trainable_variables
=
model
.
trainable_variables
if
self
.
_trainable_variables_filter
:
trainable_variables
=
self
.
_trainable_variables_filter
(
trainable_variables
)
logging
.
info
(
'Filter trainable variables from %d to %d'
,
len
(
model
.
trainable_variables
),
len
(
trainable_variables
))
update_state_fn
=
lambda
labels
,
outputs
:
None
if
isinstance
(
metric
,
tf
.
keras
.
metrics
.
Metric
):
update_state_fn
=
metric
.
update_state
else
:
logging
.
error
(
'Detection: train metric is not an instance of '
'tf.keras.metrics.Metric.'
)
def
_replicated_step
(
inputs
):
"""Replicated training step."""
inputs
,
labels
=
inputs
with
tf
.
GradientTape
()
as
tape
:
outputs
=
model
(
inputs
,
training
=
True
)
all_losses
=
loss_fn
(
labels
,
outputs
)
losses
=
{}
for
k
,
v
in
all_losses
.
items
():
losses
[
k
]
=
tf
.
reduce_mean
(
v
)
per_replica_loss
=
losses
[
'total_loss'
]
/
strategy
.
num_replicas_in_sync
update_state_fn
(
labels
,
outputs
)
grads
=
tape
.
gradient
(
per_replica_loss
,
trainable_variables
)
clipped_grads
,
_
=
tf
.
clip_by_global_norm
(
grads
,
clip_norm
=
1.0
)
optimizer
.
apply_gradients
(
zip
(
clipped_grads
,
trainable_variables
))
return
losses
return
_replicated_step
def
_create_test_step
(
self
,
strategy
,
model
,
metric
):
"""Creates a distributed test step."""
@
tf
.
function
def
test_step
(
iterator
,
eval_steps
):
"""Calculates evaluation metrics on distributed devices."""
def
_test_step_fn
(
inputs
,
eval_steps
):
"""Replicated accuracy calculation."""
inputs
,
labels
=
inputs
model_outputs
=
model
(
inputs
,
training
=
False
)
if
self
.
_predict_post_process_fn
:
labels
,
prediction_outputs
=
self
.
_predict_post_process_fn
(
labels
,
model_outputs
)
num_remaining_visualizations
=
(
self
.
_params
.
eval
.
num_images_to_visualize
-
eval_steps
)
# If there are remaining number of visualizations that needs to be
# done, add next batch outputs for visualization.
#
# TODO(hongjunchoi): Once dynamic slicing is supported on TPU, only
# write correct slice of outputs to summary file.
if
num_remaining_visualizations
>
0
:
visualization_utils
.
visualize_images_with_bounding_boxes
(
inputs
,
prediction_outputs
[
'detection_boxes'
],
self
.
global_train_step
,
self
.
eval_summary_writer
)
return
labels
,
prediction_outputs
labels
,
outputs
=
strategy
.
run
(
_test_step_fn
,
args
=
(
next
(
iterator
),
eval_steps
,
))
outputs
=
tf
.
nest
.
map_structure
(
strategy
.
experimental_local_results
,
outputs
)
labels
=
tf
.
nest
.
map_structure
(
strategy
.
experimental_local_results
,
labels
)
eval_steps
.
assign_add
(
self
.
_params
.
eval
.
batch_size
)
return
labels
,
outputs
return
test_step
def
_run_evaluation
(
self
,
test_step
,
current_training_step
,
metric
,
test_iterator
):
"""Runs validation steps and aggregate metrics."""
self
.
eval_steps
.
assign
(
0
)
if
not
test_iterator
or
not
metric
:
logging
.
warning
(
'Both test_iterator (%s) and metrics (%s) must not be None.'
,
test_iterator
,
metric
)
return
None
logging
.
info
(
'Running evaluation after step: %s.'
,
current_training_step
)
while
True
:
try
:
labels
,
outputs
=
test_step
(
test_iterator
,
self
.
eval_steps
)
if
metric
:
metric
.
update_state
(
labels
,
outputs
)
except
(
StopIteration
,
tf
.
errors
.
OutOfRangeError
):
break
metric_result
=
metric
.
result
()
if
isinstance
(
metric
,
tf
.
keras
.
metrics
.
Metric
):
metric_result
=
tf
.
nest
.
map_structure
(
lambda
x
:
x
.
numpy
().
astype
(
float
),
metric_result
)
logging
.
info
(
'Step: [%d] Validation metric = %s'
,
current_training_step
,
metric_result
)
return
metric_result
official/legacy/detection/executor/distributed_executor.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Custom training loop for running TensorFlow 2.0 models."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
from
typing
import
Optional
,
Dict
,
List
,
Text
,
Callable
,
Union
,
Iterator
,
Any
from
absl
import
flags
from
absl
import
logging
import
numpy
as
np
import
tensorflow
as
tf
# pylint: disable=unused-import,g-import-not-at-top,redefined-outer-name,reimported
from
official.common
import
distribute_utils
from
official.modeling.hyperparams
import
params_dict
from
official.utils
import
hyperparams_flags
from
official.utils.misc
import
keras_utils
FLAGS
=
flags
.
FLAGS
strategy_flags_dict
=
hyperparams_flags
.
strategy_flags_dict
hparam_flags_dict
=
hyperparams_flags
.
hparam_flags_dict
def
_save_checkpoint
(
checkpoint
,
model_dir
,
checkpoint_prefix
):
"""Saves model to model_dir with provided checkpoint prefix."""
checkpoint_path
=
os
.
path
.
join
(
model_dir
,
checkpoint_prefix
)
saved_path
=
checkpoint
.
save
(
checkpoint_path
)
logging
.
info
(
'Saving model as TF checkpoint: %s'
,
saved_path
)
def
_steps_to_run
(
current_step
,
total_steps
,
steps_per_loop
):
"""Calculates steps to run on device."""
if
steps_per_loop
<=
0
:
raise
ValueError
(
'steps_per_loop should be positive integer.'
)
return
min
(
total_steps
-
current_step
,
steps_per_loop
)
def
_no_metric
():
return
None
def
metrics_as_dict
(
metric
):
"""Puts input metric(s) into a list.
Args:
metric: metric(s) to be put into the list. `metric` could be an object, a
list, or a dict of tf.keras.metrics.Metric or has the `required_method`.
Returns:
A dictionary of valid metrics.
"""
if
isinstance
(
metric
,
tf
.
keras
.
metrics
.
Metric
):
metrics
=
{
metric
.
name
:
metric
}
elif
isinstance
(
metric
,
list
):
metrics
=
{
m
.
name
:
m
for
m
in
metric
}
elif
isinstance
(
metric
,
dict
):
metrics
=
metric
elif
not
metric
:
return
{}
else
:
metrics
=
{
'metric'
:
metric
}
return
metrics
def
metric_results
(
metric
):
"""Collects results from the given metric(s)."""
metrics
=
metrics_as_dict
(
metric
)
metric_result
=
{
name
:
m
.
result
().
numpy
().
astype
(
float
)
for
name
,
m
in
metrics
.
items
()
}
return
metric_result
def
reset_states
(
metric
):
"""Resets states of the given metric(s)."""
metrics
=
metrics_as_dict
(
metric
)
for
m
in
metrics
.
values
():
m
.
reset_states
()
class
SummaryWriter
(
object
):
"""Simple SummaryWriter for writing dictionary of metrics.
Attributes:
writer: The tf.SummaryWriter.
"""
def
__init__
(
self
,
model_dir
:
Text
,
name
:
Text
):
"""Inits SummaryWriter with paths.
Args:
model_dir: the model folder path.
name: the summary subfolder name.
"""
self
.
writer
=
tf
.
summary
.
create_file_writer
(
os
.
path
.
join
(
model_dir
,
name
))
def
__call__
(
self
,
metrics
:
Union
[
Dict
[
Text
,
float
],
float
],
step
:
int
):
"""Write metrics to summary with the given writer.
Args:
metrics: a dictionary of metrics values. Prefer dictionary.
step: integer. The training step.
"""
if
not
isinstance
(
metrics
,
dict
):
# Support scalar metric without name.
logging
.
warning
(
'Warning: summary writer prefer metrics as dictionary.'
)
metrics
=
{
'metric'
:
metrics
}
with
self
.
writer
.
as_default
():
for
k
,
v
in
metrics
.
items
():
tf
.
summary
.
scalar
(
k
,
v
,
step
=
step
)
self
.
writer
.
flush
()
class
DistributedExecutor
(
object
):
"""Interface to train and eval models with tf.distribute.Strategy."""
def
__init__
(
self
,
strategy
,
params
,
model_fn
,
loss_fn
,
is_multi_host
=
False
):
"""Constructor.
Args:
strategy: an instance of tf.distribute.Strategy.
params: Model configuration needed to run distribution strategy.
model_fn: Keras model function. Signature:
(params: ParamsDict) -> tf.keras.models.Model.
loss_fn: loss function. Signature:
(y_true: Tensor, y_pred: Tensor) -> Tensor
is_multi_host: Set to True when using multi hosts for training, like multi
worker GPU or TPU pod (slice). Otherwise, False.
"""
self
.
_params
=
params
self
.
_model_fn
=
model_fn
self
.
_loss_fn
=
loss_fn
self
.
_strategy
=
strategy
self
.
_checkpoint_name
=
'ctl_step_{step}.ckpt'
self
.
_is_multi_host
=
is_multi_host
self
.
train_summary_writer
=
None
self
.
eval_summary_writer
=
None
self
.
global_train_step
=
None
@
property
def
checkpoint_name
(
self
):
"""Returns default checkpoint name."""
return
self
.
_checkpoint_name
@
checkpoint_name
.
setter
def
checkpoint_name
(
self
,
name
):
"""Sets default summary writer for the current thread."""
self
.
_checkpoint_name
=
name
def
loss_fn
(
self
):
return
self
.
_loss_fn
()
def
model_fn
(
self
,
params
):
return
self
.
_model_fn
(
params
)
def
_save_config
(
self
,
model_dir
):
"""Save parameters to config files if model_dir is defined."""
logging
.
info
(
'Save config to model_dir %s.'
,
model_dir
)
if
model_dir
:
if
not
tf
.
io
.
gfile
.
exists
(
model_dir
):
tf
.
io
.
gfile
.
makedirs
(
model_dir
)
self
.
_params
.
lock
()
params_dict
.
save_params_dict_to_yaml
(
self
.
_params
,
model_dir
+
'/params.yaml'
)
else
:
logging
.
warning
(
'model_dir is empty, so skip the save config.'
)
def
_get_input_iterator
(
self
,
input_fn
:
Callable
[...,
tf
.
data
.
Dataset
],
strategy
:
tf
.
distribute
.
Strategy
)
->
Optional
[
Iterator
[
Any
]]:
"""Returns distributed dataset iterator.
Args:
input_fn: (params: dict) -> tf.data.Dataset.
strategy: an instance of tf.distribute.Strategy.
Returns:
An iterator that yields input tensors.
"""
if
input_fn
is
None
:
return
None
# When training with multiple TPU workers, datasets needs to be cloned
# across workers. Since Dataset instance cannot be cloned in eager mode,
# we instead pass callable that returns a dataset.
if
self
.
_is_multi_host
:
return
iter
(
strategy
.
distribute_datasets_from_function
(
input_fn
))
else
:
input_data
=
input_fn
()
return
iter
(
strategy
.
experimental_distribute_dataset
(
input_data
))
def
_create_replicated_step
(
self
,
strategy
,
model
,
loss_fn
,
optimizer
,
metric
=
None
):
"""Creates a single training step.
Args:
strategy: an instance of tf.distribute.Strategy.
model: (Tensor, bool) -> Tensor. model function.
loss_fn: (y_true: Tensor, y_pred: Tensor) -> Tensor.
optimizer: tf.keras.optimizers.Optimizer.
metric: tf.keras.metrics.Metric subclass.
Returns:
The training step callable.
"""
metrics
=
metrics_as_dict
(
metric
)
def
_replicated_step
(
inputs
):
"""Replicated training step."""
inputs
,
labels
=
inputs
with
tf
.
GradientTape
()
as
tape
:
outputs
=
model
(
inputs
,
training
=
True
)
prediction_loss
=
loss_fn
(
labels
,
outputs
)
loss
=
tf
.
reduce_mean
(
prediction_loss
)
loss
=
loss
/
strategy
.
num_replicas_in_sync
for
m
in
metrics
.
values
():
m
.
update_state
(
labels
,
outputs
)
grads
=
tape
.
gradient
(
loss
,
model
.
trainable_variables
)
optimizer
.
apply_gradients
(
zip
(
grads
,
model
.
trainable_variables
))
return
loss
return
_replicated_step
def
_create_train_step
(
self
,
strategy
,
model
,
loss_fn
,
optimizer
,
metric
=
None
):
"""Creates a distributed training step.
Args:
strategy: an instance of tf.distribute.Strategy.
model: (Tensor, bool) -> Tensor. model function.
loss_fn: (y_true: Tensor, y_pred: Tensor) -> Tensor.
optimizer: tf.keras.optimizers.Optimizer.
metric: tf.keras.metrics.Metric subclass.
Returns:
The training step callable.
"""
replicated_step
=
self
.
_create_replicated_step
(
strategy
,
model
,
loss_fn
,
optimizer
,
metric
)
@
tf
.
function
def
train_step
(
iterator
,
num_steps
):
"""Performs a distributed training step.
Args:
iterator: an iterator that yields input tensors.
num_steps: the number of steps in the loop.
Returns:
The loss tensor.
"""
if
not
isinstance
(
num_steps
,
tf
.
Tensor
):
raise
ValueError
(
'steps should be an Tensor. Python object may cause '
'retracing.'
)
per_replica_losses
=
strategy
.
run
(
replicated_step
,
args
=
(
next
(
iterator
),))
for
_
in
tf
.
range
(
num_steps
-
1
):
per_replica_losses
=
strategy
.
run
(
replicated_step
,
args
=
(
next
(
iterator
),))
# For reporting, we returns the mean of losses.
losses
=
tf
.
nest
.
map_structure
(
lambda
x
:
strategy
.
reduce
(
tf
.
distribute
.
ReduceOp
.
MEAN
,
x
,
axis
=
None
),
per_replica_losses
)
return
losses
return
train_step
def
_create_test_step
(
self
,
strategy
,
model
,
metric
):
"""Creates a distributed test step."""
metrics
=
metrics_as_dict
(
metric
)
@
tf
.
function
def
test_step
(
iterator
):
"""Calculates evaluation metrics on distributed devices."""
if
not
metric
:
logging
.
info
(
'Skip test_step because metric is None (%s)'
,
metric
)
return
None
,
None
def
_test_step_fn
(
inputs
):
"""Replicated accuracy calculation."""
inputs
,
labels
=
inputs
model_outputs
=
model
(
inputs
,
training
=
False
)
for
m
in
metrics
.
values
():
m
.
update_state
(
labels
,
model_outputs
)
return
labels
,
model_outputs
return
strategy
.
run
(
_test_step_fn
,
args
=
(
next
(
iterator
),))
return
test_step
def
train
(
self
,
train_input_fn
:
Callable
[[
params_dict
.
ParamsDict
],
tf
.
data
.
Dataset
],
eval_input_fn
:
Optional
[
Callable
[[
params_dict
.
ParamsDict
],
tf
.
data
.
Dataset
]]
=
None
,
model_dir
:
Optional
[
Text
]
=
None
,
total_steps
:
int
=
1
,
iterations_per_loop
:
int
=
1
,
train_metric_fn
:
Optional
[
Callable
[[],
Any
]]
=
None
,
eval_metric_fn
:
Optional
[
Callable
[[],
Any
]]
=
None
,
summary_writer_fn
:
Callable
[[
Text
,
Text
],
SummaryWriter
]
=
SummaryWriter
,
init_checkpoint
:
Optional
[
Callable
[[
tf
.
keras
.
Model
],
Any
]]
=
None
,
custom_callbacks
:
Optional
[
List
[
tf
.
keras
.
callbacks
.
Callback
]]
=
None
,
continuous_eval
:
bool
=
False
,
save_config
:
bool
=
True
):
"""Runs distributed training.
Args:
train_input_fn: (params: dict) -> tf.data.Dataset training data input
function.
eval_input_fn: (Optional) same type as train_input_fn. If not None, will
trigger evaluating metric on eval data. If None, will not run the eval
step.
model_dir: the folder path for model checkpoints.
total_steps: total training steps.
iterations_per_loop: train steps per loop. After each loop, this job will
update metrics like loss and save checkpoint.
train_metric_fn: metric_fn for evaluation in train_step.
eval_metric_fn: metric_fn for evaluation in test_step.
summary_writer_fn: function to create summary writer.
init_checkpoint: function to load checkpoint.
custom_callbacks: A list of Keras Callbacks objects to run during
training. More specifically, `on_batch_begin()`, `on_batch_end()`,
methods are invoked during training.
continuous_eval: If `True`, will continously run evaluation on every
available checkpoints. If `False`, will do the evaluation once after the
final step.
save_config: bool. Whether to save params to model_dir.
Returns:
The training loss and eval metrics.
"""
assert
train_input_fn
is
not
None
if
train_metric_fn
and
not
callable
(
train_metric_fn
):
raise
ValueError
(
'if `train_metric_fn` is specified, '
'train_metric_fn must be a callable.'
)
if
eval_metric_fn
and
not
callable
(
eval_metric_fn
):
raise
ValueError
(
'if `eval_metric_fn` is specified, '
'eval_metric_fn must be a callable.'
)
train_metric_fn
=
train_metric_fn
or
_no_metric
eval_metric_fn
=
eval_metric_fn
or
_no_metric
if
custom_callbacks
and
iterations_per_loop
!=
1
:
logging
.
warning
(
'It is sematically wrong to run callbacks when '
'iterations_per_loop is not one (%s)'
,
iterations_per_loop
)
custom_callbacks
=
custom_callbacks
or
[]
def
_run_callbacks_on_batch_begin
(
batch
):
"""Runs custom callbacks at the start of every step."""
if
not
custom_callbacks
:
return
for
callback
in
custom_callbacks
:
if
callback
:
callback
.
on_batch_begin
(
batch
)
def
_run_callbacks_on_batch_end
(
batch
):
"""Runs custom callbacks at the end of every step."""
if
not
custom_callbacks
:
return
for
callback
in
custom_callbacks
:
if
callback
:
callback
.
on_batch_end
(
batch
)
if
save_config
:
self
.
_save_config
(
model_dir
)
if
FLAGS
.
save_checkpoint_freq
:
save_freq
=
FLAGS
.
save_checkpoint_freq
else
:
save_freq
=
iterations_per_loop
params
=
self
.
_params
strategy
=
self
.
_strategy
# To reduce unnecessary send/receive input pipeline operation, we place
# input pipeline ops in worker task.
train_iterator
=
self
.
_get_input_iterator
(
train_input_fn
,
strategy
)
train_loss
=
None
train_metric_result
=
None
eval_metric_result
=
None
tf
.
keras
.
backend
.
set_learning_phase
(
1
)
with
strategy
.
scope
():
# To correctly place the model weights on accelerators,
# model and optimizer should be created in scope.
model
=
self
.
model_fn
(
params
.
as_dict
())
if
not
hasattr
(
model
,
'optimizer'
):
raise
ValueError
(
'User should set optimizer attribute to model '
'inside `model_fn`.'
)
optimizer
=
model
.
optimizer
# Training loop starts here.
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
,
optimizer
=
optimizer
)
latest_checkpoint_file
=
tf
.
train
.
latest_checkpoint
(
model_dir
)
initial_step
=
0
if
latest_checkpoint_file
:
logging
.
info
(
'Checkpoint file %s found and restoring from '
'checkpoint'
,
latest_checkpoint_file
)
checkpoint
.
restore
(
latest_checkpoint_file
)
initial_step
=
optimizer
.
iterations
.
numpy
()
logging
.
info
(
'Loading from checkpoint file completed. Init step %d'
,
initial_step
)
elif
init_checkpoint
:
logging
.
info
(
'Restoring from init checkpoint function'
)
init_checkpoint
(
model
)
logging
.
info
(
'Loading from init checkpoint file completed'
)
current_step
=
optimizer
.
iterations
.
numpy
()
checkpoint_name
=
self
.
checkpoint_name
eval_metric
=
eval_metric_fn
()
train_metric
=
train_metric_fn
()
train_summary_writer
=
summary_writer_fn
(
model_dir
,
'eval_train'
)
self
.
train_summary_writer
=
train_summary_writer
.
writer
test_summary_writer
=
summary_writer_fn
(
model_dir
,
'eval_test'
)
self
.
eval_summary_writer
=
test_summary_writer
.
writer
# Use training summary writer in TimeHistory if it's in use
for
cb
in
custom_callbacks
:
if
isinstance
(
cb
,
keras_utils
.
TimeHistory
):
cb
.
summary_writer
=
self
.
train_summary_writer
# Continue training loop.
train_step
=
self
.
_create_train_step
(
strategy
=
strategy
,
model
=
model
,
loss_fn
=
self
.
loss_fn
(),
optimizer
=
optimizer
,
metric
=
train_metric
)
test_step
=
None
if
eval_input_fn
and
eval_metric
:
self
.
global_train_step
=
model
.
optimizer
.
iterations
test_step
=
self
.
_create_test_step
(
strategy
,
model
,
metric
=
eval_metric
)
# Step-0 operations
if
current_step
==
0
and
not
latest_checkpoint_file
:
_save_checkpoint
(
checkpoint
,
model_dir
,
checkpoint_name
.
format
(
step
=
current_step
))
if
test_step
:
eval_iterator
=
self
.
_get_input_iterator
(
eval_input_fn
,
strategy
)
eval_metric_result
=
self
.
_run_evaluation
(
test_step
,
current_step
,
eval_metric
,
eval_iterator
)
logging
.
info
(
'Step: %s evalation metric = %s.'
,
current_step
,
eval_metric_result
)
test_summary_writer
(
metrics
=
eval_metric_result
,
step
=
optimizer
.
iterations
)
reset_states
(
eval_metric
)
logging
.
info
(
'Training started'
)
last_save_checkpoint_step
=
current_step
while
current_step
<
total_steps
:
num_steps
=
_steps_to_run
(
current_step
,
total_steps
,
iterations_per_loop
)
_run_callbacks_on_batch_begin
(
current_step
)
train_loss
=
train_step
(
train_iterator
,
tf
.
convert_to_tensor
(
num_steps
,
dtype
=
tf
.
int32
))
current_step
+=
num_steps
train_loss
=
tf
.
nest
.
map_structure
(
lambda
x
:
x
.
numpy
().
astype
(
float
),
train_loss
)
_run_callbacks_on_batch_end
(
current_step
-
1
)
if
not
isinstance
(
train_loss
,
dict
):
train_loss
=
{
'total_loss'
:
train_loss
}
if
np
.
isnan
(
train_loss
[
'total_loss'
]):
raise
ValueError
(
'total loss is NaN.'
)
if
train_metric
:
train_metric_result
=
metric_results
(
train_metric
)
train_metric_result
.
update
(
train_loss
)
else
:
train_metric_result
=
train_loss
if
callable
(
optimizer
.
lr
):
train_metric_result
.
update
(
{
'learning_rate'
:
optimizer
.
lr
(
current_step
).
numpy
()})
else
:
train_metric_result
.
update
({
'learning_rate'
:
optimizer
.
lr
.
numpy
()})
logging
.
info
(
'Train Step: %d/%d / loss = %s / training metric = %s'
,
current_step
,
total_steps
,
train_loss
,
train_metric_result
)
train_summary_writer
(
metrics
=
train_metric_result
,
step
=
optimizer
.
iterations
)
# Saves model checkpoints and run validation steps at every
# iterations_per_loop steps.
# To avoid repeated model saving, we do not save after the last
# step of training.
if
save_freq
>
0
and
current_step
<
total_steps
and
(
current_step
-
last_save_checkpoint_step
)
>=
save_freq
:
_save_checkpoint
(
checkpoint
,
model_dir
,
checkpoint_name
.
format
(
step
=
current_step
))
last_save_checkpoint_step
=
current_step
if
continuous_eval
and
current_step
<
total_steps
and
test_step
:
eval_iterator
=
self
.
_get_input_iterator
(
eval_input_fn
,
strategy
)
eval_metric_result
=
self
.
_run_evaluation
(
test_step
,
current_step
,
eval_metric
,
eval_iterator
)
logging
.
info
(
'Step: %s evalation metric = %s.'
,
current_step
,
eval_metric_result
)
test_summary_writer
(
metrics
=
eval_metric_result
,
step
=
optimizer
.
iterations
)
# Re-initialize evaluation metric, except the last step.
if
eval_metric
and
current_step
<
total_steps
:
reset_states
(
eval_metric
)
if
train_metric
and
current_step
<
total_steps
:
reset_states
(
train_metric
)
# Reaches the end of training and saves the last checkpoint.
if
last_save_checkpoint_step
<
total_steps
:
_save_checkpoint
(
checkpoint
,
model_dir
,
checkpoint_name
.
format
(
step
=
current_step
))
if
test_step
:
logging
.
info
(
'Running final evaluation after training is complete.'
)
eval_iterator
=
self
.
_get_input_iterator
(
eval_input_fn
,
strategy
)
eval_metric_result
=
self
.
_run_evaluation
(
test_step
,
current_step
,
eval_metric
,
eval_iterator
)
logging
.
info
(
'Final evaluation metric = %s.'
,
eval_metric_result
)
test_summary_writer
(
metrics
=
eval_metric_result
,
step
=
optimizer
.
iterations
)
self
.
train_summary_writer
.
close
()
self
.
eval_summary_writer
.
close
()
return
train_metric_result
,
eval_metric_result
def
_run_evaluation
(
self
,
test_step
,
current_training_step
,
metric
,
test_iterator
):
"""Runs validation steps and aggregate metrics."""
if
not
test_iterator
or
not
metric
:
logging
.
warning
(
'Both test_iterator (%s) and metrics (%s) must not be None.'
,
test_iterator
,
metric
)
return
None
logging
.
info
(
'Running evaluation after step: %s.'
,
current_training_step
)
eval_step
=
0
while
True
:
try
:
with
tf
.
experimental
.
async_scope
():
test_step
(
test_iterator
)
eval_step
+=
1
except
(
StopIteration
,
tf
.
errors
.
OutOfRangeError
):
tf
.
experimental
.
async_clear_error
()
break
metric_result
=
metric_results
(
metric
)
logging
.
info
(
'Total eval steps: [%d]'
,
eval_step
)
logging
.
info
(
'At training step: [%r] Validation metric = %r'
,
current_training_step
,
metric_result
)
return
metric_result
def
evaluate_from_model_dir
(
self
,
model_dir
:
Text
,
eval_input_fn
:
Callable
[[
params_dict
.
ParamsDict
],
tf
.
data
.
Dataset
],
eval_metric_fn
:
Callable
[[],
Any
],
total_steps
:
int
=
-
1
,
eval_timeout
:
Optional
[
int
]
=
None
,
min_eval_interval
:
int
=
180
,
summary_writer_fn
:
Callable
[[
Text
,
Text
],
SummaryWriter
]
=
SummaryWriter
):
"""Runs distributed evaluation on model folder.
Args:
model_dir: the folder for storing model checkpoints.
eval_input_fn: (Optional) same type as train_input_fn. If not None, will
trigger evaluting metric on eval data. If None, will not run eval step.
eval_metric_fn: metric_fn for evaluation in test_step.
total_steps: total training steps. If the current step reaches the
total_steps, the evaluation loop will stop.
eval_timeout: The maximum number of seconds to wait between checkpoints.
If left as None, then the process will wait indefinitely. Used by
tf.train.checkpoints_iterator.
min_eval_interval: The minimum number of seconds between yielding
checkpoints. Used by tf.train.checkpoints_iterator.
summary_writer_fn: function to create summary writer.
Returns:
Eval metrics dictionary of the last checkpoint.
"""
if
not
model_dir
:
raise
ValueError
(
'model_dir must be set.'
)
def
terminate_eval
():
tf
.
logging
.
info
(
'Terminating eval after %d seconds of no checkpoints'
%
eval_timeout
)
return
True
summary_writer
=
summary_writer_fn
(
model_dir
,
'eval'
)
self
.
eval_summary_writer
=
summary_writer
.
writer
# Read checkpoints from the given model directory
# until `eval_timeout` seconds elapses.
for
checkpoint_path
in
tf
.
train
.
checkpoints_iterator
(
model_dir
,
min_interval_secs
=
min_eval_interval
,
timeout
=
eval_timeout
,
timeout_fn
=
terminate_eval
):
eval_metric_result
,
current_step
=
self
.
evaluate_checkpoint
(
checkpoint_path
=
checkpoint_path
,
eval_input_fn
=
eval_input_fn
,
eval_metric_fn
=
eval_metric_fn
,
summary_writer
=
summary_writer
)
if
total_steps
>
0
and
current_step
>=
total_steps
:
logging
.
info
(
'Evaluation finished after training step %d'
,
current_step
)
break
return
eval_metric_result
def
evaluate_checkpoint
(
self
,
checkpoint_path
:
Text
,
eval_input_fn
:
Callable
[[
params_dict
.
ParamsDict
],
tf
.
data
.
Dataset
],
eval_metric_fn
:
Callable
[[],
Any
],
summary_writer
:
Optional
[
SummaryWriter
]
=
None
):
"""Runs distributed evaluation on the one checkpoint.
Args:
checkpoint_path: the checkpoint to evaluate.
eval_input_fn: (Optional) same type as train_input_fn. If not None, will
trigger evaluting metric on eval data. If None, will not run eval step.
eval_metric_fn: metric_fn for evaluation in test_step.
summary_writer: function to create summary writer.
Returns:
Eval metrics dictionary of the last checkpoint.
"""
if
not
callable
(
eval_metric_fn
):
raise
ValueError
(
'if `eval_metric_fn` is specified, '
'eval_metric_fn must be a callable.'
)
old_phase
=
tf
.
keras
.
backend
.
learning_phase
()
tf
.
keras
.
backend
.
set_learning_phase
(
0
)
params
=
self
.
_params
strategy
=
self
.
_strategy
# To reduce unnecessary send/receive input pipeline operation, we place
# input pipeline ops in worker task.
with
strategy
.
scope
():
# To correctly place the model weights on accelerators,
# model and optimizer should be created in scope.
model
=
self
.
model_fn
(
params
.
as_dict
())
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
)
eval_metric
=
eval_metric_fn
()
assert
eval_metric
,
'eval_metric does not exist'
test_step
=
self
.
_create_test_step
(
strategy
,
model
,
metric
=
eval_metric
)
logging
.
info
(
'Starting to evaluate.'
)
if
not
checkpoint_path
:
raise
ValueError
(
'checkpoint path is empty'
)
reader
=
tf
.
compat
.
v1
.
train
.
NewCheckpointReader
(
checkpoint_path
)
current_step
=
reader
.
get_tensor
(
'optimizer/iter/.ATTRIBUTES/VARIABLE_VALUE'
)
logging
.
info
(
'Checkpoint file %s found and restoring from '
'checkpoint'
,
checkpoint_path
)
status
=
checkpoint
.
restore
(
checkpoint_path
)
status
.
expect_partial
().
assert_existing_objects_matched
()
self
.
global_train_step
=
model
.
optimizer
.
iterations
eval_iterator
=
self
.
_get_input_iterator
(
eval_input_fn
,
strategy
)
eval_metric_result
=
self
.
_run_evaluation
(
test_step
,
current_step
,
eval_metric
,
eval_iterator
)
logging
.
info
(
'Step: %s evalation metric = %s.'
,
current_step
,
eval_metric_result
)
summary_writer
(
metrics
=
eval_metric_result
,
step
=
current_step
)
reset_states
(
eval_metric
)
tf
.
keras
.
backend
.
set_learning_phase
(
old_phase
)
return
eval_metric_result
,
current_step
def
predict
(
self
):
return
NotImplementedError
(
'Unimplmented function.'
)
class
ExecutorBuilder
(
object
):
"""Builder of DistributedExecutor.
Example 1: Builds an executor with supported Strategy.
builder = ExecutorBuilder(
strategy_type='tpu',
strategy_config={'tpu': '/bns/xxx'})
dist_executor = builder.build_executor(
params=params,
model_fn=my_model_fn,
loss_fn=my_loss_fn,
metric_fn=my_metric_fn)
Example 2: Builds an executor with customized Strategy.
builder = ExecutorBuilder()
builder.strategy = <some customized Strategy>
dist_executor = builder.build_executor(
params=params,
model_fn=my_model_fn,
loss_fn=my_loss_fn,
metric_fn=my_metric_fn)
Example 3: Builds a customized executor with customized Strategy.
class MyDistributedExecutor(DistributedExecutor):
# implementation ...
builder = ExecutorBuilder()
builder.strategy = <some customized Strategy>
dist_executor = builder.build_executor(
class_ctor=MyDistributedExecutor,
params=params,
model_fn=my_model_fn,
loss_fn=my_loss_fn,
metric_fn=my_metric_fn)
"""
def
__init__
(
self
,
strategy_type
=
None
,
strategy_config
=
None
):
_
=
distribute_utils
.
configure_cluster
(
strategy_config
.
worker_hosts
,
strategy_config
.
task_index
)
"""Constructor.
Args:
strategy_type: string. One of 'tpu', 'mirrored', 'multi_worker_mirrored'.
If None, the user is responsible to set the strategy before calling
build_executor(...).
strategy_config: necessary config for constructing the proper Strategy.
Check strategy_flags_dict() for examples of the structure.
"""
self
.
_strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
strategy_type
,
num_gpus
=
strategy_config
.
num_gpus
,
all_reduce_alg
=
strategy_config
.
all_reduce_alg
,
num_packs
=
strategy_config
.
num_packs
,
tpu_address
=
strategy_config
.
tpu
)
@
property
def
strategy
(
self
):
"""Returns default checkpoint name."""
return
self
.
_strategy
@
strategy
.
setter
def
strategy
(
self
,
new_strategy
):
"""Sets default summary writer for the current thread."""
self
.
_strategy
=
new_strategy
def
build_executor
(
self
,
class_ctor
=
DistributedExecutor
,
params
=
None
,
model_fn
=
None
,
loss_fn
=
None
,
**
kwargs
):
"""Creates an executor according to strategy type.
See doc string of the DistributedExecutor.__init__ for more information of
the
input arguments.
Args:
class_ctor: A constructor of executor (default: DistributedExecutor).
params: ParamsDict, all the model parameters and runtime parameters.
model_fn: Keras model function.
loss_fn: loss function.
**kwargs: other arguments to the executor constructor.
Returns:
An instance of DistributedExecutor or its subclass.
"""
if
self
.
_strategy
is
None
:
raise
ValueError
(
'`strategy` should not be None. You need to specify '
'`strategy_type` in the builder contructor or directly '
'set the `strategy` property of the builder.'
)
return
class_ctor
(
strategy
=
self
.
_strategy
,
params
=
params
,
model_fn
=
model_fn
,
loss_fn
=
loss_fn
,
**
kwargs
)
official/legacy/detection/main.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Main function to train various object detection models."""
import
functools
import
pprint
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
tensorflow
as
tf
from
official.common
import
distribute_utils
from
official.legacy.detection.configs
import
factory
as
config_factory
from
official.legacy.detection.dataloader
import
input_reader
from
official.legacy.detection.dataloader
import
mode_keys
as
ModeKeys
from
official.legacy.detection.executor
import
distributed_executor
as
executor
from
official.legacy.detection.executor.detection_executor
import
DetectionDistributedExecutor
from
official.legacy.detection.modeling
import
factory
as
model_factory
from
official.modeling.hyperparams
import
params_dict
from
official.utils
import
hyperparams_flags
from
official.utils.flags
import
core
as
flags_core
from
official.utils.misc
import
keras_utils
hyperparams_flags
.
initialize_common_flags
()
flags_core
.
define_log_steps
()
flags
.
DEFINE_bool
(
'enable_xla'
,
default
=
False
,
help
=
'Enable XLA for GPU'
)
flags
.
DEFINE_string
(
'mode'
,
default
=
'train'
,
help
=
'Mode to run: `train`, `eval` or `eval_once`.'
)
flags
.
DEFINE_string
(
'model'
,
default
=
'retinanet'
,
help
=
'Model to run: `retinanet`, `mask_rcnn` or `shapemask`.'
)
flags
.
DEFINE_string
(
'training_file_pattern'
,
None
,
'Location of the train data.'
)
flags
.
DEFINE_string
(
'eval_file_pattern'
,
None
,
'Location of ther eval data'
)
flags
.
DEFINE_string
(
'checkpoint_path'
,
None
,
'The checkpoint path to eval. Only used in eval_once mode.'
)
FLAGS
=
flags
.
FLAGS
def
run_executor
(
params
,
mode
,
checkpoint_path
=
None
,
train_input_fn
=
None
,
eval_input_fn
=
None
,
callbacks
=
None
,
prebuilt_strategy
=
None
):
"""Runs the object detection model on distribution strategy defined by the user."""
if
params
.
architecture
.
use_bfloat16
:
tf
.
compat
.
v2
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_bfloat16'
)
model_builder
=
model_factory
.
model_generator
(
params
)
if
prebuilt_strategy
is
not
None
:
strategy
=
prebuilt_strategy
else
:
strategy_config
=
params
.
strategy_config
distribute_utils
.
configure_cluster
(
strategy_config
.
worker_hosts
,
strategy_config
.
task_index
)
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
params
.
strategy_type
,
num_gpus
=
strategy_config
.
num_gpus
,
all_reduce_alg
=
strategy_config
.
all_reduce_alg
,
num_packs
=
strategy_config
.
num_packs
,
tpu_address
=
strategy_config
.
tpu
)
num_workers
=
int
(
strategy
.
num_replicas_in_sync
+
7
)
//
8
is_multi_host
=
(
int
(
num_workers
)
>=
2
)
if
mode
==
'train'
:
def
_model_fn
(
params
):
return
model_builder
.
build_model
(
params
,
mode
=
ModeKeys
.
TRAIN
)
logging
.
info
(
'Train num_replicas_in_sync %d num_workers %d is_multi_host %s'
,
strategy
.
num_replicas_in_sync
,
num_workers
,
is_multi_host
)
dist_executor
=
DetectionDistributedExecutor
(
strategy
=
strategy
,
params
=
params
,
model_fn
=
_model_fn
,
loss_fn
=
model_builder
.
build_loss_fn
,
is_multi_host
=
is_multi_host
,
predict_post_process_fn
=
model_builder
.
post_processing
,
trainable_variables_filter
=
model_builder
.
make_filter_trainable_variables_fn
())
if
is_multi_host
:
train_input_fn
=
functools
.
partial
(
train_input_fn
,
batch_size
=
params
.
train
.
batch_size
//
strategy
.
num_replicas_in_sync
)
return
dist_executor
.
train
(
train_input_fn
=
train_input_fn
,
model_dir
=
params
.
model_dir
,
iterations_per_loop
=
params
.
train
.
iterations_per_loop
,
total_steps
=
params
.
train
.
total_steps
,
init_checkpoint
=
model_builder
.
make_restore_checkpoint_fn
(),
custom_callbacks
=
callbacks
,
save_config
=
True
)
elif
mode
==
'eval'
or
mode
==
'eval_once'
:
def
_model_fn
(
params
):
return
model_builder
.
build_model
(
params
,
mode
=
ModeKeys
.
PREDICT_WITH_GT
)
logging
.
info
(
'Eval num_replicas_in_sync %d num_workers %d is_multi_host %s'
,
strategy
.
num_replicas_in_sync
,
num_workers
,
is_multi_host
)
if
is_multi_host
:
eval_input_fn
=
functools
.
partial
(
eval_input_fn
,
batch_size
=
params
.
eval
.
batch_size
//
strategy
.
num_replicas_in_sync
)
dist_executor
=
DetectionDistributedExecutor
(
strategy
=
strategy
,
params
=
params
,
model_fn
=
_model_fn
,
loss_fn
=
model_builder
.
build_loss_fn
,
is_multi_host
=
is_multi_host
,
predict_post_process_fn
=
model_builder
.
post_processing
,
trainable_variables_filter
=
model_builder
.
make_filter_trainable_variables_fn
())
if
mode
==
'eval'
:
results
=
dist_executor
.
evaluate_from_model_dir
(
model_dir
=
params
.
model_dir
,
eval_input_fn
=
eval_input_fn
,
eval_metric_fn
=
model_builder
.
eval_metrics
,
eval_timeout
=
params
.
eval
.
eval_timeout
,
min_eval_interval
=
params
.
eval
.
min_eval_interval
,
total_steps
=
params
.
train
.
total_steps
)
else
:
# Run evaluation once for a single checkpoint.
if
not
checkpoint_path
:
raise
ValueError
(
'checkpoint_path cannot be empty.'
)
if
tf
.
io
.
gfile
.
isdir
(
checkpoint_path
):
checkpoint_path
=
tf
.
train
.
latest_checkpoint
(
checkpoint_path
)
summary_writer
=
executor
.
SummaryWriter
(
params
.
model_dir
,
'eval'
)
results
,
_
=
dist_executor
.
evaluate_checkpoint
(
checkpoint_path
=
checkpoint_path
,
eval_input_fn
=
eval_input_fn
,
eval_metric_fn
=
model_builder
.
eval_metrics
,
summary_writer
=
summary_writer
)
for
k
,
v
in
results
.
items
():
logging
.
info
(
'Final eval metric %s: %f'
,
k
,
v
)
return
results
else
:
raise
ValueError
(
'Mode not found: %s.'
%
mode
)
def
run
(
callbacks
=
None
):
"""Runs the experiment."""
keras_utils
.
set_session_config
(
enable_xla
=
FLAGS
.
enable_xla
)
params
=
config_factory
.
config_generator
(
FLAGS
.
model
)
params
=
params_dict
.
override_params_dict
(
params
,
FLAGS
.
config_file
,
is_strict
=
True
)
params
=
params_dict
.
override_params_dict
(
params
,
FLAGS
.
params_override
,
is_strict
=
True
)
params
.
override
(
{
'strategy_type'
:
FLAGS
.
strategy_type
,
'model_dir'
:
FLAGS
.
model_dir
,
'strategy_config'
:
executor
.
strategy_flags_dict
(),
},
is_strict
=
False
)
# Make sure use_tpu and strategy_type are in sync.
params
.
use_tpu
=
(
params
.
strategy_type
==
'tpu'
)
if
not
params
.
use_tpu
:
params
.
override
({
'architecture'
:
{
'use_bfloat16'
:
False
,
},
'norm_activation'
:
{
'use_sync_bn'
:
False
,
},
},
is_strict
=
True
)
params
.
validate
()
params
.
lock
()
pp
=
pprint
.
PrettyPrinter
()
params_str
=
pp
.
pformat
(
params
.
as_dict
())
logging
.
info
(
'Model Parameters: %s'
,
params_str
)
train_input_fn
=
None
eval_input_fn
=
None
training_file_pattern
=
FLAGS
.
training_file_pattern
or
params
.
train
.
train_file_pattern
eval_file_pattern
=
FLAGS
.
eval_file_pattern
or
params
.
eval
.
eval_file_pattern
if
not
training_file_pattern
and
not
eval_file_pattern
:
raise
ValueError
(
'Must provide at least one of training_file_pattern and '
'eval_file_pattern.'
)
if
training_file_pattern
:
# Use global batch size for single host.
train_input_fn
=
input_reader
.
InputFn
(
file_pattern
=
training_file_pattern
,
params
=
params
,
mode
=
input_reader
.
ModeKeys
.
TRAIN
,
batch_size
=
params
.
train
.
batch_size
)
if
eval_file_pattern
:
eval_input_fn
=
input_reader
.
InputFn
(
file_pattern
=
eval_file_pattern
,
params
=
params
,
mode
=
input_reader
.
ModeKeys
.
PREDICT_WITH_GT
,
batch_size
=
params
.
eval
.
batch_size
,
num_examples
=
params
.
eval
.
eval_samples
)
if
callbacks
is
None
:
callbacks
=
[]
if
FLAGS
.
log_steps
:
callbacks
.
append
(
keras_utils
.
TimeHistory
(
batch_size
=
params
.
train
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
,
))
return
run_executor
(
params
,
FLAGS
.
mode
,
checkpoint_path
=
FLAGS
.
checkpoint_path
,
train_input_fn
=
train_input_fn
,
eval_input_fn
=
eval_input_fn
,
callbacks
=
callbacks
)
def
main
(
argv
):
del
argv
# Unused.
run
()
if
__name__
==
'__main__'
:
tf
.
config
.
set_soft_device_placement
(
True
)
app
.
run
(
main
)
official/legacy/detection/modeling/__init__.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/legacy/detection/modeling/architecture/__init__.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/legacy/detection/modeling/architecture/factory.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Model architecture factory."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
official.legacy.detection.modeling.architecture
import
fpn
from
official.legacy.detection.modeling.architecture
import
heads
from
official.legacy.detection.modeling.architecture
import
identity
from
official.legacy.detection.modeling.architecture
import
nn_ops
from
official.legacy.detection.modeling.architecture
import
resnet
from
official.legacy.detection.modeling.architecture
import
spinenet
def
norm_activation_generator
(
params
):
return
nn_ops
.
norm_activation_builder
(
momentum
=
params
.
batch_norm_momentum
,
epsilon
=
params
.
batch_norm_epsilon
,
trainable
=
params
.
batch_norm_trainable
,
activation
=
params
.
activation
)
def
backbone_generator
(
params
):
"""Generator function for various backbone models."""
if
params
.
architecture
.
backbone
==
'resnet'
:
resnet_params
=
params
.
resnet
backbone_fn
=
resnet
.
Resnet
(
resnet_depth
=
resnet_params
.
resnet_depth
,
activation
=
params
.
norm_activation
.
activation
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
elif
params
.
architecture
.
backbone
==
'spinenet'
:
spinenet_params
=
params
.
spinenet
backbone_fn
=
spinenet
.
SpineNetBuilder
(
model_id
=
spinenet_params
.
model_id
)
else
:
raise
ValueError
(
'Backbone model `{}` is not supported.'
.
format
(
params
.
architecture
.
backbone
))
return
backbone_fn
def
multilevel_features_generator
(
params
):
"""Generator function for various FPN models."""
if
params
.
architecture
.
multilevel_features
==
'fpn'
:
fpn_params
=
params
.
fpn
fpn_fn
=
fpn
.
Fpn
(
min_level
=
params
.
architecture
.
min_level
,
max_level
=
params
.
architecture
.
max_level
,
fpn_feat_dims
=
fpn_params
.
fpn_feat_dims
,
use_separable_conv
=
fpn_params
.
use_separable_conv
,
activation
=
params
.
norm_activation
.
activation
,
use_batch_norm
=
fpn_params
.
use_batch_norm
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
elif
params
.
architecture
.
multilevel_features
==
'identity'
:
fpn_fn
=
identity
.
Identity
()
else
:
raise
ValueError
(
'The multi-level feature model `{}` is not supported.'
.
format
(
params
.
architecture
.
multilevel_features
))
return
fpn_fn
def
retinanet_head_generator
(
params
):
"""Generator function for RetinaNet head architecture."""
head_params
=
params
.
retinanet_head
anchors_per_location
=
params
.
anchor
.
num_scales
*
len
(
params
.
anchor
.
aspect_ratios
)
return
heads
.
RetinanetHead
(
params
.
architecture
.
min_level
,
params
.
architecture
.
max_level
,
params
.
architecture
.
num_classes
,
anchors_per_location
,
head_params
.
num_convs
,
head_params
.
num_filters
,
head_params
.
use_separable_conv
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
rpn_head_generator
(
params
):
"""Generator function for RPN head architecture."""
head_params
=
params
.
rpn_head
anchors_per_location
=
params
.
anchor
.
num_scales
*
len
(
params
.
anchor
.
aspect_ratios
)
return
heads
.
RpnHead
(
params
.
architecture
.
min_level
,
params
.
architecture
.
max_level
,
anchors_per_location
,
head_params
.
num_convs
,
head_params
.
num_filters
,
head_params
.
use_separable_conv
,
params
.
norm_activation
.
activation
,
head_params
.
use_batch_norm
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
oln_rpn_head_generator
(
params
):
"""Generator function for OLN-proposal (OLN-RPN) head architecture."""
head_params
=
params
.
rpn_head
anchors_per_location
=
params
.
anchor
.
num_scales
*
len
(
params
.
anchor
.
aspect_ratios
)
return
heads
.
OlnRpnHead
(
params
.
architecture
.
min_level
,
params
.
architecture
.
max_level
,
anchors_per_location
,
head_params
.
num_convs
,
head_params
.
num_filters
,
head_params
.
use_separable_conv
,
params
.
norm_activation
.
activation
,
head_params
.
use_batch_norm
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
fast_rcnn_head_generator
(
params
):
"""Generator function for Fast R-CNN head architecture."""
head_params
=
params
.
frcnn_head
return
heads
.
FastrcnnHead
(
params
.
architecture
.
num_classes
,
head_params
.
num_convs
,
head_params
.
num_filters
,
head_params
.
use_separable_conv
,
head_params
.
num_fcs
,
head_params
.
fc_dims
,
params
.
norm_activation
.
activation
,
head_params
.
use_batch_norm
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
oln_box_score_head_generator
(
params
):
"""Generator function for Scoring Fast R-CNN head architecture."""
head_params
=
params
.
frcnn_head
return
heads
.
OlnBoxScoreHead
(
params
.
architecture
.
num_classes
,
head_params
.
num_convs
,
head_params
.
num_filters
,
head_params
.
use_separable_conv
,
head_params
.
num_fcs
,
head_params
.
fc_dims
,
params
.
norm_activation
.
activation
,
head_params
.
use_batch_norm
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
mask_rcnn_head_generator
(
params
):
"""Generator function for Mask R-CNN head architecture."""
head_params
=
params
.
mrcnn_head
return
heads
.
MaskrcnnHead
(
params
.
architecture
.
num_classes
,
params
.
architecture
.
mask_target_size
,
head_params
.
num_convs
,
head_params
.
num_filters
,
head_params
.
use_separable_conv
,
params
.
norm_activation
.
activation
,
head_params
.
use_batch_norm
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
oln_mask_score_head_generator
(
params
):
"""Generator function for Scoring Mask R-CNN head architecture."""
head_params
=
params
.
mrcnn_head
return
heads
.
OlnMaskScoreHead
(
params
.
architecture
.
num_classes
,
params
.
architecture
.
mask_target_size
,
head_params
.
num_convs
,
head_params
.
num_filters
,
head_params
.
use_separable_conv
,
params
.
norm_activation
.
activation
,
head_params
.
use_batch_norm
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
shapeprior_head_generator
(
params
):
"""Generator function for shape prior head architecture."""
head_params
=
params
.
shapemask_head
return
heads
.
ShapemaskPriorHead
(
params
.
architecture
.
num_classes
,
head_params
.
num_downsample_channels
,
head_params
.
mask_crop_size
,
head_params
.
use_category_for_mask
,
head_params
.
shape_prior_path
)
def
coarsemask_head_generator
(
params
):
"""Generator function for ShapeMask coarse mask head architecture."""
head_params
=
params
.
shapemask_head
return
heads
.
ShapemaskCoarsemaskHead
(
params
.
architecture
.
num_classes
,
head_params
.
num_downsample_channels
,
head_params
.
mask_crop_size
,
head_params
.
use_category_for_mask
,
head_params
.
num_convs
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
finemask_head_generator
(
params
):
"""Generator function for Shapemask fine mask head architecture."""
head_params
=
params
.
shapemask_head
return
heads
.
ShapemaskFinemaskHead
(
params
.
architecture
.
num_classes
,
head_params
.
num_downsample_channels
,
head_params
.
mask_crop_size
,
head_params
.
use_category_for_mask
,
head_params
.
num_convs
,
head_params
.
upsample_factor
)
official/legacy/detection/modeling/architecture/fpn.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Feature Pyramid Networks.
Feature Pyramid Networks were proposed in:
[1] Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan,
, and Serge Belongie
Feature Pyramid Networks for Object Detection. CVPR 2017.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
tensorflow
as
tf
from
official.legacy.detection.modeling.architecture
import
nn_ops
from
official.legacy.detection.ops
import
spatial_transform_ops
class
Fpn
(
object
):
"""Feature pyramid networks."""
def
__init__
(
self
,
min_level
=
3
,
max_level
=
7
,
fpn_feat_dims
=
256
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""FPN initialization function.
Args:
min_level: `int` minimum level in FPN output feature maps.
max_level: `int` maximum level in FPN output feature maps.
fpn_feat_dims: `int` number of filters in FPN layers.
use_separable_conv: `bool`, if True use separable convolution for
convolution in FPN layers.
activation: the activation function.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
"""
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_fpn_feat_dims
=
fpn_feat_dims
if
use_separable_conv
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
SeparableConv2D
,
depth_multiplier
=
1
)
else
:
self
.
_conv2d_op
=
tf
.
keras
.
layers
.
Conv2D
if
activation
==
'relu'
:
self
.
_activation_op
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_op
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Unsupported activation `{}`.'
.
format
(
activation
))
self
.
_use_batch_norm
=
use_batch_norm
self
.
_norm_activation
=
norm_activation
self
.
_norm_activations
=
{}
self
.
_lateral_conv2d_op
=
{}
self
.
_post_hoc_conv2d_op
=
{}
self
.
_coarse_conv2d_op
=
{}
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
if
self
.
_use_batch_norm
:
self
.
_norm_activations
[
level
]
=
norm_activation
(
use_activation
=
False
,
name
=
'p%d-bn'
%
level
)
self
.
_lateral_conv2d_op
[
level
]
=
self
.
_conv2d_op
(
filters
=
self
.
_fpn_feat_dims
,
kernel_size
=
(
1
,
1
),
padding
=
'same'
,
name
=
'l%d'
%
level
)
self
.
_post_hoc_conv2d_op
[
level
]
=
self
.
_conv2d_op
(
filters
=
self
.
_fpn_feat_dims
,
strides
=
(
1
,
1
),
kernel_size
=
(
3
,
3
),
padding
=
'same'
,
name
=
'post_hoc_d%d'
%
level
)
self
.
_coarse_conv2d_op
[
level
]
=
self
.
_conv2d_op
(
filters
=
self
.
_fpn_feat_dims
,
strides
=
(
2
,
2
),
kernel_size
=
(
3
,
3
),
padding
=
'same'
,
name
=
'p%d'
%
level
)
def
__call__
(
self
,
multilevel_features
,
is_training
=
None
):
"""Returns the FPN features for a given multilevel features.
Args:
multilevel_features: a `dict` containing `int` keys for continuous feature
levels, e.g., [2, 3, 4, 5]. The values are corresponding features with
shape [batch_size, height_l, width_l, num_filters].
is_training: `bool` if True, the model is in training mode.
Returns:
a `dict` containing `int` keys for continuous feature levels
[min_level, min_level + 1, ..., max_level]. The values are corresponding
FPN features with shape [batch_size, height_l, width_l, fpn_feat_dims].
"""
input_levels
=
list
(
multilevel_features
.
keys
())
if
min
(
input_levels
)
>
self
.
_min_level
:
raise
ValueError
(
'The minimum backbone level %d should be '
%
(
min
(
input_levels
))
+
'less or equal to FPN minimum level %d.:'
%
(
self
.
_min_level
))
backbone_max_level
=
min
(
max
(
input_levels
),
self
.
_max_level
)
with
tf
.
name_scope
(
'fpn'
):
# Adds lateral connections.
feats_lateral
=
{}
for
level
in
range
(
self
.
_min_level
,
backbone_max_level
+
1
):
feats_lateral
[
level
]
=
self
.
_lateral_conv2d_op
[
level
](
multilevel_features
[
level
])
# Adds top-down path.
feats
=
{
backbone_max_level
:
feats_lateral
[
backbone_max_level
]}
for
level
in
range
(
backbone_max_level
-
1
,
self
.
_min_level
-
1
,
-
1
):
feats
[
level
]
=
spatial_transform_ops
.
nearest_upsampling
(
feats
[
level
+
1
],
2
)
+
feats_lateral
[
level
]
# Adds post-hoc 3x3 convolution kernel.
for
level
in
range
(
self
.
_min_level
,
backbone_max_level
+
1
):
feats
[
level
]
=
self
.
_post_hoc_conv2d_op
[
level
](
feats
[
level
])
# Adds coarser FPN levels introduced for RetinaNet.
for
level
in
range
(
backbone_max_level
+
1
,
self
.
_max_level
+
1
):
feats_in
=
feats
[
level
-
1
]
if
level
>
backbone_max_level
+
1
:
feats_in
=
self
.
_activation_op
(
feats_in
)
feats
[
level
]
=
self
.
_coarse_conv2d_op
[
level
](
feats_in
)
if
self
.
_use_batch_norm
:
# Adds batch_norm layer.
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
feats
[
level
]
=
self
.
_norm_activations
[
level
](
feats
[
level
],
is_training
=
is_training
)
return
feats
official/legacy/detection/modeling/architecture/heads.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes to build various prediction heads in all supported models."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
numpy
as
np
import
tensorflow
as
tf
from
official.legacy.detection.modeling.architecture
import
nn_ops
from
official.legacy.detection.ops
import
spatial_transform_ops
class
RpnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Region Proposal Network head."""
def
__init__
(
self
,
min_level
,
max_level
,
anchors_per_location
,
num_convs
=
2
,
num_filters
=
256
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Region Proposal Network head.
Args:
min_level: `int` number of minimum feature level.
max_level: `int` number of maximum feature level.
anchors_per_location: `int` number of number of anchors per pixel
location.
num_convs: `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
super
().
__init__
(
autocast
=
False
)
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_anchors_per_location
=
anchors_per_location
if
activation
==
'relu'
:
self
.
_activation_op
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_op
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Unsupported activation `{}`.'
.
format
(
activation
))
self
.
_use_batch_norm
=
use_batch_norm
if
use_separable_conv
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
SeparableConv2D
,
depth_multiplier
=
1
,
bias_initializer
=
tf
.
zeros_initializer
())
else
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
Conv2D
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
bias_initializer
=
tf
.
zeros_initializer
())
self
.
_rpn_conv
=
self
.
_conv2d_op
(
num_filters
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
padding
=
'same'
,
name
=
'rpn'
)
self
.
_rpn_class_conv
=
self
.
_conv2d_op
(
anchors_per_location
,
kernel_size
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
name
=
'rpn-class'
)
self
.
_rpn_box_conv
=
self
.
_conv2d_op
(
4
*
anchors_per_location
,
kernel_size
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
name
=
'rpn-box'
)
self
.
_norm_activations
=
{}
if
self
.
_use_batch_norm
:
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
self
.
_norm_activations
[
level
]
=
norm_activation
(
name
=
'rpn-l%d-bn'
%
level
)
def
_shared_rpn_heads
(
self
,
features
,
anchors_per_location
,
level
,
is_training
):
"""Shared RPN heads."""
features
=
self
.
_rpn_conv
(
features
)
if
self
.
_use_batch_norm
:
# The batch normalization layers are not shared between levels.
features
=
self
.
_norm_activations
[
level
](
features
,
is_training
=
is_training
)
# Proposal classification scores
scores
=
self
.
_rpn_class_conv
(
features
)
# Proposal bbox regression deltas
bboxes
=
self
.
_rpn_box_conv
(
features
)
return
scores
,
bboxes
def
call
(
self
,
features
,
is_training
=
None
):
scores_outputs
=
{}
box_outputs
=
{}
with
tf
.
name_scope
(
'rpn_head'
):
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
scores_output
,
box_output
=
self
.
_shared_rpn_heads
(
features
[
level
],
self
.
_anchors_per_location
,
level
,
is_training
)
scores_outputs
[
level
]
=
scores_output
box_outputs
[
level
]
=
box_output
return
scores_outputs
,
box_outputs
class
OlnRpnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Region Proposal Network for Object Localization Network (OLN)."""
def
__init__
(
self
,
min_level
,
max_level
,
anchors_per_location
,
num_convs
=
2
,
num_filters
=
256
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Region Proposal Network head.
Args:
min_level: `int` number of minimum feature level.
max_level: `int` number of maximum feature level.
anchors_per_location: `int` number of number of anchors per pixel
location.
num_convs: `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_anchors_per_location
=
anchors_per_location
if
activation
==
'relu'
:
self
.
_activation_op
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_op
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Unsupported activation `{}`.'
.
format
(
activation
))
self
.
_use_batch_norm
=
use_batch_norm
if
use_separable_conv
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
SeparableConv2D
,
depth_multiplier
=
1
,
bias_initializer
=
tf
.
zeros_initializer
())
else
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
Conv2D
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
bias_initializer
=
tf
.
zeros_initializer
())
self
.
_rpn_conv
=
self
.
_conv2d_op
(
num_filters
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
padding
=
'same'
,
name
=
'rpn'
)
self
.
_rpn_class_conv
=
self
.
_conv2d_op
(
anchors_per_location
,
kernel_size
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
name
=
'rpn-class'
)
self
.
_rpn_box_conv
=
self
.
_conv2d_op
(
4
*
anchors_per_location
,
kernel_size
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
name
=
'rpn-box-lrtb'
)
self
.
_rpn_center_conv
=
self
.
_conv2d_op
(
anchors_per_location
,
kernel_size
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
name
=
'rpn-centerness'
)
self
.
_norm_activations
=
{}
if
self
.
_use_batch_norm
:
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
self
.
_norm_activations
[
level
]
=
norm_activation
(
name
=
'rpn-l%d-bn'
%
level
)
def
_shared_rpn_heads
(
self
,
features
,
anchors_per_location
,
level
,
is_training
):
"""Shared RPN heads."""
features
=
self
.
_rpn_conv
(
features
)
if
self
.
_use_batch_norm
:
# The batch normalization layers are not shared between levels.
features
=
self
.
_norm_activations
[
level
](
features
,
is_training
=
is_training
)
# Feature L2 normalization for training stability
features
=
tf
.
math
.
l2_normalize
(
features
,
axis
=-
1
,
name
=
'rpn-norm'
,)
# Proposal classification scores
scores
=
self
.
_rpn_class_conv
(
features
)
# Proposal bbox regression deltas
bboxes
=
self
.
_rpn_box_conv
(
features
)
# Proposal centerness scores
centers
=
self
.
_rpn_center_conv
(
features
)
return
scores
,
bboxes
,
centers
def
__call__
(
self
,
features
,
is_training
=
None
):
scores_outputs
=
{}
box_outputs
=
{}
center_outputs
=
{}
with
tf
.
name_scope
(
'rpn_head'
):
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
scores_output
,
box_output
,
center_output
=
self
.
_shared_rpn_heads
(
features
[
level
],
self
.
_anchors_per_location
,
level
,
is_training
)
scores_outputs
[
level
]
=
scores_output
box_outputs
[
level
]
=
box_output
center_outputs
[
level
]
=
center_output
return
scores_outputs
,
box_outputs
,
center_outputs
class
FastrcnnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Fast R-CNN box head."""
def
__init__
(
self
,
num_classes
,
num_convs
=
0
,
num_filters
=
256
,
use_separable_conv
=
False
,
num_fcs
=
2
,
fc_dims
=
1024
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Fast R-CNN box head.
Args:
num_classes: a integer for the number of classes.
num_convs: `int` number that represents the number of the intermediate
conv layers before the FC layers.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
num_fcs: `int` number that represents the number of FC layers before the
predictions.
fc_dims: `int` number that represents the number of dimension of the FC
layers.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
super
(
FastrcnnHead
,
self
).
__init__
(
autocast
=
False
)
self
.
_num_classes
=
num_classes
self
.
_num_convs
=
num_convs
self
.
_num_filters
=
num_filters
if
use_separable_conv
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
SeparableConv2D
,
depth_multiplier
=
1
,
bias_initializer
=
tf
.
zeros_initializer
())
else
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
Conv2D
,
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
bias_initializer
=
tf
.
zeros_initializer
())
self
.
_num_fcs
=
num_fcs
self
.
_fc_dims
=
fc_dims
if
activation
==
'relu'
:
self
.
_activation_op
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_op
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Unsupported activation `{}`.'
.
format
(
activation
))
self
.
_use_batch_norm
=
use_batch_norm
self
.
_norm_activation
=
norm_activation
self
.
_conv_ops
=
[]
self
.
_conv_bn_ops
=
[]
for
i
in
range
(
self
.
_num_convs
):
self
.
_conv_ops
.
append
(
self
.
_conv2d_op
(
self
.
_num_filters
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'conv_{}'
.
format
(
i
)))
if
self
.
_use_batch_norm
:
self
.
_conv_bn_ops
.
append
(
self
.
_norm_activation
())
self
.
_fc_ops
=
[]
self
.
_fc_bn_ops
=
[]
for
i
in
range
(
self
.
_num_fcs
):
self
.
_fc_ops
.
append
(
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_fc_dims
,
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'fc{}'
.
format
(
i
)))
if
self
.
_use_batch_norm
:
self
.
_fc_bn_ops
.
append
(
self
.
_norm_activation
(
fused
=
False
))
self
.
_class_predict
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_classes
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
bias_initializer
=
tf
.
zeros_initializer
(),
name
=
'class-predict'
)
self
.
_box_predict
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_classes
*
4
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.001
),
bias_initializer
=
tf
.
zeros_initializer
(),
name
=
'box-predict'
)
def
call
(
self
,
roi_features
,
is_training
=
None
):
"""Box and class branches for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape [batch_size, num_rois,
height_l, width_l, num_filters].
is_training: `boolean`, if True if model is in training mode.
Returns:
class_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes], representing the class predictions.
box_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes * 4], representing the box
predictions.
"""
with
tf
.
name_scope
(
'fast_rcnn_head'
):
# reshape inputs beofre FC.
_
,
num_rois
,
height
,
width
,
filters
=
roi_features
.
get_shape
().
as_list
()
net
=
tf
.
reshape
(
roi_features
,
[
-
1
,
height
,
width
,
filters
])
for
i
in
range
(
self
.
_num_convs
):
net
=
self
.
_conv_ops
[
i
](
net
)
if
self
.
_use_batch_norm
:
net
=
self
.
_conv_bn_ops
[
i
](
net
,
is_training
=
is_training
)
filters
=
self
.
_num_filters
if
self
.
_num_convs
>
0
else
filters
net
=
tf
.
reshape
(
net
,
[
-
1
,
num_rois
,
height
*
width
*
filters
])
for
i
in
range
(
self
.
_num_fcs
):
net
=
self
.
_fc_ops
[
i
](
net
)
if
self
.
_use_batch_norm
:
net
=
self
.
_fc_bn_ops
[
i
](
net
,
is_training
=
is_training
)
class_outputs
=
self
.
_class_predict
(
net
)
box_outputs
=
self
.
_box_predict
(
net
)
return
class_outputs
,
box_outputs
class
OlnBoxScoreHead
(
tf
.
keras
.
layers
.
Layer
):
"""Box head of Object Localization Network (OLN)."""
def
__init__
(
self
,
num_classes
,
num_convs
=
0
,
num_filters
=
256
,
use_separable_conv
=
False
,
num_fcs
=
2
,
fc_dims
=
1024
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build OLN box head.
Args:
num_classes: a integer for the number of classes.
num_convs: `int` number that represents the number of the intermediate
conv layers before the FC layers.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
num_fcs: `int` number that represents the number of FC layers before the
predictions.
fc_dims: `int` number that represents the number of dimension of the FC
layers.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self
.
_num_classes
=
num_classes
self
.
_num_convs
=
num_convs
self
.
_num_filters
=
num_filters
if
use_separable_conv
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
SeparableConv2D
,
depth_multiplier
=
1
,
bias_initializer
=
tf
.
zeros_initializer
())
else
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
Conv2D
,
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
bias_initializer
=
tf
.
zeros_initializer
())
self
.
_num_fcs
=
num_fcs
self
.
_fc_dims
=
fc_dims
if
activation
==
'relu'
:
self
.
_activation_op
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_op
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Unsupported activation `{}`.'
.
format
(
activation
))
self
.
_use_batch_norm
=
use_batch_norm
self
.
_norm_activation
=
norm_activation
self
.
_conv_ops
=
[]
self
.
_conv_bn_ops
=
[]
for
i
in
range
(
self
.
_num_convs
):
self
.
_conv_ops
.
append
(
self
.
_conv2d_op
(
self
.
_num_filters
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'conv_{}'
.
format
(
i
)))
if
self
.
_use_batch_norm
:
self
.
_conv_bn_ops
.
append
(
self
.
_norm_activation
())
self
.
_fc_ops
=
[]
self
.
_fc_bn_ops
=
[]
for
i
in
range
(
self
.
_num_fcs
):
self
.
_fc_ops
.
append
(
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_fc_dims
,
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'fc{}'
.
format
(
i
)))
if
self
.
_use_batch_norm
:
self
.
_fc_bn_ops
.
append
(
self
.
_norm_activation
(
fused
=
False
))
self
.
_class_predict
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_classes
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
bias_initializer
=
tf
.
zeros_initializer
(),
name
=
'class-predict'
)
self
.
_box_predict
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_classes
*
4
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.001
),
bias_initializer
=
tf
.
zeros_initializer
(),
name
=
'box-predict'
)
self
.
_score_predict
=
tf
.
keras
.
layers
.
Dense
(
1
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
bias_initializer
=
tf
.
zeros_initializer
(),
name
=
'score-predict'
)
def
__call__
(
self
,
roi_features
,
is_training
=
None
):
"""Box and class branches for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape [batch_size, num_rois,
height_l, width_l, num_filters].
is_training: `boolean`, if True if model is in training mode.
Returns:
class_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes], representing the class predictions.
box_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes * 4], representing the box
predictions.
"""
with
tf
.
name_scope
(
'fast_rcnn_head'
):
# reshape inputs beofre FC.
_
,
num_rois
,
height
,
width
,
filters
=
roi_features
.
get_shape
().
as_list
()
net
=
tf
.
reshape
(
roi_features
,
[
-
1
,
height
,
width
,
filters
])
for
i
in
range
(
self
.
_num_convs
):
net
=
self
.
_conv_ops
[
i
](
net
)
if
self
.
_use_batch_norm
:
net
=
self
.
_conv_bn_ops
[
i
](
net
,
is_training
=
is_training
)
filters
=
self
.
_num_filters
if
self
.
_num_convs
>
0
else
filters
net
=
tf
.
reshape
(
net
,
[
-
1
,
num_rois
,
height
*
width
*
filters
])
for
i
in
range
(
self
.
_num_fcs
):
net
=
self
.
_fc_ops
[
i
](
net
)
if
self
.
_use_batch_norm
:
net
=
self
.
_fc_bn_ops
[
i
](
net
,
is_training
=
is_training
)
class_outputs
=
self
.
_class_predict
(
net
)
box_outputs
=
self
.
_box_predict
(
net
)
score_outputs
=
self
.
_score_predict
(
net
)
return
class_outputs
,
box_outputs
,
score_outputs
class
MaskrcnnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Mask R-CNN head."""
def
__init__
(
self
,
num_classes
,
mask_target_size
,
num_convs
=
4
,
num_filters
=
256
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Fast R-CNN head.
Args:
num_classes: a integer for the number of classes.
mask_target_size: a integer that is the resolution of masks.
num_convs: `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
super
(
MaskrcnnHead
,
self
).
__init__
(
autocast
=
False
)
self
.
_num_classes
=
num_classes
self
.
_mask_target_size
=
mask_target_size
self
.
_num_convs
=
num_convs
self
.
_num_filters
=
num_filters
if
use_separable_conv
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
SeparableConv2D
,
depth_multiplier
=
1
,
bias_initializer
=
tf
.
zeros_initializer
())
else
:
self
.
_conv2d_op
=
functools
.
partial
(
tf
.
keras
.
layers
.
Conv2D
,
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
bias_initializer
=
tf
.
zeros_initializer
())
if
activation
==
'relu'
:
self
.
_activation_op
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_op
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Unsupported activation `{}`.'
.
format
(
activation
))
self
.
_use_batch_norm
=
use_batch_norm
self
.
_norm_activation
=
norm_activation
self
.
_conv2d_ops
=
[]
for
i
in
range
(
self
.
_num_convs
):
self
.
_conv2d_ops
.
append
(
self
.
_conv2d_op
(
self
.
_num_filters
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'mask-conv-l%d'
%
i
))
self
.
_mask_conv_transpose
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
self
.
_num_filters
,
kernel_size
=
(
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'valid'
,
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
bias_initializer
=
tf
.
zeros_initializer
(),
name
=
'conv5-mask'
)
with
tf
.
name_scope
(
'mask_head'
):
self
.
_mask_conv2d_op
=
self
.
_conv2d_op
(
self
.
_num_classes
,
kernel_size
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
name
=
'mask_fcn_logits'
)
def
call
(
self
,
roi_features
,
class_indices
,
is_training
=
None
):
"""Mask branch for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape [batch_size, num_rois,
height_l, width_l, num_filters].
class_indices: a Tensor of shape [batch_size, num_rois], indicating which
class the ROI is.
is_training: `boolean`, if True if model is in training mode.
Returns:
mask_outputs: a tensor with a shape of
[batch_size, num_masks, mask_height, mask_width, num_classes],
representing the mask predictions.
fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2],
representing the fg mask targets.
Raises:
ValueError: If boxes is not a rank-3 tensor or the last dimension of
boxes is not 4.
"""
with
tf
.
name_scope
(
'mask_head'
):
_
,
num_rois
,
height
,
width
,
filters
=
roi_features
.
get_shape
().
as_list
()
net
=
tf
.
reshape
(
roi_features
,
[
-
1
,
height
,
width
,
filters
])
for
i
in
range
(
self
.
_num_convs
):
net
=
self
.
_conv2d_ops
[
i
](
net
)
if
self
.
_use_batch_norm
:
net
=
self
.
_norm_activation
()(
net
,
is_training
=
is_training
)
net
=
self
.
_mask_conv_transpose
(
net
)
if
self
.
_use_batch_norm
:
net
=
self
.
_norm_activation
()(
net
,
is_training
=
is_training
)
mask_outputs
=
self
.
_mask_conv2d_op
(
net
)
mask_outputs
=
tf
.
reshape
(
mask_outputs
,
[
-
1
,
num_rois
,
self
.
_mask_target_size
,
self
.
_mask_target_size
,
self
.
_num_classes
])
with
tf
.
name_scope
(
'masks_post_processing'
):
# TODO(pengchong): Figure out the way not to use the static inferred
# batch size.
batch_size
,
num_masks
=
class_indices
.
get_shape
().
as_list
()
mask_outputs
=
tf
.
transpose
(
a
=
mask_outputs
,
perm
=
[
0
,
1
,
4
,
2
,
3
])
# Constructs indices for gather.
batch_indices
=
tf
.
tile
(
tf
.
expand_dims
(
tf
.
range
(
batch_size
),
axis
=
1
),
[
1
,
num_masks
])
mask_indices
=
tf
.
tile
(
tf
.
expand_dims
(
tf
.
range
(
num_masks
),
axis
=
0
),
[
batch_size
,
1
])
gather_indices
=
tf
.
stack
(
[
batch_indices
,
mask_indices
,
class_indices
],
axis
=
2
)
mask_outputs
=
tf
.
gather_nd
(
mask_outputs
,
gather_indices
)
return
mask_outputs
class
RetinanetHead
(
object
):
"""RetinaNet head."""
def
__init__
(
self
,
min_level
,
max_level
,
num_classes
,
anchors_per_location
,
num_convs
=
4
,
num_filters
=
256
,
use_separable_conv
=
False
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build RetinaNet head.
Args:
min_level: `int` number of minimum feature level.
max_level: `int` number of maximum feature level.
num_classes: `int` number of classification categories.
anchors_per_location: `int` number of anchors per pixel location.
num_convs: `int` number of stacked convolution before the last prediction
layer.
num_filters: `int` number of filters used in the head architecture.
use_separable_conv: `bool` to indicate whether to use separable
convoluation.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_num_classes
=
num_classes
self
.
_anchors_per_location
=
anchors_per_location
self
.
_num_convs
=
num_convs
self
.
_num_filters
=
num_filters
self
.
_use_separable_conv
=
use_separable_conv
with
tf
.
name_scope
(
'class_net'
)
as
scope_name
:
self
.
_class_name_scope
=
tf
.
name_scope
(
scope_name
)
with
tf
.
name_scope
(
'box_net'
)
as
scope_name
:
self
.
_box_name_scope
=
tf
.
name_scope
(
scope_name
)
self
.
_build_class_net_layers
(
norm_activation
)
self
.
_build_box_net_layers
(
norm_activation
)
def
_class_net_batch_norm_name
(
self
,
i
,
level
):
return
'class-%d-%d'
%
(
i
,
level
)
def
_box_net_batch_norm_name
(
self
,
i
,
level
):
return
'box-%d-%d'
%
(
i
,
level
)
def
_build_class_net_layers
(
self
,
norm_activation
):
"""Build re-usable layers for class prediction network."""
if
self
.
_use_separable_conv
:
self
.
_class_predict
=
tf
.
keras
.
layers
.
SeparableConv2D
(
self
.
_num_classes
*
self
.
_anchors_per_location
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
padding
=
'same'
,
name
=
'class-predict'
)
else
:
self
.
_class_predict
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_num_classes
*
self
.
_anchors_per_location
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
padding
=
'same'
,
name
=
'class-predict'
)
self
.
_class_conv
=
[]
self
.
_class_norm_activation
=
{}
for
i
in
range
(
self
.
_num_convs
):
if
self
.
_use_separable_conv
:
self
.
_class_conv
.
append
(
tf
.
keras
.
layers
.
SeparableConv2D
(
self
.
_num_filters
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
zeros_initializer
(),
activation
=
None
,
padding
=
'same'
,
name
=
'class-'
+
str
(
i
)))
else
:
self
.
_class_conv
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
self
.
_num_filters
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
activation
=
None
,
padding
=
'same'
,
name
=
'class-'
+
str
(
i
)))
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
name
=
self
.
_class_net_batch_norm_name
(
i
,
level
)
self
.
_class_norm_activation
[
name
]
=
norm_activation
(
name
=
name
)
def
_build_box_net_layers
(
self
,
norm_activation
):
"""Build re-usable layers for box prediction network."""
if
self
.
_use_separable_conv
:
self
.
_box_predict
=
tf
.
keras
.
layers
.
SeparableConv2D
(
4
*
self
.
_anchors_per_location
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
zeros_initializer
(),
padding
=
'same'
,
name
=
'box-predict'
)
else
:
self
.
_box_predict
=
tf
.
keras
.
layers
.
Conv2D
(
4
*
self
.
_anchors_per_location
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
padding
=
'same'
,
name
=
'box-predict'
)
self
.
_box_conv
=
[]
self
.
_box_norm_activation
=
{}
for
i
in
range
(
self
.
_num_convs
):
if
self
.
_use_separable_conv
:
self
.
_box_conv
.
append
(
tf
.
keras
.
layers
.
SeparableConv2D
(
self
.
_num_filters
,
kernel_size
=
(
3
,
3
),
activation
=
None
,
bias_initializer
=
tf
.
zeros_initializer
(),
padding
=
'same'
,
name
=
'box-'
+
str
(
i
)))
else
:
self
.
_box_conv
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
self
.
_num_filters
,
kernel_size
=
(
3
,
3
),
activation
=
None
,
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'box-'
+
str
(
i
)))
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
name
=
self
.
_box_net_batch_norm_name
(
i
,
level
)
self
.
_box_norm_activation
[
name
]
=
norm_activation
(
name
=
name
)
def
__call__
(
self
,
fpn_features
,
is_training
=
None
):
"""Returns outputs of RetinaNet head."""
class_outputs
=
{}
box_outputs
=
{}
with
tf
.
name_scope
(
'retinanet_head'
):
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
features
=
fpn_features
[
level
]
class_outputs
[
level
]
=
self
.
class_net
(
features
,
level
,
is_training
=
is_training
)
box_outputs
[
level
]
=
self
.
box_net
(
features
,
level
,
is_training
=
is_training
)
return
class_outputs
,
box_outputs
def
class_net
(
self
,
features
,
level
,
is_training
):
"""Class prediction network for RetinaNet."""
with
self
.
_class_name_scope
:
for
i
in
range
(
self
.
_num_convs
):
features
=
self
.
_class_conv
[
i
](
features
)
# The convolution layers in the class net are shared among all levels,
# but each level has its batch normlization to capture the statistical
# difference among different levels.
name
=
self
.
_class_net_batch_norm_name
(
i
,
level
)
features
=
self
.
_class_norm_activation
[
name
](
features
,
is_training
=
is_training
)
classes
=
self
.
_class_predict
(
features
)
return
classes
def
box_net
(
self
,
features
,
level
,
is_training
=
None
):
"""Box regression network for RetinaNet."""
with
self
.
_box_name_scope
:
for
i
in
range
(
self
.
_num_convs
):
features
=
self
.
_box_conv
[
i
](
features
)
# The convolution layers in the box net are shared among all levels, but
# each level has its batch normlization to capture the statistical
# difference among different levels.
name
=
self
.
_box_net_batch_norm_name
(
i
,
level
)
features
=
self
.
_box_norm_activation
[
name
](
features
,
is_training
=
is_training
)
boxes
=
self
.
_box_predict
(
features
)
return
boxes
# TODO(yeqing): Refactor this class when it is ready for var_scope reuse.
class
ShapemaskPriorHead
(
object
):
"""ShapeMask Prior head."""
def
__init__
(
self
,
num_classes
,
num_downsample_channels
,
mask_crop_size
,
use_category_for_mask
,
shape_prior_path
):
"""Initialize params to build RetinaNet head.
Args:
num_classes: Number of output classes.
num_downsample_channels: number of channels in mask branch.
mask_crop_size: feature crop size.
use_category_for_mask: use class information in mask branch.
shape_prior_path: the path to load shape priors.
"""
self
.
_mask_num_classes
=
num_classes
if
use_category_for_mask
else
1
self
.
_num_downsample_channels
=
num_downsample_channels
self
.
_mask_crop_size
=
mask_crop_size
self
.
_shape_prior_path
=
shape_prior_path
self
.
_use_category_for_mask
=
use_category_for_mask
self
.
_shape_prior_fc
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_downsample_channels
,
name
=
'shape-prior-fc'
)
def
__call__
(
self
,
fpn_features
,
boxes
,
outer_boxes
,
classes
,
is_training
):
"""Generate the detection priors from the box detections and FPN features.
This corresponds to the Fig. 4 of the ShapeMask paper at
https://arxiv.org/pdf/1904.03239.pdf
Args:
fpn_features: a dictionary of FPN features.
boxes: a float tensor of shape [batch_size, num_instances, 4] representing
the tight gt boxes from dataloader/detection.
outer_boxes: a float tensor of shape [batch_size, num_instances, 4]
representing the loose gt boxes from dataloader/detection.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: training mode or not.
Returns:
instance_features: a float Tensor of shape [batch_size * num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
detection_priors: A float Tensor of shape [batch_size * num_instances,
mask_size, mask_size, 1].
"""
with
tf
.
name_scope
(
'prior_mask'
):
batch_size
,
num_instances
,
_
=
boxes
.
get_shape
().
as_list
()
outer_boxes
=
tf
.
cast
(
outer_boxes
,
tf
.
float32
)
boxes
=
tf
.
cast
(
boxes
,
tf
.
float32
)
instance_features
=
spatial_transform_ops
.
multilevel_crop_and_resize
(
fpn_features
,
outer_boxes
,
output_size
=
self
.
_mask_crop_size
)
instance_features
=
self
.
_shape_prior_fc
(
instance_features
)
shape_priors
=
self
.
_get_priors
()
# Get uniform priors for each outer box.
uniform_priors
=
tf
.
ones
([
batch_size
,
num_instances
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
uniform_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
uniform_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
# Classify shape priors using uniform priors + instance features.
prior_distribution
=
self
.
_classify_shape_priors
(
tf
.
cast
(
instance_features
,
tf
.
float32
),
uniform_priors
,
classes
)
instance_priors
=
tf
.
gather
(
shape_priors
,
classes
)
instance_priors
*=
tf
.
expand_dims
(
tf
.
expand_dims
(
tf
.
cast
(
prior_distribution
,
tf
.
float32
),
axis
=-
1
),
axis
=-
1
)
instance_priors
=
tf
.
reduce_sum
(
instance_priors
,
axis
=
2
)
detection_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
instance_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
return
instance_features
,
detection_priors
def
_get_priors
(
self
):
"""Load shape priors from file."""
# loads class specific or agnostic shape priors
if
self
.
_shape_prior_path
:
# Priors are loaded into shape [mask_num_classes, num_clusters, 32, 32].
priors
=
np
.
load
(
tf
.
io
.
gfile
.
GFile
(
self
.
_shape_prior_path
,
'rb'
))
priors
=
tf
.
convert_to_tensor
(
priors
,
dtype
=
tf
.
float32
)
self
.
_num_clusters
=
priors
.
get_shape
().
as_list
()[
1
]
else
:
# If prior path does not exist, do not use priors, i.e., pirors equal to
# uniform empty 32x32 patch.
self
.
_num_clusters
=
1
priors
=
tf
.
zeros
([
self
.
_mask_num_classes
,
self
.
_num_clusters
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
return
priors
def
_classify_shape_priors
(
self
,
features
,
uniform_priors
,
classes
):
"""Classify the uniform prior by predicting the shape modes.
Classify the object crop features into K modes of the clusters for each
category.
Args:
features: A float Tensor of shape [batch_size, num_instances, mask_size,
mask_size, num_channels].
uniform_priors: A float Tensor of shape [batch_size, num_instances,
mask_size, mask_size] representing the uniform detection priors.
classes: A int Tensor of shape [batch_size, num_instances] of detection
class ids.
Returns:
prior_distribution: A float Tensor of shape
[batch_size, num_instances, num_clusters] representing the classifier
output probability over all possible shapes.
"""
batch_size
,
num_instances
,
_
,
_
,
_
=
features
.
get_shape
().
as_list
()
features
*=
tf
.
expand_dims
(
uniform_priors
,
axis
=-
1
)
# Reduce spatial dimension of features. The features have shape
# [batch_size, num_instances, num_channels].
features
=
tf
.
reduce_mean
(
features
,
axis
=
(
2
,
3
))
logits
=
tf
.
keras
.
layers
.
Dense
(
self
.
_mask_num_classes
*
self
.
_num_clusters
,
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
),
name
=
'classify-shape-prior-fc'
)(
features
)
logits
=
tf
.
reshape
(
logits
,
[
batch_size
,
num_instances
,
self
.
_mask_num_classes
,
self
.
_num_clusters
])
if
self
.
_use_category_for_mask
:
logits
=
tf
.
gather
(
logits
,
tf
.
expand_dims
(
classes
,
axis
=-
1
),
batch_dims
=
2
)
logits
=
tf
.
squeeze
(
logits
,
axis
=
2
)
else
:
logits
=
logits
[:,
:,
0
,
:]
distribution
=
tf
.
nn
.
softmax
(
logits
,
name
=
'shape_prior_weights'
)
return
distribution
class
ShapemaskCoarsemaskHead
(
object
):
"""ShapemaskCoarsemaskHead head."""
def
__init__
(
self
,
num_classes
,
num_downsample_channels
,
mask_crop_size
,
use_category_for_mask
,
num_convs
,
norm_activation
=
nn_ops
.
norm_activation_builder
()):
"""Initialize params to build ShapeMask coarse and fine prediction head.
Args:
num_classes: `int` number of mask classification categories.
num_downsample_channels: `int` number of filters at mask head.
mask_crop_size: feature crop size.
use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction
layer.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self
.
_mask_num_classes
=
num_classes
if
use_category_for_mask
else
1
self
.
_use_category_for_mask
=
use_category_for_mask
self
.
_num_downsample_channels
=
num_downsample_channels
self
.
_mask_crop_size
=
mask_crop_size
self
.
_num_convs
=
num_convs
self
.
_norm_activation
=
norm_activation
self
.
_coarse_mask_fc
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_downsample_channels
,
name
=
'coarse-mask-fc'
)
self
.
_class_conv
=
[]
self
.
_class_norm_activation
=
[]
for
i
in
range
(
self
.
_num_convs
):
self
.
_class_conv
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
self
.
_num_downsample_channels
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'coarse-mask-class-%d'
%
i
))
self
.
_class_norm_activation
.
append
(
norm_activation
(
name
=
'coarse-mask-class-%d-bn'
%
i
))
self
.
_class_predict
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_mask_num_classes
,
kernel_size
=
(
1
,
1
),
# Focal loss bias initialization to have foreground 0.01 probability.
bias_initializer
=
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'coarse-mask-class-predict'
)
def
__call__
(
self
,
features
,
detection_priors
,
classes
,
is_training
):
"""Generate instance masks from FPN features and detection priors.
This corresponds to the Fig. 5-6 of the ShapeMask paper at
https://arxiv.org/pdf/1904.03239.pdf
Args:
features: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
detection_priors: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, 1]. This is the detection prior for the
instance.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: a bool indicating whether in training mode.
Returns:
mask_outputs: instance mask prediction as a float Tensor of shape
[batch_size, num_instances, mask_size, mask_size].
"""
with
tf
.
name_scope
(
'coarse_mask'
):
# Transform detection priors to have the same dimension as features.
detection_priors
=
tf
.
expand_dims
(
detection_priors
,
axis
=-
1
)
detection_priors
=
self
.
_coarse_mask_fc
(
detection_priors
)
features
+=
detection_priors
mask_logits
=
self
.
decoder_net
(
features
,
is_training
)
# Gather the logits with right input class.
if
self
.
_use_category_for_mask
:
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
gather
(
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
else
:
mask_logits
=
mask_logits
[...,
0
]
return
mask_logits
def
decoder_net
(
self
,
features
,
is_training
=
False
):
"""Coarse mask decoder network architecture.
Args:
features: A tensor of size [batch, height_in, width_in, channels_in].
is_training: Whether batch_norm layers are in training mode.
Returns:
images: A feature tensor of size [batch, output_size, output_size,
num_channels]
"""
(
batch_size
,
num_instances
,
height
,
width
,
num_channels
)
=
features
.
get_shape
().
as_list
()
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
for
i
in
range
(
self
.
_num_convs
):
features
=
self
.
_class_conv
[
i
](
features
)
features
=
self
.
_class_norm_activation
[
i
](
features
,
is_training
=
is_training
)
mask_logits
=
self
.
_class_predict
(
features
)
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
batch_size
,
num_instances
,
height
,
width
,
self
.
_mask_num_classes
])
return
mask_logits
class
ShapemaskFinemaskHead
(
object
):
"""ShapemaskFinemaskHead head."""
def
__init__
(
self
,
num_classes
,
num_downsample_channels
,
mask_crop_size
,
use_category_for_mask
,
num_convs
,
upsample_factor
,
norm_activation
=
nn_ops
.
norm_activation_builder
()):
"""Initialize params to build ShapeMask coarse and fine prediction head.
Args:
num_classes: `int` number of mask classification categories.
num_downsample_channels: `int` number of filters at mask head.
mask_crop_size: feature crop size.
use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction
layer.
upsample_factor: `int` number of fine mask upsampling factor.
norm_activation: an operation that includes a batch normalization layer
followed by a relu layer(optional).
"""
self
.
_use_category_for_mask
=
use_category_for_mask
self
.
_mask_num_classes
=
num_classes
if
use_category_for_mask
else
1
self
.
_num_downsample_channels
=
num_downsample_channels
self
.
_mask_crop_size
=
mask_crop_size
self
.
_num_convs
=
num_convs
self
.
up_sample_factor
=
upsample_factor
self
.
_fine_mask_fc
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_downsample_channels
,
name
=
'fine-mask-fc'
)
self
.
_upsample_conv
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
self
.
_num_downsample_channels
,
(
self
.
up_sample_factor
,
self
.
up_sample_factor
),
(
self
.
up_sample_factor
,
self
.
up_sample_factor
),
name
=
'fine-mask-conv2d-tran'
)
self
.
_fine_class_conv
=
[]
self
.
_fine_class_bn
=
[]
for
i
in
range
(
self
.
_num_convs
):
self
.
_fine_class_conv
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
self
.
_num_downsample_channels
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
activation
=
None
,
padding
=
'same'
,
name
=
'fine-mask-class-%d'
%
i
))
self
.
_fine_class_bn
.
append
(
norm_activation
(
name
=
'fine-mask-class-%d-bn'
%
i
))
self
.
_class_predict_conv
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_mask_num_classes
,
kernel_size
=
(
1
,
1
),
# Focal loss bias initialization to have foreground 0.01 probability.
bias_initializer
=
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'fine-mask-class-predict'
)
def
__call__
(
self
,
features
,
mask_logits
,
classes
,
is_training
):
"""Generate instance masks from FPN features and detection priors.
This corresponds to the Fig. 5-6 of the ShapeMask paper at
https://arxiv.org/pdf/1904.03239.pdf
Args:
features: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
mask_logits: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size] indicating predicted mask logits.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: a bool indicating whether in training mode.
Returns:
mask_outputs: instance mask prediction as a float Tensor of shape
[batch_size, num_instances, mask_size, mask_size].
"""
# Extract the foreground mean features
# with tf.variable_scope('fine_mask', reuse=tf.AUTO_REUSE):
with
tf
.
name_scope
(
'fine_mask'
):
mask_probs
=
tf
.
nn
.
sigmoid
(
mask_logits
)
# Compute instance embedding for hard average.
binary_mask
=
tf
.
cast
(
tf
.
greater
(
mask_probs
,
0.5
),
features
.
dtype
)
instance_embedding
=
tf
.
reduce_sum
(
features
*
tf
.
expand_dims
(
binary_mask
,
axis
=-
1
),
axis
=
(
2
,
3
))
instance_embedding
/=
tf
.
expand_dims
(
tf
.
reduce_sum
(
binary_mask
,
axis
=
(
2
,
3
))
+
1e-20
,
axis
=-
1
)
# Take the difference between crop features and mean instance features.
features
-=
tf
.
expand_dims
(
tf
.
expand_dims
(
instance_embedding
,
axis
=
2
),
axis
=
2
)
features
+=
self
.
_fine_mask_fc
(
tf
.
expand_dims
(
mask_probs
,
axis
=-
1
))
# Decoder to generate upsampled segmentation mask.
mask_logits
=
self
.
decoder_net
(
features
,
is_training
)
if
self
.
_use_category_for_mask
:
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
gather
(
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
else
:
mask_logits
=
mask_logits
[...,
0
]
return
mask_logits
def
decoder_net
(
self
,
features
,
is_training
=
False
):
"""Fine mask decoder network architecture.
Args:
features: A tensor of size [batch, height_in, width_in, channels_in].
is_training: Whether batch_norm layers are in training mode.
Returns:
images: A feature tensor of size [batch, output_size, output_size,
num_channels], where output size is self._gt_upsample_scale times
that of input.
"""
(
batch_size
,
num_instances
,
height
,
width
,
num_channels
)
=
features
.
get_shape
().
as_list
()
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
for
i
in
range
(
self
.
_num_convs
):
features
=
self
.
_fine_class_conv
[
i
](
features
)
features
=
self
.
_fine_class_bn
[
i
](
features
,
is_training
=
is_training
)
if
self
.
up_sample_factor
>
1
:
features
=
self
.
_upsample_conv
(
features
)
# Predict per-class instance masks.
mask_logits
=
self
.
_class_predict_conv
(
features
)
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
batch_size
,
num_instances
,
height
*
self
.
up_sample_factor
,
width
*
self
.
up_sample_factor
,
self
.
_mask_num_classes
])
return
mask_logits
official/legacy/detection/modeling/architecture/identity.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Identity Fn that forwards the input features."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
class
Identity
(
object
):
"""Identity function that forwards the input features."""
def
__call__
(
self
,
features
,
is_training
=
False
):
"""Only forwards the input features."""
return
features
official/legacy/detection/modeling/architecture/nn_blocks.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common building blocks for neural networks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
class
ResidualBlock
(
tf
.
keras
.
layers
.
Layer
):
"""A residual block."""
def
__init__
(
self
,
filters
,
strides
,
use_projection
=
False
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""A residual block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super
(
ResidualBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_strides
=
strides
self
.
_use_projection
=
use_projection
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
if
self
.
_use_projection
:
self
.
_shortcut
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
1
,
strides
=
self
.
_strides
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
self
.
_strides
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
super
(
ResidualBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'strides'
:
self
.
_strides
,
'use_projection'
:
self
.
_use_projection
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
ResidualBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
shortcut
=
inputs
if
self
.
_use_projection
:
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
return
self
.
_activation_fn
(
x
+
shortcut
)
class
BottleneckBlock
(
tf
.
keras
.
layers
.
Layer
):
"""A standard bottleneck block."""
def
__init__
(
self
,
filters
,
strides
,
use_projection
=
False
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""A standard bottleneck block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super
(
BottleneckBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_strides
=
strides
self
.
_use_projection
=
use_projection
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
if
self
.
_use_projection
:
self
.
_shortcut
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
*
4
,
kernel_size
=
1
,
strides
=
self
.
_strides
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
self
.
_strides
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv3
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
*
4
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm3
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
super
(
BottleneckBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'strides'
:
self
.
_strides
,
'use_projection'
:
self
.
_use_projection
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
BottleneckBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
shortcut
=
inputs
if
self
.
_use_projection
:
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv3
(
x
)
x
=
self
.
_norm3
(
x
)
return
self
.
_activation_fn
(
x
+
shortcut
)
official/legacy/detection/modeling/architecture/nn_ops.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Neural network operations commonly shared by the architectures."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
tensorflow
as
tf
class
NormActivation
(
tf
.
keras
.
layers
.
Layer
):
"""Combined Normalization and Activation layers."""
def
__init__
(
self
,
momentum
=
0.997
,
epsilon
=
1e-4
,
trainable
=
True
,
init_zero
=
False
,
use_activation
=
True
,
activation
=
'relu'
,
fused
=
True
,
name
=
None
):
"""A class to construct layers for a batch normalization followed by a ReLU.
Args:
momentum: momentum for the moving average.
epsilon: small float added to variance to avoid dividing by zero.
trainable: `bool`, if True also add variables to the graph collection
GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization
layer.
init_zero: `bool` if True, initializes scale parameter of batch
normalization with 0. If False, initialize it with 1.
use_activation: `bool`, whether to add the optional activation layer after
the batch normalization layer.
activation: 'string', the type of the activation layer. Currently support
`relu` and `swish`.
fused: `bool` fused option in batch normalziation.
name: `str` name for the operation.
"""
super
(
NormActivation
,
self
).
__init__
(
trainable
=
trainable
)
if
init_zero
:
gamma_initializer
=
tf
.
keras
.
initializers
.
Zeros
()
else
:
gamma_initializer
=
tf
.
keras
.
initializers
.
Ones
()
self
.
_normalization_op
=
tf
.
keras
.
layers
.
BatchNormalization
(
momentum
=
momentum
,
epsilon
=
epsilon
,
center
=
True
,
scale
=
True
,
trainable
=
trainable
,
fused
=
fused
,
gamma_initializer
=
gamma_initializer
,
name
=
name
)
self
.
_use_activation
=
use_activation
if
activation
==
'relu'
:
self
.
_activation_op
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_op
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Unsupported activation `{}`.'
.
format
(
activation
))
def
__call__
(
self
,
inputs
,
is_training
=
None
):
"""Builds the normalization layer followed by an optional activation layer.
Args:
inputs: `Tensor` of shape `[batch, channels, ...]`.
is_training: `boolean`, if True if model is in training mode.
Returns:
A normalized `Tensor` with the same `data_format`.
"""
# We will need to keep training=None by default, so that it can be inherit
# from keras.Model.training
if
is_training
and
self
.
trainable
:
is_training
=
True
inputs
=
self
.
_normalization_op
(
inputs
,
training
=
is_training
)
if
self
.
_use_activation
:
inputs
=
self
.
_activation_op
(
inputs
)
return
inputs
def
norm_activation_builder
(
momentum
=
0.997
,
epsilon
=
1e-4
,
trainable
=
True
,
activation
=
'relu'
,
**
kwargs
):
return
functools
.
partial
(
NormActivation
,
momentum
=
momentum
,
epsilon
=
epsilon
,
trainable
=
trainable
,
activation
=
activation
,
**
kwargs
)
official/legacy/detection/modeling/architecture/resnet.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions for the post-activation form of Residual Networks.
Residual networks (ResNets) were proposed in:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
official.legacy.detection.modeling.architecture
import
nn_ops
# TODO(b/140112644): Refactor the code with Keras style, i.e. build and call.
class
Resnet
(
object
):
"""Class to build ResNet family model."""
def
__init__
(
self
,
resnet_depth
,
activation
=
'relu'
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
),
data_format
=
'channels_last'
):
"""ResNet initialization function.
Args:
resnet_depth: `int` depth of ResNet backbone model.
activation: the activation function.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
"""
self
.
_resnet_depth
=
resnet_depth
if
activation
==
'relu'
:
self
.
_activation_op
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_op
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Unsupported activation `{}`.'
.
format
(
activation
))
self
.
_norm_activation
=
norm_activation
self
.
_data_format
=
data_format
model_params
=
{
10
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
1
,
1
,
1
,
1
]
},
18
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
2
,
2
,
2
,
2
]
},
34
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
3
,
4
,
6
,
3
]
},
50
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
6
,
3
]
},
101
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
23
,
3
]
},
152
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
8
,
36
,
3
]
},
200
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
24
,
36
,
3
]
}
}
if
resnet_depth
not
in
model_params
:
valid_resnet_depths
=
', '
.
join
(
[
str
(
depth
)
for
depth
in
sorted
(
model_params
.
keys
())])
raise
ValueError
(
'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
%
(
valid_resnet_depths
),
self
.
_resnet_depth
)
params
=
model_params
[
resnet_depth
]
self
.
_resnet_fn
=
self
.
resnet_v1_generator
(
params
[
'block'
],
params
[
'layers'
])
def
__call__
(
self
,
inputs
,
is_training
=
None
):
"""Returns the ResNet model for a given size and number of output classes.
Args:
inputs: a `Tesnor` with shape [batch_size, height, width, 3] representing
a batch of images.
is_training: `bool` if True, the model is in training mode.
Returns:
a `dict` containing `int` keys for continuous feature levels [2, 3, 4, 5].
The values are corresponding feature hierarchy in ResNet with shape
[batch_size, height_l, width_l, num_filters].
"""
with
tf
.
name_scope
(
'resnet%s'
%
self
.
_resnet_depth
):
return
self
.
_resnet_fn
(
inputs
,
is_training
)
def
fixed_padding
(
self
,
inputs
,
kernel_size
):
"""Pads the input along the spatial dimensions independently of input size.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]` or `[batch,
height, width, channels]` depending on `data_format`.
kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
operations. Should be a positive integer.
Returns:
A padded `Tensor` of the same `data_format` with size either intact
(if `kernel_size == 1`) or padded (if `kernel_size > 1`).
"""
pad_total
=
kernel_size
-
1
pad_beg
=
pad_total
//
2
pad_end
=
pad_total
-
pad_beg
if
self
.
_data_format
==
'channels_first'
:
padded_inputs
=
tf
.
pad
(
tensor
=
inputs
,
paddings
=
[[
0
,
0
],
[
0
,
0
],
[
pad_beg
,
pad_end
],
[
pad_beg
,
pad_end
]])
else
:
padded_inputs
=
tf
.
pad
(
tensor
=
inputs
,
paddings
=
[[
0
,
0
],
[
pad_beg
,
pad_end
],
[
pad_beg
,
pad_end
],
[
0
,
0
]])
return
padded_inputs
def
conv2d_fixed_padding
(
self
,
inputs
,
filters
,
kernel_size
,
strides
):
"""Strided 2-D convolution with explicit padding.
The padding is consistent and is based only on `kernel_size`, not on the
dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
Args:
inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
filters: `int` number of filters in the convolution.
kernel_size: `int` size of the kernel to be used in the convolution.
strides: `int` strides of the convolution.
Returns:
A `Tensor` of shape `[batch, filters, height_out, width_out]`.
"""
if
strides
>
1
:
inputs
=
self
.
fixed_padding
(
inputs
,
kernel_size
)
return
tf
.
keras
.
layers
.
Conv2D
(
filters
=
filters
,
kernel_size
=
kernel_size
,
strides
=
strides
,
padding
=
(
'SAME'
if
strides
==
1
else
'VALID'
),
use_bias
=
False
,
kernel_initializer
=
tf
.
initializers
.
VarianceScaling
(),
data_format
=
self
.
_data_format
)(
inputs
=
inputs
)
def
residual_block
(
self
,
inputs
,
filters
,
strides
,
use_projection
=
False
,
is_training
=
None
):
"""Standard building block for residual networks with BN after convolutions.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
Returns:
The output `Tensor` of the block.
"""
shortcut
=
inputs
if
use_projection
:
# Projection shortcut in first layer to match filters and strides
shortcut
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
1
,
strides
=
strides
)
shortcut
=
self
.
_norm_activation
(
use_activation
=
False
)(
shortcut
,
is_training
=
is_training
)
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
3
,
strides
=
strides
)
inputs
=
self
.
_norm_activation
()(
inputs
,
is_training
=
is_training
)
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
3
,
strides
=
1
)
inputs
=
self
.
_norm_activation
(
use_activation
=
False
,
init_zero
=
True
)(
inputs
,
is_training
=
is_training
)
return
self
.
_activation_op
(
inputs
+
shortcut
)
def
bottleneck_block
(
self
,
inputs
,
filters
,
strides
,
use_projection
=
False
,
is_training
=
None
):
"""Bottleneck block variant for residual networks with BN after convolutions.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
Returns:
The output `Tensor` of the block.
"""
shortcut
=
inputs
if
use_projection
:
# Projection shortcut only in first block within a group. Bottleneck
# blocks end with 4 times the number of filters.
filters_out
=
4
*
filters
shortcut
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
filters_out
,
kernel_size
=
1
,
strides
=
strides
)
shortcut
=
self
.
_norm_activation
(
use_activation
=
False
)(
shortcut
,
is_training
=
is_training
)
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
1
,
strides
=
1
)
inputs
=
self
.
_norm_activation
()(
inputs
,
is_training
=
is_training
)
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
3
,
strides
=
strides
)
inputs
=
self
.
_norm_activation
()(
inputs
,
is_training
=
is_training
)
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
4
*
filters
,
kernel_size
=
1
,
strides
=
1
)
inputs
=
self
.
_norm_activation
(
use_activation
=
False
,
init_zero
=
True
)(
inputs
,
is_training
=
is_training
)
return
self
.
_activation_op
(
inputs
+
shortcut
)
def
block_group
(
self
,
inputs
,
filters
,
block_fn
,
blocks
,
strides
,
name
,
is_training
):
"""Creates one group of blocks for the ResNet model.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first convolution of the layer.
block_fn: `function` for the block to use within the model
blocks: `int` number of blocks contained in the layer.
strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input.
name: `str`name for the Tensor output of the block layer.
is_training: `bool` if True, the model is in training mode.
Returns:
The output `Tensor` of the block layer.
"""
# Only the first block per block_group uses projection shortcut and strides.
inputs
=
block_fn
(
inputs
,
filters
,
strides
,
use_projection
=
True
,
is_training
=
is_training
)
for
_
in
range
(
1
,
blocks
):
inputs
=
block_fn
(
inputs
,
filters
,
1
,
is_training
=
is_training
)
return
tf
.
identity
(
inputs
,
name
)
def
resnet_v1_generator
(
self
,
block_fn
,
layers
):
"""Generator for ResNet v1 models.
Args:
block_fn: `function` for the block to use within the model. Either
`residual_block` or `bottleneck_block`.
layers: list of 4 `int`s denoting the number of blocks to include in each
of the 4 block groups. Each group consists of blocks that take inputs of
the same resolution.
Returns:
Model `function` that takes in `inputs` and `is_training` and returns the
output `Tensor` of the ResNet model.
"""
def
model
(
inputs
,
is_training
=
None
):
"""Creation of the model graph."""
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
64
,
kernel_size
=
7
,
strides
=
2
)
inputs
=
tf
.
identity
(
inputs
,
'initial_conv'
)
inputs
=
self
.
_norm_activation
()(
inputs
,
is_training
=
is_training
)
inputs
=
tf
.
keras
.
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'SAME'
,
data_format
=
self
.
_data_format
)(
inputs
)
inputs
=
tf
.
identity
(
inputs
,
'initial_max_pool'
)
c2
=
self
.
block_group
(
inputs
=
inputs
,
filters
=
64
,
block_fn
=
block_fn
,
blocks
=
layers
[
0
],
strides
=
1
,
name
=
'block_group1'
,
is_training
=
is_training
)
c3
=
self
.
block_group
(
inputs
=
c2
,
filters
=
128
,
block_fn
=
block_fn
,
blocks
=
layers
[
1
],
strides
=
2
,
name
=
'block_group2'
,
is_training
=
is_training
)
c4
=
self
.
block_group
(
inputs
=
c3
,
filters
=
256
,
block_fn
=
block_fn
,
blocks
=
layers
[
2
],
strides
=
2
,
name
=
'block_group3'
,
is_training
=
is_training
)
c5
=
self
.
block_group
(
inputs
=
c4
,
filters
=
512
,
block_fn
=
block_fn
,
blocks
=
layers
[
3
],
strides
=
2
,
name
=
'block_group4'
,
is_training
=
is_training
)
return
{
2
:
c2
,
3
:
c3
,
4
:
c4
,
5
:
c5
}
return
model
official/legacy/detection/modeling/architecture/spinenet.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
# ==============================================================================
"""Implementation of SpineNet model.
X. Du, T-Y. Lin, P. Jin, G. Ghiasi, M. Tan, Y. Cui, Q. V. Le, X. Song
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization
https://arxiv.org/abs/1912.05027
"""
import
math
from
absl
import
logging
import
tensorflow
as
tf
from
official.legacy.detection.modeling.architecture
import
nn_blocks
from
official.modeling
import
tf_utils
layers
=
tf
.
keras
.
layers
FILTER_SIZE_MAP
=
{
1
:
32
,
2
:
64
,
3
:
128
,
4
:
256
,
5
:
256
,
6
:
256
,
7
:
256
,
}
# The fixed SpineNet architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, block_fn, (input_offset0, input_offset1), is_output).
SPINENET_BLOCK_SPECS
=
[
(
2
,
'bottleneck'
,
(
0
,
1
),
False
),
(
4
,
'residual'
,
(
0
,
1
),
False
),
(
3
,
'bottleneck'
,
(
2
,
3
),
False
),
(
4
,
'bottleneck'
,
(
2
,
4
),
False
),
(
6
,
'residual'
,
(
3
,
5
),
False
),
(
4
,
'bottleneck'
,
(
3
,
5
),
False
),
(
5
,
'residual'
,
(
6
,
7
),
False
),
(
7
,
'residual'
,
(
6
,
8
),
False
),
(
5
,
'bottleneck'
,
(
8
,
9
),
False
),
(
5
,
'bottleneck'
,
(
8
,
10
),
False
),
(
4
,
'bottleneck'
,
(
5
,
10
),
True
),
(
3
,
'bottleneck'
,
(
4
,
10
),
True
),
(
5
,
'bottleneck'
,
(
7
,
12
),
True
),
(
7
,
'bottleneck'
,
(
5
,
14
),
True
),
(
6
,
'bottleneck'
,
(
12
,
14
),
True
),
]
SCALING_MAP
=
{
'49S'
:
{
'endpoints_num_filters'
:
128
,
'filter_size_scale'
:
0.65
,
'resample_alpha'
:
0.5
,
'block_repeats'
:
1
,
},
'49'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.0
,
'resample_alpha'
:
0.5
,
'block_repeats'
:
1
,
},
'96'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.0
,
'resample_alpha'
:
0.5
,
'block_repeats'
:
2
,
},
'143'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.0
,
'resample_alpha'
:
1.0
,
'block_repeats'
:
3
,
},
'190'
:
{
'endpoints_num_filters'
:
512
,
'filter_size_scale'
:
1.3
,
'resample_alpha'
:
1.0
,
'block_repeats'
:
4
,
},
}
class
BlockSpec
(
object
):
"""A container class that specifies the block configuration for SpineNet."""
def
__init__
(
self
,
level
,
block_fn
,
input_offsets
,
is_output
):
self
.
level
=
level
self
.
block_fn
=
block_fn
self
.
input_offsets
=
input_offsets
self
.
is_output
=
is_output
def
build_block_specs
(
block_specs
=
None
):
"""Builds the list of BlockSpec objects for SpineNet."""
if
not
block_specs
:
block_specs
=
SPINENET_BLOCK_SPECS
logging
.
info
(
'Building SpineNet block specs: %s'
,
block_specs
)
return
[
BlockSpec
(
*
b
)
for
b
in
block_specs
]
class
SpineNet
(
tf
.
keras
.
Model
):
"""Class to build SpineNet models."""
def
__init__
(
self
,
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
640
,
640
,
3
]),
min_level
=
3
,
max_level
=
7
,
block_specs
=
build_block_specs
(),
endpoints_num_filters
=
256
,
resample_alpha
=
0.5
,
block_repeats
=
1
,
filter_size_scale
=
1.0
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""SpineNet model."""
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_endpoints_num_filters
=
endpoints_num_filters
self
.
_resample_alpha
=
resample_alpha
self
.
_block_repeats
=
block_repeats
self
.
_filter_size_scale
=
filter_size_scale
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
if
activation
==
'relu'
:
self
.
_activation
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Activation {} not implemented.'
.
format
(
activation
))
self
.
_init_block_fn
=
'bottleneck'
self
.
_num_init_blocks
=
2
if
use_sync_bn
:
self
.
_norm
=
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
# Build SpineNet.
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
net
=
self
.
_build_stem
(
inputs
=
inputs
)
net
=
self
.
_build_scale_permuted_network
(
net
=
net
,
input_width
=
input_specs
.
shape
[
1
])
net
=
self
.
_build_endpoints
(
net
=
net
)
super
(
SpineNet
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
net
)
def
_block_group
(
self
,
inputs
,
filters
,
strides
,
block_fn_cand
,
block_repeats
=
1
,
name
=
'block_group'
):
"""Creates one group of blocks for the SpineNet model."""
block_fn_candidates
=
{
'bottleneck'
:
nn_blocks
.
BottleneckBlock
,
'residual'
:
nn_blocks
.
ResidualBlock
,
}
block_fn
=
block_fn_candidates
[
block_fn_cand
]
_
,
_
,
_
,
num_filters
=
inputs
.
get_shape
().
as_list
()
if
block_fn_cand
==
'bottleneck'
:
use_projection
=
not
(
num_filters
==
(
filters
*
4
)
and
strides
==
1
)
else
:
use_projection
=
not
(
num_filters
==
filters
and
strides
==
1
)
x
=
block_fn
(
filters
=
filters
,
strides
=
strides
,
use_projection
=
use_projection
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
for
_
in
range
(
1
,
block_repeats
):
x
=
block_fn
(
filters
=
filters
,
strides
=
1
,
use_projection
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
x
)
return
tf
.
identity
(
x
,
name
=
name
)
def
_build_stem
(
self
,
inputs
):
"""Build SpineNet stem."""
x
=
layers
.
Conv2D
(
filters
=
64
,
kernel_size
=
7
,
strides
=
2
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
x
)
x
=
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'same'
)(
x
)
net
=
[]
# Build the initial level 2 blocks.
for
i
in
range
(
self
.
_num_init_blocks
):
x
=
self
.
_block_group
(
inputs
=
x
,
filters
=
int
(
FILTER_SIZE_MAP
[
2
]
*
self
.
_filter_size_scale
),
strides
=
1
,
block_fn_cand
=
self
.
_init_block_fn
,
block_repeats
=
self
.
_block_repeats
,
name
=
'stem_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
return
net
def
_build_scale_permuted_network
(
self
,
net
,
input_width
,
weighted_fusion
=
False
):
"""Build scale-permuted network."""
net_sizes
=
[
int
(
math
.
ceil
(
input_width
/
2
**
2
))]
*
len
(
net
)
net_block_fns
=
[
self
.
_init_block_fn
]
*
len
(
net
)
num_outgoing_connections
=
[
0
]
*
len
(
net
)
endpoints
=
{}
for
i
,
block_spec
in
enumerate
(
self
.
_block_specs
):
# Find out specs for the target block.
target_width
=
int
(
math
.
ceil
(
input_width
/
2
**
block_spec
.
level
))
target_num_filters
=
int
(
FILTER_SIZE_MAP
[
block_spec
.
level
]
*
self
.
_filter_size_scale
)
target_block_fn
=
block_spec
.
block_fn
# Resample then merge input0 and input1.
parents
=
[]
input0
=
block_spec
.
input_offsets
[
0
]
input1
=
block_spec
.
input_offsets
[
1
]
x0
=
self
.
_resample_with_alpha
(
inputs
=
net
[
input0
],
input_width
=
net_sizes
[
input0
],
input_block_fn
=
net_block_fns
[
input0
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
,
target_block_fn
=
target_block_fn
,
alpha
=
self
.
_resample_alpha
)
parents
.
append
(
x0
)
num_outgoing_connections
[
input0
]
+=
1
x1
=
self
.
_resample_with_alpha
(
inputs
=
net
[
input1
],
input_width
=
net_sizes
[
input1
],
input_block_fn
=
net_block_fns
[
input1
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
,
target_block_fn
=
target_block_fn
,
alpha
=
self
.
_resample_alpha
)
parents
.
append
(
x1
)
num_outgoing_connections
[
input1
]
+=
1
# Merge 0 outdegree blocks to the output block.
if
block_spec
.
is_output
:
for
j
,
(
j_feat
,
j_connections
)
in
enumerate
(
zip
(
net
,
num_outgoing_connections
)):
if
j_connections
==
0
and
(
j_feat
.
shape
[
2
]
==
target_width
and
j_feat
.
shape
[
3
]
==
x0
.
shape
[
3
]):
parents
.
append
(
j_feat
)
num_outgoing_connections
[
j
]
+=
1
# pylint: disable=g-direct-tensorflow-import
if
weighted_fusion
:
dtype
=
parents
[
0
].
dtype
parent_weights
=
[
tf
.
nn
.
relu
(
tf
.
cast
(
tf
.
Variable
(
1.0
,
name
=
'block{}_fusion{}'
.
format
(
i
,
j
)),
dtype
=
dtype
))
for
j
in
range
(
len
(
parents
))]
weights_sum
=
tf
.
add_n
(
parent_weights
)
parents
=
[
parents
[
i
]
*
parent_weights
[
i
]
/
(
weights_sum
+
0.0001
)
for
i
in
range
(
len
(
parents
))
]
# Fuse all parent nodes then build a new block.
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
tf
.
add_n
(
parents
))
x
=
self
.
_block_group
(
inputs
=
x
,
filters
=
target_num_filters
,
strides
=
1
,
block_fn_cand
=
target_block_fn
,
block_repeats
=
self
.
_block_repeats
,
name
=
'scale_permuted_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
net_sizes
.
append
(
target_width
)
net_block_fns
.
append
(
target_block_fn
)
num_outgoing_connections
.
append
(
0
)
# Save output feats.
if
block_spec
.
is_output
:
if
block_spec
.
level
in
endpoints
:
raise
ValueError
(
'Duplicate feats found for output level {}.'
.
format
(
block_spec
.
level
))
if
(
block_spec
.
level
<
self
.
_min_level
or
block_spec
.
level
>
self
.
_max_level
):
raise
ValueError
(
'Output level is out of range [{}, {}]'
.
format
(
self
.
_min_level
,
self
.
_max_level
))
endpoints
[
block_spec
.
level
]
=
x
return
endpoints
def
_build_endpoints
(
self
,
net
):
"""Match filter size for endpoints before sharing conv layers."""
endpoints
=
{}
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
x
=
layers
.
Conv2D
(
filters
=
self
.
_endpoints_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
net
[
level
])
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
x
)
endpoints
[
level
]
=
x
return
endpoints
def
_resample_with_alpha
(
self
,
inputs
,
input_width
,
input_block_fn
,
target_width
,
target_num_filters
,
target_block_fn
,
alpha
=
0.5
):
"""Match resolution and feature dimension."""
_
,
_
,
_
,
input_num_filters
=
inputs
.
get_shape
().
as_list
()
if
input_block_fn
==
'bottleneck'
:
input_num_filters
/=
4
new_num_filters
=
int
(
input_num_filters
*
alpha
)
x
=
layers
.
Conv2D
(
filters
=
new_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
x
)
# Spatial resampling.
if
input_width
>
target_width
:
x
=
layers
.
Conv2D
(
filters
=
new_num_filters
,
kernel_size
=
3
,
strides
=
2
,
padding
=
'SAME'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
x
)
input_width
/=
2
while
input_width
>
target_width
:
x
=
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'SAME'
)(
x
)
input_width
/=
2
elif
input_width
<
target_width
:
scale
=
target_width
//
input_width
x
=
layers
.
UpSampling2D
(
size
=
(
scale
,
scale
))(
x
)
# Last 1x1 conv to match filter size.
if
target_block_fn
==
'bottleneck'
:
target_num_filters
*=
4
x
=
layers
.
Conv2D
(
filters
=
target_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
return
x
class
SpineNetBuilder
(
object
):
"""SpineNet builder."""
def
__init__
(
self
,
model_id
,
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
640
,
640
,
3
]),
min_level
=
3
,
max_level
=
7
,
block_specs
=
build_block_specs
(),
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
):
if
model_id
not
in
SCALING_MAP
:
raise
ValueError
(
'SpineNet {} is not a valid architecture.'
.
format
(
model_id
))
scaling_params
=
SCALING_MAP
[
model_id
]
self
.
_input_specs
=
input_specs
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_endpoints_num_filters
=
scaling_params
[
'endpoints_num_filters'
]
self
.
_resample_alpha
=
scaling_params
[
'resample_alpha'
]
self
.
_block_repeats
=
scaling_params
[
'block_repeats'
]
self
.
_filter_size_scale
=
scaling_params
[
'filter_size_scale'
]
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
def
__call__
(
self
,
inputs
,
is_training
=
None
):
model
=
SpineNet
(
input_specs
=
self
.
_input_specs
,
min_level
=
self
.
_min_level
,
max_level
=
self
.
_max_level
,
block_specs
=
self
.
_block_specs
,
endpoints_num_filters
=
self
.
_endpoints_num_filters
,
resample_alpha
=
self
.
_resample_alpha
,
block_repeats
=
self
.
_block_repeats
,
filter_size_scale
=
self
.
_filter_size_scale
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)
return
model
(
inputs
)
official/legacy/detection/modeling/base_model.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base Model definition."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
abc
import
re
import
tensorflow
as
tf
from
official.legacy.detection.modeling
import
checkpoint_utils
from
official.legacy.detection.modeling
import
learning_rates
from
official.legacy.detection.modeling
import
optimizers
def
_make_filter_trainable_variables_fn
(
frozen_variable_prefix
):
"""Creates a function for filtering trainable varialbes."""
def
_filter_trainable_variables
(
variables
):
"""Filters trainable varialbes.
Args:
variables: a list of tf.Variable to be filtered.
Returns:
filtered_variables: a list of tf.Variable filtered out the frozen ones.
"""
# frozen_variable_prefix: a regex string specifing the prefix pattern of
# the frozen variables' names.
filtered_variables
=
[
v
for
v
in
variables
if
not
frozen_variable_prefix
or
not
re
.
match
(
frozen_variable_prefix
,
v
.
name
)
]
return
filtered_variables
return
_filter_trainable_variables
class
Model
(
object
):
"""Base class for model function."""
__metaclass__
=
abc
.
ABCMeta
def
__init__
(
self
,
params
):
self
.
_use_bfloat16
=
params
.
architecture
.
use_bfloat16
if
params
.
architecture
.
use_bfloat16
:
tf
.
compat
.
v2
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_bfloat16'
)
# Optimization.
self
.
_optimizer_fn
=
optimizers
.
OptimizerFactory
(
params
.
train
.
optimizer
)
self
.
_learning_rate
=
learning_rates
.
learning_rate_generator
(
params
.
train
.
total_steps
,
params
.
train
.
learning_rate
)
self
.
_frozen_variable_prefix
=
params
.
train
.
frozen_variable_prefix
self
.
_regularization_var_regex
=
params
.
train
.
regularization_variable_regex
self
.
_l2_weight_decay
=
params
.
train
.
l2_weight_decay
# Checkpoint restoration.
self
.
_checkpoint
=
params
.
train
.
checkpoint
.
as_dict
()
# Summary.
self
.
_enable_summary
=
params
.
enable_summary
self
.
_model_dir
=
params
.
model_dir
@
abc
.
abstractmethod
def
build_outputs
(
self
,
inputs
,
mode
):
"""Build the graph of the forward path."""
pass
@
abc
.
abstractmethod
def
build_model
(
self
,
params
,
mode
):
"""Build the model object."""
pass
@
abc
.
abstractmethod
def
build_loss_fn
(
self
):
"""Build the model object."""
pass
def
post_processing
(
self
,
labels
,
outputs
):
"""Post-processing function."""
return
labels
,
outputs
def
model_outputs
(
self
,
inputs
,
mode
):
"""Build the model outputs."""
return
self
.
build_outputs
(
inputs
,
mode
)
def
build_optimizer
(
self
):
"""Returns train_op to optimize total loss."""
# Sets up the optimizer.
return
self
.
_optimizer_fn
(
self
.
_learning_rate
)
def
make_filter_trainable_variables_fn
(
self
):
"""Creates a function for filtering trainable varialbes."""
return
_make_filter_trainable_variables_fn
(
self
.
_frozen_variable_prefix
)
def
weight_decay_loss
(
self
,
trainable_variables
):
reg_variables
=
[
v
for
v
in
trainable_variables
if
self
.
_regularization_var_regex
is
None
or
re
.
match
(
self
.
_regularization_var_regex
,
v
.
name
)
]
return
self
.
_l2_weight_decay
*
tf
.
add_n
(
[
tf
.
nn
.
l2_loss
(
v
)
for
v
in
reg_variables
])
def
make_restore_checkpoint_fn
(
self
):
"""Returns scaffold function to restore parameters from v1 checkpoint."""
if
'skip_checkpoint_variables'
in
self
.
_checkpoint
:
skip_regex
=
self
.
_checkpoint
[
'skip_checkpoint_variables'
]
else
:
skip_regex
=
None
return
checkpoint_utils
.
make_restore_checkpoint_fn
(
self
.
_checkpoint
[
'path'
],
prefix
=
self
.
_checkpoint
[
'prefix'
],
skip_regex
=
skip_regex
)
def
eval_metrics
(
self
):
"""Returns tuple of metric function and its inputs for evaluation."""
raise
NotImplementedError
(
'Unimplemented eval_metrics'
)
official/legacy/detection/modeling/checkpoint_utils.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Util functions for loading checkpoints.
Especially for loading Tensorflow 1.x
checkpoint to Tensorflow 2.x (keras) model.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
re
from
absl
import
logging
import
tensorflow
as
tf
def
_build_assignment_map
(
keras_model
,
prefix
=
''
,
skip_variables_regex
=
None
,
var_to_shape_map
=
None
):
"""Builds the variable assignment map.
Compute an assignment mapping for loading older checkpoints into a Keras
model. Variable names are remapped from the original TPUEstimator model to
the new Keras name.
Args:
keras_model: tf.keras.Model object to provide variables to assign.
prefix: prefix in the variable name to be remove for alignment with names in
the checkpoint.
skip_variables_regex: regular expression to math the names of variables that
do not need to be assign.
var_to_shape_map: variable name to shape mapping from the checkpoint.
Returns:
The variable assignment map.
"""
assignment_map
=
{}
checkpoint_names
=
[]
if
var_to_shape_map
:
# pylint: disable=g-long-lambda
checkpoint_names
=
list
(
filter
(
lambda
x
:
not
x
.
endswith
(
'Momentum'
)
and
not
x
.
endswith
(
'global_step'
),
var_to_shape_map
.
keys
()))
# pylint: enable=g-long-lambda
logging
.
info
(
'Number of variables in the checkpoint %d'
,
len
(
checkpoint_names
))
for
var
in
keras_model
.
variables
:
var_name
=
var
.
name
if
skip_variables_regex
and
re
.
match
(
skip_variables_regex
,
var_name
):
continue
# Trim the index of the variable.
if
':'
in
var_name
:
var_name
=
var_name
[:
var_name
.
rindex
(
':'
)]
if
var_name
.
startswith
(
prefix
):
var_name
=
var_name
[
len
(
prefix
):]
if
not
var_to_shape_map
:
assignment_map
[
var_name
]
=
var
continue
# Match name with variables in the checkpoint.
# pylint: disable=cell-var-from-loop
match_names
=
list
(
filter
(
lambda
x
:
x
.
endswith
(
var_name
),
checkpoint_names
))
# pylint: enable=cell-var-from-loop
try
:
if
match_names
:
assert
len
(
match_names
)
==
1
,
'more then on matches for {}: {}'
.
format
(
var_name
,
match_names
)
checkpoint_names
.
remove
(
match_names
[
0
])
assignment_map
[
match_names
[
0
]]
=
var
else
:
logging
.
info
(
'Error not found var name: %s'
,
var_name
)
except
Exception
as
e
:
logging
.
info
(
'Error removing the match_name: %s'
,
match_names
)
logging
.
info
(
'Exception: %s'
,
e
)
raise
logging
.
info
(
'Found matching variable in checkpoint: %d'
,
len
(
assignment_map
))
return
assignment_map
def
_get_checkpoint_map
(
checkpoint_path
):
reader
=
tf
.
train
.
load_checkpoint
(
checkpoint_path
)
return
reader
.
get_variable_to_shape_map
()
def
make_restore_checkpoint_fn
(
checkpoint_path
,
prefix
=
''
,
skip_regex
=
None
):
"""Returns scaffold function to restore parameters from v1 checkpoint.
Args:
checkpoint_path: path of the checkpoint folder or file.
Example 1: '/path/to/model_dir/'
Example 2: '/path/to/model.ckpt-22500'
prefix: prefix in the variable name to be remove for alignment with names in
the checkpoint.
skip_regex: regular expression to math the names of variables that do not
need to be assign.
Returns:
Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model.
"""
def
_restore_checkpoint_fn
(
keras_model
):
"""Loads pretrained model through scaffold function."""
if
not
checkpoint_path
:
logging
.
info
(
'checkpoint_path is empty'
)
return
var_prefix
=
prefix
if
prefix
and
not
prefix
.
endswith
(
'/'
):
var_prefix
+=
'/'
var_to_shape_map
=
_get_checkpoint_map
(
checkpoint_path
)
assert
var_to_shape_map
,
'var_to_shape_map should not be empty'
vars_to_load
=
_build_assignment_map
(
keras_model
,
prefix
=
var_prefix
,
skip_variables_regex
=
skip_regex
,
var_to_shape_map
=
var_to_shape_map
)
if
not
vars_to_load
:
raise
ValueError
(
'Variables to load is empty.'
)
tf
.
compat
.
v1
.
train
.
init_from_checkpoint
(
checkpoint_path
,
vars_to_load
)
return
_restore_checkpoint_fn
official/legacy/detection/modeling/factory.py
0 → 100644
View file @
e293e338
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory to build detection model."""
from
official.legacy.detection.modeling
import
maskrcnn_model
from
official.legacy.detection.modeling
import
olnmask_model
from
official.legacy.detection.modeling
import
retinanet_model
from
official.legacy.detection.modeling
import
shapemask_model
def
model_generator
(
params
):
"""Model function generator."""
if
params
.
type
==
'retinanet'
:
model_fn
=
retinanet_model
.
RetinanetModel
(
params
)
elif
params
.
type
==
'mask_rcnn'
:
model_fn
=
maskrcnn_model
.
MaskrcnnModel
(
params
)
elif
params
.
type
==
'olnmask'
:
model_fn
=
olnmask_model
.
OlnMaskModel
(
params
)
elif
params
.
type
==
'shapemask'
:
model_fn
=
shapemask_model
.
ShapeMaskModel
(
params
)
else
:
raise
ValueError
(
'Model %s is not supported.'
%
params
.
type
)
return
model_fn
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment