Commit 9a264c9f authored by Yeqing Li's avatar Yeqing Li Committed by A. Unique TensorFlower
Browse files

Deprecating official/vision/detection folder.

The folder is archived in the official/legacy/detection

PiperOrigin-RevId: 419643226
parent 8fb4e6a6
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The COCO-style evaluator.
The following snippet demonstrates the use of interfaces:
evaluator = COCOEvaluator(...)
for _ in range(num_evals):
for _ in range(num_batches_per_eval):
predictions, groundtruth = predictor.predict(...) # pop a batch.
evaluator.update(predictions, groundtruths) # aggregate internal stats.
evaluator.evaluate() # finish one full eval.
See also: https://github.com/cocodataset/cocoapi/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import atexit
import copy
import tempfile
from absl import logging
import numpy as np
from pycocotools import cocoeval
import six
import tensorflow as tf
from official.vision.detection.evaluation import coco_utils
from official.vision.detection.utils import class_utils
class MetricWrapper(object):
# This is only a wrapper for COCO metric and works on for numpy array. So it
# doesn't inherit from tf.keras.layers.Layer or tf.keras.metrics.Metric.
def __init__(self, evaluator):
self._evaluator = evaluator
def update_state(self, y_true, y_pred):
labels = tf.nest.map_structure(lambda x: x.numpy(), y_true)
outputs = tf.nest.map_structure(lambda x: x.numpy(), y_pred)
groundtruths = {}
predictions = {}
for key, val in outputs.items():
if isinstance(val, tuple):
val = np.concatenate(val)
predictions[key] = val
for key, val in labels.items():
if isinstance(val, tuple):
val = np.concatenate(val)
groundtruths[key] = val
self._evaluator.update(predictions, groundtruths)
def result(self):
return self._evaluator.evaluate()
def reset_states(self):
return self._evaluator.reset()
class COCOEvaluator(object):
"""COCO evaluation metric class."""
def __init__(self, annotation_file, include_mask, need_rescale_bboxes=True):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
annotation_file: a JSON file that stores annotations of the eval dataset.
If `annotation_file` is None, groundtruth annotations will be loaded
from the dataloader.
include_mask: a boolean to indicate whether or not to include the mask
eval.
need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back
to absolute values (`image_info` is needed in this case).
"""
if annotation_file:
if annotation_file.startswith('gs://'):
_, local_val_json = tempfile.mkstemp(suffix='.json')
tf.io.gfile.remove(local_val_json)
tf.io.gfile.copy(annotation_file, local_val_json)
atexit.register(tf.io.gfile.remove, local_val_json)
else:
local_val_json = annotation_file
self._coco_gt = coco_utils.COCOWrapper(
eval_type=('mask' if include_mask else 'box'),
annotation_file=local_val_json)
self._annotation_file = annotation_file
self._include_mask = include_mask
self._metric_names = [
'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1', 'ARmax10',
'ARmax100', 'ARs', 'ARm', 'ARl'
]
self._required_prediction_fields = [
'source_id', 'num_detections', 'detection_classes', 'detection_scores',
'detection_boxes'
]
self._need_rescale_bboxes = need_rescale_bboxes
if self._need_rescale_bboxes:
self._required_prediction_fields.append('image_info')
self._required_groundtruth_fields = [
'source_id', 'height', 'width', 'classes', 'boxes'
]
if self._include_mask:
mask_metric_names = ['mask_' + x for x in self._metric_names]
self._metric_names.extend(mask_metric_names)
self._required_prediction_fields.extend(['detection_masks'])
self._required_groundtruth_fields.extend(['masks'])
self.reset()
def reset(self):
"""Resets internal states for a fresh run."""
self._predictions = {}
if not self._annotation_file:
self._groundtruths = {}
def evaluate(self):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if not self._annotation_file:
logging.info('Thre is no annotation_file in COCOEvaluator.')
gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset(
self._groundtruths)
coco_gt = coco_utils.COCOWrapper(
eval_type=('mask' if self._include_mask else 'box'),
gt_dataset=gt_dataset)
else:
logging.info('Using annotation file: %s', self._annotation_file)
coco_gt = self._coco_gt
coco_predictions = coco_utils.convert_predictions_to_coco_annotations(
self._predictions)
coco_dt = coco_gt.loadRes(predictions=coco_predictions)
image_ids = [ann['image_id'] for ann in coco_predictions]
coco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.params.imgIds = image_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_metrics = coco_eval.stats
if self._include_mask:
mcoco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='segm')
mcoco_eval.params.imgIds = image_ids
mcoco_eval.evaluate()
mcoco_eval.accumulate()
mcoco_eval.summarize()
mask_coco_metrics = mcoco_eval.stats
if self._include_mask:
metrics = np.hstack((coco_metrics, mask_coco_metrics))
else:
metrics = coco_metrics
# Cleans up the internal variables in order for a fresh eval next time.
self.reset()
metrics_dict = {}
for i, name in enumerate(self._metric_names):
metrics_dict[name] = metrics[i].astype(np.float32)
return metrics_dict
def _process_predictions(self, predictions):
image_scale = np.tile(predictions['image_info'][:, 2:3, :], (1, 1, 2))
predictions['detection_boxes'] = (
predictions['detection_boxes'].astype(np.float32))
predictions['detection_boxes'] /= image_scale
if 'detection_outer_boxes' in predictions:
predictions['detection_outer_boxes'] = (
predictions['detection_outer_boxes'].astype(np.float32))
predictions['detection_outer_boxes'] /= image_scale
def update(self, predictions, groundtruths=None):
"""Update and aggregate detection results and groundtruth data.
Args:
predictions: a dictionary of numpy arrays including the fields below. See
different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- image_info [if `need_rescale_bboxes` is True]: a numpy array of
float of shape [batch_size, 4, 2].
- num_detections: a numpy array of int of shape [batch_size].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_classes: a numpy array of int of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K].
Optional fields:
- detection_masks: a numpy array of float of shape [batch_size, K,
mask_height, mask_width].
groundtruths: a dictionary of numpy arrays including the fields below. See
also different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- height: a numpy array of int of shape [batch_size].
- width: a numpy array of int of shape [batch_size].
- num_detections: a numpy array of int of shape [batch_size].
- boxes: a numpy array of float of shape [batch_size, K, 4].
- classes: a numpy array of int of shape [batch_size, K].
Optional fields:
- is_crowds: a numpy array of int of shape [batch_size, K]. If the
field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the field
is absent, the area is calculated using either boxes or masks
depending on which one is available.
- masks: a numpy array of float of shape [batch_size, K, mask_height,
mask_width],
Raises:
ValueError: if the required prediction or groundtruth fields are not
present in the incoming `predictions` or `groundtruths`.
"""
for k in self._required_prediction_fields:
if k not in predictions:
raise ValueError(
'Missing the required key `{}` in predictions!'.format(k))
if self._need_rescale_bboxes:
self._process_predictions(predictions)
for k, v in six.iteritems(predictions):
if k not in self._predictions:
self._predictions[k] = [v]
else:
self._predictions[k].append(v)
if not self._annotation_file:
assert groundtruths
for k in self._required_groundtruth_fields:
if k not in groundtruths:
raise ValueError(
'Missing the required key `{}` in groundtruths!'.format(k))
for k, v in six.iteritems(groundtruths):
if k not in self._groundtruths:
self._groundtruths[k] = [v]
else:
self._groundtruths[k].append(v)
class OlnXclassEvaluator(COCOEvaluator):
"""COCO evaluation metric class."""
def __init__(self, annotation_file, include_mask, need_rescale_bboxes=True,
use_category=True, seen_class='all'):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
annotation_file: a JSON file that stores annotations of the eval dataset.
If `annotation_file` is None, groundtruth annotations will be loaded
from the dataloader.
include_mask: a boolean to indicate whether or not to include the mask
eval.
need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back
to absolute values (`image_info` is needed in this case).
use_category: if `False`, treat all object in all classes in one
foreground category.
seen_class: 'all' or 'voc' or 'nonvoc'
"""
super(OlnXclassEvaluator, self).__init__(
annotation_file=annotation_file,
include_mask=include_mask,
need_rescale_bboxes=need_rescale_bboxes)
self._use_category = use_category
self._seen_class = seen_class
self._seen_class_ids = class_utils.coco_split_class_ids(seen_class)
self._metric_names = [
'AP', 'AP50', 'AP75',
'APs', 'APm', 'APl',
'ARmax10', 'ARmax20', 'ARmax50', 'ARmax100', 'ARmax200',
'ARmax10s', 'ARmax10m', 'ARmax10l'
]
if self._seen_class != 'all':
self._metric_names.extend([
'AP_seen', 'AP50_seen', 'AP75_seen',
'APs_seen', 'APm_seen', 'APl_seen',
'ARmax10_seen', 'ARmax20_seen', 'ARmax50_seen',
'ARmax100_seen', 'ARmax200_seen',
'ARmax10s_seen', 'ARmax10m_seen', 'ARmax10l_seen',
'AP_novel', 'AP50_novel', 'AP75_novel',
'APs_novel', 'APm_novel', 'APl_novel',
'ARmax10_novel', 'ARmax20_novel', 'ARmax50_novel',
'ARmax100_novel', 'ARmax200_novel',
'ARmax10s_novel', 'ARmax10m_novel', 'ARmax10l_novel',
])
if self._include_mask:
mask_metric_names = ['mask_' + x for x in self._metric_names]
self._metric_names.extend(mask_metric_names)
self._required_prediction_fields.extend(['detection_masks'])
self._required_groundtruth_fields.extend(['masks'])
self.reset()
def evaluate(self):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if not self._annotation_file:
logging.info('Thre is no annotation_file in COCOEvaluator.')
gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset(
self._groundtruths)
coco_gt = coco_utils.COCOWrapper(
eval_type=('mask' if self._include_mask else 'box'),
gt_dataset=gt_dataset)
else:
logging.info('Using annotation file: %s', self._annotation_file)
coco_gt = self._coco_gt
coco_predictions = coco_utils.convert_predictions_to_coco_annotations(
self._predictions)
coco_dt = coco_gt.loadRes(predictions=coco_predictions)
image_ids = [ann['image_id'] for ann in coco_predictions]
# Class manipulation: 'all' split samples -> ignored_split = 0.
for idx, ann in enumerate(coco_gt.dataset['annotations']):
coco_gt.dataset['annotations'][idx]['ignored_split'] = 0
coco_eval = cocoeval.OlnCOCOevalXclassWrapper(
coco_gt, coco_dt, iou_type='bbox')
coco_eval.params.maxDets = [10, 20, 50, 100, 200]
coco_eval.params.imgIds = image_ids
coco_eval.params.useCats = 0 if not self._use_category else 1
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_metrics = coco_eval.stats
if self._include_mask:
mcoco_eval = cocoeval.OlnCOCOevalXclassWrapper(
coco_gt, coco_dt, iou_type='segm')
mcoco_eval.params.maxDets = [10, 20, 50, 100, 200]
mcoco_eval.params.imgIds = image_ids
mcoco_eval.params.useCats = 0 if not self._use_category else 1
mcoco_eval.evaluate()
mcoco_eval.accumulate()
mcoco_eval.summarize()
mask_coco_metrics = mcoco_eval.stats
if self._include_mask:
metrics = np.hstack((coco_metrics, mask_coco_metrics))
else:
metrics = coco_metrics
if self._seen_class != 'all':
# for seen class eval, samples of novel_class are ignored.
coco_gt_seen = copy.deepcopy(coco_gt)
for idx, ann in enumerate(coco_gt.dataset['annotations']):
if ann['category_id'] in self._seen_class_ids:
coco_gt_seen.dataset['annotations'][idx]['ignored_split'] = 0
else:
coco_gt_seen.dataset['annotations'][idx]['ignored_split'] = 1
coco_eval_seen = cocoeval.OlnCOCOevalXclassWrapper(
coco_gt_seen, coco_dt, iou_type='bbox')
coco_eval_seen.params.maxDets = [10, 20, 50, 100, 200]
coco_eval_seen.params.imgIds = image_ids
coco_eval_seen.params.useCats = 0 if not self._use_category else 1
coco_eval_seen.evaluate()
coco_eval_seen.accumulate()
coco_eval_seen.summarize()
coco_metrics_seen = coco_eval_seen.stats
if self._include_mask:
mcoco_eval_seen = cocoeval.OlnCOCOevalXclassWrapper(
coco_gt_seen, coco_dt, iou_type='segm')
mcoco_eval_seen.params.maxDets = [10, 20, 50, 100, 200]
mcoco_eval_seen.params.imgIds = image_ids
mcoco_eval_seen.params.useCats = 0 if not self._use_category else 1
mcoco_eval_seen.evaluate()
mcoco_eval_seen.accumulate()
mcoco_eval_seen.summarize()
mask_coco_metrics_seen = mcoco_eval_seen.stats
# for novel class eval, samples of seen_class are ignored.
coco_gt_novel = copy.deepcopy(coco_gt)
for idx, ann in enumerate(coco_gt.dataset['annotations']):
if ann['category_id'] in self._seen_class_ids:
coco_gt_novel.dataset['annotations'][idx]['ignored_split'] = 1
else:
coco_gt_novel.dataset['annotations'][idx]['ignored_split'] = 0
coco_eval_novel = cocoeval.OlnCOCOevalXclassWrapper(
coco_gt_novel, coco_dt, iou_type='bbox')
coco_eval_novel.params.maxDets = [10, 20, 50, 100, 200]
coco_eval_novel.params.imgIds = image_ids
coco_eval_novel.params.useCats = 0 if not self._use_category else 1
coco_eval_novel.evaluate()
coco_eval_novel.accumulate()
coco_eval_novel.summarize()
coco_metrics_novel = coco_eval_novel.stats
if self._include_mask:
mcoco_eval_novel = cocoeval.OlnCOCOevalXclassWrapper(
coco_gt_novel, coco_dt, iou_type='segm')
mcoco_eval_novel.params.maxDets = [10, 20, 50, 100, 200]
mcoco_eval_novel.params.imgIds = image_ids
mcoco_eval_novel.params.useCats = 0 if not self._use_category else 1
mcoco_eval_novel.evaluate()
mcoco_eval_novel.accumulate()
mcoco_eval_novel.summarize()
mask_coco_metrics_novel = mcoco_eval_novel.stats
# Combine all splits.
if self._include_mask:
metrics = np.hstack((
coco_metrics, coco_metrics_seen, coco_metrics_novel,
mask_coco_metrics, mask_coco_metrics_seen, mask_coco_metrics_novel))
else:
metrics = np.hstack((
coco_metrics, coco_metrics_seen, coco_metrics_novel))
# Cleans up the internal variables in order for a fresh eval next time.
self.reset()
metrics_dict = {}
for i, name in enumerate(self._metric_names):
metrics_dict[name] = metrics[i].astype(np.float32)
return metrics_dict
class OlnXdataEvaluator(OlnXclassEvaluator):
"""COCO evaluation metric class."""
def __init__(self, annotation_file, include_mask, need_rescale_bboxes=True,
use_category=True, seen_class='all'):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
annotation_file: a JSON file that stores annotations of the eval dataset.
If `annotation_file` is None, groundtruth annotations will be loaded
from the dataloader.
include_mask: a boolean to indicate whether or not to include the mask
eval.
need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back
to absolute values (`image_info` is needed in this case).
use_category: if `False`, treat all object in all classes in one
foreground category.
seen_class: 'all' or 'voc' or 'nonvoc'
"""
super(OlnXdataEvaluator, self).__init__(
annotation_file=annotation_file,
include_mask=include_mask,
need_rescale_bboxes=need_rescale_bboxes,
use_category=False,
seen_class='all')
def evaluate(self):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if not self._annotation_file:
logging.info('Thre is no annotation_file in COCOEvaluator.')
gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset(
self._groundtruths)
coco_gt = coco_utils.COCOWrapper(
eval_type=('mask' if self._include_mask else 'box'),
gt_dataset=gt_dataset)
else:
logging.info('Using annotation file: %s', self._annotation_file)
coco_gt = self._coco_gt
coco_predictions = coco_utils.convert_predictions_to_coco_annotations(
self._predictions)
coco_dt = coco_gt.loadRes(predictions=coco_predictions)
image_ids = [ann['image_id'] for ann in coco_predictions]
# Class manipulation: 'all' split samples -> ignored_split = 0.
for idx, _ in enumerate(coco_gt.dataset['annotations']):
coco_gt.dataset['annotations'][idx]['ignored_split'] = 0
coco_eval = cocoeval.OlnCOCOevalWrapper(coco_gt, coco_dt, iou_type='bbox')
coco_eval.params.maxDets = [10, 20, 50, 100, 200]
coco_eval.params.imgIds = image_ids
coco_eval.params.useCats = 0 if not self._use_category else 1
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_metrics = coco_eval.stats
if self._include_mask:
mcoco_eval = cocoeval.OlnCOCOevalWrapper(coco_gt, coco_dt,
iou_type='segm')
mcoco_eval.params.maxDets = [10, 20, 50, 100, 200]
mcoco_eval.params.imgIds = image_ids
mcoco_eval.params.useCats = 0 if not self._use_category else 1
mcoco_eval.evaluate()
mcoco_eval.accumulate()
mcoco_eval.summarize()
mask_coco_metrics = mcoco_eval.stats
if self._include_mask:
metrics = np.hstack((coco_metrics, mask_coco_metrics))
else:
metrics = coco_metrics
# Cleans up the internal variables in order for a fresh eval next time.
self.reset()
metrics_dict = {}
for i, name in enumerate(self._metric_names):
metrics_dict[name] = metrics[i].astype(np.float32)
return metrics_dict
class ShapeMaskCOCOEvaluator(COCOEvaluator):
"""COCO evaluation metric class for ShapeMask."""
def __init__(self, mask_eval_class, **kwargs):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
mask_eval_class: the set of classes for mask evaluation.
**kwargs: other keyword arguments passed to the parent class initializer.
"""
super(ShapeMaskCOCOEvaluator, self).__init__(**kwargs)
self._mask_eval_class = mask_eval_class
self._eval_categories = class_utils.coco_split_class_ids(mask_eval_class)
if mask_eval_class != 'all':
self._metric_names = [
x.replace('mask', 'novel_mask') for x in self._metric_names
]
def evaluate(self):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if not self._annotation_file:
gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset(
self._groundtruths)
coco_gt = coco_utils.COCOWrapper(
eval_type=('mask' if self._include_mask else 'box'),
gt_dataset=gt_dataset)
else:
coco_gt = self._coco_gt
coco_predictions = coco_utils.convert_predictions_to_coco_annotations(
self._predictions)
coco_dt = coco_gt.loadRes(predictions=coco_predictions)
image_ids = [ann['image_id'] for ann in coco_predictions]
coco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.params.imgIds = image_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_metrics = coco_eval.stats
if self._include_mask:
mcoco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='segm')
mcoco_eval.params.imgIds = image_ids
mcoco_eval.evaluate()
mcoco_eval.accumulate()
mcoco_eval.summarize()
if self._mask_eval_class == 'all':
metrics = np.hstack((coco_metrics, mcoco_eval.stats))
else:
mask_coco_metrics = mcoco_eval.category_stats
val_catg_idx = np.isin(mcoco_eval.params.catIds, self._eval_categories)
# Gather the valid evaluation of the eval categories.
if np.any(val_catg_idx):
mean_val_metrics = []
for mid in range(len(self._metric_names) // 2):
mean_val_metrics.append(
np.nanmean(mask_coco_metrics[mid][val_catg_idx]))
mean_val_metrics = np.array(mean_val_metrics)
else:
mean_val_metrics = np.zeros(len(self._metric_names) // 2)
metrics = np.hstack((coco_metrics, mean_val_metrics))
else:
metrics = coco_metrics
# Cleans up the internal variables in order for a fresh eval next time.
self.reset()
metrics_dict = {}
for i, name in enumerate(self._metric_names):
metrics_dict[name] = metrics[i].astype(np.float32)
return metrics_dict
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Util functions related to pycocotools and COCO eval."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import json
from absl import logging
import numpy as np
from PIL import Image
from pycocotools import coco
from pycocotools import mask as mask_api
import six
import tensorflow as tf
from official.vision.detection.dataloader import tf_example_decoder
from official.vision.detection.utils import box_utils
from official.vision.detection.utils import mask_utils
class COCOWrapper(coco.COCO):
"""COCO wrapper class.
This class wraps COCO API object, which provides the following additional
functionalities:
1. Support string type image id.
2. Support loading the groundtruth dataset using the external annotation
dictionary.
3. Support loading the prediction results using the external annotation
dictionary.
"""
def __init__(self, eval_type='box', annotation_file=None, gt_dataset=None):
"""Instantiates a COCO-style API object.
Args:
eval_type: either 'box' or 'mask'.
annotation_file: a JSON file that stores annotations of the eval dataset.
This is required if `gt_dataset` is not provided.
gt_dataset: the groundtruth eval datatset in COCO API format.
"""
if ((annotation_file and gt_dataset) or
((not annotation_file) and (not gt_dataset))):
raise ValueError('One and only one of `annotation_file` and `gt_dataset` '
'needs to be specified.')
if eval_type not in ['box', 'mask']:
raise ValueError('The `eval_type` can only be either `box` or `mask`.')
coco.COCO.__init__(self, annotation_file=annotation_file)
self._eval_type = eval_type
if gt_dataset:
self.dataset = gt_dataset
self.createIndex()
def loadRes(self, predictions):
"""Loads result file and return a result api object.
Args:
predictions: a list of dictionary each representing an annotation in COCO
format. The required fields are `image_id`, `category_id`, `score`,
`bbox`, `segmentation`.
Returns:
res: result COCO api object.
Raises:
ValueError: if the set of image id from predctions is not the subset of
the set of image id of the groundtruth dataset.
"""
res = coco.COCO()
res.dataset['images'] = copy.deepcopy(self.dataset['images'])
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
image_ids = [ann['image_id'] for ann in predictions]
if set(image_ids) != (set(image_ids) & set(self.getImgIds())):
raise ValueError('Results do not correspond to the current dataset!')
for ann in predictions:
x1, x2, y1, y2 = [ann['bbox'][0], ann['bbox'][0] + ann['bbox'][2],
ann['bbox'][1], ann['bbox'][1] + ann['bbox'][3]]
if self._eval_type == 'box':
ann['area'] = ann['bbox'][2] * ann['bbox'][3]
ann['segmentation'] = [
[x1, y1, x1, y2, x2, y2, x2, y1]]
elif self._eval_type == 'mask':
ann['area'] = mask_api.area(ann['segmentation'])
res.dataset['annotations'] = copy.deepcopy(predictions)
res.createIndex()
return res
def convert_predictions_to_coco_annotations(predictions):
"""Converts a batch of predictions to annotations in COCO format.
Args:
predictions: a dictionary of lists of numpy arrays including the following
fields. K below denotes the maximum number of instances per image.
Required fields:
- source_id: a list of numpy arrays of int or string of shape
[batch_size].
- num_detections: a list of numpy arrays of int of shape [batch_size].
- detection_boxes: a list of numpy arrays of float of shape
[batch_size, K, 4], where coordinates are in the original image
space (not the scaled image space).
- detection_classes: a list of numpy arrays of int of shape
[batch_size, K].
- detection_scores: a list of numpy arrays of float of shape
[batch_size, K].
Optional fields:
- detection_masks: a list of numpy arrays of float of shape
[batch_size, K, mask_height, mask_width].
Returns:
coco_predictions: prediction in COCO annotation format.
"""
coco_predictions = []
num_batches = len(predictions['source_id'])
batch_size = predictions['source_id'][0].shape[0]
max_num_detections = predictions['detection_classes'][0].shape[1]
use_outer_box = 'detection_outer_boxes' in predictions
for i in range(num_batches):
predictions['detection_boxes'][i] = box_utils.yxyx_to_xywh(
predictions['detection_boxes'][i])
if use_outer_box:
predictions['detection_outer_boxes'][i] = box_utils.yxyx_to_xywh(
predictions['detection_outer_boxes'][i])
mask_boxes = predictions['detection_outer_boxes']
else:
mask_boxes = predictions['detection_boxes']
for j in range(batch_size):
if 'detection_masks' in predictions:
image_masks = mask_utils.paste_instance_masks(
predictions['detection_masks'][i][j],
mask_boxes[i][j],
int(predictions['image_info'][i][j, 0, 0]),
int(predictions['image_info'][i][j, 0, 1]))
binary_masks = (image_masks > 0.0).astype(np.uint8)
encoded_masks = [
mask_api.encode(np.asfortranarray(binary_mask))
for binary_mask in list(binary_masks)]
for k in range(max_num_detections):
ann = {}
ann['image_id'] = predictions['source_id'][i][j]
ann['category_id'] = predictions['detection_classes'][i][j, k]
ann['bbox'] = predictions['detection_boxes'][i][j, k]
ann['score'] = predictions['detection_scores'][i][j, k]
if 'detection_masks' in predictions:
ann['segmentation'] = encoded_masks[k]
coco_predictions.append(ann)
for i, ann in enumerate(coco_predictions):
ann['id'] = i + 1
return coco_predictions
def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
"""Converts groundtruths to the dataset in COCO format.
Args:
groundtruths: a dictionary of numpy arrays including the fields below.
Note that each element in the list represent the number for a single
example without batch dimension. K below denotes the actual number of
instances for each image.
Required fields:
- source_id: a list of numpy arrays of int or string of shape
[batch_size].
- height: a list of numpy arrays of int of shape [batch_size].
- width: a list of numpy arrays of int of shape [batch_size].
- num_detections: a list of numpy arrays of int of shape [batch_size].
- boxes: a list of numpy arrays of float of shape [batch_size, K, 4],
where coordinates are in the original image space (not the
normalized coordinates).
- classes: a list of numpy arrays of int of shape [batch_size, K].
Optional fields:
- is_crowds: a list of numpy arrays of int of shape [batch_size, K]. If
th field is absent, it is assumed that this instance is not crowd.
- areas: a list of numy arrays of float of shape [batch_size, K]. If the
field is absent, the area is calculated using either boxes or
masks depending on which one is available.
- masks: a list of numpy arrays of string of shape [batch_size, K],
label_map: (optional) a dictionary that defines items from the category id
to the category name. If `None`, collect the category mappping from the
`groundtruths`.
Returns:
coco_groundtruths: the groundtruth dataset in COCO format.
"""
source_ids = np.concatenate(groundtruths['source_id'], axis=0)
heights = np.concatenate(groundtruths['height'], axis=0)
widths = np.concatenate(groundtruths['width'], axis=0)
gt_images = [{'id': int(i), 'height': int(h), 'width': int(w)} for i, h, w
in zip(source_ids, heights, widths)]
gt_annotations = []
num_batches = len(groundtruths['source_id'])
batch_size = groundtruths['source_id'][0].shape[0]
for i in range(num_batches):
for j in range(batch_size):
num_instances = groundtruths['num_detections'][i][j]
for k in range(num_instances):
ann = {}
ann['image_id'] = int(groundtruths['source_id'][i][j])
if 'is_crowds' in groundtruths:
ann['iscrowd'] = int(groundtruths['is_crowds'][i][j, k])
else:
ann['iscrowd'] = 0
ann['category_id'] = int(groundtruths['classes'][i][j, k])
boxes = groundtruths['boxes'][i]
ann['bbox'] = [
float(boxes[j, k, 1]),
float(boxes[j, k, 0]),
float(boxes[j, k, 3] - boxes[j, k, 1]),
float(boxes[j, k, 2] - boxes[j, k, 0])]
if 'areas' in groundtruths:
ann['area'] = float(groundtruths['areas'][i][j, k])
else:
ann['area'] = float(
(boxes[j, k, 3] - boxes[j, k, 1]) *
(boxes[j, k, 2] - boxes[j, k, 0]))
if 'masks' in groundtruths:
mask = Image.open(six.BytesIO(groundtruths['masks'][i][j, k]))
width, height = mask.size
np_mask = (
np.array(mask.getdata()).reshape(height, width).astype(np.uint8))
np_mask[np_mask > 0] = 255
encoded_mask = mask_api.encode(np.asfortranarray(np_mask))
ann['segmentation'] = encoded_mask
if 'areas' not in groundtruths:
ann['area'] = mask_api.area(encoded_mask)
gt_annotations.append(ann)
for i, ann in enumerate(gt_annotations):
ann['id'] = i + 1
if label_map:
gt_categories = [{'id': i, 'name': label_map[i]} for i in label_map]
else:
category_ids = [gt['category_id'] for gt in gt_annotations]
gt_categories = [{'id': i} for i in set(category_ids)]
gt_dataset = {
'images': gt_images,
'categories': gt_categories,
'annotations': copy.deepcopy(gt_annotations),
}
return gt_dataset
class COCOGroundtruthGenerator(object):
"""Generates the groundtruth annotations from a single example."""
def __init__(self, file_pattern, num_examples, include_mask):
self._file_pattern = file_pattern
self._num_examples = num_examples
self._include_mask = include_mask
self._dataset_fn = tf.data.TFRecordDataset
def _parse_single_example(self, example):
"""Parses a single serialized tf.Example proto.
Args:
example: a serialized tf.Example proto string.
Returns:
A dictionary of groundtruth with the following fields:
source_id: a scalar tensor of int64 representing the image source_id.
height: a scalar tensor of int64 representing the image height.
width: a scalar tensor of int64 representing the image width.
boxes: a float tensor of shape [K, 4], representing the groundtruth
boxes in absolute coordinates with respect to the original image size.
classes: a int64 tensor of shape [K], representing the class labels of
each instances.
is_crowds: a bool tensor of shape [K], indicating whether the instance
is crowd.
areas: a float tensor of shape [K], indicating the area of each
instance.
masks: a string tensor of shape [K], containing the bytes of the png
mask of each instance.
"""
decoder = tf_example_decoder.TfExampleDecoder(
include_mask=self._include_mask)
decoded_tensors = decoder.decode(example)
image = decoded_tensors['image']
image_size = tf.shape(image)[0:2]
boxes = box_utils.denormalize_boxes(
decoded_tensors['groundtruth_boxes'], image_size)
groundtruths = {
'source_id': tf.string_to_number(
decoded_tensors['source_id'], out_type=tf.int64),
'height': decoded_tensors['height'],
'width': decoded_tensors['width'],
'num_detections': tf.shape(decoded_tensors['groundtruth_classes'])[0],
'boxes': boxes,
'classes': decoded_tensors['groundtruth_classes'],
'is_crowds': decoded_tensors['groundtruth_is_crowd'],
'areas': decoded_tensors['groundtruth_area'],
}
if self._include_mask:
groundtruths.update({
'masks': decoded_tensors['groundtruth_instance_masks_png'],
})
return groundtruths
def _build_pipeline(self):
"""Builds data pipeline to generate groundtruth annotations."""
dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
dataset = dataset.apply(
tf.data.experimental.parallel_interleave(
lambda filename: self._dataset_fn(filename).prefetch(1),
cycle_length=32,
sloppy=False))
dataset = dataset.map(self._parse_single_example, num_parallel_calls=64)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(1, drop_remainder=False)
return dataset
def __call__(self):
with tf.Graph().as_default():
dataset = self._build_pipeline()
groundtruth = dataset.make_one_shot_iterator().get_next()
with tf.Session() as sess:
for _ in range(self._num_examples):
groundtruth_result = sess.run(groundtruth)
yield groundtruth_result
def scan_and_generator_annotation_file(file_pattern,
num_samples,
include_mask,
annotation_file):
"""Scans and generate the COCO-style annotation JSON file given a dataset."""
groundtruth_generator = COCOGroundtruthGenerator(
file_pattern, num_samples, include_mask)
generate_annotation_file(groundtruth_generator, annotation_file)
def generate_annotation_file(groundtruth_generator,
annotation_file):
"""Generates COCO-style annotation JSON file given a groundtruth generator."""
groundtruths = {}
logging.info('Loading groundtruth annotations from dataset to memory...')
for groundtruth in groundtruth_generator():
for k, v in six.iteritems(groundtruth):
if k not in groundtruths:
groundtruths[k] = [v]
else:
groundtruths[k].append(v)
gt_dataset = convert_groundtruths_to_coco_dataset(groundtruths)
logging.info('Saving groundtruth annotations to the JSON file...')
with tf.io.gfile.GFile(annotation_file, 'w') as f:
f.write(json.dumps(gt_dataset))
logging.info('Done saving the JSON file...')
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluator factory."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from official.vision.detection.evaluation import coco_evaluator
def evaluator_generator(params):
"""Generator function for various evaluators."""
if params.type == 'box':
evaluator = coco_evaluator.COCOEvaluator(
annotation_file=params.val_json_file, include_mask=False)
elif params.type == 'box_and_mask':
evaluator = coco_evaluator.COCOEvaluator(
annotation_file=params.val_json_file, include_mask=True)
elif params.type == 'oln_xclass_box':
evaluator = coco_evaluator.OlnXclassEvaluator(
annotation_file=params.val_json_file, include_mask=False,
use_category=False, seen_class=params.seen_class,)
elif params.type == 'oln_xclass_box_and_mask':
evaluator = coco_evaluator.OlnXclassEvaluator(
annotation_file=params.val_json_file, include_mask=True,
use_category=False, seen_class=params.seen_class,)
elif params.type == 'oln_xdata_box':
evaluator = coco_evaluator.OlnXdataEvaluator(
annotation_file=params.val_json_file, include_mask=False,
use_category=False, seen_class='all',)
elif params.type == 'shapemask_box_and_mask':
evaluator = coco_evaluator.ShapeMaskCOCOEvaluator(
mask_eval_class=params.mask_eval_class,
annotation_file=params.val_json_file, include_mask=True)
else:
raise ValueError('Evaluator %s is not supported.' % params.type)
return coco_evaluator.MetricWrapper(evaluator)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An executor class for running model on TensorFlow 2.0."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl import logging
import tensorflow as tf
from official.vision.detection.executor import distributed_executor as executor
from official.vision.utils.object_detection import visualization_utils
class DetectionDistributedExecutor(executor.DistributedExecutor):
"""Detection specific customer training loop executor.
Subclasses the DistributedExecutor and adds support for numpy based metrics.
"""
def __init__(self,
predict_post_process_fn=None,
trainable_variables_filter=None,
**kwargs):
super(DetectionDistributedExecutor, self).__init__(**kwargs)
if predict_post_process_fn:
assert callable(predict_post_process_fn)
if trainable_variables_filter:
assert callable(trainable_variables_filter)
self._predict_post_process_fn = predict_post_process_fn
self._trainable_variables_filter = trainable_variables_filter
self.eval_steps = tf.Variable(
0,
trainable=False,
dtype=tf.int32,
synchronization=tf.VariableSynchronization.ON_READ,
aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA,
shape=[])
def _create_replicated_step(self,
strategy,
model,
loss_fn,
optimizer,
metric=None):
trainable_variables = model.trainable_variables
if self._trainable_variables_filter:
trainable_variables = self._trainable_variables_filter(
trainable_variables)
logging.info('Filter trainable variables from %d to %d',
len(model.trainable_variables), len(trainable_variables))
update_state_fn = lambda labels, outputs: None
if isinstance(metric, tf.keras.metrics.Metric):
update_state_fn = metric.update_state
else:
logging.error('Detection: train metric is not an instance of '
'tf.keras.metrics.Metric.')
def _replicated_step(inputs):
"""Replicated training step."""
inputs, labels = inputs
with tf.GradientTape() as tape:
outputs = model(inputs, training=True)
all_losses = loss_fn(labels, outputs)
losses = {}
for k, v in all_losses.items():
losses[k] = tf.reduce_mean(v)
per_replica_loss = losses['total_loss'] / strategy.num_replicas_in_sync
update_state_fn(labels, outputs)
grads = tape.gradient(per_replica_loss, trainable_variables)
clipped_grads, _ = tf.clip_by_global_norm(grads, clip_norm=1.0)
optimizer.apply_gradients(zip(clipped_grads, trainable_variables))
return losses
return _replicated_step
def _create_test_step(self, strategy, model, metric):
"""Creates a distributed test step."""
@tf.function
def test_step(iterator, eval_steps):
"""Calculates evaluation metrics on distributed devices."""
def _test_step_fn(inputs, eval_steps):
"""Replicated accuracy calculation."""
inputs, labels = inputs
model_outputs = model(inputs, training=False)
if self._predict_post_process_fn:
labels, prediction_outputs = self._predict_post_process_fn(
labels, model_outputs)
num_remaining_visualizations = (
self._params.eval.num_images_to_visualize - eval_steps)
# If there are remaining number of visualizations that needs to be
# done, add next batch outputs for visualization.
#
# TODO(hongjunchoi): Once dynamic slicing is supported on TPU, only
# write correct slice of outputs to summary file.
if num_remaining_visualizations > 0:
visualization_utils.visualize_images_with_bounding_boxes(
inputs, prediction_outputs['detection_boxes'],
self.global_train_step, self.eval_summary_writer)
return labels, prediction_outputs
labels, outputs = strategy.run(
_test_step_fn, args=(
next(iterator),
eval_steps,
))
outputs = tf.nest.map_structure(strategy.experimental_local_results,
outputs)
labels = tf.nest.map_structure(strategy.experimental_local_results,
labels)
eval_steps.assign_add(self._params.eval.batch_size)
return labels, outputs
return test_step
def _run_evaluation(self, test_step, current_training_step, metric,
test_iterator):
"""Runs validation steps and aggregate metrics."""
self.eval_steps.assign(0)
if not test_iterator or not metric:
logging.warning(
'Both test_iterator (%s) and metrics (%s) must not be None.',
test_iterator, metric)
return None
logging.info('Running evaluation after step: %s.', current_training_step)
while True:
try:
labels, outputs = test_step(test_iterator, self.eval_steps)
if metric:
metric.update_state(labels, outputs)
except (StopIteration, tf.errors.OutOfRangeError):
break
metric_result = metric.result()
if isinstance(metric, tf.keras.metrics.Metric):
metric_result = tf.nest.map_structure(lambda x: x.numpy().astype(float),
metric_result)
logging.info('Step: [%d] Validation metric = %s', current_training_step,
metric_result)
return metric_result
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Custom training loop for running TensorFlow 2.0 models."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import flags
from absl import logging
import numpy as np
import tensorflow as tf
# pylint: disable=unused-import,g-import-not-at-top,redefined-outer-name,reimported
from typing import Optional, Dict, List, Text, Callable, Union, Iterator, Any
from official.modeling.hyperparams import params_dict
from official.utils import hyperparams_flags
from official.common import distribute_utils
from official.utils.misc import keras_utils
FLAGS = flags.FLAGS
strategy_flags_dict = hyperparams_flags.strategy_flags_dict
hparam_flags_dict = hyperparams_flags.hparam_flags_dict
def _save_checkpoint(checkpoint, model_dir, checkpoint_prefix):
"""Saves model to model_dir with provided checkpoint prefix."""
checkpoint_path = os.path.join(model_dir, checkpoint_prefix)
saved_path = checkpoint.save(checkpoint_path)
logging.info('Saving model as TF checkpoint: %s', saved_path)
def _steps_to_run(current_step, total_steps, steps_per_loop):
"""Calculates steps to run on device."""
if steps_per_loop <= 0:
raise ValueError('steps_per_loop should be positive integer.')
return min(total_steps - current_step, steps_per_loop)
def _no_metric():
return None
def metrics_as_dict(metric):
"""Puts input metric(s) into a list.
Args:
metric: metric(s) to be put into the list. `metric` could be an object, a
list, or a dict of tf.keras.metrics.Metric or has the `required_method`.
Returns:
A dictionary of valid metrics.
"""
if isinstance(metric, tf.keras.metrics.Metric):
metrics = {metric.name: metric}
elif isinstance(metric, list):
metrics = {m.name: m for m in metric}
elif isinstance(metric, dict):
metrics = metric
elif not metric:
return {}
else:
metrics = {'metric': metric}
return metrics
def metric_results(metric):
"""Collects results from the given metric(s)."""
metrics = metrics_as_dict(metric)
metric_result = {
name: m.result().numpy().astype(float) for name, m in metrics.items()
}
return metric_result
def reset_states(metric):
"""Resets states of the given metric(s)."""
metrics = metrics_as_dict(metric)
for m in metrics.values():
m.reset_states()
class SummaryWriter(object):
"""Simple SummaryWriter for writing dictionary of metrics.
Attributes:
writer: The tf.SummaryWriter.
"""
def __init__(self, model_dir: Text, name: Text):
"""Inits SummaryWriter with paths.
Args:
model_dir: the model folder path.
name: the summary subfolder name.
"""
self.writer = tf.summary.create_file_writer(os.path.join(model_dir, name))
def __call__(self, metrics: Union[Dict[Text, float], float], step: int):
"""Write metrics to summary with the given writer.
Args:
metrics: a dictionary of metrics values. Prefer dictionary.
step: integer. The training step.
"""
if not isinstance(metrics, dict):
# Support scalar metric without name.
logging.warning('Warning: summary writer prefer metrics as dictionary.')
metrics = {'metric': metrics}
with self.writer.as_default():
for k, v in metrics.items():
tf.summary.scalar(k, v, step=step)
self.writer.flush()
class DistributedExecutor(object):
"""Interface to train and eval models with tf.distribute.Strategy."""
def __init__(self, strategy, params, model_fn, loss_fn, is_multi_host=False):
"""Constructor.
Args:
strategy: an instance of tf.distribute.Strategy.
params: Model configuration needed to run distribution strategy.
model_fn: Keras model function. Signature:
(params: ParamsDict) -> tf.keras.models.Model.
loss_fn: loss function. Signature:
(y_true: Tensor, y_pred: Tensor) -> Tensor
is_multi_host: Set to True when using multi hosts for training, like multi
worker GPU or TPU pod (slice). Otherwise, False.
"""
self._params = params
self._model_fn = model_fn
self._loss_fn = loss_fn
self._strategy = strategy
self._checkpoint_name = 'ctl_step_{step}.ckpt'
self._is_multi_host = is_multi_host
self.train_summary_writer = None
self.eval_summary_writer = None
self.global_train_step = None
@property
def checkpoint_name(self):
"""Returns default checkpoint name."""
return self._checkpoint_name
@checkpoint_name.setter
def checkpoint_name(self, name):
"""Sets default summary writer for the current thread."""
self._checkpoint_name = name
def loss_fn(self):
return self._loss_fn()
def model_fn(self, params):
return self._model_fn(params)
def _save_config(self, model_dir):
"""Save parameters to config files if model_dir is defined."""
logging.info('Save config to model_dir %s.', model_dir)
if model_dir:
if not tf.io.gfile.exists(model_dir):
tf.io.gfile.makedirs(model_dir)
self._params.lock()
params_dict.save_params_dict_to_yaml(self._params,
model_dir + '/params.yaml')
else:
logging.warning('model_dir is empty, so skip the save config.')
def _get_input_iterator(
self, input_fn: Callable[..., tf.data.Dataset],
strategy: tf.distribute.Strategy) -> Optional[Iterator[Any]]:
"""Returns distributed dataset iterator.
Args:
input_fn: (params: dict) -> tf.data.Dataset.
strategy: an instance of tf.distribute.Strategy.
Returns:
An iterator that yields input tensors.
"""
if input_fn is None:
return None
# When training with multiple TPU workers, datasets needs to be cloned
# across workers. Since Dataset instance cannot be cloned in eager mode,
# we instead pass callable that returns a dataset.
if self._is_multi_host:
return iter(strategy.distribute_datasets_from_function(input_fn))
else:
input_data = input_fn()
return iter(strategy.experimental_distribute_dataset(input_data))
def _create_replicated_step(self,
strategy,
model,
loss_fn,
optimizer,
metric=None):
"""Creates a single training step.
Args:
strategy: an instance of tf.distribute.Strategy.
model: (Tensor, bool) -> Tensor. model function.
loss_fn: (y_true: Tensor, y_pred: Tensor) -> Tensor.
optimizer: tf.keras.optimizers.Optimizer.
metric: tf.keras.metrics.Metric subclass.
Returns:
The training step callable.
"""
metrics = metrics_as_dict(metric)
def _replicated_step(inputs):
"""Replicated training step."""
inputs, labels = inputs
with tf.GradientTape() as tape:
outputs = model(inputs, training=True)
prediction_loss = loss_fn(labels, outputs)
loss = tf.reduce_mean(prediction_loss)
loss = loss / strategy.num_replicas_in_sync
for m in metrics.values():
m.update_state(labels, outputs)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return loss
return _replicated_step
def _create_train_step(self,
strategy,
model,
loss_fn,
optimizer,
metric=None):
"""Creates a distributed training step.
Args:
strategy: an instance of tf.distribute.Strategy.
model: (Tensor, bool) -> Tensor. model function.
loss_fn: (y_true: Tensor, y_pred: Tensor) -> Tensor.
optimizer: tf.keras.optimizers.Optimizer.
metric: tf.keras.metrics.Metric subclass.
Returns:
The training step callable.
"""
replicated_step = self._create_replicated_step(strategy, model, loss_fn,
optimizer, metric)
@tf.function
def train_step(iterator, num_steps):
"""Performs a distributed training step.
Args:
iterator: an iterator that yields input tensors.
num_steps: the number of steps in the loop.
Returns:
The loss tensor.
"""
if not isinstance(num_steps, tf.Tensor):
raise ValueError('steps should be an Tensor. Python object may cause '
'retracing.')
per_replica_losses = strategy.run(replicated_step, args=(next(iterator),))
for _ in tf.range(num_steps - 1):
per_replica_losses = strategy.run(
replicated_step, args=(next(iterator),))
# For reporting, we returns the mean of losses.
losses = tf.nest.map_structure(
lambda x: strategy.reduce(tf.distribute.ReduceOp.MEAN, x, axis=None),
per_replica_losses)
return losses
return train_step
def _create_test_step(self, strategy, model, metric):
"""Creates a distributed test step."""
metrics = metrics_as_dict(metric)
@tf.function
def test_step(iterator):
"""Calculates evaluation metrics on distributed devices."""
if not metric:
logging.info('Skip test_step because metric is None (%s)', metric)
return None, None
def _test_step_fn(inputs):
"""Replicated accuracy calculation."""
inputs, labels = inputs
model_outputs = model(inputs, training=False)
for m in metrics.values():
m.update_state(labels, model_outputs)
return labels, model_outputs
return strategy.run(_test_step_fn, args=(next(iterator),))
return test_step
def train(
self,
train_input_fn: Callable[[params_dict.ParamsDict], tf.data.Dataset],
eval_input_fn: Optional[Callable[[params_dict.ParamsDict],
tf.data.Dataset]] = None,
model_dir: Optional[Text] = None,
total_steps: int = 1,
iterations_per_loop: int = 1,
train_metric_fn: Optional[Callable[[], Any]] = None,
eval_metric_fn: Optional[Callable[[], Any]] = None,
summary_writer_fn: Callable[[Text, Text], SummaryWriter] = SummaryWriter,
init_checkpoint: Optional[Callable[[tf.keras.Model], Any]] = None,
custom_callbacks: Optional[List[tf.keras.callbacks.Callback]] = None,
continuous_eval: bool = False,
save_config: bool = True):
"""Runs distributed training.
Args:
train_input_fn: (params: dict) -> tf.data.Dataset training data input
function.
eval_input_fn: (Optional) same type as train_input_fn. If not None, will
trigger evaluating metric on eval data. If None, will not run the eval
step.
model_dir: the folder path for model checkpoints.
total_steps: total training steps.
iterations_per_loop: train steps per loop. After each loop, this job will
update metrics like loss and save checkpoint.
train_metric_fn: metric_fn for evaluation in train_step.
eval_metric_fn: metric_fn for evaluation in test_step.
summary_writer_fn: function to create summary writer.
init_checkpoint: function to load checkpoint.
custom_callbacks: A list of Keras Callbacks objects to run during
training. More specifically, `on_batch_begin()`, `on_batch_end()`,
methods are invoked during training.
continuous_eval: If `True`, will continously run evaluation on every
available checkpoints. If `False`, will do the evaluation once after the
final step.
save_config: bool. Whether to save params to model_dir.
Returns:
The training loss and eval metrics.
"""
assert train_input_fn is not None
if train_metric_fn and not callable(train_metric_fn):
raise ValueError('if `train_metric_fn` is specified, '
'train_metric_fn must be a callable.')
if eval_metric_fn and not callable(eval_metric_fn):
raise ValueError('if `eval_metric_fn` is specified, '
'eval_metric_fn must be a callable.')
train_metric_fn = train_metric_fn or _no_metric
eval_metric_fn = eval_metric_fn or _no_metric
if custom_callbacks and iterations_per_loop != 1:
logging.warning(
'It is sematically wrong to run callbacks when '
'iterations_per_loop is not one (%s)', iterations_per_loop)
custom_callbacks = custom_callbacks or []
def _run_callbacks_on_batch_begin(batch):
"""Runs custom callbacks at the start of every step."""
if not custom_callbacks:
return
for callback in custom_callbacks:
if callback:
callback.on_batch_begin(batch)
def _run_callbacks_on_batch_end(batch):
"""Runs custom callbacks at the end of every step."""
if not custom_callbacks:
return
for callback in custom_callbacks:
if callback:
callback.on_batch_end(batch)
if save_config:
self._save_config(model_dir)
if FLAGS.save_checkpoint_freq:
save_freq = FLAGS.save_checkpoint_freq
else:
save_freq = iterations_per_loop
params = self._params
strategy = self._strategy
# To reduce unnecessary send/receive input pipeline operation, we place
# input pipeline ops in worker task.
train_iterator = self._get_input_iterator(train_input_fn, strategy)
train_loss = None
train_metric_result = None
eval_metric_result = None
tf.keras.backend.set_learning_phase(1)
with strategy.scope():
# To correctly place the model weights on accelerators,
# model and optimizer should be created in scope.
model = self.model_fn(params.as_dict())
if not hasattr(model, 'optimizer'):
raise ValueError('User should set optimizer attribute to model '
'inside `model_fn`.')
optimizer = model.optimizer
# Training loop starts here.
checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)
latest_checkpoint_file = tf.train.latest_checkpoint(model_dir)
initial_step = 0
if latest_checkpoint_file:
logging.info(
'Checkpoint file %s found and restoring from '
'checkpoint', latest_checkpoint_file)
checkpoint.restore(latest_checkpoint_file)
initial_step = optimizer.iterations.numpy()
logging.info('Loading from checkpoint file completed. Init step %d',
initial_step)
elif init_checkpoint:
logging.info('Restoring from init checkpoint function')
init_checkpoint(model)
logging.info('Loading from init checkpoint file completed')
current_step = optimizer.iterations.numpy()
checkpoint_name = self.checkpoint_name
eval_metric = eval_metric_fn()
train_metric = train_metric_fn()
train_summary_writer = summary_writer_fn(model_dir, 'eval_train')
self.train_summary_writer = train_summary_writer.writer
test_summary_writer = summary_writer_fn(model_dir, 'eval_test')
self.eval_summary_writer = test_summary_writer.writer
# Use training summary writer in TimeHistory if it's in use
for cb in custom_callbacks:
if isinstance(cb, keras_utils.TimeHistory):
cb.summary_writer = self.train_summary_writer
# Continue training loop.
train_step = self._create_train_step(
strategy=strategy,
model=model,
loss_fn=self.loss_fn(),
optimizer=optimizer,
metric=train_metric)
test_step = None
if eval_input_fn and eval_metric:
self.global_train_step = model.optimizer.iterations
test_step = self._create_test_step(strategy, model, metric=eval_metric)
# Step-0 operations
if current_step == 0 and not latest_checkpoint_file:
_save_checkpoint(checkpoint, model_dir,
checkpoint_name.format(step=current_step))
if test_step:
eval_iterator = self._get_input_iterator(eval_input_fn, strategy)
eval_metric_result = self._run_evaluation(test_step, current_step,
eval_metric, eval_iterator)
logging.info('Step: %s evalation metric = %s.', current_step,
eval_metric_result)
test_summary_writer(metrics=eval_metric_result, step=optimizer.iterations)
reset_states(eval_metric)
logging.info('Training started')
last_save_checkpoint_step = current_step
while current_step < total_steps:
num_steps = _steps_to_run(current_step, total_steps, iterations_per_loop)
_run_callbacks_on_batch_begin(current_step)
train_loss = train_step(train_iterator,
tf.convert_to_tensor(num_steps, dtype=tf.int32))
current_step += num_steps
train_loss = tf.nest.map_structure(lambda x: x.numpy().astype(float),
train_loss)
_run_callbacks_on_batch_end(current_step - 1)
if not isinstance(train_loss, dict):
train_loss = {'total_loss': train_loss}
if np.isnan(train_loss['total_loss']):
raise ValueError('total loss is NaN.')
if train_metric:
train_metric_result = metric_results(train_metric)
train_metric_result.update(train_loss)
else:
train_metric_result = train_loss
if callable(optimizer.lr):
train_metric_result.update(
{'learning_rate': optimizer.lr(current_step).numpy()})
else:
train_metric_result.update({'learning_rate': optimizer.lr.numpy()})
logging.info('Train Step: %d/%d / loss = %s / training metric = %s',
current_step, total_steps, train_loss, train_metric_result)
train_summary_writer(
metrics=train_metric_result, step=optimizer.iterations)
# Saves model checkpoints and run validation steps at every
# iterations_per_loop steps.
# To avoid repeated model saving, we do not save after the last
# step of training.
if save_freq > 0 and current_step < total_steps and (
current_step - last_save_checkpoint_step) >= save_freq:
_save_checkpoint(checkpoint, model_dir,
checkpoint_name.format(step=current_step))
last_save_checkpoint_step = current_step
if continuous_eval and current_step < total_steps and test_step:
eval_iterator = self._get_input_iterator(eval_input_fn, strategy)
eval_metric_result = self._run_evaluation(test_step, current_step,
eval_metric, eval_iterator)
logging.info('Step: %s evalation metric = %s.', current_step,
eval_metric_result)
test_summary_writer(
metrics=eval_metric_result, step=optimizer.iterations)
# Re-initialize evaluation metric, except the last step.
if eval_metric and current_step < total_steps:
reset_states(eval_metric)
if train_metric and current_step < total_steps:
reset_states(train_metric)
# Reaches the end of training and saves the last checkpoint.
if last_save_checkpoint_step < total_steps:
_save_checkpoint(checkpoint, model_dir,
checkpoint_name.format(step=current_step))
if test_step:
logging.info('Running final evaluation after training is complete.')
eval_iterator = self._get_input_iterator(eval_input_fn, strategy)
eval_metric_result = self._run_evaluation(test_step, current_step,
eval_metric, eval_iterator)
logging.info('Final evaluation metric = %s.', eval_metric_result)
test_summary_writer(metrics=eval_metric_result, step=optimizer.iterations)
self.train_summary_writer.close()
self.eval_summary_writer.close()
return train_metric_result, eval_metric_result
def _run_evaluation(self, test_step, current_training_step, metric,
test_iterator):
"""Runs validation steps and aggregate metrics."""
if not test_iterator or not metric:
logging.warning(
'Both test_iterator (%s) and metrics (%s) must not be None.',
test_iterator, metric)
return None
logging.info('Running evaluation after step: %s.', current_training_step)
eval_step = 0
while True:
try:
with tf.experimental.async_scope():
test_step(test_iterator)
eval_step += 1
except (StopIteration, tf.errors.OutOfRangeError):
tf.experimental.async_clear_error()
break
metric_result = metric_results(metric)
logging.info('Total eval steps: [%d]', eval_step)
logging.info('At training step: [%r] Validation metric = %r',
current_training_step, metric_result)
return metric_result
def evaluate_from_model_dir(
self,
model_dir: Text,
eval_input_fn: Callable[[params_dict.ParamsDict], tf.data.Dataset],
eval_metric_fn: Callable[[], Any],
total_steps: int = -1,
eval_timeout: Optional[int] = None,
min_eval_interval: int = 180,
summary_writer_fn: Callable[[Text, Text], SummaryWriter] = SummaryWriter):
"""Runs distributed evaluation on model folder.
Args:
model_dir: the folder for storing model checkpoints.
eval_input_fn: (Optional) same type as train_input_fn. If not None, will
trigger evaluting metric on eval data. If None, will not run eval step.
eval_metric_fn: metric_fn for evaluation in test_step.
total_steps: total training steps. If the current step reaches the
total_steps, the evaluation loop will stop.
eval_timeout: The maximum number of seconds to wait between checkpoints.
If left as None, then the process will wait indefinitely. Used by
tf.train.checkpoints_iterator.
min_eval_interval: The minimum number of seconds between yielding
checkpoints. Used by tf.train.checkpoints_iterator.
summary_writer_fn: function to create summary writer.
Returns:
Eval metrics dictionary of the last checkpoint.
"""
if not model_dir:
raise ValueError('model_dir must be set.')
def terminate_eval():
tf.logging.info('Terminating eval after %d seconds of no checkpoints' %
eval_timeout)
return True
summary_writer = summary_writer_fn(model_dir, 'eval')
self.eval_summary_writer = summary_writer.writer
# Read checkpoints from the given model directory
# until `eval_timeout` seconds elapses.
for checkpoint_path in tf.train.checkpoints_iterator(
model_dir,
min_interval_secs=min_eval_interval,
timeout=eval_timeout,
timeout_fn=terminate_eval):
eval_metric_result, current_step = self.evaluate_checkpoint(
checkpoint_path=checkpoint_path,
eval_input_fn=eval_input_fn,
eval_metric_fn=eval_metric_fn,
summary_writer=summary_writer)
if total_steps > 0 and current_step >= total_steps:
logging.info('Evaluation finished after training step %d', current_step)
break
return eval_metric_result
def evaluate_checkpoint(self,
checkpoint_path: Text,
eval_input_fn: Callable[[params_dict.ParamsDict],
tf.data.Dataset],
eval_metric_fn: Callable[[], Any],
summary_writer: Optional[SummaryWriter] = None):
"""Runs distributed evaluation on the one checkpoint.
Args:
checkpoint_path: the checkpoint to evaluate.
eval_input_fn: (Optional) same type as train_input_fn. If not None, will
trigger evaluting metric on eval data. If None, will not run eval step.
eval_metric_fn: metric_fn for evaluation in test_step.
summary_writer: function to create summary writer.
Returns:
Eval metrics dictionary of the last checkpoint.
"""
if not callable(eval_metric_fn):
raise ValueError('if `eval_metric_fn` is specified, '
'eval_metric_fn must be a callable.')
old_phase = tf.keras.backend.learning_phase()
tf.keras.backend.set_learning_phase(0)
params = self._params
strategy = self._strategy
# To reduce unnecessary send/receive input pipeline operation, we place
# input pipeline ops in worker task.
with strategy.scope():
# To correctly place the model weights on accelerators,
# model and optimizer should be created in scope.
model = self.model_fn(params.as_dict())
checkpoint = tf.train.Checkpoint(model=model)
eval_metric = eval_metric_fn()
assert eval_metric, 'eval_metric does not exist'
test_step = self._create_test_step(strategy, model, metric=eval_metric)
logging.info('Starting to evaluate.')
if not checkpoint_path:
raise ValueError('checkpoint path is empty')
reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path)
current_step = reader.get_tensor(
'optimizer/iter/.ATTRIBUTES/VARIABLE_VALUE')
logging.info('Checkpoint file %s found and restoring from '
'checkpoint', checkpoint_path)
status = checkpoint.restore(checkpoint_path)
status.expect_partial().assert_existing_objects_matched()
self.global_train_step = model.optimizer.iterations
eval_iterator = self._get_input_iterator(eval_input_fn, strategy)
eval_metric_result = self._run_evaluation(test_step, current_step,
eval_metric, eval_iterator)
logging.info('Step: %s evalation metric = %s.', current_step,
eval_metric_result)
summary_writer(metrics=eval_metric_result, step=current_step)
reset_states(eval_metric)
tf.keras.backend.set_learning_phase(old_phase)
return eval_metric_result, current_step
def predict(self):
return NotImplementedError('Unimplmented function.')
class ExecutorBuilder(object):
"""Builder of DistributedExecutor.
Example 1: Builds an executor with supported Strategy.
builder = ExecutorBuilder(
strategy_type='tpu',
strategy_config={'tpu': '/bns/xxx'})
dist_executor = builder.build_executor(
params=params,
model_fn=my_model_fn,
loss_fn=my_loss_fn,
metric_fn=my_metric_fn)
Example 2: Builds an executor with customized Strategy.
builder = ExecutorBuilder()
builder.strategy = <some customized Strategy>
dist_executor = builder.build_executor(
params=params,
model_fn=my_model_fn,
loss_fn=my_loss_fn,
metric_fn=my_metric_fn)
Example 3: Builds a customized executor with customized Strategy.
class MyDistributedExecutor(DistributedExecutor):
# implementation ...
builder = ExecutorBuilder()
builder.strategy = <some customized Strategy>
dist_executor = builder.build_executor(
class_ctor=MyDistributedExecutor,
params=params,
model_fn=my_model_fn,
loss_fn=my_loss_fn,
metric_fn=my_metric_fn)
"""
def __init__(self, strategy_type=None, strategy_config=None):
_ = distribute_utils.configure_cluster(strategy_config.worker_hosts,
strategy_config.task_index)
"""Constructor.
Args:
strategy_type: string. One of 'tpu', 'mirrored', 'multi_worker_mirrored'.
If None, the user is responsible to set the strategy before calling
build_executor(...).
strategy_config: necessary config for constructing the proper Strategy.
Check strategy_flags_dict() for examples of the structure.
"""
self._strategy = distribute_utils.get_distribution_strategy(
distribution_strategy=strategy_type,
num_gpus=strategy_config.num_gpus,
all_reduce_alg=strategy_config.all_reduce_alg,
num_packs=strategy_config.num_packs,
tpu_address=strategy_config.tpu)
@property
def strategy(self):
"""Returns default checkpoint name."""
return self._strategy
@strategy.setter
def strategy(self, new_strategy):
"""Sets default summary writer for the current thread."""
self._strategy = new_strategy
def build_executor(self,
class_ctor=DistributedExecutor,
params=None,
model_fn=None,
loss_fn=None,
**kwargs):
"""Creates an executor according to strategy type.
See doc string of the DistributedExecutor.__init__ for more information of
the
input arguments.
Args:
class_ctor: A constructor of executor (default: DistributedExecutor).
params: ParamsDict, all the model parameters and runtime parameters.
model_fn: Keras model function.
loss_fn: loss function.
**kwargs: other arguments to the executor constructor.
Returns:
An instance of DistributedExecutor or its subclass.
"""
if self._strategy is None:
raise ValueError('`strategy` should not be None. You need to specify '
'`strategy_type` in the builder contructor or directly '
'set the `strategy` property of the builder.')
return class_ctor(
strategy=self._strategy,
params=params,
model_fn=model_fn,
loss_fn=loss_fn,
**kwargs)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Main function to train various object detection models."""
import functools
import pprint
from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
from official.common import distribute_utils
from official.modeling.hyperparams import params_dict
from official.utils import hyperparams_flags
from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils
from official.vision.detection.configs import factory as config_factory
from official.vision.detection.dataloader import input_reader
from official.vision.detection.dataloader import mode_keys as ModeKeys
from official.vision.detection.executor import distributed_executor as executor
from official.vision.detection.executor.detection_executor import DetectionDistributedExecutor
from official.vision.detection.modeling import factory as model_factory
hyperparams_flags.initialize_common_flags()
flags_core.define_log_steps()
flags.DEFINE_bool('enable_xla', default=False, help='Enable XLA for GPU')
flags.DEFINE_string(
'mode',
default='train',
help='Mode to run: `train`, `eval` or `eval_once`.')
flags.DEFINE_string(
'model', default='retinanet',
help='Model to run: `retinanet`, `mask_rcnn` or `shapemask`.')
flags.DEFINE_string('training_file_pattern', None,
'Location of the train data.')
flags.DEFINE_string('eval_file_pattern', None, 'Location of ther eval data')
flags.DEFINE_string(
'checkpoint_path', None,
'The checkpoint path to eval. Only used in eval_once mode.')
FLAGS = flags.FLAGS
def run_executor(params,
mode,
checkpoint_path=None,
train_input_fn=None,
eval_input_fn=None,
callbacks=None,
prebuilt_strategy=None):
"""Runs the object detection model on distribution strategy defined by the user."""
if params.architecture.use_bfloat16:
tf.compat.v2.keras.mixed_precision.set_global_policy('mixed_bfloat16')
model_builder = model_factory.model_generator(params)
if prebuilt_strategy is not None:
strategy = prebuilt_strategy
else:
strategy_config = params.strategy_config
distribute_utils.configure_cluster(strategy_config.worker_hosts,
strategy_config.task_index)
strategy = distribute_utils.get_distribution_strategy(
distribution_strategy=params.strategy_type,
num_gpus=strategy_config.num_gpus,
all_reduce_alg=strategy_config.all_reduce_alg,
num_packs=strategy_config.num_packs,
tpu_address=strategy_config.tpu)
num_workers = int(strategy.num_replicas_in_sync + 7) // 8
is_multi_host = (int(num_workers) >= 2)
if mode == 'train':
def _model_fn(params):
return model_builder.build_model(params, mode=ModeKeys.TRAIN)
logging.info(
'Train num_replicas_in_sync %d num_workers %d is_multi_host %s',
strategy.num_replicas_in_sync, num_workers, is_multi_host)
dist_executor = DetectionDistributedExecutor(
strategy=strategy,
params=params,
model_fn=_model_fn,
loss_fn=model_builder.build_loss_fn,
is_multi_host=is_multi_host,
predict_post_process_fn=model_builder.post_processing,
trainable_variables_filter=model_builder
.make_filter_trainable_variables_fn())
if is_multi_host:
train_input_fn = functools.partial(
train_input_fn,
batch_size=params.train.batch_size // strategy.num_replicas_in_sync)
return dist_executor.train(
train_input_fn=train_input_fn,
model_dir=params.model_dir,
iterations_per_loop=params.train.iterations_per_loop,
total_steps=params.train.total_steps,
init_checkpoint=model_builder.make_restore_checkpoint_fn(),
custom_callbacks=callbacks,
save_config=True)
elif mode == 'eval' or mode == 'eval_once':
def _model_fn(params):
return model_builder.build_model(params, mode=ModeKeys.PREDICT_WITH_GT)
logging.info('Eval num_replicas_in_sync %d num_workers %d is_multi_host %s',
strategy.num_replicas_in_sync, num_workers, is_multi_host)
if is_multi_host:
eval_input_fn = functools.partial(
eval_input_fn,
batch_size=params.eval.batch_size // strategy.num_replicas_in_sync)
dist_executor = DetectionDistributedExecutor(
strategy=strategy,
params=params,
model_fn=_model_fn,
loss_fn=model_builder.build_loss_fn,
is_multi_host=is_multi_host,
predict_post_process_fn=model_builder.post_processing,
trainable_variables_filter=model_builder
.make_filter_trainable_variables_fn())
if mode == 'eval':
results = dist_executor.evaluate_from_model_dir(
model_dir=params.model_dir,
eval_input_fn=eval_input_fn,
eval_metric_fn=model_builder.eval_metrics,
eval_timeout=params.eval.eval_timeout,
min_eval_interval=params.eval.min_eval_interval,
total_steps=params.train.total_steps)
else:
# Run evaluation once for a single checkpoint.
if not checkpoint_path:
raise ValueError('checkpoint_path cannot be empty.')
if tf.io.gfile.isdir(checkpoint_path):
checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
summary_writer = executor.SummaryWriter(params.model_dir, 'eval')
results, _ = dist_executor.evaluate_checkpoint(
checkpoint_path=checkpoint_path,
eval_input_fn=eval_input_fn,
eval_metric_fn=model_builder.eval_metrics,
summary_writer=summary_writer)
for k, v in results.items():
logging.info('Final eval metric %s: %f', k, v)
return results
else:
raise ValueError('Mode not found: %s.' % mode)
def run(callbacks=None):
keras_utils.set_session_config(enable_xla=FLAGS.enable_xla)
params = config_factory.config_generator(FLAGS.model)
params = params_dict.override_params_dict(
params, FLAGS.config_file, is_strict=True)
params = params_dict.override_params_dict(
params, FLAGS.params_override, is_strict=True)
params.override(
{
'strategy_type': FLAGS.strategy_type,
'model_dir': FLAGS.model_dir,
'strategy_config': executor.strategy_flags_dict(),
},
is_strict=False)
# Make sure use_tpu and strategy_type are in sync.
params.use_tpu = (params.strategy_type == 'tpu')
if not params.use_tpu:
params.override({
'architecture': {
'use_bfloat16': False,
},
'norm_activation': {
'use_sync_bn': False,
},
}, is_strict=True)
params.validate()
params.lock()
pp = pprint.PrettyPrinter()
params_str = pp.pformat(params.as_dict())
logging.info('Model Parameters: %s', params_str)
train_input_fn = None
eval_input_fn = None
training_file_pattern = FLAGS.training_file_pattern or params.train.train_file_pattern
eval_file_pattern = FLAGS.eval_file_pattern or params.eval.eval_file_pattern
if not training_file_pattern and not eval_file_pattern:
raise ValueError('Must provide at least one of training_file_pattern and '
'eval_file_pattern.')
if training_file_pattern:
# Use global batch size for single host.
train_input_fn = input_reader.InputFn(
file_pattern=training_file_pattern,
params=params,
mode=input_reader.ModeKeys.TRAIN,
batch_size=params.train.batch_size)
if eval_file_pattern:
eval_input_fn = input_reader.InputFn(
file_pattern=eval_file_pattern,
params=params,
mode=input_reader.ModeKeys.PREDICT_WITH_GT,
batch_size=params.eval.batch_size,
num_examples=params.eval.eval_samples)
if callbacks is None:
callbacks = []
if FLAGS.log_steps:
callbacks.append(
keras_utils.TimeHistory(
batch_size=params.train.batch_size,
log_steps=FLAGS.log_steps,
))
return run_executor(
params,
FLAGS.mode,
checkpoint_path=FLAGS.checkpoint_path,
train_input_fn=train_input_fn,
eval_input_fn=eval_input_fn,
callbacks=callbacks)
def main(argv):
del argv # Unused.
run()
if __name__ == '__main__':
tf.config.set_soft_device_placement(True)
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Model architecture factory."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from official.vision.detection.modeling.architecture import fpn
from official.vision.detection.modeling.architecture import heads
from official.vision.detection.modeling.architecture import identity
from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.modeling.architecture import resnet
from official.vision.detection.modeling.architecture import spinenet
def norm_activation_generator(params):
return nn_ops.norm_activation_builder(
momentum=params.batch_norm_momentum,
epsilon=params.batch_norm_epsilon,
trainable=params.batch_norm_trainable,
activation=params.activation)
def backbone_generator(params):
"""Generator function for various backbone models."""
if params.architecture.backbone == 'resnet':
resnet_params = params.resnet
backbone_fn = resnet.Resnet(
resnet_depth=resnet_params.resnet_depth,
activation=params.norm_activation.activation,
norm_activation=norm_activation_generator(
params.norm_activation))
elif params.architecture.backbone == 'spinenet':
spinenet_params = params.spinenet
backbone_fn = spinenet.SpineNetBuilder(model_id=spinenet_params.model_id)
else:
raise ValueError('Backbone model `{}` is not supported.'
.format(params.architecture.backbone))
return backbone_fn
def multilevel_features_generator(params):
"""Generator function for various FPN models."""
if params.architecture.multilevel_features == 'fpn':
fpn_params = params.fpn
fpn_fn = fpn.Fpn(
min_level=params.architecture.min_level,
max_level=params.architecture.max_level,
fpn_feat_dims=fpn_params.fpn_feat_dims,
use_separable_conv=fpn_params.use_separable_conv,
activation=params.norm_activation.activation,
use_batch_norm=fpn_params.use_batch_norm,
norm_activation=norm_activation_generator(
params.norm_activation))
elif params.architecture.multilevel_features == 'identity':
fpn_fn = identity.Identity()
else:
raise ValueError('The multi-level feature model `{}` is not supported.'
.format(params.architecture.multilevel_features))
return fpn_fn
def retinanet_head_generator(params):
"""Generator function for RetinaNet head architecture."""
head_params = params.retinanet_head
anchors_per_location = params.anchor.num_scales * len(
params.anchor.aspect_ratios)
return heads.RetinanetHead(
params.architecture.min_level,
params.architecture.max_level,
params.architecture.num_classes,
anchors_per_location,
head_params.num_convs,
head_params.num_filters,
head_params.use_separable_conv,
norm_activation=norm_activation_generator(params.norm_activation))
def rpn_head_generator(params):
"""Generator function for RPN head architecture."""
head_params = params.rpn_head
anchors_per_location = params.anchor.num_scales * len(
params.anchor.aspect_ratios)
return heads.RpnHead(
params.architecture.min_level,
params.architecture.max_level,
anchors_per_location,
head_params.num_convs,
head_params.num_filters,
head_params.use_separable_conv,
params.norm_activation.activation,
head_params.use_batch_norm,
norm_activation=norm_activation_generator(params.norm_activation))
def oln_rpn_head_generator(params):
"""Generator function for OLN-proposal (OLN-RPN) head architecture."""
head_params = params.rpn_head
anchors_per_location = params.anchor.num_scales * len(
params.anchor.aspect_ratios)
return heads.OlnRpnHead(
params.architecture.min_level,
params.architecture.max_level,
anchors_per_location,
head_params.num_convs,
head_params.num_filters,
head_params.use_separable_conv,
params.norm_activation.activation,
head_params.use_batch_norm,
norm_activation=norm_activation_generator(params.norm_activation))
def fast_rcnn_head_generator(params):
"""Generator function for Fast R-CNN head architecture."""
head_params = params.frcnn_head
return heads.FastrcnnHead(
params.architecture.num_classes,
head_params.num_convs,
head_params.num_filters,
head_params.use_separable_conv,
head_params.num_fcs,
head_params.fc_dims,
params.norm_activation.activation,
head_params.use_batch_norm,
norm_activation=norm_activation_generator(params.norm_activation))
def oln_box_score_head_generator(params):
"""Generator function for Scoring Fast R-CNN head architecture."""
head_params = params.frcnn_head
return heads.OlnBoxScoreHead(
params.architecture.num_classes,
head_params.num_convs,
head_params.num_filters,
head_params.use_separable_conv,
head_params.num_fcs,
head_params.fc_dims,
params.norm_activation.activation,
head_params.use_batch_norm,
norm_activation=norm_activation_generator(params.norm_activation))
def mask_rcnn_head_generator(params):
"""Generator function for Mask R-CNN head architecture."""
head_params = params.mrcnn_head
return heads.MaskrcnnHead(
params.architecture.num_classes,
params.architecture.mask_target_size,
head_params.num_convs,
head_params.num_filters,
head_params.use_separable_conv,
params.norm_activation.activation,
head_params.use_batch_norm,
norm_activation=norm_activation_generator(params.norm_activation))
def oln_mask_score_head_generator(params):
"""Generator function for Scoring Mask R-CNN head architecture."""
head_params = params.mrcnn_head
return heads.OlnMaskScoreHead(
params.architecture.num_classes,
params.architecture.mask_target_size,
head_params.num_convs,
head_params.num_filters,
head_params.use_separable_conv,
params.norm_activation.activation,
head_params.use_batch_norm,
norm_activation=norm_activation_generator(params.norm_activation))
def shapeprior_head_generator(params):
"""Generator function for shape prior head architecture."""
head_params = params.shapemask_head
return heads.ShapemaskPriorHead(
params.architecture.num_classes,
head_params.num_downsample_channels,
head_params.mask_crop_size,
head_params.use_category_for_mask,
head_params.shape_prior_path)
def coarsemask_head_generator(params):
"""Generator function for ShapeMask coarse mask head architecture."""
head_params = params.shapemask_head
return heads.ShapemaskCoarsemaskHead(
params.architecture.num_classes,
head_params.num_downsample_channels,
head_params.mask_crop_size,
head_params.use_category_for_mask,
head_params.num_convs,
norm_activation=norm_activation_generator(params.norm_activation))
def finemask_head_generator(params):
"""Generator function for Shapemask fine mask head architecture."""
head_params = params.shapemask_head
return heads.ShapemaskFinemaskHead(
params.architecture.num_classes,
head_params.num_downsample_channels,
head_params.mask_crop_size,
head_params.use_category_for_mask,
head_params.num_convs,
head_params.upsample_factor)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Feature Pyramid Networks.
Feature Pyramid Networks were proposed in:
[1] Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan,
, and Serge Belongie
Feature Pyramid Networks for Object Detection. CVPR 2017.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import tensorflow as tf
from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.ops import spatial_transform_ops
class Fpn(object):
"""Feature pyramid networks."""
def __init__(self,
min_level=3,
max_level=7,
fpn_feat_dims=256,
use_separable_conv=False,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
"""FPN initialization function.
Args:
min_level: `int` minimum level in FPN output feature maps.
max_level: `int` maximum level in FPN output feature maps.
fpn_feat_dims: `int` number of filters in FPN layers.
use_separable_conv: `bool`, if True use separable convolution for
convolution in FPN layers.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
"""
self._min_level = min_level
self._max_level = max_level
self._fpn_feat_dims = fpn_feat_dims
if use_separable_conv:
self._conv2d_op = functools.partial(
tf.keras.layers.SeparableConv2D, depth_multiplier=1)
else:
self._conv2d_op = tf.keras.layers.Conv2D
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm
self._norm_activation = norm_activation
self._norm_activations = {}
self._lateral_conv2d_op = {}
self._post_hoc_conv2d_op = {}
self._coarse_conv2d_op = {}
for level in range(self._min_level, self._max_level + 1):
if self._use_batch_norm:
self._norm_activations[level] = norm_activation(
use_activation=False, name='p%d-bn' % level)
self._lateral_conv2d_op[level] = self._conv2d_op(
filters=self._fpn_feat_dims,
kernel_size=(1, 1),
padding='same',
name='l%d' % level)
self._post_hoc_conv2d_op[level] = self._conv2d_op(
filters=self._fpn_feat_dims,
strides=(1, 1),
kernel_size=(3, 3),
padding='same',
name='post_hoc_d%d' % level)
self._coarse_conv2d_op[level] = self._conv2d_op(
filters=self._fpn_feat_dims,
strides=(2, 2),
kernel_size=(3, 3),
padding='same',
name='p%d' % level)
def __call__(self, multilevel_features, is_training=None):
"""Returns the FPN features for a given multilevel features.
Args:
multilevel_features: a `dict` containing `int` keys for continuous feature
levels, e.g., [2, 3, 4, 5]. The values are corresponding features with
shape [batch_size, height_l, width_l, num_filters].
is_training: `bool` if True, the model is in training mode.
Returns:
a `dict` containing `int` keys for continuous feature levels
[min_level, min_level + 1, ..., max_level]. The values are corresponding
FPN features with shape [batch_size, height_l, width_l, fpn_feat_dims].
"""
input_levels = list(multilevel_features.keys())
if min(input_levels) > self._min_level:
raise ValueError(
'The minimum backbone level %d should be '%(min(input_levels)) +
'less or equal to FPN minimum level %d.:'%(self._min_level))
backbone_max_level = min(max(input_levels), self._max_level)
with tf.name_scope('fpn'):
# Adds lateral connections.
feats_lateral = {}
for level in range(self._min_level, backbone_max_level + 1):
feats_lateral[level] = self._lateral_conv2d_op[level](
multilevel_features[level])
# Adds top-down path.
feats = {backbone_max_level: feats_lateral[backbone_max_level]}
for level in range(backbone_max_level - 1, self._min_level - 1, -1):
feats[level] = spatial_transform_ops.nearest_upsampling(
feats[level + 1], 2) + feats_lateral[level]
# Adds post-hoc 3x3 convolution kernel.
for level in range(self._min_level, backbone_max_level + 1):
feats[level] = self._post_hoc_conv2d_op[level](feats[level])
# Adds coarser FPN levels introduced for RetinaNet.
for level in range(backbone_max_level + 1, self._max_level + 1):
feats_in = feats[level - 1]
if level > backbone_max_level + 1:
feats_in = self._activation_op(feats_in)
feats[level] = self._coarse_conv2d_op[level](feats_in)
if self._use_batch_norm:
# Adds batch_norm layer.
for level in range(self._min_level, self._max_level + 1):
feats[level] = self._norm_activations[level](
feats[level], is_training=is_training)
return feats
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes to build various prediction heads in all supported models."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import numpy as np
import tensorflow as tf
from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.ops import spatial_transform_ops
class RpnHead(tf.keras.layers.Layer):
"""Region Proposal Network head."""
def __init__(
self,
min_level,
max_level,
anchors_per_location,
num_convs=2,
num_filters=256,
use_separable_conv=False,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Region Proposal Network head.
Args:
min_level: `int` number of minimum feature level.
max_level: `int` number of maximum feature level.
anchors_per_location: `int` number of number of anchors per pixel
location.
num_convs: `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
super().__init__(autocast=False)
self._min_level = min_level
self._max_level = max_level
self._anchors_per_location = anchors_per_location
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm
if use_separable_conv:
self._conv2d_op = functools.partial(
tf.keras.layers.SeparableConv2D,
depth_multiplier=1,
bias_initializer=tf.zeros_initializer())
else:
self._conv2d_op = functools.partial(
tf.keras.layers.Conv2D,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
bias_initializer=tf.zeros_initializer())
self._rpn_conv = self._conv2d_op(
num_filters,
kernel_size=(3, 3),
strides=(1, 1),
activation=(None if self._use_batch_norm else self._activation_op),
padding='same',
name='rpn')
self._rpn_class_conv = self._conv2d_op(
anchors_per_location,
kernel_size=(1, 1),
strides=(1, 1),
padding='valid',
name='rpn-class')
self._rpn_box_conv = self._conv2d_op(
4 * anchors_per_location,
kernel_size=(1, 1),
strides=(1, 1),
padding='valid',
name='rpn-box')
self._norm_activations = {}
if self._use_batch_norm:
for level in range(self._min_level, self._max_level + 1):
self._norm_activations[level] = norm_activation(name='rpn-l%d-bn' %
level)
def _shared_rpn_heads(self, features, anchors_per_location, level,
is_training):
"""Shared RPN heads."""
features = self._rpn_conv(features)
if self._use_batch_norm:
# The batch normalization layers are not shared between levels.
features = self._norm_activations[level](
features, is_training=is_training)
# Proposal classification scores
scores = self._rpn_class_conv(features)
# Proposal bbox regression deltas
bboxes = self._rpn_box_conv(features)
return scores, bboxes
def call(self, features, is_training=None):
scores_outputs = {}
box_outputs = {}
with tf.name_scope('rpn_head'):
for level in range(self._min_level, self._max_level + 1):
scores_output, box_output = self._shared_rpn_heads(
features[level], self._anchors_per_location, level, is_training)
scores_outputs[level] = scores_output
box_outputs[level] = box_output
return scores_outputs, box_outputs
class OlnRpnHead(tf.keras.layers.Layer):
"""Region Proposal Network for Object Localization Network (OLN)."""
def __init__(
self,
min_level,
max_level,
anchors_per_location,
num_convs=2,
num_filters=256,
use_separable_conv=False,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Region Proposal Network head.
Args:
min_level: `int` number of minimum feature level.
max_level: `int` number of maximum feature level.
anchors_per_location: `int` number of number of anchors per pixel
location.
num_convs: `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self._min_level = min_level
self._max_level = max_level
self._anchors_per_location = anchors_per_location
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm
if use_separable_conv:
self._conv2d_op = functools.partial(
tf.keras.layers.SeparableConv2D,
depth_multiplier=1,
bias_initializer=tf.zeros_initializer())
else:
self._conv2d_op = functools.partial(
tf.keras.layers.Conv2D,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
bias_initializer=tf.zeros_initializer())
self._rpn_conv = self._conv2d_op(
num_filters,
kernel_size=(3, 3),
strides=(1, 1),
activation=(None if self._use_batch_norm else self._activation_op),
padding='same',
name='rpn')
self._rpn_class_conv = self._conv2d_op(
anchors_per_location,
kernel_size=(1, 1),
strides=(1, 1),
padding='valid',
name='rpn-class')
self._rpn_box_conv = self._conv2d_op(
4 * anchors_per_location,
kernel_size=(1, 1),
strides=(1, 1),
padding='valid',
name='rpn-box-lrtb')
self._rpn_center_conv = self._conv2d_op(
anchors_per_location,
kernel_size=(1, 1),
strides=(1, 1),
padding='valid',
name='rpn-centerness')
self._norm_activations = {}
if self._use_batch_norm:
for level in range(self._min_level, self._max_level + 1):
self._norm_activations[level] = norm_activation(name='rpn-l%d-bn' %
level)
def _shared_rpn_heads(self, features, anchors_per_location, level,
is_training):
"""Shared RPN heads."""
features = self._rpn_conv(features)
if self._use_batch_norm:
# The batch normalization layers are not shared between levels.
features = self._norm_activations[level](
features, is_training=is_training)
# Feature L2 normalization for training stability
features = tf.math.l2_normalize(
features,
axis=-1,
name='rpn-norm',)
# Proposal classification scores
scores = self._rpn_class_conv(features)
# Proposal bbox regression deltas
bboxes = self._rpn_box_conv(features)
# Proposal centerness scores
centers = self._rpn_center_conv(features)
return scores, bboxes, centers
def __call__(self, features, is_training=None):
scores_outputs = {}
box_outputs = {}
center_outputs = {}
with tf.name_scope('rpn_head'):
for level in range(self._min_level, self._max_level + 1):
scores_output, box_output, center_output = self._shared_rpn_heads(
features[level], self._anchors_per_location, level, is_training)
scores_outputs[level] = scores_output
box_outputs[level] = box_output
center_outputs[level] = center_output
return scores_outputs, box_outputs, center_outputs
class FastrcnnHead(tf.keras.layers.Layer):
"""Fast R-CNN box head."""
def __init__(
self,
num_classes,
num_convs=0,
num_filters=256,
use_separable_conv=False,
num_fcs=2,
fc_dims=1024,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Fast R-CNN box head.
Args:
num_classes: a integer for the number of classes.
num_convs: `int` number that represents the number of the intermediate
conv layers before the FC layers.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
num_fcs: `int` number that represents the number of FC layers before the
predictions.
fc_dims: `int` number that represents the number of dimension of the FC
layers.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
super(FastrcnnHead, self).__init__(autocast=False)
self._num_classes = num_classes
self._num_convs = num_convs
self._num_filters = num_filters
if use_separable_conv:
self._conv2d_op = functools.partial(
tf.keras.layers.SeparableConv2D,
depth_multiplier=1,
bias_initializer=tf.zeros_initializer())
else:
self._conv2d_op = functools.partial(
tf.keras.layers.Conv2D,
kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
bias_initializer=tf.zeros_initializer())
self._num_fcs = num_fcs
self._fc_dims = fc_dims
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm
self._norm_activation = norm_activation
self._conv_ops = []
self._conv_bn_ops = []
for i in range(self._num_convs):
self._conv_ops.append(
self._conv2d_op(
self._num_filters,
kernel_size=(3, 3),
strides=(1, 1),
padding='same',
dilation_rate=(1, 1),
activation=(None
if self._use_batch_norm else self._activation_op),
name='conv_{}'.format(i)))
if self._use_batch_norm:
self._conv_bn_ops.append(self._norm_activation())
self._fc_ops = []
self._fc_bn_ops = []
for i in range(self._num_fcs):
self._fc_ops.append(
tf.keras.layers.Dense(
units=self._fc_dims,
activation=(None
if self._use_batch_norm else self._activation_op),
name='fc{}'.format(i)))
if self._use_batch_norm:
self._fc_bn_ops.append(self._norm_activation(fused=False))
self._class_predict = tf.keras.layers.Dense(
self._num_classes,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
bias_initializer=tf.zeros_initializer(),
name='class-predict')
self._box_predict = tf.keras.layers.Dense(
self._num_classes * 4,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001),
bias_initializer=tf.zeros_initializer(),
name='box-predict')
def call(self, roi_features, is_training=None):
"""Box and class branches for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape [batch_size, num_rois,
height_l, width_l, num_filters].
is_training: `boolean`, if True if model is in training mode.
Returns:
class_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes], representing the class predictions.
box_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes * 4], representing the box
predictions.
"""
with tf.name_scope(
'fast_rcnn_head'):
# reshape inputs beofre FC.
_, num_rois, height, width, filters = roi_features.get_shape().as_list()
net = tf.reshape(roi_features, [-1, height, width, filters])
for i in range(self._num_convs):
net = self._conv_ops[i](net)
if self._use_batch_norm:
net = self._conv_bn_ops[i](net, is_training=is_training)
filters = self._num_filters if self._num_convs > 0 else filters
net = tf.reshape(net, [-1, num_rois, height * width * filters])
for i in range(self._num_fcs):
net = self._fc_ops[i](net)
if self._use_batch_norm:
net = self._fc_bn_ops[i](net, is_training=is_training)
class_outputs = self._class_predict(net)
box_outputs = self._box_predict(net)
return class_outputs, box_outputs
class OlnBoxScoreHead(tf.keras.layers.Layer):
"""Box head of Object Localization Network (OLN)."""
def __init__(
self,
num_classes,
num_convs=0,
num_filters=256,
use_separable_conv=False,
num_fcs=2,
fc_dims=1024,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build OLN box head.
Args:
num_classes: a integer for the number of classes.
num_convs: `int` number that represents the number of the intermediate
conv layers before the FC layers.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
num_fcs: `int` number that represents the number of FC layers before the
predictions.
fc_dims: `int` number that represents the number of dimension of the FC
layers.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self._num_classes = num_classes
self._num_convs = num_convs
self._num_filters = num_filters
if use_separable_conv:
self._conv2d_op = functools.partial(
tf.keras.layers.SeparableConv2D,
depth_multiplier=1,
bias_initializer=tf.zeros_initializer())
else:
self._conv2d_op = functools.partial(
tf.keras.layers.Conv2D,
kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
bias_initializer=tf.zeros_initializer())
self._num_fcs = num_fcs
self._fc_dims = fc_dims
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm
self._norm_activation = norm_activation
self._conv_ops = []
self._conv_bn_ops = []
for i in range(self._num_convs):
self._conv_ops.append(
self._conv2d_op(
self._num_filters,
kernel_size=(3, 3),
strides=(1, 1),
padding='same',
dilation_rate=(1, 1),
activation=(None
if self._use_batch_norm else self._activation_op),
name='conv_{}'.format(i)))
if self._use_batch_norm:
self._conv_bn_ops.append(self._norm_activation())
self._fc_ops = []
self._fc_bn_ops = []
for i in range(self._num_fcs):
self._fc_ops.append(
tf.keras.layers.Dense(
units=self._fc_dims,
activation=(None
if self._use_batch_norm else self._activation_op),
name='fc{}'.format(i)))
if self._use_batch_norm:
self._fc_bn_ops.append(self._norm_activation(fused=False))
self._class_predict = tf.keras.layers.Dense(
self._num_classes,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
bias_initializer=tf.zeros_initializer(),
name='class-predict')
self._box_predict = tf.keras.layers.Dense(
self._num_classes * 4,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001),
bias_initializer=tf.zeros_initializer(),
name='box-predict')
self._score_predict = tf.keras.layers.Dense(
1,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
bias_initializer=tf.zeros_initializer(),
name='score-predict')
def __call__(self, roi_features, is_training=None):
"""Box and class branches for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape [batch_size, num_rois,
height_l, width_l, num_filters].
is_training: `boolean`, if True if model is in training mode.
Returns:
class_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes], representing the class predictions.
box_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes * 4], representing the box
predictions.
"""
with tf.name_scope('fast_rcnn_head'):
# reshape inputs beofre FC.
_, num_rois, height, width, filters = roi_features.get_shape().as_list()
net = tf.reshape(roi_features, [-1, height, width, filters])
for i in range(self._num_convs):
net = self._conv_ops[i](net)
if self._use_batch_norm:
net = self._conv_bn_ops[i](net, is_training=is_training)
filters = self._num_filters if self._num_convs > 0 else filters
net = tf.reshape(net, [-1, num_rois, height * width * filters])
for i in range(self._num_fcs):
net = self._fc_ops[i](net)
if self._use_batch_norm:
net = self._fc_bn_ops[i](net, is_training=is_training)
class_outputs = self._class_predict(net)
box_outputs = self._box_predict(net)
score_outputs = self._score_predict(net)
return class_outputs, box_outputs, score_outputs
class MaskrcnnHead(tf.keras.layers.Layer):
"""Mask R-CNN head."""
def __init__(
self,
num_classes,
mask_target_size,
num_convs=4,
num_filters=256,
use_separable_conv=False,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Fast R-CNN head.
Args:
num_classes: a integer for the number of classes.
mask_target_size: a integer that is the resolution of masks.
num_convs: `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: `int` number that represents the number of filters of the
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
super(MaskrcnnHead, self).__init__(autocast=False)
self._num_classes = num_classes
self._mask_target_size = mask_target_size
self._num_convs = num_convs
self._num_filters = num_filters
if use_separable_conv:
self._conv2d_op = functools.partial(
tf.keras.layers.SeparableConv2D,
depth_multiplier=1,
bias_initializer=tf.zeros_initializer())
else:
self._conv2d_op = functools.partial(
tf.keras.layers.Conv2D,
kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
bias_initializer=tf.zeros_initializer())
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._use_batch_norm = use_batch_norm
self._norm_activation = norm_activation
self._conv2d_ops = []
for i in range(self._num_convs):
self._conv2d_ops.append(
self._conv2d_op(
self._num_filters,
kernel_size=(3, 3),
strides=(1, 1),
padding='same',
dilation_rate=(1, 1),
activation=(None
if self._use_batch_norm else self._activation_op),
name='mask-conv-l%d' % i))
self._mask_conv_transpose = tf.keras.layers.Conv2DTranspose(
self._num_filters,
kernel_size=(2, 2),
strides=(2, 2),
padding='valid',
activation=(None if self._use_batch_norm else self._activation_op),
kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
bias_initializer=tf.zeros_initializer(),
name='conv5-mask')
with tf.name_scope('mask_head'):
self._mask_conv2d_op = self._conv2d_op(
self._num_classes,
kernel_size=(1, 1),
strides=(1, 1),
padding='valid',
name='mask_fcn_logits')
def call(self, roi_features, class_indices, is_training=None):
"""Mask branch for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape [batch_size, num_rois,
height_l, width_l, num_filters].
class_indices: a Tensor of shape [batch_size, num_rois], indicating which
class the ROI is.
is_training: `boolean`, if True if model is in training mode.
Returns:
mask_outputs: a tensor with a shape of
[batch_size, num_masks, mask_height, mask_width, num_classes],
representing the mask predictions.
fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2],
representing the fg mask targets.
Raises:
ValueError: If boxes is not a rank-3 tensor or the last dimension of
boxes is not 4.
"""
with tf.name_scope('mask_head'):
_, num_rois, height, width, filters = roi_features.get_shape().as_list()
net = tf.reshape(roi_features, [-1, height, width, filters])
for i in range(self._num_convs):
net = self._conv2d_ops[i](net)
if self._use_batch_norm:
net = self._norm_activation()(net, is_training=is_training)
net = self._mask_conv_transpose(net)
if self._use_batch_norm:
net = self._norm_activation()(net, is_training=is_training)
mask_outputs = self._mask_conv2d_op(net)
mask_outputs = tf.reshape(mask_outputs, [
-1, num_rois, self._mask_target_size, self._mask_target_size,
self._num_classes
])
with tf.name_scope('masks_post_processing'):
# TODO(pengchong): Figure out the way not to use the static inferred
# batch size.
batch_size, num_masks = class_indices.get_shape().as_list()
mask_outputs = tf.transpose(a=mask_outputs, perm=[0, 1, 4, 2, 3])
# Constructs indices for gather.
batch_indices = tf.tile(
tf.expand_dims(tf.range(batch_size), axis=1), [1, num_masks])
mask_indices = tf.tile(
tf.expand_dims(tf.range(num_masks), axis=0), [batch_size, 1])
gather_indices = tf.stack(
[batch_indices, mask_indices, class_indices], axis=2)
mask_outputs = tf.gather_nd(mask_outputs, gather_indices)
return mask_outputs
class RetinanetHead(object):
"""RetinaNet head."""
def __init__(
self,
min_level,
max_level,
num_classes,
anchors_per_location,
num_convs=4,
num_filters=256,
use_separable_conv=False,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build RetinaNet head.
Args:
min_level: `int` number of minimum feature level.
max_level: `int` number of maximum feature level.
num_classes: `int` number of classification categories.
anchors_per_location: `int` number of anchors per pixel location.
num_convs: `int` number of stacked convolution before the last prediction
layer.
num_filters: `int` number of filters used in the head architecture.
use_separable_conv: `bool` to indicate whether to use separable
convoluation.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self._min_level = min_level
self._max_level = max_level
self._num_classes = num_classes
self._anchors_per_location = anchors_per_location
self._num_convs = num_convs
self._num_filters = num_filters
self._use_separable_conv = use_separable_conv
with tf.name_scope('class_net') as scope_name:
self._class_name_scope = tf.name_scope(scope_name)
with tf.name_scope('box_net') as scope_name:
self._box_name_scope = tf.name_scope(scope_name)
self._build_class_net_layers(norm_activation)
self._build_box_net_layers(norm_activation)
def _class_net_batch_norm_name(self, i, level):
return 'class-%d-%d' % (i, level)
def _box_net_batch_norm_name(self, i, level):
return 'box-%d-%d' % (i, level)
def _build_class_net_layers(self, norm_activation):
"""Build re-usable layers for class prediction network."""
if self._use_separable_conv:
self._class_predict = tf.keras.layers.SeparableConv2D(
self._num_classes * self._anchors_per_location,
kernel_size=(3, 3),
bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
padding='same',
name='class-predict')
else:
self._class_predict = tf.keras.layers.Conv2D(
self._num_classes * self._anchors_per_location,
kernel_size=(3, 3),
bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=1e-5),
padding='same',
name='class-predict')
self._class_conv = []
self._class_norm_activation = {}
for i in range(self._num_convs):
if self._use_separable_conv:
self._class_conv.append(
tf.keras.layers.SeparableConv2D(
self._num_filters,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
activation=None,
padding='same',
name='class-' + str(i)))
else:
self._class_conv.append(
tf.keras.layers.Conv2D(
self._num_filters,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(
stddev=0.01),
activation=None,
padding='same',
name='class-' + str(i)))
for level in range(self._min_level, self._max_level + 1):
name = self._class_net_batch_norm_name(i, level)
self._class_norm_activation[name] = norm_activation(name=name)
def _build_box_net_layers(self, norm_activation):
"""Build re-usable layers for box prediction network."""
if self._use_separable_conv:
self._box_predict = tf.keras.layers.SeparableConv2D(
4 * self._anchors_per_location,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
padding='same',
name='box-predict')
else:
self._box_predict = tf.keras.layers.Conv2D(
4 * self._anchors_per_location,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=1e-5),
padding='same',
name='box-predict')
self._box_conv = []
self._box_norm_activation = {}
for i in range(self._num_convs):
if self._use_separable_conv:
self._box_conv.append(
tf.keras.layers.SeparableConv2D(
self._num_filters,
kernel_size=(3, 3),
activation=None,
bias_initializer=tf.zeros_initializer(),
padding='same',
name='box-' + str(i)))
else:
self._box_conv.append(
tf.keras.layers.Conv2D(
self._num_filters,
kernel_size=(3, 3),
activation=None,
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(
stddev=0.01),
padding='same',
name='box-' + str(i)))
for level in range(self._min_level, self._max_level + 1):
name = self._box_net_batch_norm_name(i, level)
self._box_norm_activation[name] = norm_activation(name=name)
def __call__(self, fpn_features, is_training=None):
"""Returns outputs of RetinaNet head."""
class_outputs = {}
box_outputs = {}
with tf.name_scope('retinanet_head'):
for level in range(self._min_level, self._max_level + 1):
features = fpn_features[level]
class_outputs[level] = self.class_net(
features, level, is_training=is_training)
box_outputs[level] = self.box_net(
features, level, is_training=is_training)
return class_outputs, box_outputs
def class_net(self, features, level, is_training):
"""Class prediction network for RetinaNet."""
with self._class_name_scope:
for i in range(self._num_convs):
features = self._class_conv[i](features)
# The convolution layers in the class net are shared among all levels,
# but each level has its batch normlization to capture the statistical
# difference among different levels.
name = self._class_net_batch_norm_name(i, level)
features = self._class_norm_activation[name](
features, is_training=is_training)
classes = self._class_predict(features)
return classes
def box_net(self, features, level, is_training=None):
"""Box regression network for RetinaNet."""
with self._box_name_scope:
for i in range(self._num_convs):
features = self._box_conv[i](features)
# The convolution layers in the box net are shared among all levels, but
# each level has its batch normlization to capture the statistical
# difference among different levels.
name = self._box_net_batch_norm_name(i, level)
features = self._box_norm_activation[name](
features, is_training=is_training)
boxes = self._box_predict(features)
return boxes
# TODO(yeqing): Refactor this class when it is ready for var_scope reuse.
class ShapemaskPriorHead(object):
"""ShapeMask Prior head."""
def __init__(self, num_classes, num_downsample_channels, mask_crop_size,
use_category_for_mask, shape_prior_path):
"""Initialize params to build RetinaNet head.
Args:
num_classes: Number of output classes.
num_downsample_channels: number of channels in mask branch.
mask_crop_size: feature crop size.
use_category_for_mask: use class information in mask branch.
shape_prior_path: the path to load shape priors.
"""
self._mask_num_classes = num_classes if use_category_for_mask else 1
self._num_downsample_channels = num_downsample_channels
self._mask_crop_size = mask_crop_size
self._shape_prior_path = shape_prior_path
self._use_category_for_mask = use_category_for_mask
self._shape_prior_fc = tf.keras.layers.Dense(
self._num_downsample_channels, name='shape-prior-fc')
def __call__(self, fpn_features, boxes, outer_boxes, classes, is_training):
"""Generate the detection priors from the box detections and FPN features.
This corresponds to the Fig. 4 of the ShapeMask paper at
https://arxiv.org/pdf/1904.03239.pdf
Args:
fpn_features: a dictionary of FPN features.
boxes: a float tensor of shape [batch_size, num_instances, 4] representing
the tight gt boxes from dataloader/detection.
outer_boxes: a float tensor of shape [batch_size, num_instances, 4]
representing the loose gt boxes from dataloader/detection.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: training mode or not.
Returns:
instance_features: a float Tensor of shape [batch_size * num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
detection_priors: A float Tensor of shape [batch_size * num_instances,
mask_size, mask_size, 1].
"""
with tf.name_scope('prior_mask'):
batch_size, num_instances, _ = boxes.get_shape().as_list()
outer_boxes = tf.cast(outer_boxes, tf.float32)
boxes = tf.cast(boxes, tf.float32)
instance_features = spatial_transform_ops.multilevel_crop_and_resize(
fpn_features, outer_boxes, output_size=self._mask_crop_size)
instance_features = self._shape_prior_fc(instance_features)
shape_priors = self._get_priors()
# Get uniform priors for each outer box.
uniform_priors = tf.ones([
batch_size, num_instances, self._mask_crop_size, self._mask_crop_size
])
uniform_priors = spatial_transform_ops.crop_mask_in_target_box(
uniform_priors, boxes, outer_boxes, self._mask_crop_size)
# Classify shape priors using uniform priors + instance features.
prior_distribution = self._classify_shape_priors(
tf.cast(instance_features, tf.float32), uniform_priors, classes)
instance_priors = tf.gather(shape_priors, classes)
instance_priors *= tf.expand_dims(
tf.expand_dims(tf.cast(prior_distribution, tf.float32), axis=-1),
axis=-1)
instance_priors = tf.reduce_sum(instance_priors, axis=2)
detection_priors = spatial_transform_ops.crop_mask_in_target_box(
instance_priors, boxes, outer_boxes, self._mask_crop_size)
return instance_features, detection_priors
def _get_priors(self):
"""Load shape priors from file."""
# loads class specific or agnostic shape priors
if self._shape_prior_path:
# Priors are loaded into shape [mask_num_classes, num_clusters, 32, 32].
priors = np.load(tf.io.gfile.GFile(self._shape_prior_path, 'rb'))
priors = tf.convert_to_tensor(priors, dtype=tf.float32)
self._num_clusters = priors.get_shape().as_list()[1]
else:
# If prior path does not exist, do not use priors, i.e., pirors equal to
# uniform empty 32x32 patch.
self._num_clusters = 1
priors = tf.zeros([
self._mask_num_classes, self._num_clusters, self._mask_crop_size,
self._mask_crop_size
])
return priors
def _classify_shape_priors(self, features, uniform_priors, classes):
"""Classify the uniform prior by predicting the shape modes.
Classify the object crop features into K modes of the clusters for each
category.
Args:
features: A float Tensor of shape [batch_size, num_instances, mask_size,
mask_size, num_channels].
uniform_priors: A float Tensor of shape [batch_size, num_instances,
mask_size, mask_size] representing the uniform detection priors.
classes: A int Tensor of shape [batch_size, num_instances] of detection
class ids.
Returns:
prior_distribution: A float Tensor of shape
[batch_size, num_instances, num_clusters] representing the classifier
output probability over all possible shapes.
"""
batch_size, num_instances, _, _, _ = features.get_shape().as_list()
features *= tf.expand_dims(uniform_priors, axis=-1)
# Reduce spatial dimension of features. The features have shape
# [batch_size, num_instances, num_channels].
features = tf.reduce_mean(features, axis=(2, 3))
logits = tf.keras.layers.Dense(
self._mask_num_classes * self._num_clusters,
kernel_initializer=tf.random_normal_initializer(stddev=0.01),
name='classify-shape-prior-fc')(features)
logits = tf.reshape(
logits,
[batch_size, num_instances, self._mask_num_classes, self._num_clusters])
if self._use_category_for_mask:
logits = tf.gather(logits, tf.expand_dims(classes, axis=-1), batch_dims=2)
logits = tf.squeeze(logits, axis=2)
else:
logits = logits[:, :, 0, :]
distribution = tf.nn.softmax(logits, name='shape_prior_weights')
return distribution
class ShapemaskCoarsemaskHead(object):
"""ShapemaskCoarsemaskHead head."""
def __init__(self,
num_classes,
num_downsample_channels,
mask_crop_size,
use_category_for_mask,
num_convs,
norm_activation=nn_ops.norm_activation_builder()):
"""Initialize params to build ShapeMask coarse and fine prediction head.
Args:
num_classes: `int` number of mask classification categories.
num_downsample_channels: `int` number of filters at mask head.
mask_crop_size: feature crop size.
use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction
layer.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self._mask_num_classes = num_classes if use_category_for_mask else 1
self._use_category_for_mask = use_category_for_mask
self._num_downsample_channels = num_downsample_channels
self._mask_crop_size = mask_crop_size
self._num_convs = num_convs
self._norm_activation = norm_activation
self._coarse_mask_fc = tf.keras.layers.Dense(
self._num_downsample_channels, name='coarse-mask-fc')
self._class_conv = []
self._class_norm_activation = []
for i in range(self._num_convs):
self._class_conv.append(
tf.keras.layers.Conv2D(
self._num_downsample_channels,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(
stddev=0.01),
padding='same',
name='coarse-mask-class-%d' % i))
self._class_norm_activation.append(
norm_activation(name='coarse-mask-class-%d-bn' % i))
self._class_predict = tf.keras.layers.Conv2D(
self._mask_num_classes,
kernel_size=(1, 1),
# Focal loss bias initialization to have foreground 0.01 probability.
bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
padding='same',
name='coarse-mask-class-predict')
def __call__(self, features, detection_priors, classes, is_training):
"""Generate instance masks from FPN features and detection priors.
This corresponds to the Fig. 5-6 of the ShapeMask paper at
https://arxiv.org/pdf/1904.03239.pdf
Args:
features: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
detection_priors: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, 1]. This is the detection prior for the
instance.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: a bool indicating whether in training mode.
Returns:
mask_outputs: instance mask prediction as a float Tensor of shape
[batch_size, num_instances, mask_size, mask_size].
"""
with tf.name_scope('coarse_mask'):
# Transform detection priors to have the same dimension as features.
detection_priors = tf.expand_dims(detection_priors, axis=-1)
detection_priors = self._coarse_mask_fc(detection_priors)
features += detection_priors
mask_logits = self.decoder_net(features, is_training)
# Gather the logits with right input class.
if self._use_category_for_mask:
mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
mask_logits = tf.gather(
mask_logits, tf.expand_dims(classes, -1), batch_dims=2)
mask_logits = tf.squeeze(mask_logits, axis=2)
else:
mask_logits = mask_logits[..., 0]
return mask_logits
def decoder_net(self, features, is_training=False):
"""Coarse mask decoder network architecture.
Args:
features: A tensor of size [batch, height_in, width_in, channels_in].
is_training: Whether batch_norm layers are in training mode.
Returns:
images: A feature tensor of size [batch, output_size, output_size,
num_channels]
"""
(batch_size, num_instances, height, width,
num_channels) = features.get_shape().as_list()
features = tf.reshape(
features, [batch_size * num_instances, height, width, num_channels])
for i in range(self._num_convs):
features = self._class_conv[i](features)
features = self._class_norm_activation[i](
features, is_training=is_training)
mask_logits = self._class_predict(features)
mask_logits = tf.reshape(
mask_logits,
[batch_size, num_instances, height, width, self._mask_num_classes])
return mask_logits
class ShapemaskFinemaskHead(object):
"""ShapemaskFinemaskHead head."""
def __init__(self,
num_classes,
num_downsample_channels,
mask_crop_size,
use_category_for_mask,
num_convs,
upsample_factor,
norm_activation=nn_ops.norm_activation_builder()):
"""Initialize params to build ShapeMask coarse and fine prediction head.
Args:
num_classes: `int` number of mask classification categories.
num_downsample_channels: `int` number of filters at mask head.
mask_crop_size: feature crop size.
use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction
layer.
upsample_factor: `int` number of fine mask upsampling factor.
norm_activation: an operation that includes a batch normalization layer
followed by a relu layer(optional).
"""
self._use_category_for_mask = use_category_for_mask
self._mask_num_classes = num_classes if use_category_for_mask else 1
self._num_downsample_channels = num_downsample_channels
self._mask_crop_size = mask_crop_size
self._num_convs = num_convs
self.up_sample_factor = upsample_factor
self._fine_mask_fc = tf.keras.layers.Dense(
self._num_downsample_channels, name='fine-mask-fc')
self._upsample_conv = tf.keras.layers.Conv2DTranspose(
self._num_downsample_channels,
(self.up_sample_factor, self.up_sample_factor),
(self.up_sample_factor, self.up_sample_factor),
name='fine-mask-conv2d-tran')
self._fine_class_conv = []
self._fine_class_bn = []
for i in range(self._num_convs):
self._fine_class_conv.append(
tf.keras.layers.Conv2D(
self._num_downsample_channels,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(
stddev=0.01),
activation=None,
padding='same',
name='fine-mask-class-%d' % i))
self._fine_class_bn.append(
norm_activation(name='fine-mask-class-%d-bn' % i))
self._class_predict_conv = tf.keras.layers.Conv2D(
self._mask_num_classes,
kernel_size=(1, 1),
# Focal loss bias initialization to have foreground 0.01 probability.
bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
padding='same',
name='fine-mask-class-predict')
def __call__(self, features, mask_logits, classes, is_training):
"""Generate instance masks from FPN features and detection priors.
This corresponds to the Fig. 5-6 of the ShapeMask paper at
https://arxiv.org/pdf/1904.03239.pdf
Args:
features: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
mask_logits: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size] indicating predicted mask logits.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: a bool indicating whether in training mode.
Returns:
mask_outputs: instance mask prediction as a float Tensor of shape
[batch_size, num_instances, mask_size, mask_size].
"""
# Extract the foreground mean features
# with tf.variable_scope('fine_mask', reuse=tf.AUTO_REUSE):
with tf.name_scope('fine_mask'):
mask_probs = tf.nn.sigmoid(mask_logits)
# Compute instance embedding for hard average.
binary_mask = tf.cast(tf.greater(mask_probs, 0.5), features.dtype)
instance_embedding = tf.reduce_sum(
features * tf.expand_dims(binary_mask, axis=-1), axis=(2, 3))
instance_embedding /= tf.expand_dims(
tf.reduce_sum(binary_mask, axis=(2, 3)) + 1e-20, axis=-1)
# Take the difference between crop features and mean instance features.
features -= tf.expand_dims(
tf.expand_dims(instance_embedding, axis=2), axis=2)
features += self._fine_mask_fc(tf.expand_dims(mask_probs, axis=-1))
# Decoder to generate upsampled segmentation mask.
mask_logits = self.decoder_net(features, is_training)
if self._use_category_for_mask:
mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
mask_logits = tf.gather(
mask_logits, tf.expand_dims(classes, -1), batch_dims=2)
mask_logits = tf.squeeze(mask_logits, axis=2)
else:
mask_logits = mask_logits[..., 0]
return mask_logits
def decoder_net(self, features, is_training=False):
"""Fine mask decoder network architecture.
Args:
features: A tensor of size [batch, height_in, width_in, channels_in].
is_training: Whether batch_norm layers are in training mode.
Returns:
images: A feature tensor of size [batch, output_size, output_size,
num_channels], where output size is self._gt_upsample_scale times
that of input.
"""
(batch_size, num_instances, height, width,
num_channels) = features.get_shape().as_list()
features = tf.reshape(
features, [batch_size * num_instances, height, width, num_channels])
for i in range(self._num_convs):
features = self._fine_class_conv[i](features)
features = self._fine_class_bn[i](features, is_training=is_training)
if self.up_sample_factor > 1:
features = self._upsample_conv(features)
# Predict per-class instance masks.
mask_logits = self._class_predict_conv(features)
mask_logits = tf.reshape(mask_logits, [
batch_size, num_instances, height * self.up_sample_factor,
width * self.up_sample_factor, self._mask_num_classes
])
return mask_logits
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Identity Fn that forwards the input features."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class Identity(object):
"""Identity function that forwards the input features."""
def __call__(self, features, is_training=False):
"""Only forwards the input features."""
return features
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common building blocks for neural networks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from official.modeling import tf_utils
class ResidualBlock(tf.keras.layers.Layer):
"""A residual block."""
def __init__(self,
filters,
strides,
use_projection=False,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""A residual block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super(ResidualBlock, self).__init__(**kwargs)
self._filters = filters
self._strides = strides
self._use_projection = use_projection
self._use_sync_bn = use_sync_bn
self._activation = activation
self._kernel_initializer = kernel_initializer
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation_fn = tf_utils.get_activation(activation)
def build(self, input_shape):
if self._use_projection:
self._shortcut = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=1,
strides=self._strides,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm0 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv1 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=self._strides,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm1 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv2 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=1,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm2 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
super(ResidualBlock, self).build(input_shape)
def get_config(self):
config = {
'filters': self._filters,
'strides': self._strides,
'use_projection': self._use_projection,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
}
base_config = super(ResidualBlock, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, inputs):
shortcut = inputs
if self._use_projection:
shortcut = self._shortcut(shortcut)
shortcut = self._norm0(shortcut)
x = self._conv1(inputs)
x = self._norm1(x)
x = self._activation_fn(x)
x = self._conv2(x)
x = self._norm2(x)
return self._activation_fn(x + shortcut)
class BottleneckBlock(tf.keras.layers.Layer):
"""A standard bottleneck block."""
def __init__(self,
filters,
strides,
use_projection=False,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""A standard bottleneck block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super(BottleneckBlock, self).__init__(**kwargs)
self._filters = filters
self._strides = strides
self._use_projection = use_projection
self._use_sync_bn = use_sync_bn
self._activation = activation
self._kernel_initializer = kernel_initializer
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation_fn = tf_utils.get_activation(activation)
def build(self, input_shape):
if self._use_projection:
self._shortcut = tf.keras.layers.Conv2D(
filters=self._filters * 4,
kernel_size=1,
strides=self._strides,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm0 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv1 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm1 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv2 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=self._strides,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm2 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv3 = tf.keras.layers.Conv2D(
filters=self._filters * 4,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm3 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
super(BottleneckBlock, self).build(input_shape)
def get_config(self):
config = {
'filters': self._filters,
'strides': self._strides,
'use_projection': self._use_projection,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
}
base_config = super(BottleneckBlock, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, inputs):
shortcut = inputs
if self._use_projection:
shortcut = self._shortcut(shortcut)
shortcut = self._norm0(shortcut)
x = self._conv1(inputs)
x = self._norm1(x)
x = self._activation_fn(x)
x = self._conv2(x)
x = self._norm2(x)
x = self._activation_fn(x)
x = self._conv3(x)
x = self._norm3(x)
return self._activation_fn(x + shortcut)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Neural network operations commonly shared by the architectures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import tensorflow as tf
class NormActivation(tf.keras.layers.Layer):
"""Combined Normalization and Activation layers."""
def __init__(self,
momentum=0.997,
epsilon=1e-4,
trainable=True,
init_zero=False,
use_activation=True,
activation='relu',
fused=True,
name=None):
"""A class to construct layers for a batch normalization followed by a ReLU.
Args:
momentum: momentum for the moving average.
epsilon: small float added to variance to avoid dividing by zero.
trainable: `bool`, if True also add variables to the graph collection
GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization
layer.
init_zero: `bool` if True, initializes scale parameter of batch
normalization with 0. If False, initialize it with 1.
fused: `bool` fused option in batch normalziation.
use_actiation: `bool`, whether to add the optional activation layer after
the batch normalization layer.
activation: 'string', the type of the activation layer. Currently support
`relu` and `swish`.
name: `str` name for the operation.
"""
super(NormActivation, self).__init__(trainable=trainable)
if init_zero:
gamma_initializer = tf.keras.initializers.Zeros()
else:
gamma_initializer = tf.keras.initializers.Ones()
self._normalization_op = tf.keras.layers.BatchNormalization(
momentum=momentum,
epsilon=epsilon,
center=True,
scale=True,
trainable=trainable,
fused=fused,
gamma_initializer=gamma_initializer,
name=name)
self._use_activation = use_activation
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
def __call__(self, inputs, is_training=None):
"""Builds the normalization layer followed by an optional activation layer.
Args:
inputs: `Tensor` of shape `[batch, channels, ...]`.
is_training: `boolean`, if True if model is in training mode.
Returns:
A normalized `Tensor` with the same `data_format`.
"""
# We will need to keep training=None by default, so that it can be inherit
# from keras.Model.training
if is_training and self.trainable:
is_training = True
inputs = self._normalization_op(inputs, training=is_training)
if self._use_activation:
inputs = self._activation_op(inputs)
return inputs
def norm_activation_builder(momentum=0.997,
epsilon=1e-4,
trainable=True,
activation='relu',
**kwargs):
return functools.partial(
NormActivation,
momentum=momentum,
epsilon=epsilon,
trainable=trainable,
activation=activation,
**kwargs)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions for the post-activation form of Residual Networks.
Residual networks (ResNets) were proposed in:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from official.vision.detection.modeling.architecture import nn_ops
# TODO(b/140112644): Refactor the code with Keras style, i.e. build and call.
class Resnet(object):
"""Class to build ResNet family model."""
def __init__(
self,
resnet_depth,
activation='relu',
norm_activation=nn_ops.norm_activation_builder(activation='relu'),
data_format='channels_last'):
"""ResNet initialization function.
Args:
resnet_depth: `int` depth of ResNet backbone model.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
"""
self._resnet_depth = resnet_depth
if activation == 'relu':
self._activation_op = tf.nn.relu
elif activation == 'swish':
self._activation_op = tf.nn.swish
else:
raise ValueError('Unsupported activation `{}`.'.format(activation))
self._norm_activation = norm_activation
self._data_format = data_format
model_params = {
10: {
'block': self.residual_block,
'layers': [1, 1, 1, 1]
},
18: {
'block': self.residual_block,
'layers': [2, 2, 2, 2]
},
34: {
'block': self.residual_block,
'layers': [3, 4, 6, 3]
},
50: {
'block': self.bottleneck_block,
'layers': [3, 4, 6, 3]
},
101: {
'block': self.bottleneck_block,
'layers': [3, 4, 23, 3]
},
152: {
'block': self.bottleneck_block,
'layers': [3, 8, 36, 3]
},
200: {
'block': self.bottleneck_block,
'layers': [3, 24, 36, 3]
}
}
if resnet_depth not in model_params:
valid_resnet_depths = ', '.join(
[str(depth) for depth in sorted(model_params.keys())])
raise ValueError(
'The resnet_depth should be in [%s]. Not a valid resnet_depth:' %
(valid_resnet_depths), self._resnet_depth)
params = model_params[resnet_depth]
self._resnet_fn = self.resnet_v1_generator(params['block'],
params['layers'])
def __call__(self, inputs, is_training=None):
"""Returns the ResNet model for a given size and number of output classes.
Args:
inputs: a `Tesnor` with shape [batch_size, height, width, 3] representing
a batch of images.
is_training: `bool` if True, the model is in training mode.
Returns:
a `dict` containing `int` keys for continuous feature levels [2, 3, 4, 5].
The values are corresponding feature hierarchy in ResNet with shape
[batch_size, height_l, width_l, num_filters].
"""
with tf.name_scope('resnet%s' % self._resnet_depth):
return self._resnet_fn(inputs, is_training)
def fixed_padding(self, inputs, kernel_size):
"""Pads the input along the spatial dimensions independently of input size.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]` or `[batch,
height, width, channels]` depending on `data_format`.
kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
operations. Should be a positive integer.
Returns:
A padded `Tensor` of the same `data_format` with size either intact
(if `kernel_size == 1`) or padded (if `kernel_size > 1`).
"""
pad_total = kernel_size - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
if self._data_format == 'channels_first':
padded_inputs = tf.pad(
tensor=inputs,
paddings=[[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]])
else:
padded_inputs = tf.pad(
tensor=inputs,
paddings=[[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
return padded_inputs
def conv2d_fixed_padding(self, inputs, filters, kernel_size, strides):
"""Strided 2-D convolution with explicit padding.
The padding is consistent and is based only on `kernel_size`, not on the
dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
Args:
inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
filters: `int` number of filters in the convolution.
kernel_size: `int` size of the kernel to be used in the convolution.
strides: `int` strides of the convolution.
Returns:
A `Tensor` of shape `[batch, filters, height_out, width_out]`.
"""
if strides > 1:
inputs = self.fixed_padding(inputs, kernel_size)
return tf.keras.layers.Conv2D(
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=('SAME' if strides == 1 else 'VALID'),
use_bias=False,
kernel_initializer=tf.initializers.VarianceScaling(),
data_format=self._data_format)(
inputs=inputs)
def residual_block(self,
inputs,
filters,
strides,
use_projection=False,
is_training=None):
"""Standard building block for residual networks with BN after convolutions.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
Returns:
The output `Tensor` of the block.
"""
shortcut = inputs
if use_projection:
# Projection shortcut in first layer to match filters and strides
shortcut = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=1, strides=strides)
shortcut = self._norm_activation(use_activation=False)(
shortcut, is_training=is_training)
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides)
inputs = self._norm_activation()(inputs, is_training=is_training)
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=1)
inputs = self._norm_activation(
use_activation=False, init_zero=True)(
inputs, is_training=is_training)
return self._activation_op(inputs + shortcut)
def bottleneck_block(self,
inputs,
filters,
strides,
use_projection=False,
is_training=None):
"""Bottleneck block variant for residual networks with BN after convolutions.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
Returns:
The output `Tensor` of the block.
"""
shortcut = inputs
if use_projection:
# Projection shortcut only in first block within a group. Bottleneck
# blocks end with 4 times the number of filters.
filters_out = 4 * filters
shortcut = self.conv2d_fixed_padding(
inputs=inputs, filters=filters_out, kernel_size=1, strides=strides)
shortcut = self._norm_activation(use_activation=False)(
shortcut, is_training=is_training)
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=1, strides=1)
inputs = self._norm_activation()(inputs, is_training=is_training)
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides)
inputs = self._norm_activation()(inputs, is_training=is_training)
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=4 * filters, kernel_size=1, strides=1)
inputs = self._norm_activation(
use_activation=False, init_zero=True)(
inputs, is_training=is_training)
return self._activation_op(inputs + shortcut)
def block_group(self, inputs, filters, block_fn, blocks, strides, name,
is_training):
"""Creates one group of blocks for the ResNet model.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first convolution of the layer.
block_fn: `function` for the block to use within the model
blocks: `int` number of blocks contained in the layer.
strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input.
name: `str`name for the Tensor output of the block layer.
is_training: `bool` if True, the model is in training mode.
Returns:
The output `Tensor` of the block layer.
"""
# Only the first block per block_group uses projection shortcut and strides.
inputs = block_fn(
inputs, filters, strides, use_projection=True, is_training=is_training)
for _ in range(1, blocks):
inputs = block_fn(inputs, filters, 1, is_training=is_training)
return tf.identity(inputs, name)
def resnet_v1_generator(self, block_fn, layers):
"""Generator for ResNet v1 models.
Args:
block_fn: `function` for the block to use within the model. Either
`residual_block` or `bottleneck_block`.
layers: list of 4 `int`s denoting the number of blocks to include in each
of the 4 block groups. Each group consists of blocks that take inputs of
the same resolution.
Returns:
Model `function` that takes in `inputs` and `is_training` and returns the
output `Tensor` of the ResNet model.
"""
def model(inputs, is_training=None):
"""Creation of the model graph."""
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=64, kernel_size=7, strides=2)
inputs = tf.identity(inputs, 'initial_conv')
inputs = self._norm_activation()(inputs, is_training=is_training)
inputs = tf.keras.layers.MaxPool2D(
pool_size=3, strides=2, padding='SAME',
data_format=self._data_format)(
inputs)
inputs = tf.identity(inputs, 'initial_max_pool')
c2 = self.block_group(
inputs=inputs,
filters=64,
block_fn=block_fn,
blocks=layers[0],
strides=1,
name='block_group1',
is_training=is_training)
c3 = self.block_group(
inputs=c2,
filters=128,
block_fn=block_fn,
blocks=layers[1],
strides=2,
name='block_group2',
is_training=is_training)
c4 = self.block_group(
inputs=c3,
filters=256,
block_fn=block_fn,
blocks=layers[2],
strides=2,
name='block_group3',
is_training=is_training)
c5 = self.block_group(
inputs=c4,
filters=512,
block_fn=block_fn,
blocks=layers[3],
strides=2,
name='block_group4',
is_training=is_training)
return {2: c2, 3: c3, 4: c4, 5: c5}
return model
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
# ==============================================================================
"""Implementation of SpineNet model.
X. Du, T-Y. Lin, P. Jin, G. Ghiasi, M. Tan, Y. Cui, Q. V. Le, X. Song
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization
https://arxiv.org/abs/1912.05027
"""
import math
from absl import logging
import tensorflow as tf
from official.modeling import tf_utils
from official.vision.detection.modeling.architecture import nn_blocks
layers = tf.keras.layers
FILTER_SIZE_MAP = {
1: 32,
2: 64,
3: 128,
4: 256,
5: 256,
6: 256,
7: 256,
}
# The fixed SpineNet architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, block_fn, (input_offset0, input_offset1), is_output).
SPINENET_BLOCK_SPECS = [
(2, 'bottleneck', (0, 1), False),
(4, 'residual', (0, 1), False),
(3, 'bottleneck', (2, 3), False),
(4, 'bottleneck', (2, 4), False),
(6, 'residual', (3, 5), False),
(4, 'bottleneck', (3, 5), False),
(5, 'residual', (6, 7), False),
(7, 'residual', (6, 8), False),
(5, 'bottleneck', (8, 9), False),
(5, 'bottleneck', (8, 10), False),
(4, 'bottleneck', (5, 10), True),
(3, 'bottleneck', (4, 10), True),
(5, 'bottleneck', (7, 12), True),
(7, 'bottleneck', (5, 14), True),
(6, 'bottleneck', (12, 14), True),
]
SCALING_MAP = {
'49S': {
'endpoints_num_filters': 128,
'filter_size_scale': 0.65,
'resample_alpha': 0.5,
'block_repeats': 1,
},
'49': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 0.5,
'block_repeats': 1,
},
'96': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 0.5,
'block_repeats': 2,
},
'143': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 1.0,
'block_repeats': 3,
},
'190': {
'endpoints_num_filters': 512,
'filter_size_scale': 1.3,
'resample_alpha': 1.0,
'block_repeats': 4,
},
}
class BlockSpec(object):
"""A container class that specifies the block configuration for SpineNet."""
def __init__(self, level, block_fn, input_offsets, is_output):
self.level = level
self.block_fn = block_fn
self.input_offsets = input_offsets
self.is_output = is_output
def build_block_specs(block_specs=None):
"""Builds the list of BlockSpec objects for SpineNet."""
if not block_specs:
block_specs = SPINENET_BLOCK_SPECS
logging.info('Building SpineNet block specs: %s', block_specs)
return [BlockSpec(*b) for b in block_specs]
class SpineNet(tf.keras.Model):
"""Class to build SpineNet models."""
def __init__(self,
input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
min_level=3,
max_level=7,
block_specs=build_block_specs(),
endpoints_num_filters=256,
resample_alpha=0.5,
block_repeats=1,
filter_size_scale=1.0,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""SpineNet model."""
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._endpoints_num_filters = endpoints_num_filters
self._resample_alpha = resample_alpha
self._block_repeats = block_repeats
self._filter_size_scale = filter_size_scale
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if activation == 'relu':
self._activation = tf.nn.relu
elif activation == 'swish':
self._activation = tf.nn.swish
else:
raise ValueError('Activation {} not implemented.'.format(activation))
self._init_block_fn = 'bottleneck'
self._num_init_blocks = 2
if use_sync_bn:
self._norm = layers.experimental.SyncBatchNormalization
else:
self._norm = layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
# Build SpineNet.
inputs = tf.keras.Input(shape=input_specs.shape[1:])
net = self._build_stem(inputs=inputs)
net = self._build_scale_permuted_network(
net=net, input_width=input_specs.shape[1])
net = self._build_endpoints(net=net)
super(SpineNet, self).__init__(inputs=inputs, outputs=net)
def _block_group(self,
inputs,
filters,
strides,
block_fn_cand,
block_repeats=1,
name='block_group'):
"""Creates one group of blocks for the SpineNet model."""
block_fn_candidates = {
'bottleneck': nn_blocks.BottleneckBlock,
'residual': nn_blocks.ResidualBlock,
}
block_fn = block_fn_candidates[block_fn_cand]
_, _, _, num_filters = inputs.get_shape().as_list()
if block_fn_cand == 'bottleneck':
use_projection = not (num_filters == (filters * 4) and strides == 1)
else:
use_projection = not (num_filters == filters and strides == 1)
x = block_fn(
filters=filters,
strides=strides,
use_projection=use_projection,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
inputs)
for _ in range(1, block_repeats):
x = block_fn(
filters=filters,
strides=1,
use_projection=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
x)
return tf.identity(x, name=name)
def _build_stem(self, inputs):
"""Build SpineNet stem."""
x = layers.Conv2D(
filters=64,
kernel_size=7,
strides=2,
use_bias=False,
padding='same',
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
inputs)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
net = []
# Build the initial level 2 blocks.
for i in range(self._num_init_blocks):
x = self._block_group(
inputs=x,
filters=int(FILTER_SIZE_MAP[2] * self._filter_size_scale),
strides=1,
block_fn_cand=self._init_block_fn,
block_repeats=self._block_repeats,
name='stem_block_{}'.format(i + 1))
net.append(x)
return net
def _build_scale_permuted_network(self,
net,
input_width,
weighted_fusion=False):
"""Build scale-permuted network."""
net_sizes = [int(math.ceil(input_width / 2**2))] * len(net)
net_block_fns = [self._init_block_fn] * len(net)
num_outgoing_connections = [0] * len(net)
endpoints = {}
for i, block_spec in enumerate(self._block_specs):
# Find out specs for the target block.
target_width = int(math.ceil(input_width / 2**block_spec.level))
target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
self._filter_size_scale)
target_block_fn = block_spec.block_fn
# Resample then merge input0 and input1.
parents = []
input0 = block_spec.input_offsets[0]
input1 = block_spec.input_offsets[1]
x0 = self._resample_with_alpha(
inputs=net[input0],
input_width=net_sizes[input0],
input_block_fn=net_block_fns[input0],
target_width=target_width,
target_num_filters=target_num_filters,
target_block_fn=target_block_fn,
alpha=self._resample_alpha)
parents.append(x0)
num_outgoing_connections[input0] += 1
x1 = self._resample_with_alpha(
inputs=net[input1],
input_width=net_sizes[input1],
input_block_fn=net_block_fns[input1],
target_width=target_width,
target_num_filters=target_num_filters,
target_block_fn=target_block_fn,
alpha=self._resample_alpha)
parents.append(x1)
num_outgoing_connections[input1] += 1
# Merge 0 outdegree blocks to the output block.
if block_spec.is_output:
for j, (j_feat,
j_connections) in enumerate(zip(net, num_outgoing_connections)):
if j_connections == 0 and (j_feat.shape[2] == target_width and
j_feat.shape[3] == x0.shape[3]):
parents.append(j_feat)
num_outgoing_connections[j] += 1
# pylint: disable=g-direct-tensorflow-import
if weighted_fusion:
dtype = parents[0].dtype
parent_weights = [
tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
i, j)), dtype=dtype)) for j in range(len(parents))]
weights_sum = tf.add_n(parent_weights)
parents = [
parents[i] * parent_weights[i] / (weights_sum + 0.0001)
for i in range(len(parents))
]
# Fuse all parent nodes then build a new block.
x = tf_utils.get_activation(self._activation)(tf.add_n(parents))
x = self._block_group(
inputs=x,
filters=target_num_filters,
strides=1,
block_fn_cand=target_block_fn,
block_repeats=self._block_repeats,
name='scale_permuted_block_{}'.format(i + 1))
net.append(x)
net_sizes.append(target_width)
net_block_fns.append(target_block_fn)
num_outgoing_connections.append(0)
# Save output feats.
if block_spec.is_output:
if block_spec.level in endpoints:
raise ValueError('Duplicate feats found for output level {}.'.format(
block_spec.level))
if (block_spec.level < self._min_level or
block_spec.level > self._max_level):
raise ValueError('Output level is out of range [{}, {}]'.format(
self._min_level, self._max_level))
endpoints[block_spec.level] = x
return endpoints
def _build_endpoints(self, net):
"""Match filter size for endpoints before sharing conv layers."""
endpoints = {}
for level in range(self._min_level, self._max_level + 1):
x = layers.Conv2D(
filters=self._endpoints_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
net[level])
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
endpoints[level] = x
return endpoints
def _resample_with_alpha(self,
inputs,
input_width,
input_block_fn,
target_width,
target_num_filters,
target_block_fn,
alpha=0.5):
"""Match resolution and feature dimension."""
_, _, _, input_num_filters = inputs.get_shape().as_list()
if input_block_fn == 'bottleneck':
input_num_filters /= 4
new_num_filters = int(input_num_filters * alpha)
x = layers.Conv2D(
filters=new_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
inputs)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
# Spatial resampling.
if input_width > target_width:
x = layers.Conv2D(
filters=new_num_filters,
kernel_size=3,
strides=2,
padding='SAME',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
x)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
input_width /= 2
while input_width > target_width:
x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x)
input_width /= 2
elif input_width < target_width:
scale = target_width // input_width
x = layers.UpSampling2D(size=(scale, scale))(x)
# Last 1x1 conv to match filter size.
if target_block_fn == 'bottleneck':
target_num_filters *= 4
x = layers.Conv2D(
filters=target_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
x)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
return x
class SpineNetBuilder(object):
"""SpineNet builder."""
def __init__(self,
model_id,
input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
min_level=3,
max_level=7,
block_specs=build_block_specs(),
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001):
if model_id not in SCALING_MAP:
raise ValueError(
'SpineNet {} is not a valid architecture.'.format(model_id))
scaling_params = SCALING_MAP[model_id]
self._input_specs = input_specs
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._endpoints_num_filters = scaling_params['endpoints_num_filters']
self._resample_alpha = scaling_params['resample_alpha']
self._block_repeats = scaling_params['block_repeats']
self._filter_size_scale = scaling_params['filter_size_scale']
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._activation = activation
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
def __call__(self, inputs, is_training=None):
model = SpineNet(
input_specs=self._input_specs,
min_level=self._min_level,
max_level=self._max_level,
block_specs=self._block_specs,
endpoints_num_filters=self._endpoints_num_filters,
resample_alpha=self._resample_alpha,
block_repeats=self._block_repeats,
filter_size_scale=self._filter_size_scale,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)
return model(inputs)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base Model definition."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import abc
import functools
import re
import tensorflow as tf
from official.vision.detection.modeling import checkpoint_utils
from official.vision.detection.modeling import learning_rates
from official.vision.detection.modeling import optimizers
def _make_filter_trainable_variables_fn(frozen_variable_prefix):
"""Creates a function for filtering trainable varialbes."""
def _filter_trainable_variables(variables):
"""Filters trainable varialbes.
Args:
variables: a list of tf.Variable to be filtered.
Returns:
filtered_variables: a list of tf.Variable filtered out the frozen ones.
"""
# frozen_variable_prefix: a regex string specifing the prefix pattern of
# the frozen variables' names.
filtered_variables = [
v for v in variables if not frozen_variable_prefix or
not re.match(frozen_variable_prefix, v.name)
]
return filtered_variables
return _filter_trainable_variables
class Model(object):
"""Base class for model function."""
__metaclass__ = abc.ABCMeta
def __init__(self, params):
self._use_bfloat16 = params.architecture.use_bfloat16
if params.architecture.use_bfloat16:
tf.compat.v2.keras.mixed_precision.set_global_policy('mixed_bfloat16')
# Optimization.
self._optimizer_fn = optimizers.OptimizerFactory(params.train.optimizer)
self._learning_rate = learning_rates.learning_rate_generator(
params.train.total_steps, params.train.learning_rate)
self._frozen_variable_prefix = params.train.frozen_variable_prefix
self._regularization_var_regex = params.train.regularization_variable_regex
self._l2_weight_decay = params.train.l2_weight_decay
# Checkpoint restoration.
self._checkpoint = params.train.checkpoint.as_dict()
# Summary.
self._enable_summary = params.enable_summary
self._model_dir = params.model_dir
@abc.abstractmethod
def build_outputs(self, inputs, mode):
"""Build the graph of the forward path."""
pass
@abc.abstractmethod
def build_model(self, params, mode):
"""Build the model object."""
pass
@abc.abstractmethod
def build_loss_fn(self):
"""Build the model object."""
pass
def post_processing(self, labels, outputs):
"""Post-processing function."""
return labels, outputs
def model_outputs(self, inputs, mode):
"""Build the model outputs."""
return self.build_outputs(inputs, mode)
def build_optimizer(self):
"""Returns train_op to optimize total loss."""
# Sets up the optimizer.
return self._optimizer_fn(self._learning_rate)
def make_filter_trainable_variables_fn(self):
"""Creates a function for filtering trainable varialbes."""
return _make_filter_trainable_variables_fn(self._frozen_variable_prefix)
def weight_decay_loss(self, trainable_variables):
reg_variables = [
v for v in trainable_variables
if self._regularization_var_regex is None or
re.match(self._regularization_var_regex, v.name)
]
return self._l2_weight_decay * tf.add_n(
[tf.nn.l2_loss(v) for v in reg_variables])
def make_restore_checkpoint_fn(self):
"""Returns scaffold function to restore parameters from v1 checkpoint."""
if 'skip_checkpoint_variables' in self._checkpoint:
skip_regex = self._checkpoint['skip_checkpoint_variables']
else:
skip_regex = None
return checkpoint_utils.make_restore_checkpoint_fn(
self._checkpoint['path'],
prefix=self._checkpoint['prefix'],
skip_regex=skip_regex)
def eval_metrics(self):
"""Returns tuple of metric function and its inputs for evaluation."""
raise NotImplementedError('Unimplemented eval_metrics')
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Util functions for loading checkpoints.
Especially for loading Tensorflow 1.x
checkpoint to Tensorflow 2.x (keras) model.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import re
from absl import logging
import tensorflow as tf
def _build_assignment_map(keras_model,
prefix='',
skip_variables_regex=None,
var_to_shape_map=None):
"""Compute an assignment mapping for loading older checkpoints into a Keras
model. Variable names are remapped from the original TPUEstimator model to
the new Keras name.
Args:
keras_model: tf.keras.Model object to provide variables to assign.
prefix: prefix in the variable name to be remove for alignment with names in
the checkpoint.
skip_variables_regex: regular expression to math the names of variables that
do not need to be assign.
var_to_shape_map: variable name to shape mapping from the checkpoint.
Returns:
The variable assignment map.
"""
assignment_map = {}
checkpoint_names = []
if var_to_shape_map:
checkpoint_names = list(
filter(
lambda x: not x.endswith('Momentum') and not x.endswith(
'global_step'), var_to_shape_map.keys()))
logging.info('Number of variables in the checkpoint %d',
len(checkpoint_names))
for var in keras_model.variables:
var_name = var.name
if skip_variables_regex and re.match(skip_variables_regex, var_name):
continue
# Trim the index of the variable.
if ':' in var_name:
var_name = var_name[:var_name.rindex(':')]
if var_name.startswith(prefix):
var_name = var_name[len(prefix):]
if not var_to_shape_map:
assignment_map[var_name] = var
continue
# Match name with variables in the checkpoint.
match_names = list(filter(lambda x: x.endswith(var_name), checkpoint_names))
try:
if match_names:
assert len(match_names) == 1, 'more then on matches for {}: {}'.format(
var_name, match_names)
checkpoint_names.remove(match_names[0])
assignment_map[match_names[0]] = var
else:
logging.info('Error not found var name: %s', var_name)
except Exception as e:
logging.info('Error removing the match_name: %s', match_names)
logging.info('Exception: %s', e)
raise
logging.info('Found matching variable in checkpoint: %d', len(assignment_map))
return assignment_map
def _get_checkpoint_map(checkpoint_path):
reader = tf.train.load_checkpoint(checkpoint_path)
return reader.get_variable_to_shape_map()
def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
"""Returns scaffold function to restore parameters from v1 checkpoint.
Args:
checkpoint_path: path of the checkpoint folder or file.
Example 1: '/path/to/model_dir/'
Example 2: '/path/to/model.ckpt-22500'
prefix: prefix in the variable name to be remove for alignment with names in
the checkpoint.
skip_regex: regular expression to math the names of variables that do not
need to be assign.
Returns:
Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model.
"""
def _restore_checkpoint_fn(keras_model):
"""Loads pretrained model through scaffold function."""
if not checkpoint_path:
logging.info('checkpoint_path is empty')
return
var_prefix = prefix
if prefix and not prefix.endswith('/'):
var_prefix += '/'
var_to_shape_map = _get_checkpoint_map(checkpoint_path)
assert var_to_shape_map, 'var_to_shape_map should not be empty'
vars_to_load = _build_assignment_map(
keras_model,
prefix=var_prefix,
skip_variables_regex=skip_regex,
var_to_shape_map=var_to_shape_map)
if not vars_to_load:
raise ValueError('Variables to load is empty.')
tf.compat.v1.train.init_from_checkpoint(checkpoint_path, vars_to_load)
return _restore_checkpoint_fn
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory to build detection model."""
from official.vision.detection.modeling import maskrcnn_model
from official.vision.detection.modeling import olnmask_model
from official.vision.detection.modeling import retinanet_model
from official.vision.detection.modeling import shapemask_model
def model_generator(params):
"""Model function generator."""
if params.type == 'retinanet':
model_fn = retinanet_model.RetinanetModel(params)
elif params.type == 'mask_rcnn':
model_fn = maskrcnn_model.MaskrcnnModel(params)
elif params.type == 'olnmask':
model_fn = olnmask_model.OlnMaskModel(params)
elif params.type == 'shapemask':
model_fn = shapemask_model.ShapeMaskModel(params)
else:
raise ValueError('Model %s is not supported.'% params.type)
return model_fn
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment