Commit b92025a9 authored by anivegesana's avatar anivegesana
Browse files

Merge branch 'master' of https://github.com/tensorflow/models into detection_generator_pr_2

parents 1b425791 37536370
......@@ -43,11 +43,6 @@ class RankingTrainer(base_trainer.Trainer):
def train_loop_end(self) -> Dict[str, float]:
"""See base class."""
self.join()
# Checks if the model numeric status is stable and conducts the checkpoint
# recovery accordingly.
if self._recovery:
self._recovery.maybe_recover(self.train_loss.result().numpy(),
self.global_step.numpy())
logs = {}
for metric in self.train_metrics + [self.train_loss]:
logs[metric.name] = metric.result()
......
......@@ -50,6 +50,7 @@ class ASPP(hyperparams.Config):
dilation_rates: List[int] = dataclasses.field(default_factory=list)
dropout_rate: float = 0.0
num_filters: int = 256
use_depthwise_convolution: bool = False
pool_kernel_size: Optional[List[int]] = None # Use global average pooling.
......
......@@ -55,6 +55,7 @@ class Parser(hyperparams.Config):
aug_rand_hflip: bool = False
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
aug_policy: Optional[str] = None
skip_crowd_during_training: bool = True
max_num_instances: int = 100
......
......@@ -60,6 +60,7 @@ class SegmentationHead(hyperparams.Config):
level: int = 3
num_convs: int = 2
num_filters: int = 256
use_depthwise_convolution: bool = False
prediction_kernel_size: int = 1
upsample_factor: int = 1
feature_fusion: Optional[str] = None # None, deeplabv3plus, or pyramid_fusion
......
......@@ -15,7 +15,7 @@ done
cocosplit_url="dl.yf.io/fs-det/datasets/cocosplit"
wget --recursive --no-parent -q --show-progress --progress=bar:force:noscroll \
-P "${tmp_dir}" -A "trainvalno5k.json,5k.json,*10shot*.json,*30shot*.json" \
-P "${tmp_dir}" -A "trainvalno5k.json,5k.json,*1shot*.json,*3shot*.json,*5shot*.json,*10shot*.json,*30shot*.json" \
"http://${cocosplit_url}/"
mv "${tmp_dir}/${cocosplit_url}/"* "${tmp_dir}"
rm -rf "${tmp_dir}/${cocosplit_url}/"
......@@ -24,7 +24,7 @@ python process_coco_few_shot_json_files.py \
--logtostderr --workdir="${tmp_dir}"
for seed in {0..9}; do
for shots in 10 30; do
for shots in 1 3 5 10 30; do
python create_coco_tf_record.py \
--logtostderr \
--image_dir="${base_image_dir}/train2014" \
......
......@@ -53,7 +53,7 @@ CATEGORIES = ['airplane', 'apple', 'backpack', 'banana', 'baseball bat',
'traffic light', 'train', 'truck', 'tv', 'umbrella', 'vase',
'wine glass', 'zebra']
SEEDS = list(range(10))
SHOTS = [10, 30]
SHOTS = [1, 3, 5, 10, 30]
FILE_SUFFIXES = collections.defaultdict(list)
for _seed, _shots in itertools.product(SEEDS, SHOTS):
......
......@@ -131,7 +131,6 @@ def convert_predictions_to_coco_annotations(predictions):
"""
coco_predictions = []
num_batches = len(predictions['source_id'])
batch_size = predictions['source_id'][0].shape[0]
max_num_detections = predictions['detection_classes'][0].shape[1]
use_outer_box = 'detection_outer_boxes' in predictions
for i in range(num_batches):
......@@ -144,6 +143,7 @@ def convert_predictions_to_coco_annotations(predictions):
else:
mask_boxes = predictions['detection_boxes']
batch_size = predictions['source_id'][i].shape[0]
for j in range(batch_size):
if 'detection_masks' in predictions:
image_masks = mask_ops.paste_instance_masks(
......@@ -211,9 +211,9 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
gt_annotations = []
num_batches = len(groundtruths['source_id'])
batch_size = groundtruths['source_id'][0].shape[0]
for i in range(num_batches):
max_num_instances = groundtruths['classes'][i].shape[1]
batch_size = groundtruths['source_id'][i].shape[0]
for j in range(batch_size):
num_instances = groundtruths['num_detections'][i][j]
if num_instances > max_num_instances:
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of the Panoptic Quality metric.
Panoptic Quality is an instance-based metric for evaluating the task of
image parsing, aka panoptic segmentation.
Please see the paper for details:
"Panoptic Segmentation", Alexander Kirillov, Kaiming He, Ross Girshick,
Carsten Rother and Piotr Dollar. arXiv:1801.00868, 2018.
Note that this metric class is branched from
https://github.com/tensorflow/models/blob/master/research/deeplab/evaluation/panoptic_quality.py
"""
import collections
import numpy as np
_EPSILON = 1e-10
def realdiv_maybe_zero(x, y):
"""Element-wise x / y where y may contain zeros, for those returns 0 too."""
return np.where(
np.less(np.abs(y), _EPSILON), np.zeros_like(x), np.divide(x, y))
def _ids_to_counts(id_array):
"""Given a numpy array, a mapping from each unique entry to its count."""
ids, counts = np.unique(id_array, return_counts=True)
return dict(zip(ids, counts))
class PanopticQuality:
"""Metric class for Panoptic Quality.
"Panoptic Segmentation" by Alexander Kirillov, Kaiming He, Ross Girshick,
Carsten Rother, Piotr Dollar.
https://arxiv.org/abs/1801.00868
"""
def __init__(self, num_categories, ignored_label, max_instances_per_category,
offset):
"""Initialization for PanopticQualityMetric.
Args:
num_categories: The number of segmentation categories (or "classes" in the
dataset.
ignored_label: A category id that is ignored in evaluation, e.g. the void
label as defined in COCO panoptic segmentation dataset.
max_instances_per_category: The maximum number of instances for each
category. Used in ensuring unique instance labels.
offset: The maximum number of unique labels. This is used, by multiplying
the ground-truth labels, to generate unique ids for individual regions
of overlap between groundtruth and predicted segments.
"""
self.num_categories = num_categories
self.ignored_label = ignored_label
self.max_instances_per_category = max_instances_per_category
self.offset = offset
self.reset()
def _naively_combine_labels(self, category_mask, instance_mask):
"""Naively creates a combined label array from categories and instances."""
return (category_mask.astype(np.uint32) * self.max_instances_per_category +
instance_mask.astype(np.uint32))
def compare_and_accumulate(self, groundtruths, predictions):
"""Compares predicted segmentation with groundtruth, accumulates its metric.
It is not assumed that instance ids are unique across different categories.
See for example combine_semantic_and_instance_predictions.py in official
PanopticAPI evaluation code for issues to consider when fusing category
and instance labels.
Instances ids of the ignored category have the meaning that id 0 is "void"
and remaining ones are crowd instances.
Args:
groundtruths: A dictionary contains groundtruth labels. It should contain
the following fields.
- category_mask: A 2D numpy uint16 array of groundtruth per-pixel
category labels.
- instance_mask: A 2D numpy uint16 array of groundtruth instance labels.
predictions: A dictionary contains the model outputs. It should contain
the following fields.
- category_array: A 2D numpy uint16 array of predicted per-pixel
category labels.
- instance_array: A 2D numpy uint16 array of predicted instance labels.
"""
groundtruth_category_mask = groundtruths['category_mask']
groundtruth_instance_mask = groundtruths['instance_mask']
predicted_category_mask = predictions['category_mask']
predicted_instance_mask = predictions['instance_mask']
# First, combine the category and instance labels so that every unique
# value for (category, instance) is assigned a unique integer label.
pred_segment_id = self._naively_combine_labels(predicted_category_mask,
predicted_instance_mask)
gt_segment_id = self._naively_combine_labels(groundtruth_category_mask,
groundtruth_instance_mask)
# Pre-calculate areas for all groundtruth and predicted segments.
gt_segment_areas = _ids_to_counts(gt_segment_id)
pred_segment_areas = _ids_to_counts(pred_segment_id)
# We assume there is only one void segment and it has instance id = 0.
void_segment_id = self.ignored_label * self.max_instances_per_category
# There may be other ignored groundtruth segments with instance id > 0, find
# those ids using the unique segment ids extracted with the area computation
# above.
ignored_segment_ids = {
gt_segment_id for gt_segment_id in gt_segment_areas
if (gt_segment_id //
self.max_instances_per_category) == self.ignored_label
}
# Next, combine the groundtruth and predicted labels. Dividing up the pixels
# based on which groundtruth segment and which predicted segment they belong
# to, this will assign a different 32-bit integer label to each choice
# of (groundtruth segment, predicted segment), encoded as
# gt_segment_id * offset + pred_segment_id.
intersection_id_array = (
gt_segment_id.astype(np.uint64) * self.offset +
pred_segment_id.astype(np.uint64))
# For every combination of (groundtruth segment, predicted segment) with a
# non-empty intersection, this counts the number of pixels in that
# intersection.
intersection_areas = _ids_to_counts(intersection_id_array)
# Helper function that computes the area of the overlap between a predicted
# segment and the ground-truth void/ignored segment.
def prediction_void_overlap(pred_segment_id):
void_intersection_id = void_segment_id * self.offset + pred_segment_id
return intersection_areas.get(void_intersection_id, 0)
# Compute overall ignored overlap.
def prediction_ignored_overlap(pred_segment_id):
total_ignored_overlap = 0
for ignored_segment_id in ignored_segment_ids:
intersection_id = ignored_segment_id * self.offset + pred_segment_id
total_ignored_overlap += intersection_areas.get(intersection_id, 0)
return total_ignored_overlap
# Sets that are populated with which segments groundtruth/predicted segments
# have been matched with overlapping predicted/groundtruth segments
# respectively.
gt_matched = set()
pred_matched = set()
# Calculate IoU per pair of intersecting segments of the same category.
for intersection_id, intersection_area in intersection_areas.items():
gt_segment_id = int(intersection_id // self.offset)
pred_segment_id = int(intersection_id % self.offset)
gt_category = int(gt_segment_id // self.max_instances_per_category)
pred_category = int(pred_segment_id // self.max_instances_per_category)
if gt_category != pred_category:
continue
# Union between the groundtruth and predicted segments being compared does
# not include the portion of the predicted segment that consists of
# groundtruth "void" pixels.
union = (
gt_segment_areas[gt_segment_id] +
pred_segment_areas[pred_segment_id] - intersection_area -
prediction_void_overlap(pred_segment_id))
iou = intersection_area / union
if iou > 0.5:
self.tp_per_class[gt_category] += 1
self.iou_per_class[gt_category] += iou
gt_matched.add(gt_segment_id)
pred_matched.add(pred_segment_id)
# Count false negatives for each category.
for gt_segment_id in gt_segment_areas:
if gt_segment_id in gt_matched:
continue
category = gt_segment_id // self.max_instances_per_category
# Failing to detect a void segment is not a false negative.
if category == self.ignored_label:
continue
self.fn_per_class[category] += 1
# Count false positives for each category.
for pred_segment_id in pred_segment_areas:
if pred_segment_id in pred_matched:
continue
# A false positive is not penalized if is mostly ignored in the
# groundtruth.
if (prediction_ignored_overlap(pred_segment_id) /
pred_segment_areas[pred_segment_id]) > 0.5:
continue
category = pred_segment_id // self.max_instances_per_category
self.fp_per_class[category] += 1
def _valid_categories(self):
"""Categories with a "valid" value for the metric, have > 0 instances.
We will ignore the `ignore_label` class and other classes which have
`tp + fn + fp = 0`.
Returns:
Boolean array of shape `[num_categories]`.
"""
valid_categories = np.not_equal(
self.tp_per_class + self.fn_per_class + self.fp_per_class, 0)
if self.ignored_label >= 0 and self.ignored_label < self.num_categories:
valid_categories[self.ignored_label] = False
return valid_categories
def result_per_category(self):
"""For supported metrics, return individual per-category metric values.
Returns:
A dictionary contains all per-class metrics, each metrics is a numpy array
of shape `[self.num_categories]`, where index `i` is the metrics value
over only that category.
"""
sq_per_class = realdiv_maybe_zero(self.iou_per_class, self.tp_per_class)
rq_per_class = realdiv_maybe_zero(
self.tp_per_class,
self.tp_per_class + 0.5 * self.fn_per_class + 0.5 * self.fp_per_class)
return {
'sq_per_class': sq_per_class,
'rq_per_class': rq_per_class,
'pq_per_class': np.multiply(sq_per_class, rq_per_class)
}
def result(self, is_thing=None):
"""Computes and returns the detailed metric results over all comparisons.
Args:
is_thing: A boolean array of length `num_categories`. The entry
`is_thing[category_id]` is True iff that category is a "thing" category
instead of "stuff."
Returns:
A dictionary with a breakdown of metrics and/or metric factors by things,
stuff, and all categories.
"""
results = self.result_per_category()
valid_categories = self._valid_categories()
# If known, break down which categories are valid _and_ things/stuff.
category_sets = collections.OrderedDict()
category_sets['All'] = valid_categories
if is_thing is not None:
category_sets['Things'] = np.logical_and(valid_categories, is_thing)
category_sets['Stuff'] = np.logical_and(valid_categories,
np.logical_not(is_thing))
for category_set_name, in_category_set in category_sets.items():
if np.any(in_category_set):
results.update({
f'{category_set_name}_pq':
np.mean(results['pq_per_class'][in_category_set]),
f'{category_set_name}_sq':
np.mean(results['sq_per_class'][in_category_set]),
f'{category_set_name}_rq':
np.mean(results['rq_per_class'][in_category_set]),
# The number of categories in this subset.
f'{category_set_name}_num_categories':
np.sum(in_category_set.astype(np.int32)),
})
else:
results[category_set_name] = {
f'{category_set_name}_pq': 0.,
f'{category_set_name}_sq': 0.,
f'{category_set_name}_rq': 0.,
f'{category_set_name}_num_categories': 0
}
return results
def reset(self):
"""Resets the accumulation to the metric class's state at initialization."""
self.iou_per_class = np.zeros(self.num_categories, dtype=np.float64)
self.tp_per_class = np.zeros(self.num_categories, dtype=np.float64)
self.fn_per_class = np.zeros(self.num_categories, dtype=np.float64)
self.fp_per_class = np.zeros(self.num_categories, dtype=np.float64)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The panoptic quality evaluator.
The following snippet demonstrates the use of interfaces:
evaluator = PanopticQualityEvaluator(...)
for _ in range(num_evals):
for _ in range(num_batches_per_eval):
predictions, groundtruth = predictor.predict(...) # pop a batch.
evaluator.update_state(groundtruths, predictions)
evaluator.result() # finish one full eval and reset states.
See also: https://github.com/cocodataset/cocoapi/
"""
import numpy as np
import tensorflow as tf
from official.vision.beta.evaluation import panoptic_quality
class PanopticQualityEvaluator:
"""Panoptic Quality metric class."""
def __init__(self, num_categories, ignored_label, max_instances_per_category,
offset, is_thing=None):
"""Constructs Panoptic Quality evaluation class.
The class provides the interface to Panoptic Quality metrics_fn.
Args:
num_categories: The number of segmentation categories (or "classes" in the
dataset.
ignored_label: A category id that is ignored in evaluation, e.g. the void
label as defined in COCO panoptic segmentation dataset.
max_instances_per_category: The maximum number of instances for each
category. Used in ensuring unique instance labels.
offset: The maximum number of unique labels. This is used, by multiplying
the ground-truth labels, to generate unique ids for individual regions
of overlap between groundtruth and predicted segments.
is_thing: A boolean array of length `num_categories`. The entry
`is_thing[category_id]` is True iff that category is a "thing" category
instead of "stuff." Default to `None`, and it means categories are not
classified into these two categories.
"""
self._pq_metric_module = panoptic_quality.PanopticQuality(
num_categories, ignored_label, max_instances_per_category, offset)
self._is_thing = is_thing
self._required_prediction_fields = ['category_mask', 'instance_mask']
self._required_groundtruth_fields = ['category_mask', 'instance_mask']
self.reset_states()
@property
def name(self):
return 'panoptic_quality'
def reset_states(self):
"""Resets internal states for a fresh run."""
self._pq_metric_module.reset()
def result(self):
"""Evaluates detection results, and reset_states."""
results = self._pq_metric_module.result(self._is_thing)
self.reset_states()
return results
def _convert_to_numpy(self, groundtruths, predictions):
"""Converts tesnors to numpy arrays."""
if groundtruths:
labels = tf.nest.map_structure(lambda x: x.numpy(), groundtruths)
numpy_groundtruths = {}
for key, val in labels.items():
if isinstance(val, tuple):
val = np.concatenate(val)
numpy_groundtruths[key] = val
else:
numpy_groundtruths = groundtruths
if predictions:
outputs = tf.nest.map_structure(lambda x: x.numpy(), predictions)
numpy_predictions = {}
for key, val in outputs.items():
if isinstance(val, tuple):
val = np.concatenate(val)
numpy_predictions[key] = val
else:
numpy_predictions = predictions
return numpy_groundtruths, numpy_predictions
def update_state(self, groundtruths, predictions):
"""Update and aggregate detection results and groundtruth data.
Args:
groundtruths: a dictionary of Tensors including the fields below. See also
different parsers under `../dataloader` for more details.
Required fields:
- category_mask: a numpy array of uint16 of shape [batch_size, H, W].
- instance_mask: a numpy array of uint16 of shape [batch_size, H, W].
predictions: a dictionary of tensors including the fields below. See
different parsers under `../dataloader` for more details.
Required fields:
- category_mask: a numpy array of uint16 of shape [batch_size, H, W].
- instance_mask: a numpy array of uint16 of shape [batch_size, H, W].
Raises:
ValueError: if the required prediction or groundtruth fields are not
present in the incoming `predictions` or `groundtruths`.
"""
groundtruths, predictions = self._convert_to_numpy(groundtruths,
predictions)
for k in self._required_prediction_fields:
if k not in predictions:
raise ValueError(
'Missing the required key `{}` in predictions!'.format(k))
for k in self._required_groundtruth_fields:
if k not in groundtruths:
raise ValueError(
'Missing the required key `{}` in groundtruths!'.format(k))
self._pq_metric_module.compare_and_accumulate(groundtruths, predictions)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic_quality_evaluator."""
import numpy as np
import tensorflow as tf
from official.vision.beta.evaluation import panoptic_quality_evaluator
class PanopticQualityEvaluatorTest(tf.test.TestCase):
def test_multiple_batches(self):
category_mask = np.zeros([6, 6], np.uint16)
groundtruth_instance_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 2, 2, 2, 1],
[1, 2, 2, 2, 2, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
good_det_instance_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 2, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruths = {
'category_mask': tf.convert_to_tensor(category_mask),
'instance_mask': tf.convert_to_tensor(groundtruth_instance_mask)
}
predictions = {
'category_mask': tf.convert_to_tensor(category_mask),
'instance_mask': tf.convert_to_tensor(good_det_instance_mask)
}
pq_evaluator = panoptic_quality_evaluator.PanopticQualityEvaluator(
num_categories=1,
ignored_label=2,
max_instances_per_category=16,
offset=16)
for _ in range(2):
pq_evaluator.update_state(groundtruths, predictions)
bad_det_instance_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
predictions['instance_mask'] = tf.convert_to_tensor(bad_det_instance_mask)
for _ in range(2):
pq_evaluator.update_state(groundtruths, predictions)
results = pq_evaluator.result()
np.testing.assert_array_equal(results['pq_per_class'],
[((28 / 30 + 6 / 8) + (27 / 32)) / 2 / 2])
np.testing.assert_array_equal(results['rq_per_class'], [3 / 4])
np.testing.assert_array_equal(results['sq_per_class'],
[((28 / 30 + 6 / 8) + (27 / 32)) / 3])
self.assertAlmostEqual(results['All_pq'], 0.63177083)
self.assertAlmostEqual(results['All_rq'], 0.75)
self.assertAlmostEqual(results['All_sq'], 0.84236111)
self.assertEqual(results['All_num_categories'], 1)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Panoptic Quality metric.
Note that this metric test class is branched from
https://github.com/tensorflow/models/blob/master/research/deeplab/evaluation/panoptic_quality_test.py
"""
from absl.testing import absltest
import numpy as np
from official.vision.beta.evaluation import panoptic_quality
class PanopticQualityTest(absltest.TestCase):
def test_perfect_match(self):
category_mask = np.zeros([6, 6], np.uint16)
instance_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 1, 1, 1],
[1, 2, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruths = {
'category_mask': category_mask,
'instance_mask': instance_mask
}
predictions = {
'category_mask': category_mask,
'instance_mask': instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=1,
ignored_label=2,
max_instances_per_category=16,
offset=16)
pq_metric.compare_and_accumulate(groundtruths, predictions)
np.testing.assert_array_equal(pq_metric.iou_per_class, [2.0])
np.testing.assert_array_equal(pq_metric.tp_per_class, [2])
np.testing.assert_array_equal(pq_metric.fn_per_class, [0])
np.testing.assert_array_equal(pq_metric.fp_per_class, [0])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [1.0])
np.testing.assert_array_equal(results['rq_per_class'], [1.0])
np.testing.assert_array_equal(results['sq_per_class'], [1.0])
self.assertAlmostEqual(results['All_pq'], 1.0)
self.assertAlmostEqual(results['All_rq'], 1.0)
self.assertAlmostEqual(results['All_sq'], 1.0)
self.assertEqual(results['All_num_categories'], 1)
def test_totally_wrong(self):
category_mask = np.array([
[0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0],
[0, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
dtype=np.uint16)
instance_mask = np.zeros([6, 6], np.uint16)
groundtruths = {
'category_mask': category_mask,
'instance_mask': instance_mask
}
predictions = {
'category_mask': 1 - category_mask,
'instance_mask': instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=2,
ignored_label=2,
max_instances_per_category=1,
offset=16)
pq_metric.compare_and_accumulate(groundtruths, predictions)
np.testing.assert_array_equal(pq_metric.iou_per_class, [0.0, 0.0])
np.testing.assert_array_equal(pq_metric.tp_per_class, [0, 0])
np.testing.assert_array_equal(pq_metric.fn_per_class, [1, 1])
np.testing.assert_array_equal(pq_metric.fp_per_class, [1, 1])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [0.0, 0.0])
np.testing.assert_array_equal(results['rq_per_class'], [0.0, 0.0])
np.testing.assert_array_equal(results['sq_per_class'], [0.0, 0.0])
self.assertAlmostEqual(results['All_pq'], 0.0)
self.assertAlmostEqual(results['All_rq'], 0.0)
self.assertAlmostEqual(results['All_sq'], 0.0)
self.assertEqual(results['All_num_categories'], 2)
def test_matches_by_iou(self):
groundtruth_instance_mask = np.array(
[
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 2, 2, 2, 1],
[1, 2, 2, 2, 2, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
good_det_instance_mask = np.array(
[
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 2, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruths = {
'category_mask': np.zeros_like(groundtruth_instance_mask),
'instance_mask': groundtruth_instance_mask
}
predictions = {
'category_mask': np.zeros_like(good_det_instance_mask),
'instance_mask': good_det_instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=1,
ignored_label=2,
max_instances_per_category=16,
offset=16)
pq_metric.compare_and_accumulate(groundtruths, predictions)
# iou(1, 1) = 28/30
# iou(2, 2) = 6 / 8
np.testing.assert_array_almost_equal(pq_metric.iou_per_class,
[28 / 30 + 6 / 8])
np.testing.assert_array_equal(pq_metric.tp_per_class, [2])
np.testing.assert_array_equal(pq_metric.fn_per_class, [0])
np.testing.assert_array_equal(pq_metric.fp_per_class, [0])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'],
[(28 / 30 + 6 / 8) / 2])
np.testing.assert_array_equal(results['rq_per_class'], [1.0])
np.testing.assert_array_equal(results['sq_per_class'],
[(28 / 30 + 6 / 8) / 2])
self.assertAlmostEqual(results['All_pq'], (28 / 30 + 6 / 8) / 2)
self.assertAlmostEqual(results['All_rq'], 1.0)
self.assertAlmostEqual(results['All_sq'], (28 / 30 + 6 / 8) / 2)
self.assertEqual(results['All_num_categories'], 1)
bad_det_instance_mask = np.array(
[
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
predictions['instance_mask'] = bad_det_instance_mask
pq_metric.reset()
pq_metric.compare_and_accumulate(groundtruths, predictions)
# iou(1, 1) = 27/32
np.testing.assert_array_almost_equal(pq_metric.iou_per_class, [27 / 32])
np.testing.assert_array_equal(pq_metric.tp_per_class, [1])
np.testing.assert_array_equal(pq_metric.fn_per_class, [1])
np.testing.assert_array_equal(pq_metric.fp_per_class, [1])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [27 / 32 / 2])
np.testing.assert_array_equal(results['rq_per_class'], [0.5])
np.testing.assert_array_equal(results['sq_per_class'], [27 / 32])
self.assertAlmostEqual(results['All_pq'], 27 / 32 / 2)
self.assertAlmostEqual(results['All_rq'], 0.5)
self.assertAlmostEqual(results['All_sq'], 27 / 32)
self.assertEqual(results['All_num_categories'], 1)
def test_wrong_instances(self):
category_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 1, 2, 2],
[1, 2, 2, 1, 2, 2],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruth_instance_mask = np.zeros([6, 6], dtype=np.uint16)
predicted_instance_mask = np.array([
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
dtype=np.uint16)
groundtruths = {
'category_mask': category_mask,
'instance_mask': groundtruth_instance_mask
}
predictions = {
'category_mask': category_mask,
'instance_mask': predicted_instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=3,
ignored_label=0,
max_instances_per_category=10,
offset=100)
pq_metric.compare_and_accumulate(groundtruths, predictions)
np.testing.assert_array_equal(pq_metric.iou_per_class, [0.0, 1.0, 0.0])
np.testing.assert_array_equal(pq_metric.tp_per_class, [0, 1, 0])
np.testing.assert_array_equal(pq_metric.fn_per_class, [0, 0, 1])
np.testing.assert_array_equal(pq_metric.fp_per_class, [0, 0, 2])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [0.0, 1.0, 0.0])
np.testing.assert_array_equal(results['rq_per_class'], [0.0, 1.0, 0.0])
np.testing.assert_array_equal(results['sq_per_class'], [0.0, 1.0, 0.0])
self.assertAlmostEqual(results['All_pq'], 0.5)
self.assertAlmostEqual(results['All_rq'], 0.5)
self.assertAlmostEqual(results['All_sq'], 0.5)
self.assertEqual(results['All_num_categories'], 2)
def test_instance_order_is_arbitrary(self):
category_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 1, 2, 2],
[1, 2, 2, 1, 2, 2],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruth_instance_mask = np.array([
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 1, 1, 0, 0, 0],
[0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
dtype=np.uint16)
predicted_instance_mask = np.array([
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
dtype=np.uint16)
groundtruths = {
'category_mask': category_mask,
'instance_mask': groundtruth_instance_mask
}
predictions = {
'category_mask': category_mask,
'instance_mask': predicted_instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=3,
ignored_label=0,
max_instances_per_category=10,
offset=100)
pq_metric.compare_and_accumulate(groundtruths, predictions)
np.testing.assert_array_equal(pq_metric.iou_per_class, [0.0, 1.0, 2.0])
np.testing.assert_array_equal(pq_metric.tp_per_class, [0, 1, 2])
np.testing.assert_array_equal(pq_metric.fn_per_class, [0, 0, 0])
np.testing.assert_array_equal(pq_metric.fp_per_class, [0, 0, 0])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [0.0, 1.0, 1.0])
np.testing.assert_array_equal(results['rq_per_class'], [0.0, 1.0, 1.0])
np.testing.assert_array_equal(results['sq_per_class'], [0.0, 1.0, 1.0])
self.assertAlmostEqual(results['All_pq'], 1.0)
self.assertAlmostEqual(results['All_rq'], 1.0)
self.assertAlmostEqual(results['All_sq'], 1.0)
self.assertEqual(results['All_num_categories'], 2)
if __name__ == '__main__':
absltest.main()
......@@ -342,9 +342,10 @@ Berkin Akin, Suyog Gupta, and Andrew Howard
"""
MNMultiMAX_BLOCK_SPECS = {
'spec_name': 'MobileNetMultiMAX',
'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
'activation', 'expand_ratio',
'use_normalization', 'use_bias', 'is_output'],
'block_spec_schema': [
'block_fn', 'kernel_size', 'strides', 'filters', 'activation',
'expand_ratio', 'use_normalization', 'use_bias', 'is_output'
],
'block_specs': [
('convbn', 3, 2, 32, 'relu', None, True, False, False),
('invertedbottleneck', 3, 2, 32, 'relu', 3., None, False, True),
......@@ -363,15 +364,18 @@ MNMultiMAX_BLOCK_SPECS = {
('invertedbottleneck', 5, 1, 160, 'relu', 4., None, False, True),
('convbn', 1, 1, 960, 'relu', None, True, False, False),
('gpooling', None, None, None, None, None, None, None, False),
('convbn', 1, 1, 1280, 'relu', None, False, True, False),
# Remove bias and add batch norm for the last layer to support QAT
# and achieve slightly better accuracy.
('convbn', 1, 1, 1280, 'relu', None, True, False, False),
]
}
MNMultiAVG_BLOCK_SPECS = {
'spec_name': 'MobileNetMultiAVG',
'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
'activation', 'expand_ratio',
'use_normalization', 'use_bias', 'is_output'],
'block_spec_schema': [
'block_fn', 'kernel_size', 'strides', 'filters', 'activation',
'expand_ratio', 'use_normalization', 'use_bias', 'is_output'
],
'block_specs': [
('convbn', 3, 2, 32, 'relu', None, True, False, False),
('invertedbottleneck', 3, 2, 32, 'relu', 3., None, False, False),
......@@ -392,7 +396,9 @@ MNMultiAVG_BLOCK_SPECS = {
('invertedbottleneck', 5, 1, 192, 'relu', 4., None, False, True),
('convbn', 1, 1, 960, 'relu', None, True, False, False),
('gpooling', None, None, None, None, None, None, None, False),
('convbn', 1, 1, 1280, 'relu', None, False, True, False),
# Remove bias and add batch norm for the last layer to support QAT
# and achieve slightly better accuracy.
('convbn', 1, 1, 1280, 'relu', None, True, False, False),
]
}
......
......@@ -158,10 +158,10 @@ class MobileNetTest(parameterized.TestCase, tf.test.TestCase):
('MobileNetV3Small', 0.75): 1026552,
('MobileNetV3EdgeTPU', 1.0): 2849312,
('MobileNetV3EdgeTPU', 0.75): 1737288,
('MobileNetMultiAVG', 1.0): 3700576,
('MobileNetMultiAVG', 0.75): 2345864,
('MobileNetMultiMAX', 1.0): 3170720,
('MobileNetMultiMAX', 0.75): 2041976,
('MobileNetMultiAVG', 1.0): 3704416,
('MobileNetMultiAVG', 0.75): 2349704,
('MobileNetMultiMAX', 1.0): 3174560,
('MobileNetMultiMAX', 0.75): 2045816,
}
input_size = 224
......
......@@ -32,6 +32,12 @@ layers = tf.keras.layers
# Each element in the block configuration is in the following format:
# (block_fn, num_filters, block_repeats)
RESNET_SPECS = {
10: [
('residual', 64, 1),
('residual', 128, 1),
('residual', 256, 1),
('residual', 512, 1),
],
18: [
('residual', 64, 2),
('residual', 128, 2),
......
......@@ -28,6 +28,7 @@ from official.vision.beta.modeling.backbones import resnet
class ResNetTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(128, 10, 1),
(128, 18, 1),
(128, 34, 1),
(128, 50, 4),
......@@ -38,6 +39,7 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase):
endpoint_filter_scale):
"""Test creation of ResNet family models."""
resnet_params = {
10: 4915904,
18: 11190464,
34: 21306048,
50: 23561152,
......
......@@ -93,23 +93,6 @@ class ClassificationNetworkTest(parameterized.TestCase, tf.test.TestCase):
def test_mobilenet_network_creation(self, mobilenet_model_id,
filter_size_scale):
"""Test for creation of a MobileNet classifier."""
mobilenet_params = {
('MobileNetV1', 1.0): 4254889,
('MobileNetV1', 0.75): 2602745,
('MobileNetV2', 1.0): 3540265,
('MobileNetV2', 0.75): 2664345,
('MobileNetV3Large', 1.0): 5508713,
('MobileNetV3Large', 0.75): 4013897,
('MobileNetV3Small', 1.0): 2555993,
('MobileNetV3Small', 0.75): 2052577,
('MobileNetV3EdgeTPU', 1.0): 4131593,
('MobileNetV3EdgeTPU', 0.75): 3019569,
('MobileNetMultiAVG', 1.0): 4982857,
('MobileNetMultiAVG', 0.75): 3628145,
('MobileNetMultiMAX', 1.0): 4453001,
('MobileNetMultiMAX', 0.75): 3324257,
}
inputs = np.random.rand(2, 224, 224, 3)
tf.keras.backend.set_image_data_format('channels_last')
......@@ -123,8 +106,6 @@ class ClassificationNetworkTest(parameterized.TestCase, tf.test.TestCase):
num_classes=num_classes,
dropout_rate=0.2,
)
self.assertEqual(model.count_params(),
mobilenet_params[(mobilenet_model_id, filter_size_scale)])
logits = model(inputs)
self.assertAllEqual([2, num_classes], logits.numpy().shape)
......
......@@ -42,6 +42,7 @@ class ASPP(tf.keras.layers.Layer):
kernel_initializer: str = 'VarianceScaling',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
interpolation: str = 'bilinear',
use_depthwise_convolution: bool = False,
**kwargs):
"""Initializes an Atrous Spatial Pyramid Pooling (ASPP) layer.
......@@ -64,6 +65,8 @@ class ASPP(tf.keras.layers.Layer):
interpolation: A `str` of interpolation method. It should be one of
`bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
`gaussian`, or `mitchellcubic`.
use_depthwise_convolution: If True depthwise separable convolutions will
be added to the Atrous spatial pyramid pooling.
**kwargs: Additional keyword arguments to be passed.
"""
super(ASPP, self).__init__(**kwargs)
......@@ -80,6 +83,7 @@ class ASPP(tf.keras.layers.Layer):
'kernel_initializer': kernel_initializer,
'kernel_regularizer': kernel_regularizer,
'interpolation': interpolation,
'use_depthwise_convolution': use_depthwise_convolution,
}
def build(self, input_shape):
......@@ -100,7 +104,9 @@ class ASPP(tf.keras.layers.Layer):
dropout=self._config_dict['dropout_rate'],
kernel_initializer=self._config_dict['kernel_initializer'],
kernel_regularizer=self._config_dict['kernel_regularizer'],
interpolation=self._config_dict['interpolation'])
interpolation=self._config_dict['interpolation'],
use_depthwise_convolution=self._config_dict['use_depthwise_convolution']
)
def call(self, inputs: Mapping[str, tf.Tensor]) -> Mapping[str, tf.Tensor]:
"""Calls the Atrous Spatial Pyramid Pooling (ASPP) layer on an input.
......@@ -167,6 +173,7 @@ def build_aspp_decoder(
level=decoder_cfg.level,
dilation_rates=decoder_cfg.dilation_rates,
num_filters=decoder_cfg.num_filters,
use_depthwise_convolution=decoder_cfg.use_depthwise_convolution,
pool_kernel_size=decoder_cfg.pool_kernel_size,
dropout_rate=decoder_cfg.dropout_rate,
use_sync_bn=norm_activation_config.use_sync_bn,
......
......@@ -70,6 +70,7 @@ class ASPPTest(parameterized.TestCase, tf.test.TestCase):
kernel_regularizer=None,
interpolation='bilinear',
dropout_rate=0.2,
use_depthwise_convolution='false',
)
network = aspp.ASPP(**kwargs)
......
......@@ -76,7 +76,7 @@ def build_maskrcnn(
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
backbone(tf.keras.Input(input_specs.shape[1:]))
backbone_features = backbone(tf.keras.Input(input_specs.shape[1:]))
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
......@@ -119,6 +119,13 @@ def build_maskrcnn(
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer,
name='detection_head')
# Build backbone, decoder and region proposal network:
if decoder:
decoder_features = decoder(backbone_features)
rpn_head(decoder_features)
if roi_sampler_config.cascade_iou_thresholds:
detection_head_cascade = [detection_head]
for cascade_num in range(len(roi_sampler_config.cascade_iou_thresholds)):
......@@ -326,6 +333,7 @@ def build_segmentation_model(
num_convs=head_config.num_convs,
prediction_kernel_size=head_config.prediction_kernel_size,
num_filters=head_config.num_filters,
use_depthwise_convolution=head_config.use_depthwise_convolution,
upsample_factor=head_config.upsample_factor,
feature_fusion=head_config.feature_fusion,
low_level=head_config.low_level,
......
......@@ -31,6 +31,7 @@ class SegmentationHead(tf.keras.layers.Layer):
level: Union[int, str],
num_convs: int = 2,
num_filters: int = 256,
use_depthwise_convolution: bool = False,
prediction_kernel_size: int = 1,
upsample_factor: int = 1,
feature_fusion: Optional[str] = None,
......@@ -53,6 +54,8 @@ class SegmentationHead(tf.keras.layers.Layer):
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
prediction_kernel_size: An `int` number to specify the kernel size of the
prediction layer.
upsample_factor: An `int` number to specify the upsampling factor to
......@@ -84,6 +87,7 @@ class SegmentationHead(tf.keras.layers.Layer):
'level': level,
'num_convs': num_convs,
'num_filters': num_filters,
'use_depthwise_convolution': use_depthwise_convolution,
'prediction_kernel_size': prediction_kernel_size,
'upsample_factor': upsample_factor,
'feature_fusion': feature_fusion,
......@@ -104,12 +108,14 @@ class SegmentationHead(tf.keras.layers.Layer):
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the segmentation head."""
use_depthwise_convolution = self._config_dict['use_depthwise_convolution']
random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
conv_op = tf.keras.layers.Conv2D
conv_kwargs = {
'kernel_size': 3,
'kernel_size': 3 if not use_depthwise_convolution else 1,
'padding': 'same',
'use_bias': False,
'kernel_initializer': tf.keras.initializers.RandomNormal(stddev=0.01),
'kernel_initializer': random_initializer,
'kernel_regularizer': self._config_dict['kernel_regularizer'],
}
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
......@@ -139,6 +145,16 @@ class SegmentationHead(tf.keras.layers.Layer):
self._convs = []
self._norms = []
for i in range(self._config_dict['num_convs']):
if use_depthwise_convolution:
self._convs.append(
tf.keras.layers.DepthwiseConv2D(
name='segmentation_head_depthwise_conv_{}'.format(i),
kernel_size=3,
padding='same',
use_bias=False,
depthwise_initializer=random_initializer,
depthwise_regularizer=self._config_dict['kernel_regularizer'],
depth_multiplier=1))
conv_name = 'segmentation_head_conv_{}'.format(i)
self._convs.append(
conv_op(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment