Commit 0016b0a7 authored by sunxx1's avatar sunxx1
Browse files

Merge branch 'dtk22.04' into 'main'

Dtk22.04

See merge request dcutoolkit/deeplearing/dlexamples_new!49
parents 17bc28d5 7a382d5d
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
import keras_cv
class SmoothL1LossTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
("none", "none", (20,)),
("sum", "sum", ()),
("sum_over_batch_size", "sum_over_batch_size", ()),
)
def test_proper_output_shapes(self, reduction, target_size):
loss = keras_cv.losses.SmoothL1Loss(l1_cutoff=0.5, reduction=reduction)
result = loss(
y_true=tf.random.uniform((20, 300)),
y_pred=tf.random.uniform((20, 300)),
)
self.assertEqual(result.shape, target_size)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.metrics.coco.mean_average_precision import COCOMeanAveragePrecision
from keras_cv.metrics.coco.recall import COCORecall
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.metrics.coco.mean_average_precision import COCOMeanAveragePrecision
from keras_cv.metrics.coco.recall import COCORecall
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
import tensorflow as tf
from keras_cv import bounding_box
from keras_cv.bounding_box import iou as iou_lib
from keras_cv.metrics.coco import utils
class COCOMeanAveragePrecision(tf.keras.metrics.Metric):
"""COCOMeanAveragePrecision computes an approximation of MaP.
A usage guide is available on keras.io:
[Using KerasCV COCO metrics](https://keras.io/guides/keras_cv/coco_metrics/).
Full implementation details are available in the
[KerasCV COCO metrics whitepaper](https://arxiv.org/abs/2207.12120).
Args:
class_ids: The class IDs to evaluate the metric for. To evaluate for
all classes in over a set of sequentially labelled classes, pass
`range(classes)`.
bounding_box_format: Format of the incoming bounding boxes. Supported values
are "xywh", "center_xywh", "xyxy".
iou_thresholds: IoU thresholds over which to evaluate the recall. Must
be a tuple of floats, defaults to [0.5:0.05:0.95].
area_range: area range to constrict the considered bounding boxes in
metric computation. Defaults to `None`, which makes the metric
count all bounding boxes. Must be a tuple of floats. The first
number in the tuple represents a lower bound for areas, while the
second value represents an upper bound. For example, when
`(0, 32**2)` is passed to the metric, recall is only evaluated for
objects with areas less than `32*32`. If `(32**2, 1000000**2)` is
passed the metric will only be evaluated for boxes with areas larger
than `32**2`, and smaller than `1000000**2`.
max_detections: number of maximum detections a model is allowed to make.
Must be an integer, defaults to `100`.
recall_thresholds: The list of thresholds to average over in the MaP
computation. List of floats. Defaults to [0:.01:1].
num_buckets: num_buckets is used to select the number of confidence
buckets predictions are placed into. Instead of computation MaP
over each incrementally selected set of bounding boxes, we instead
place them into buckets. This makes distributed computation easier.
Increasing buckets improves accuracy of the metric, while decreasing
buckets improves performance. This is a tradeoff you must weight
for your use case. Defaults to 10,000 which is sufficiently large
for most use cases.
Usage:
COCOMeanAveragePrecision accepts two Tensors as input to it's
`update_state()` method. These Tensors represent bounding boxes in
`corners` format. Utilities to convert Tensors from `xywh` to `corners`
format can be found in `keras_cv.utils.bounding_box`.
Each image in a dataset may have a different number of bounding boxes,
both in the ground truth dataset and the prediction set. In order to
account for this, you may either pass a `tf.RaggedTensor`, or pad Tensors
with `-1`s to indicate unused boxes. A utility function to perform this
padding is available at
`keras_cv.bounding_box.pad_batch_to_shape()`.
```python
coco_map = keras_cv.metrics.COCOMeanAveragePrecision(
bounding_box_format='xyxy',
max_detections=100,
class_ids=[1]
)
y_true = np.array([[[0, 0, 10, 10, 1], [20, 20, 10, 10, 1]]]).astype(np.float32)
y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]]).astype(
np.float32
)
coco_map.update_state(y_true, y_pred)
coco_map.result()
# 0.24752477
```
"""
def __init__(
self,
class_ids,
bounding_box_format,
recall_thresholds=None,
iou_thresholds=None,
area_range=None,
max_detections=100,
num_buckets=10000,
**kwargs,
):
super().__init__(**kwargs)
# Initialize parameter values
self.bounding_box_format = bounding_box_format
self.iou_thresholds = iou_thresholds or [x / 100.0 for x in range(50, 100, 5)]
self.area_range = area_range
self.max_detections = max_detections
self.class_ids = list(class_ids)
self.recall_thresholds = recall_thresholds or [x / 100 for x in range(0, 101)]
self.num_buckets = num_buckets
self.num_iou_thresholds = len(self.iou_thresholds)
self.num_class_ids = len(self.class_ids)
if any([c < 0 for c in class_ids]):
raise ValueError(
"class_ids must be positive. Got " f"class_ids={class_ids}"
)
self.ground_truths = self.add_weight(
"ground_truths",
shape=(self.num_class_ids,),
dtype=tf.int32,
initializer="zeros",
)
self.true_positive_buckets = self.add_weight(
"true_positive_buckets",
shape=(
self.num_class_ids,
self.num_iou_thresholds,
self.num_buckets,
),
dtype=tf.int32,
initializer="zeros",
)
self.false_positive_buckets = self.add_weight(
"false_positive_buckets",
shape=(
self.num_class_ids,
self.num_iou_thresholds,
self.num_buckets,
),
dtype=tf.int32,
initializer="zeros",
)
def reset_state(self):
self.true_positive_buckets.assign(tf.zeros_like(self.true_positive_buckets))
self.false_positive_buckets.assign(tf.zeros_like(self.false_positive_buckets))
self.ground_truths.assign(tf.zeros_like(self.ground_truths))
@tf.function()
def update_state(self, y_true, y_pred, sample_weight=None):
if sample_weight is not None:
warnings.warn(
"sample_weight is not yet supported in keras_cv COCO metrics."
)
y_true = tf.cast(y_true, self.compute_dtype)
y_pred = tf.cast(y_pred, self.compute_dtype)
if isinstance(y_true, tf.RaggedTensor):
y_true = y_true.to_tensor(default_value=-1)
if isinstance(y_pred, tf.RaggedTensor):
y_pred = y_pred.to_tensor(default_value=-1)
y_true = bounding_box.convert_format(
y_true,
source=self.bounding_box_format,
target="xyxy",
dtype=self.compute_dtype,
)
y_pred = bounding_box.convert_format(
y_pred,
source=self.bounding_box_format,
target="xyxy",
dtype=self.compute_dtype,
)
class_ids = tf.constant(self.class_ids, dtype=self.compute_dtype)
iou_thresholds = tf.constant(self.iou_thresholds, dtype=self.compute_dtype)
num_images = tf.shape(y_true)[0]
y_pred = utils.sort_bounding_boxes(y_pred, axis=bounding_box.XYXY.CONFIDENCE)
ground_truth_boxes_update = tf.zeros_like(self.ground_truths)
true_positive_buckets_update = tf.zeros_like(self.true_positive_buckets)
false_positive_buckets_update = tf.zeros_like(self.false_positive_buckets)
for img in tf.range(num_images):
ground_truths = utils.filter_out_sentinels(y_true[img])
detections = utils.filter_out_sentinels(y_pred[img])
if self.area_range is not None:
ground_truths = utils.filter_boxes_by_area_range(
ground_truths, self.area_range[0], self.area_range[1]
)
detections = utils.filter_boxes_by_area_range(
detections, self.area_range[0], self.area_range[1]
)
if self.max_detections < tf.shape(detections)[0]:
detections = detections[: self.max_detections]
true_positives_update = tf.TensorArray(
tf.int32, size=self.num_class_ids * self.num_iou_thresholds
)
false_positives_update = tf.TensorArray(
tf.int32, size=self.num_class_ids * self.num_iou_thresholds
)
ground_truths_update = tf.TensorArray(tf.int32, size=self.num_class_ids)
for c_i in tf.range(self.num_class_ids):
category_id = class_ids[c_i]
ground_truths_by_category = utils.filter_boxes(
ground_truths, value=category_id, axis=bounding_box.XYXY.CLASS
)
detections_by_category = utils.filter_boxes(
detections, value=category_id, axis=bounding_box.XYXY.CLASS
)
if self.max_detections < tf.shape(detections_by_category)[0]:
detections_by_category = detections_by_category[
: self.max_detections
]
ground_truths_update = ground_truths_update.write(
c_i, tf.shape(ground_truths_by_category)[0]
)
ious = iou_lib.compute_iou(
ground_truths_by_category, detections_by_category, "yxyx"
)
for iou_i in tf.range(self.num_iou_thresholds):
iou_threshold = iou_thresholds[iou_i]
pred_matches = utils.match_boxes(ious, iou_threshold)
dt_scores = detections_by_category[:, bounding_box.XYXY.CONFIDENCE]
true_positives = pred_matches != -1
false_positives = pred_matches == -1
dt_scores_clipped = tf.clip_by_value(dt_scores, 0.0, 1.0)
# We must divide by 1.01 to prevent off by one errors.
confidence_buckets = tf.cast(
tf.math.floor(self.num_buckets * (dt_scores_clipped / 1.01)),
tf.int32,
)
true_positives_by_bucket = tf.gather_nd(
confidence_buckets, indices=tf.where(true_positives)
)
false_positives_by_bucket = tf.gather_nd(
confidence_buckets, indices=tf.where(false_positives)
)
true_positive_counts_per_bucket = tf.math.bincount(
true_positives_by_bucket,
minlength=self.num_buckets,
maxlength=self.num_buckets,
)
false_positives_counts_per_bucket = tf.math.bincount(
false_positives_by_bucket,
minlength=self.num_buckets,
maxlength=self.num_buckets,
)
true_positives_update = true_positives_update.write(
(self.num_iou_thresholds * c_i) + iou_i,
true_positive_counts_per_bucket,
)
false_positives_update = false_positives_update.write(
(self.num_iou_thresholds * c_i) + iou_i,
false_positives_counts_per_bucket,
)
true_positives_update = tf.reshape(
true_positives_update.stack(),
(self.num_class_ids, self.num_iou_thresholds, self.num_buckets),
)
false_positives_update = tf.reshape(
false_positives_update.stack(),
(self.num_class_ids, self.num_iou_thresholds, self.num_buckets),
)
true_positive_buckets_update = (
true_positive_buckets_update + true_positives_update
)
false_positive_buckets_update = (
false_positive_buckets_update + false_positives_update
)
ground_truth_boxes_update = (
ground_truth_boxes_update + ground_truths_update.stack()
)
self.ground_truths.assign_add(ground_truth_boxes_update)
self.true_positive_buckets.assign_add(true_positive_buckets_update)
self.false_positive_buckets.assign_add(false_positive_buckets_update)
@tf.function()
def result(self):
true_positives = tf.cast(self.true_positive_buckets, self.dtype)
false_positives = tf.cast(self.false_positive_buckets, self.dtype)
ground_truths = tf.cast(self.ground_truths, self.dtype)
true_positives_sum = tf.cumsum(true_positives, axis=-1)
false_positives_sum = tf.cumsum(false_positives, axis=-1)
present_categories = tf.math.reduce_sum(tf.cast(ground_truths != 0, tf.int32))
if present_categories == 0:
return 0.0
recalls = tf.math.divide_no_nan(
true_positives_sum, ground_truths[:, None, None]
)
precisions = true_positives_sum / (false_positives_sum + true_positives_sum)
result = tf.TensorArray(
tf.float32, size=self.num_class_ids * self.num_iou_thresholds
)
zero_pad = tf.zeros(shape=(1,), dtype=tf.float32)
# so in this case this should be: [1, 1]
for i in tf.range(self.num_class_ids):
for j in tf.range(self.num_iou_thresholds):
recalls_i = recalls[i, j]
precisions_i = precisions[i, j]
# recall threshold=0 finds the first bucket always
# this is different from the original implementation because the
# original implementation always has at least one bounding box
# in the first bucket.
#
# as such, we need to mask out the buckets where there is at
# least one bounding box Therefore, we must filter out the
# buckets where (precisions_i) is NaN, as that implies a divide
# by zero.
inds = tf.where(not tf.math.is_nan(precisions_i))
recalls_i = tf.gather_nd(recalls_i, inds)
precisions_i = tf.gather_nd(precisions_i, inds)
inds = tf.searchsorted(
recalls_i, tf.constant(self.recall_thresholds), side="left"
)
# if searchsorted returns len(precisions)+1, we should return 0
precisions_i = tf.concat([precisions_i, zero_pad], axis=-1)
precision_per_recall_threshold = tf.gather(precisions_i, inds)
result_ij = tf.math.reduce_mean(precision_per_recall_threshold, axis=-1)
result = result.write(j + i * self.num_iou_thresholds, result_ij)
result = tf.reshape(
result.stack(), (self.num_class_ids, self.num_iou_thresholds)
)
result = tf.math.reduce_mean(result, axis=-1)
result = tf.math.reduce_sum(result, axis=0) / tf.cast(
present_categories, tf.float32
)
return result
def get_config(self):
config = super().get_config()
config.update(
{
"class_ids": self.class_ids,
"bounding_box_format": self.bounding_box_format,
"recall_thresholds": self.recall_thresholds,
"iou_thresholds": self.iou_thresholds,
"area_range": self.area_range,
"max_detections": self.max_detections,
"num_buckets": self.num_buckets,
}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for COCOMeanAveragePrecision."""
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras_cv import bounding_box
from keras_cv.metrics import COCOMeanAveragePrecision
class COCOMeanAveragePrecisionTest(tf.test.TestCase):
def test_runs_inside_model(self):
i = keras.layers.Input((None, None, 6))
model = keras.Model(i, i)
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
max_detections=100,
num_buckets=4,
class_ids=[1],
area_range=(0, 64**2),
)
# These would match if they were in the area range
y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype(np.float32)
y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.5]]]).astype(
np.float32
)
model.compile(metrics=[mean_average_precision])
# mean_average_precision.update_state(y_true, y_pred)
model.evaluate(y_pred, y_true)
self.assertAllEqual(mean_average_precision.result(), 1.0)
def test_first_buckets_have_no_boxes(self):
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
iou_thresholds=[0.33],
class_ids=[1],
max_detections=100,
num_buckets=4,
recall_thresholds=[0.3, 0.5],
)
ground_truths = [3]
# one class
true_positives = [
[
[
# one threshold
# three buckets
0,
0,
1,
2,
]
]
]
false_positives = [
[
[
# one threshold
# three buckets
0,
0,
1,
0,
]
]
]
# so we get:
# rcs = [0, 0, 0.33, 1.0]
# prs = [NaN, NaN, 0.5 , 0.75]
# after filtering:
# rcs = [0.33, 1.0]
# prs = [0.5, 0.75]
# so for PR pairs we get:
# [0.3, 0.5]
# [0.5, 0.75]
# So mean average precision should be: (0.5 + 0.75)/2 = 0.625.
ground_truths = tf.constant(ground_truths, tf.int32)
true_positives = tf.constant(true_positives, tf.int32)
false_positives = tf.constant(false_positives, tf.int32)
mean_average_precision.ground_truths.assign(ground_truths)
mean_average_precision.true_positive_buckets.assign(true_positives)
mean_average_precision.false_positive_buckets.assign(false_positives)
self.assertEqual(mean_average_precision.result(), 0.625)
def test_result_method_with_direct_assignment_one_threshold(self):
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
iou_thresholds=[0.33],
class_ids=[1],
max_detections=100,
num_buckets=3,
recall_thresholds=[0.3, 0.5],
)
ground_truths = [3]
# one class
true_positives = [
[
[
# one threshold
# three buckets
0,
1,
2,
]
]
]
false_positives = [
[
[
# one threshold
# three buckets
1,
0,
0,
]
]
]
# so we get:
# rcs = [0, 0.33, 1.0]
# prs = [0, 0.5 , 0.75]
# so for PR pairs we get:
# [0.3, 0.5]
# [0.5, 0.75]
# So mean average precision should be: (0.5 + 0.75)/2 = 0.625.
ground_truths = tf.constant(ground_truths, tf.int32)
true_positives = tf.constant(true_positives, tf.int32)
false_positives = tf.constant(false_positives, tf.int32)
mean_average_precision.ground_truths.assign(ground_truths)
mean_average_precision.true_positive_buckets.assign(true_positives)
mean_average_precision.false_positive_buckets.assign(false_positives)
self.assertEqual(mean_average_precision.result(), 0.625)
def test_result_method_with_direct_assignment_missing_class(self):
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
iou_thresholds=[0.33],
class_ids=[1, 2],
max_detections=100,
num_buckets=3,
recall_thresholds=[0.3, 0.5],
)
ground_truths = [3, 0]
# one class
true_positives = [
[[0, 1, 2]],
[[0, 0, 0]],
]
false_positives = [
[[1, 0, 0]],
[[0, 0, 0]],
]
# Result should be the same as above.
ground_truths = tf.constant(ground_truths, tf.int32)
true_positives = tf.constant(true_positives, tf.int32)
false_positives = tf.constant(false_positives, tf.int32)
mean_average_precision.ground_truths.assign(ground_truths)
mean_average_precision.true_positive_buckets.assign(true_positives)
mean_average_precision.false_positive_buckets.assign(false_positives)
self.assertEqual(mean_average_precision.result(), 0.625)
def test_counting_with_missing_class_present_in_data(self):
y_true = tf.constant(
[
[
[0, 0, 100, 100, 15],
[0, 0, 100, 100, 1],
]
],
dtype=tf.float64,
)
y_pred = tf.constant(
[[[0, 50, 100, 150, 1, 1.0], [0, 50, 100, 150, 33, 1.0]]], dtype=tf.float32
)
y_true = bounding_box.pad_batch_to_shape(y_true, (1, 20, 5))
metric = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
iou_thresholds=[0.15],
class_ids=[1000, 1],
max_detections=1,
)
metric.update_state(y_true, y_pred)
self.assertAllEqual(metric.ground_truths, [0, 1])
metric.update_state(y_true, y_pred)
self.assertAllEqual(metric.ground_truths, [0, 2])
def test_bounding_box_counting(self):
y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float64)
y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32)
y_true = bounding_box.pad_batch_to_shape(y_true, (1, 20, 5))
metric = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
iou_thresholds=[0.15],
class_ids=[1],
max_detections=1,
)
metric.update_state(y_true, y_pred)
self.assertEqual(metric.ground_truths, [1])
metric.update_state(y_true, y_pred)
self.assertEqual(metric.ground_truths, [2])
def test_mixed_dtypes(self):
y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float64)
y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32)
metric = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
iou_thresholds=[0.15],
class_ids=[1],
max_detections=1,
)
metric.update_state(y_true, y_pred)
self.assertEqual(metric.result(), 1.0)
def test_runs_with_confidence_over_1(self):
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
iou_thresholds=[0.33],
class_ids=[1, 2],
max_detections=100,
num_buckets=3,
recall_thresholds=[0.3, 0.5],
)
y_true = tf.ragged.stack(
[
tf.constant([[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]], tf.float32),
tf.constant([[0, 0, 10, 10, 1]], tf.float32),
]
)
y_pred = tf.ragged.stack(
[
tf.constant([[5, 5, 10, 10, 1, 0.9]], tf.float32),
# this box is out of the valid confidence range.
tf.constant(
[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 1.1]], tf.float32
),
]
)
mean_average_precision.update_state(y_true, y_pred)
self.assertEqual(mean_average_precision.result(), 2 / 3)
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"accelerator": "GPU",
"colab": {
"name": "GenerateSamples.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "WUUY3nfRX7VF"
},
"source": [
"# Setup"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "n69M1y5bTDa_",
"outputId": "cfdb76d7-ff82-4022-cf3b-7a4e7695c6ee"
},
"source": [
"!pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI\n",
" Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-v63g3kxy\n",
" Running command git clone -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-v63g3kxy\n",
"Requirement already satisfied: setuptools>=18.0 in /usr/local/lib/python3.7/dist-packages (from pycocotools==2.0) (57.4.0)\n",
"Requirement already satisfied: cython>=0.27.3 in /usr/local/lib/python3.7/dist-packages (from pycocotools==2.0) (0.29.24)\n",
"Requirement already satisfied: matplotlib>=2.1.0 in /usr/local/lib/python3.7/dist-packages (from pycocotools==2.0) (3.2.2)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.1.0->pycocotools==2.0) (3.0.6)\n",
"Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.1.0->pycocotools==2.0) (2.8.2)\n",
"Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.1.0->pycocotools==2.0) (1.19.5)\n",
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.1.0->pycocotools==2.0) (0.11.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.1.0->pycocotools==2.0) (1.3.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib>=2.1.0->pycocotools==2.0) (1.15.0)\n",
"Building wheels for collected packages: pycocotools\n",
" Building wheel for pycocotools (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for pycocotools: filename=pycocotools-2.0-cp37-cp37m-linux_x86_64.whl size=263921 sha256=074b4c6db75f73953a705eab5e2ddcd2bc50f14a1baa288a0ffe80f6aba91f0a\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-_88ll_l6/wheels/e2/6b/1d/344ac773c7495ea0b85eb228bc66daec7400a143a92d36b7b1\n",
"Successfully built pycocotools\n",
"Installing collected packages: pycocotools\n",
" Attempting uninstall: pycocotools\n",
" Found existing installation: pycocotools 2.0.3\n",
" Uninstalling pycocotools-2.0.3:\n",
" Successfully uninstalled pycocotools-2.0.3\n",
"Successfully installed pycocotools-2.0\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "MimODgLKTHsr"
},
"source": [
"# Load Ground Truths"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_-3H1wHxOgK1",
"outputId": "72857e3e-4a34-47fc-cf71-a3f217c0674f"
},
"source": [
"!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip\n",
"!unzip annotations_trainval2017.zip"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2021-12-04 00:29:29-- http://images.cocodataset.org/annotations/annotations_trainval2017.zip\n",
"Resolving images.cocodataset.org (images.cocodataset.org)... 52.217.129.209\n",
"Connecting to images.cocodataset.org (images.cocodataset.org)|52.217.129.209|:80... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 252907541 (241M) [application/zip]\n",
"Saving to: ‘annotations_trainval2017.zip’\n",
"\n",
"annotations_trainva 100%[===================>] 241.19M 34.1MB/s in 7.6s \n",
"\n",
"2021-12-04 00:29:37 (31.8 MB/s) - ‘annotations_trainval2017.zip’ saved [252907541/252907541]\n",
"\n",
"Archive: annotations_trainval2017.zip\n",
" inflating: annotations/instances_train2017.json \n",
" inflating: annotations/instances_val2017.json \n",
" inflating: annotations/captions_train2017.json \n",
" inflating: annotations/captions_val2017.json \n",
" inflating: annotations/person_keypoints_train2017.json \n",
" inflating: annotations/person_keypoints_val2017.json \n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "sZUNps8-QU28"
},
"source": [
"import json\n",
"\n",
"with open(\"./annotations/instances_val2017.json\", \"r\") as f:\n",
" ground_truths = json.load(f)"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "D4i9jPDkYj8r"
},
"source": [
"img_ids = [gt[\"image_id\"] for gt in ground_truths[\"annotations\"]][:10]"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "nsRkZ1gsW8Nw"
},
"source": [
"ground_truths[\"annotations\"] = [\n",
" gt for gt in ground_truths[\"annotations\"] if gt[\"image_id\"] in img_ids\n",
"]"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "PUf0_UjtXGXz"
},
"source": [
"with open(\"mutated_ground_truths.json\", \"w\") as f:\n",
" json.dump(ground_truths, f)"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "438lQrfYVo93"
},
"source": [
"import random\n",
"\n",
"\n",
"def mutate_bounding_box(bounding_box):\n",
" def shift(x, xw):\n",
" return xw * random.uniform(-1, 1) + x\n",
"\n",
" return [\n",
" shift(bounding_box[0], bounding_box[2] / 10),\n",
" shift(bounding_box[1], bounding_box[3] / 10),\n",
" random.uniform(0.9, 1.1) * bounding_box[2],\n",
" random.uniform(0.9, 1.1) * bounding_box[3],\n",
" ]\n",
"\n",
"\n",
"results = []\n",
"for gt in [gt for gt in ground_truths[\"annotations\"] if gt[\"image_id\"] in img_ids]:\n",
" result = {\n",
" \"image_id\": gt[\"image_id\"],\n",
" \"area\": gt[\"area\"],\n",
" \"bounding_box\": mutate_bounding_box(gt[\"bounding_box\"]),\n",
" \"id\": gt[\"id\"],\n",
" \"category_id\": gt[\"category_id\"],\n",
" \"score\": random.uniform(0, 1),\n",
" }\n",
" results.append(result)\n",
"\n",
"with open(\"coco_results.json\", \"w\") as f:\n",
" json.dump(results, f)"
],
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pRnBL4lCOkey",
"outputId": "4b9a49d1-9acd-4a1d-84b1-8f07f6efc7d4"
},
"source": [
"from pycocotools.coco import COCO\n",
"from pycocotools.cocoeval import COCOeval\n",
"\n",
"with open(\"coco_results.json\", \"r\") as f:\n",
" results = json.load(f)\n",
"coco = COCO(\"./mutated_ground_truths.json\")\n",
"ret = {}\n",
"\n",
"cocoDt = coco.loadRes(results)\n",
"cocoEval = COCOeval(coco, cocoDt, \"bounding_box\")\n",
"cocoEval.evaluate()\n",
"cocoEval.accumulate()\n",
"cocoEval.summarize()"
],
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"loading annotations into memory...\n",
"Done (t=0.01s)\n",
"creating index...\n",
"index created!\n",
"Loading and preparing results...\n",
"DONE (t=0.00s)\n",
"creating index...\n",
"index created!\n",
"Running per image evaluation...\n",
"Evaluate annotation type *bounding_box*\n",
"DONE (t=5.57s).\n",
"Accumulating evaluation results...\n",
"DONE (t=1.68s).\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.661\n",
" Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 1.000\n",
" Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.793\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.651\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.676\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.671\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.504\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.686\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.686\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.674\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.681\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.682\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_8rKJCZFYwqE"
},
"source": [
"# Creating y_true, y_pred"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "b1M8QjMJYyXD",
"outputId": "69187c86-1429-4764-a726-fa387a2acdf4"
},
"source": [
"import numpy as np\n",
"from collections import defaultdict\n",
"\n",
"num_imgs = len(img_ids)\n",
"\n",
"groups = defaultdict(list)\n",
"\n",
"for annotation in ground_truths[\"annotations\"]:\n",
" img_id = annotation[\"image_id\"]\n",
" bounding_box = annotation[\"bounding_box\"]\n",
" bounding_box = [x for x in bounding_box] + [int(annotation[\"category_id\"])]\n",
" groups[img_id].append(bounding_box)\n",
"\n",
"imgs = sorted(groups.keys())\n",
"\n",
"result = []\n",
"for img in imgs:\n",
" bounding_boxes = groups[img]\n",
" bounding_boxes = np.array(bounding_boxes)\n",
" result.append(bounding_boxes)\n",
"\n",
"m = max([r.shape[0] for r in result])\n",
"\n",
"\n",
"def to_shape(a, shape):\n",
" y_, x_ = shape\n",
" y, x = a.shape\n",
" y_pad = y_ - y\n",
" x_pad = x_ - x\n",
" return np.pad(a, ((0, y_pad), (0, x_pad)), mode=\"constant\", constant_values=-1)\n",
"\n",
"\n",
"result = [to_shape(r, (m, 5)) for r in result]\n",
"y_true = np.array(result).astype(float)\n",
"\n",
"print(y_true.shape)"
],
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(10, 25, 5)\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zt5ENbGKbi5e",
"outputId": "1eda5c37-14ef-4d0c-92e0-47eeb624331b"
},
"source": [
"print(y_true)"
],
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[[ 2.7210e+02 2.0023e+02 1.5197e+02 2.7977e+02 1.8000e+01]\n",
" [ 1.8123e+02 8.6280e+01 2.7440e+01 7.3530e+01 4.4000e+01]\n",
" [ 1.7474e+02 0.0000e+00 2.6104e+02 2.2079e+02 7.0000e+01]\n",
" ...\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]]\n",
"\n",
" [[ 1.9797e+02 1.1722e+02 1.7045e+02 2.2207e+02 1.8000e+01]\n",
" [ 5.8450e+01 8.1690e+01 1.3153e+02 1.2590e+02 6.4000e+01]\n",
" [ 9.2900e+00 1.6000e+02 4.6658e+02 3.1484e+02 3.0000e+00]\n",
" ...\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]]\n",
"\n",
" [[ 4.7307e+02 3.9593e+02 3.8650e+01 2.8670e+01 1.8000e+01]\n",
" [ 2.0401e+02 2.3508e+02 6.0840e+01 1.7736e+02 1.0000e+00]\n",
" [ 4.3000e-01 4.9979e+02 3.3979e+02 1.0645e+02 1.5000e+01]\n",
" ...\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]]\n",
"\n",
" ...\n",
"\n",
" [[ 1.1271e+02 1.5482e+02 3.6729e+02 4.7935e+02 1.8000e+01]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" ...\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]]\n",
"\n",
" [[ 4.2758e+02 7.7870e+01 1.8888e+02 2.8591e+02 1.8000e+01]\n",
" [ 4.0399e+02 3.6290e+01 5.9210e+01 2.7506e+02 1.0000e+00]\n",
" [ 2.6073e+02 9.6000e-01 1.5376e+02 3.3522e+02 1.0000e+00]\n",
" ...\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]]\n",
"\n",
" [[ 2.0061e+02 8.9650e+01 4.0022e+02 2.5102e+02 1.8000e+01]\n",
" [ 9.4780e+01 9.5000e-01 2.4066e+02 2.1101e+02 1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" ...\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]\n",
" [-1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00 -1.0000e+00]]]\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "R3o_ecJab2QC",
"outputId": "7d2d4633-d5db-481f-d52c-f2bd3cec5b2c"
},
"source": [
"import numpy as np\n",
"from collections import defaultdict\n",
"\n",
"num_imgs = len(img_ids)\n",
"\n",
"groups = defaultdict(list)\n",
"\n",
"for annotation in results:\n",
" img_id = annotation[\"image_id\"]\n",
" bounding_box = annotation[\"bounding_box\"]\n",
" bounding_box = (\n",
" [x for x in bounding_box]\n",
" + [int(annotation[\"category_id\"])]\n",
" + [annotation[\"score\"]]\n",
" )\n",
" groups[img_id].append(bounding_box)\n",
"\n",
"imgs = sorted(groups.keys())\n",
"\n",
"result = []\n",
"for img in imgs:\n",
" bounding_boxes = groups[img]\n",
" bounding_boxes = np.array(bounding_boxes)\n",
" result.append(bounding_boxes)\n",
"\n",
"m = max([r.shape[0] for r in result])\n",
"\n",
"\n",
"def to_shape(a, shape):\n",
" y_, x_ = shape\n",
" y, x = a.shape\n",
" y_pad = y_ - y\n",
" x_pad = x_ - x\n",
" return np.pad(a, ((0, y_pad), (0, x_pad)), mode=\"constant\", constant_values=-1)\n",
"\n",
"\n",
"result = [to_shape(r, (m, 6)) for r in result]\n",
"y_pred = np.array(result)\n",
"\n",
"print(y_pred.shape)"
],
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(10, 25, 6)\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8JPT8ufgb93o",
"outputId": "beb2ad3d-ce72-4c6f-e54f-0ccc3f1e091a"
},
"source": [
"print(y_pred)"
],
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[[ 2.83170164e+02 1.98574781e+02 1.43679031e+02 2.79637175e+02\n",
" 1.80000000e+01 8.60476676e-01]\n",
" [ 1.78782770e+02 8.57070874e+01 2.57205260e+01 7.17351988e+01\n",
" 4.40000000e+01 8.67949953e-01]\n",
" [ 1.60398593e+02 -1.26983180e+01 2.77554052e+02 2.16501984e+02\n",
" 7.00000000e+01 9.40993483e-02]\n",
" ...\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]]\n",
"\n",
" [[ 2.05261670e+02 1.06865086e+02 1.71921347e+02 2.08470932e+02\n",
" 1.80000000e+01 2.33589777e-02]\n",
" [ 5.23691118e+01 7.91802126e+01 1.21694503e+02 1.36141918e+02\n",
" 6.40000000e+01 8.40724888e-01]\n",
" [-2.03978956e+01 1.42724445e+02 4.20246262e+02 2.87332323e+02\n",
" 3.00000000e+00 1.98790481e-01]\n",
" ...\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]]\n",
"\n",
" [[ 4.73816425e+02 3.96076460e+02 3.96202463e+01 3.02539300e+01\n",
" 1.80000000e+01 9.75400366e-01]\n",
" [ 1.99351076e+02 2.23320345e+02 6.62929402e+01 1.76467400e+02\n",
" 1.00000000e+00 4.36806421e-01]\n",
" [ 1.44654720e+00 5.03645965e+02 3.35607352e+02 1.08498500e+02\n",
" 1.50000000e+01 5.83539872e-01]\n",
" ...\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]]\n",
"\n",
" ...\n",
"\n",
" [[ 1.39483163e+02 1.92950396e+02 3.60178555e+02 4.85868966e+02\n",
" 1.80000000e+01 6.31667579e-01]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" ...\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]]\n",
"\n",
" [[ 4.38017342e+02 5.64162895e+01 1.85317987e+02 2.68642244e+02\n",
" 1.80000000e+01 5.80375785e-01]\n",
" [ 4.03716170e+02 5.38656333e+01 6.14525589e+01 2.69501723e+02\n",
" 1.00000000e+00 3.36547041e-01]\n",
" [ 2.66513278e+02 -1.92190604e+01 1.67066764e+02 3.01708478e+02\n",
" 1.00000000e+00 9.62374629e-01]\n",
" ...\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]]\n",
"\n",
" [[ 1.72899652e+02 9.76785280e+01 3.80835130e+02 2.66132825e+02\n",
" 1.80000000e+01 9.64015636e-02]\n",
" [ 9.34153287e+01 7.85192516e+00 2.61483759e+02 2.12719816e+02\n",
" 1.00000000e+00 5.90918860e-01]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" ...\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]\n",
" [-1.00000000e+00 -1.00000000e+00 -1.00000000e+00 -1.00000000e+00\n",
" -1.00000000e+00 -1.00000000e+00]]]\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "O9Sk_LOl2XsR"
},
"source": [
"outfile = \"sample_boxes.npz\"\n",
"np.savez(outfile, y_true, y_pred)"
],
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 16
},
"id": "3FzE5Dzy2ppg",
"outputId": "0e5573b0-a9ee-4515-95fd-2af0b40c8624"
},
"source": [
"from google.colab import files\n",
"\n",
"files.download(outfile)"
],
"execution_count": 15,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/javascript": [
"\n",
" async function download(id, filename, size) {\n",
" if (!google.colab.kernel.accessAllowed) {\n",
" return;\n",
" }\n",
" const div = document.createElement('div');\n",
" const label = document.createElement('label');\n",
" label.textContent = `Downloading \"${filename}\": `;\n",
" div.appendChild(label);\n",
" const progress = document.createElement('progress');\n",
" progress.max = size;\n",
" div.appendChild(progress);\n",
" document.body.appendChild(div);\n",
"\n",
" const buffers = [];\n",
" let downloaded = 0;\n",
"\n",
" const channel = await google.colab.kernel.comms.open(id);\n",
" // Send a message to notify the kernel that we're ready.\n",
" channel.send({})\n",
"\n",
" for await (const message of channel.messages) {\n",
" // Send a message to notify the kernel that we're ready.\n",
" channel.send({})\n",
" if (message.buffers) {\n",
" for (const buffer of message.buffers) {\n",
" buffers.push(buffer);\n",
" downloaded += buffer.byteLength;\n",
" progress.value = downloaded;\n",
" }\n",
" }\n",
" }\n",
" const blob = new Blob(buffers, {type: 'application/binary'});\n",
" const a = document.createElement('a');\n",
" a.href = window.URL.createObjectURL(blob);\n",
" a.download = filename;\n",
" div.appendChild(a);\n",
" a.click();\n",
" div.remove();\n",
" }\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/javascript": [
"download(\"download_664f2b3d-1603-4f33-9434-db321fbf4134\", \"sample_boxes.npz\", 22506)"
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {}
}
]
}
]
}
\ No newline at end of file
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import tensorflow as tf
from keras_cv import bounding_box
from keras_cv.metrics.coco import COCOMeanAveragePrecision
SAMPLE_FILE = os.path.dirname(os.path.abspath(__file__)) + "/sample_boxes.npz"
delta = 0.04
class MeanAveragePrecisionTest(tf.test.TestCase):
"""Numerical testing for COCOMeanAveragePrecision.
Unit tests that test Keras COCO metric results against the known values of
cocoeval.py. The bounding boxes in sample_boxes.npz were given to
cocoeval.py, which computed the following values:
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.617
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 1.000
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.707
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.604
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.626
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.610
"""
def test_mean_average_precision_correctness_default(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
# Area range all
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=100,
num_buckets=1000,
)
mean_average_precision.update_state(y_true, y_pred)
result = mean_average_precision.result().numpy()
self.assertAlmostEqual(result, 0.617, delta=delta)
def test_mean_average_precision_correctness_iou_05(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
class_ids=categories + [1000],
iou_thresholds=[0.5],
max_detections=100,
)
mean_average_precision.update_state(y_true, y_pred)
result = mean_average_precision.result().numpy()
self.assertAlmostEqual(result, 1.0, delta=delta)
def test_mean_average_precision_correctness_iou_75(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
class_ids=categories + [1000],
iou_thresholds=[0.75],
max_detections=100,
)
mean_average_precision.update_state(y_true, y_pred)
result = mean_average_precision.result().numpy()
self.assertAlmostEqual(result, 0.707, delta=delta)
def test_mean_average_precision_correctness_medium(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=100,
area_range=(32**2, 96**2),
)
mean_average_precision.update_state(y_true, y_pred)
result = mean_average_precision.result().numpy()
self.assertAlmostEqual(result, 0.626, delta=delta)
def test_mean_average_precision_correctness_large(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=100,
area_range=(96**2, 1e5**2),
)
mean_average_precision.update_state(y_true, y_pred)
result = mean_average_precision.result().numpy()
self.assertAlmostEqual(result, 0.610, delta=delta)
def test_mean_average_precision_correctness_small(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
mean_average_precision = COCOMeanAveragePrecision(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=100,
area_range=(0, 32**2),
)
mean_average_precision.update_state(y_true, y_pred)
result = mean_average_precision.result().numpy()
self.assertAlmostEqual(result, 0.604, delta=delta)
def load_samples(fname):
npzfile = np.load(fname)
y_true = npzfile["arr_0"].astype(np.float32)
y_pred = npzfile["arr_1"].astype(np.float32)
y_true = bounding_box.convert_format(y_true, source="xyWH", target="xyxy")
y_pred = bounding_box.convert_format(y_pred, source="xyWH", target="xyxy")
categories = set(int(x) for x in y_true[:, :, 4].numpy().flatten())
categories = [x for x in categories if x != -1]
return y_true, y_pred, categories
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests to ensure that COCOrecall computes the correct values.."""
import os
import numpy as np
import tensorflow as tf
from keras_cv import bounding_box
from keras_cv.metrics import COCORecall
SAMPLE_FILE = os.path.dirname(os.path.abspath(__file__)) + "/sample_boxes.npz"
delta = 0.04
class RecallCorrectnessTest(tf.test.TestCase):
"""Unit tests that test Keras COCO metric results against the known good ones of
cocoeval.py. The bounding boxes in sample_boxes.npz were given to cocoeval.py
which output the following values:
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.478
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.645
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.648
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.628
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.653
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641
"""
def test_recall_correctness_maxdets_1(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
# Area range all
recall = COCORecall(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=1,
)
recall.update_state(y_true, y_pred)
result = recall.result().numpy()
self.assertAlmostEqual(result, 0.478, delta=delta)
def test_recall_correctness_maxdets_10(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
# Area range all
recall = COCORecall(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=10,
)
recall.update_state(y_true, y_pred)
result = recall.result().numpy()
self.assertAlmostEqual(result, 0.645, delta=delta)
def test_recall_correctness_maxdets_100(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
# Area range all
recall = COCORecall(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=100,
)
recall.update_state(y_true, y_pred)
result = recall.result().numpy()
self.assertAlmostEqual(result, 0.648, delta=delta)
def test_recall_correctness_small_objects(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
recall = COCORecall(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=100,
area_range=(0, 32**2),
)
recall.update_state(y_true, y_pred)
result = recall.result().numpy()
self.assertAlmostEqual(result, 0.628, delta=delta)
def test_recall_correctness_medium_objects(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
recall = COCORecall(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=100,
area_range=(32**2, 96**2),
)
recall.update_state(y_true, y_pred)
result = recall.result().numpy()
self.assertAlmostEqual(result, 0.653, delta=delta)
def test_recall_correctness_large_objects(self):
y_true, y_pred, categories = load_samples(SAMPLE_FILE)
recall = COCORecall(
bounding_box_format="xyxy",
class_ids=categories + [1000],
max_detections=100,
area_range=(96**2, 1e5**2),
)
recall.update_state(y_true, y_pred)
result = recall.result().numpy()
self.assertAlmostEqual(result, 0.641, delta=delta)
def load_samples(fname):
npzfile = np.load(fname)
y_true = npzfile["arr_0"].astype(np.float32)
y_pred = npzfile["arr_1"].astype(np.float32)
y_true = bounding_box.convert_format(y_true, source="xywh", target="xyxy")
y_pred = bounding_box.convert_format(y_pred, source="xywh", target="xyxy")
categories = set(int(x) for x in y_true[:, :, 4].numpy().flatten())
categories = [x for x in categories if x != -1]
return y_true, y_pred, categories
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.initializers as initializers
from keras_cv import bounding_box
from keras_cv.bounding_box import iou as iou_lib
from keras_cv.metrics.coco import utils
class COCORecall(keras.metrics.Metric):
"""COCORecall computes the COCO recall metric.
A usage guide is available on keras.io:
[Using KerasCV COCO metrics](https://keras.io/guides/keras_cv/coco_metrics/).
Full implementation details are available in the
[KerasCV COCO metrics whitepaper](https://arxiv.org/abs/2207.12120).
Args:
class_ids: The class IDs to evaluate the metric for. To evaluate for
all classes in over a set of sequentially labelled classes, pass
`range(classes)`.
bounding_box_format: Format of the incoming bounding boxes. Supported values
are "xywh", "center_xywh", "xyxy".
iou_thresholds: IoU thresholds over which to evaluate the recall. Must
be a tuple of floats, defaults to [0.5:0.05:0.95].
area_range: area range to constrict the considered bounding boxes in
metric computation. Defaults to `None`, which makes the metric
count all bounding boxes. Must be a tuple of floats. The first
number in the tuple represents a lower bound for areas, while the
second value represents an upper bound. For example, when
`(0, 32**2)` is passed to the metric, recall is only evaluated for
objects with areas less than `32*32`. If `(32**2, 1000000**2)` is
passed the metric will only be evaluated for boxes with areas larger
than `32**2`, and smaller than `1000000**2`.
max_detections: number of maximum detections a model is allowed to make.
Must be an integer, defaults to `100`.
Usage:
COCORecall accepts two Tensors as input to it's `update_state` method.
These Tensors represent bounding boxes in `corners` format. Utilities
to convert Tensors from `xywh` to `corners` format can be found in
`keras_cv.utils.bounding_box`.
Each image in a dataset may have a different number of bounding boxes,
both in the ground truth dataset and the prediction set. In order to
account for this, you may either pass a `tf.RaggedTensor`, or pad Tensors
with `-1`s to indicate unused boxes. A utility function to perform this
padding is available at
`keras_cv.bounding_box.pad_batch_to_shape`.
```python
coco_recall = keras_cv.metrics.COCORecall(
bounding_box_format='xyxy',
max_detections=100,
class_ids=[1]
)
y_true = np.array([[[0, 0, 10, 10, 1], [20, 20, 10, 10, 1]]]).astype(np.float32)
y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]]).astype(
np.float32
)
coco_recall.update_state(y_true, y_pred)
coco_recall.result()
# 0.5
```
"""
def __init__(
self,
class_ids,
bounding_box_format,
iou_thresholds=None,
area_range=None,
max_detections=100,
**kwargs,
):
super().__init__(**kwargs)
# Initialize parameter values
self.bounding_box_format = bounding_box_format
iou_thresholds = iou_thresholds or [x / 100.0 for x in range(50, 100, 5)]
self.iou_thresholds = iou_thresholds
self.class_ids = list(class_ids)
self.area_range = area_range
self.max_detections = max_detections
# Initialize result counters
num_thresholds = len(iou_thresholds)
num_categories = len(class_ids)
if any([c < 0 for c in class_ids]):
raise ValueError(
"class_ids must be positive. Got " f"class_ids={class_ids}"
)
self.true_positives = self.add_weight(
name="true_positives",
shape=(num_thresholds, num_categories),
dtype=tf.int32,
initializer=initializers.Zeros(),
)
self.ground_truth_boxes = self.add_weight(
name="ground_truth_boxes",
shape=(num_categories,),
dtype=tf.int32,
initializer=initializers.Zeros(),
)
def reset_state(self):
self.true_positives.assign(tf.zeros_like(self.true_positives))
self.ground_truth_boxes.assign(tf.zeros_like(self.ground_truth_boxes))
@tf.function
def update_state(self, y_true, y_pred, sample_weight=None):
"""
Args:
y_true: a bounding box Tensor in corners format.
y_pred: a bounding box Tensor in corners format.
sample_weight: Currently unsupported.
"""
if sample_weight is not None:
warnings.warn(
"sample_weight is not yet supported in keras_cv COCO metrics."
)
y_true = tf.cast(y_true, self.compute_dtype)
y_pred = tf.cast(y_pred, self.compute_dtype)
# TODO(lukewood): Add first party RaggedTensor support. Currently
# this could cause an OOM error if users are not expecting to convert
# these tensors to dense tensors.
if isinstance(y_true, tf.RaggedTensor):
y_true = y_true.to_tensor(default_value=-1)
if isinstance(y_pred, tf.RaggedTensor):
y_pred = y_pred.to_tensor(default_value=-1)
y_true = bounding_box.convert_format(
y_true,
source=self.bounding_box_format,
target="xyxy",
dtype=self.compute_dtype,
)
y_pred = bounding_box.convert_format(
y_pred,
source=self.bounding_box_format,
target="xyxy",
dtype=self.compute_dtype,
)
y_pred = utils.sort_bounding_boxes(y_pred, axis=bounding_box.XYXY.CONFIDENCE)
num_images = tf.shape(y_true)[0]
iou_thresholds = tf.constant(self.iou_thresholds, dtype=tf.float32)
class_ids = tf.constant(self.class_ids, dtype=tf.float32)
num_thresholds = tf.shape(iou_thresholds)[0]
num_categories = tf.shape(class_ids)[0]
true_positives_update = tf.zeros_like(self.true_positives)
ground_truth_boxes_update = tf.zeros_like(self.ground_truth_boxes)
for img in tf.range(num_images):
y_true_for_image = utils.filter_out_sentinels(y_true[img])
y_pred_for_image = utils.filter_out_sentinels(y_pred[img])
if self.area_range is not None:
y_true_for_image = utils.filter_boxes_by_area_range(
y_true_for_image, self.area_range[0], self.area_range[1]
)
y_pred_for_image = utils.filter_boxes_by_area_range(
y_pred_for_image, self.area_range[0], self.area_range[1]
)
for k_i in tf.range(num_categories):
category = class_ids[k_i]
category_filtered_y_pred = utils.filter_boxes(
y_pred_for_image, value=category, axis=bounding_box.XYXY.CLASS
)
detections = category_filtered_y_pred
if self.max_detections < tf.shape(category_filtered_y_pred)[0]:
detections = category_filtered_y_pred[: self.max_detections]
ground_truths = utils.filter_boxes(
y_true_for_image, value=category, axis=bounding_box.XYXY.CLASS
)
ious = iou_lib.compute_iou(ground_truths, detections, "yxyx")
for t_i in tf.range(num_thresholds):
threshold = iou_thresholds[t_i]
pred_matches = utils.match_boxes(ious, threshold)
indices = [t_i, k_i]
true_positives = tf.cast(pred_matches != -1, tf.int32)
true_positives_sum = tf.math.reduce_sum(true_positives, axis=-1)
true_positives_update = tf.tensor_scatter_nd_add(
true_positives_update, [indices], [true_positives_sum]
)
ground_truth_boxes_update = tf.tensor_scatter_nd_add(
ground_truth_boxes_update,
[[k_i]],
[tf.cast(tf.shape(ground_truths)[0], tf.int32)],
)
self.true_positives.assign_add(true_positives_update)
self.ground_truth_boxes.assign_add(ground_truth_boxes_update)
@tf.function
def result(self):
present_values = self.ground_truth_boxes != 0
n_present_categories = tf.math.reduce_sum(
tf.cast(present_values, tf.int32), axis=-1
)
n_present_categories = tf.cast(n_present_categories, self.dtype)
if n_present_categories == 0.0:
return 0.0
true_positives = tf.cast(self.true_positives, self.dtype)
ground_truth_boxes = tf.cast(self.ground_truth_boxes, self.dtype)
recalls = tf.math.divide_no_nan(true_positives, ground_truth_boxes[None, :])
recalls_per_threshold = (
tf.math.reduce_sum(recalls, axis=-1) / n_present_categories
)
return tf.math.reduce_mean(recalls_per_threshold)
def get_config(self):
config = super().get_config()
config.update(
{
"class_ids": self.class_ids,
"bounding_box_format": self.bounding_box_format,
"iou_thresholds": self.iou_thresholds,
"area_range": self.area_range,
"max_detections": self.max_detections,
}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for COCORecall."""
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras_cv.metrics import COCORecall
class COCORecallTest(tf.test.TestCase):
def test_runs_inside_model(self):
i = keras.layers.Input((None, None, 6))
model = keras.Model(i, i)
recall = COCORecall(
max_detections=100,
bounding_box_format="xyxy",
class_ids=[1],
area_range=(0, 64**2),
)
# These would match if they were in the area range
y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype(np.float32)
y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]]).astype(
np.float32
)
model.compile(metrics=[recall])
model.evaluate(y_pred, y_true)
self.assertAllEqual(recall.result(), 1.0)
def test_ragged_tensor_support(self):
recall = COCORecall(
max_detections=100,
bounding_box_format="xyxy",
class_ids=[1],
area_range=(0, 64**2),
)
# These would match if they were in the area range
y_true = tf.ragged.stack(
[
tf.constant([[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]], tf.float32),
tf.constant([[0, 0, 10, 10, 1]], tf.float32),
]
)
y_pred = tf.ragged.stack(
[
tf.constant([[5, 5, 10, 10, 1, 0.9]], tf.float32),
tf.constant(
[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]], tf.float32
),
]
)
recall.update_state(y_true, y_pred)
self.assertAlmostEqual(recall.result(), 2 / 3)
def test_merge_state(self):
y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float32)
y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32)
y_pred_match = tf.constant([[[0, 0, 100, 100, 1, 1.0]]], dtype=tf.float32)
m1 = COCORecall(
bounding_box_format="xyxy",
iou_thresholds=[0.95],
class_ids=[1],
area_range=(0, 100000**2),
max_detections=1,
)
m2 = COCORecall(
bounding_box_format="xyxy",
iou_thresholds=[0.95],
class_ids=[1],
area_range=(0, 100000**2),
max_detections=1,
)
m1.update_state(y_true, y_pred)
m1.update_state(y_true, y_pred_match)
m2.update_state(y_true, y_pred)
metric_result = COCORecall(
bounding_box_format="xyxy",
iou_thresholds=[0.95],
class_ids=[1],
area_range=(0, 100000**2),
max_detections=1,
)
metric_result.merge_state([m1, m2])
self.assertEqual([[1]], metric_result.true_positives)
self.assertEqual([3], metric_result.ground_truth_boxes)
self.assertEqual(1 / 3, metric_result.result())
def test_recall_area_range_filtering(self):
recall = COCORecall(
bounding_box_format="xyxy",
max_detections=100,
class_ids=[1],
area_range=(32**2, 64**2),
)
# These would match if they were in the area range
y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype(np.float32)
y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]]).astype(
np.float32
)
recall.update_state(y_true, y_pred)
self.assertAllEqual(recall.result(), 0.0)
def test_missing_categories(self):
recall = COCORecall(
bounding_box_format="xyxy",
max_detections=100,
class_ids=[1, 2, 3],
area_range=(0, 1e9**2),
)
t = len(recall.iou_thresholds)
k = len(recall.class_ids)
true_positives = np.ones((t, k))
true_positives[:, 1] = np.zeros((t,))
true_positives = tf.constant(true_positives, dtype=tf.int32)
ground_truth_boxes = np.ones((k,)) * 2
ground_truth_boxes[1] = 0
ground_truth_boxes = tf.constant(ground_truth_boxes, dtype=tf.int32)
recall.true_positives.assign(true_positives)
recall.ground_truth_boxes.assign(ground_truth_boxes)
self.assertEqual(recall.result(), 0.5)
def test_recall_direct_assignment(self):
recall = COCORecall(
bounding_box_format="xyxy",
max_detections=100,
class_ids=[1],
area_range=(0, 1e9**2),
)
t = len(recall.iou_thresholds)
k = len(recall.class_ids)
true_positives = tf.ones((t, k), dtype=tf.int32)
ground_truth_boxes = tf.ones((k,), dtype=tf.int32) * 2
recall.true_positives.assign(true_positives)
recall.ground_truth_boxes.assign(ground_truth_boxes)
self.assertEqual(recall.result(), 0.5)
def test_max_detections_one_third(self):
recall = COCORecall(
bounding_box_format="xyxy",
max_detections=1,
class_ids=[1],
area_range=(0, 1e9**2),
)
y_true = np.array(
[[[0, 0, 100, 100, 1], [100, 100, 200, 200, 1], [300, 300, 400, 400, 1]]]
).astype(np.float32)
y_pred = np.concatenate([y_true, np.ones((1, 3, 1))], axis=-1).astype(
np.float32
)
# with max_dets=1, only 1 of the three boxes can be found
recall.update_state(y_true, y_pred)
self.assertAlmostEqual(recall.result().numpy(), 1 / 3)
def test_max_detections(self):
recall = COCORecall(
bounding_box_format="xyxy",
max_detections=3,
class_ids=[1],
area_range=(0, 1e9**2),
)
y_true = np.array(
[[[0, 0, 100, 100, 1], [100, 100, 200, 200, 1], [300, 300, 400, 400, 1]]]
).astype(np.float32)
y_pred = np.concatenate([y_true, np.ones((1, 3, 1))], axis=-1).astype(
np.float32
)
# with max_dets=1, only 1 of the three boxes can be found
recall.update_state(y_true, y_pred)
self.assertAlmostEqual(recall.result().numpy(), 1.0)
def test_recall_direct_assignment_one_third(self):
recall = COCORecall(
bounding_box_format="xyxy",
max_detections=100,
class_ids=[1],
area_range=(0, 1e9**2),
)
t = len(recall.iou_thresholds)
k = len(recall.class_ids)
true_positives = tf.ones((t, k), dtype=tf.int32)
ground_truth_boxes = tf.ones((k,), dtype=tf.int32) * 3
recall.true_positives.assign(true_positives)
recall.ground_truth_boxes.assign(ground_truth_boxes)
self.assertAlmostEqual(recall.result().numpy(), 1 / 3)
def test_area_range_bounding_box_counting(self):
y_true = tf.constant(
[[[0, 0, 100, 100, 1], [0, 0, 100, 100, 1]]], dtype=tf.float32
)
y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32)
# note the low iou threshold
metric = COCORecall(
bounding_box_format="xyxy",
iou_thresholds=[0.15],
class_ids=[1],
area_range=(0, 10000**2),
max_detections=1,
)
metric.update_state(y_true, y_pred)
self.assertEqual([[2]], metric.ground_truth_boxes)
self.assertEqual([[1]], metric.true_positives)
def test_true_positive_counting_one_good_one_bad(self):
y_true = tf.constant(
[[[0, 0, 100, 100, 1], [0, 0, 100, 100, 1]]], dtype=tf.float32
)
y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32)
# note the low iou threshold
metric = COCORecall(
bounding_box_format="xyxy",
iou_thresholds=[0.15],
class_ids=[1],
area_range=(0, 10000**2),
max_detections=1,
)
metric.update_state(y_true, y_pred)
self.assertEqual([2], metric.ground_truth_boxes)
self.assertEqual([[1]], metric.true_positives)
def test_true_positive_counting_one_true_two_pred(self):
y_true = tf.constant(
[[[0, 0, 100, 100, 1]]],
dtype=tf.float32,
)
y_pred = tf.constant(
[[[0, 50, 100, 150, 1, 0.90], [0, 0, 100, 100, 1, 1.0]]],
dtype=tf.float32,
)
# note the low iou threshold
metric = COCORecall(
bounding_box_format="xyxy",
iou_thresholds=[0.15],
class_ids=[1],
area_range=(0, 10000**2),
max_detections=1,
)
metric.update_state(y_true, y_pred)
self.assertEqual([[1]], metric.true_positives)
y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float32)
y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32)
metric.update_state(y_true, y_pred)
self.assertEqual([[2]], metric.true_positives)
def test_mixed_dtypes(self):
y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float64)
y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32)
metric = COCORecall(
bounding_box_format="xyxy",
iou_thresholds=[0.15],
class_ids=[1],
area_range=(0, 10000**2),
max_detections=1,
)
metric.update_state(y_true, y_pred)
self.assertEqual(metric.result(), 1.0)
def test_matches_single_box(self):
y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float32)
y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32)
# note the low iou threshold
metric = COCORecall(
bounding_box_format="xyxy",
iou_thresholds=[0.15],
class_ids=[1],
area_range=(0, 10000**2),
max_detections=1,
)
metric.update_state(y_true, y_pred)
self.assertEqual([[1]], metric.true_positives)
def test_matches_single_false_positive(self):
y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float32)
y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32)
metric = COCORecall(
bounding_box_format="xyxy",
iou_thresholds=[0.95],
class_ids=[1],
area_range=(0, 100000**2),
max_detections=1,
)
metric.update_state(y_true, y_pred)
self.assertEqual([[0]], metric.true_positives)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains shared utilities for Keras COCO metrics."""
import tensorflow as tf
from keras_cv import bounding_box
def filter_boxes_by_area_range(boxes, min_area, max_area):
areas = bounding_box_area(boxes)
inds = tf.where(tf.math.logical_and(areas >= min_area, areas < max_area))
return tf.gather_nd(boxes, inds)
def bounding_box_area(boxes):
"""box_areas returns the area of the provided bounding boxes.
Args:
boxes: Tensor of bounding boxes of shape `[..., 4+]` in corners format.
Returns:
areas: Tensor of areas of shape `[...]`.
"""
w = boxes[..., bounding_box.XYXY.RIGHT] - boxes[..., bounding_box.XYXY.LEFT]
h = boxes[..., bounding_box.XYXY.BOTTOM] - boxes[..., bounding_box.XYXY.TOP]
return tf.math.multiply(w, h)
def filter_boxes(boxes, value, axis=4):
"""filter_boxes is used to select only boxes matching a given class.
The most common use case for this is to filter to accept only a specific
bounding_box.CLASS.
Args:
boxes: Tensor of bounding boxes in format `[images, bounding_boxes, 6]`
value: Value the specified axis must match
axis: Integer identifying the axis on which to sort, default 4
Returns:
boxes: A new Tensor of bounding boxes, where boxes[axis]==value
"""
return tf.gather_nd(boxes, tf.where(boxes[:, axis] == value))
def to_sentinel_padded_bounding_box_tensor(box_sets):
"""pad_with_sentinels returns a Tensor of bounding_boxes padded with -1s
to ensure that each bounding_box set has identical dimensions. This is to
be used before passing bounding_box predictions, or bounding_box ground truths to
the keras COCO metrics.
Args:
box_sets: List of Tensors representing bounding boxes, or a list of lists of
Tensors.
Returns:
boxes: A new Tensor where each value missing is populated with -1.
"""
return tf.ragged.stack(box_sets).to_tensor(default_value=-1)
def filter_out_sentinels(boxes):
"""filter_out_sentinels to filter out boxes that were padded on to the prediction
or ground truth bounding_box tensor to ensure dimensions match.
Args:
boxes: Tensor of bounding boxes in format `[bounding_boxes, 6]`, usually from a
single image.
Returns:
boxes: A new Tensor of bounding boxes, where boxes[axis]!=-1.
"""
return tf.gather_nd(boxes, tf.where(boxes[:, bounding_box.XYXY.CLASS] != -1))
def sort_bounding_boxes(boxes, axis=5):
"""sort_bounding_boxes is used to sort a list of bounding boxes by a given axis.
The most common use case for this is to sort by bounding_box.XYXY.CONFIDENCE, as
this is a part of computing both COCORecall and COCOMeanAveragePrecision.
Args:
boxes: Tensor of bounding boxes in format `[images, bounding_boxes, 6]`
axis: Integer identifying the axis on which to sort, default 5
Returns:
boxes: A new Tensor of Bounding boxes, sorted on an image-wise basis.
"""
num_images = tf.shape(boxes)[0]
boxes_sorted_list = tf.TensorArray(tf.float32, size=num_images, dynamic_size=False)
for img in tf.range(num_images):
preds_for_img = boxes[img, :, :]
prediction_scores = preds_for_img[:, axis]
_, idx = tf.math.top_k(prediction_scores, tf.shape(preds_for_img)[0])
boxes_sorted_list = boxes_sorted_list.write(
img, tf.gather(preds_for_img, idx, axis=0)
)
return boxes_sorted_list.stack()
def match_boxes(ious, threshold):
"""matches bounding boxes from y_true to boxes in y_pred.
Args:
ious: lookup table from [y_true, y_pred] => IoU.
threshold: minimum IoU for a pair to be considered a match.
Returns:
a mapping from [y_pred] => matching y_true index. Dimension
of result tensor is equal to the number of boxes in y_pred.
"""
num_true = tf.shape(ious)[0]
num_pred = tf.shape(ious)[1]
gt_matches = tf.TensorArray(
tf.int32,
size=num_true,
dynamic_size=False,
infer_shape=False,
element_shape=(),
)
pred_matches = tf.TensorArray(
tf.int32,
size=num_pred,
dynamic_size=False,
infer_shape=False,
element_shape=(),
)
for i in tf.range(num_true):
gt_matches = gt_matches.write(i, -1)
for i in tf.range(num_pred):
pred_matches = pred_matches.write(i, -1)
for detection_idx in tf.range(num_pred):
match_index = -1
iou = tf.math.minimum(threshold, 1 - 1e-10)
for gt_idx in tf.range(num_true):
if gt_matches.gather([gt_idx]) > -1:
continue
# TODO(lukewood): update clause to account for gtIg
# if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
if ious[gt_idx, detection_idx] < iou:
continue
iou = ious[gt_idx, detection_idx]
match_index = gt_idx
# Write back the match indices
pred_matches = pred_matches.write(detection_idx, match_index)
if match_index == -1:
continue
gt_matches = gt_matches.write(match_index, detection_idx)
return pred_matches.stack()
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for util functions."""
import tensorflow as tf
from keras_cv import bounding_box
from keras_cv.bounding_box import iou as iou_lib
from keras_cv.metrics.coco import utils
class UtilTest(tf.test.TestCase):
def test_filter_bounding_boxes_empty(self):
# set of bounding_boxes
y_pred = tf.stack([_dummy_bounding_box(category=1)])
result = utils.filter_boxes(y_pred, 2, axis=bounding_box.XYXY.CLASS)
self.assertEqual(result.shape[0], 0)
def test_bounding_box_area(self):
boxes = tf.constant([[0, 0, 100, 100]], dtype=tf.float32)
areas = utils.bounding_box_area(boxes)
self.assertAllClose(areas, tf.constant((10000.0,)))
def test_filter_bounding_boxes(self):
# set of bounding_boxes
y_pred = tf.stack(
[_dummy_bounding_box(category=1), _dummy_bounding_box(category=2)]
)
result = utils.filter_boxes(y_pred, 2, axis=bounding_box.XYXY.CLASS)
self.assertAllClose(result, tf.stack([_dummy_bounding_box(category=2)]))
def test_to_sentinel_padded_bounding_box_tensor(self):
box_set1 = tf.stack([_dummy_bounding_box(), _dummy_bounding_box()])
box_set2 = tf.stack([_dummy_bounding_box()])
boxes = [box_set1, box_set2]
bounding_box_tensor = utils.to_sentinel_padded_bounding_box_tensor(boxes)
self.assertAllClose(
bounding_box_tensor[1, 1],
-tf.ones(
6,
),
)
def test_filter_out_sentinels(self):
# set of bounding_boxes
y_pred = tf.stack(
[_dummy_bounding_box(category=1), _dummy_bounding_box(category=-1)]
)
result = utils.filter_out_sentinels(y_pred)
self.assertAllClose(result, tf.stack([_dummy_bounding_box(category=1)]))
def test_end_to_end_sentinel_filtering(self):
box_set1 = tf.stack([_dummy_bounding_box(), _dummy_bounding_box()])
box_set2 = tf.stack([_dummy_bounding_box()])
boxes = [box_set1, box_set2]
bounding_box_tensor = utils.to_sentinel_padded_bounding_box_tensor(boxes)
self.assertAllClose(
utils.filter_out_sentinels(bounding_box_tensor[0]), box_set1
)
self.assertAllClose(
utils.filter_out_sentinels(bounding_box_tensor[1]), box_set2
)
def test_match_boxes(self):
y_pred = tf.stack(
[
_dummy_bounding_box(0.1),
_dummy_bounding_box(0.9),
_dummy_bounding_box(0.4),
]
)
y_true = tf.stack(
[
_dummy_bounding_box(0.1),
_dummy_bounding_box(0.9),
_dummy_bounding_box(0.4),
_dummy_bounding_box(0.2),
]
)
ious = iou_lib.compute_iou(y_true, y_pred, "yxyx")
self.assertEqual(utils.match_boxes(ious, 0.5).shape, [3])
def test_sort_bounding_boxes_unsorted_list(self):
y_pred = tf.expand_dims(
tf.stack(
[
_dummy_bounding_box(0.1),
_dummy_bounding_box(0.9),
_dummy_bounding_box(0.4),
_dummy_bounding_box(0.2),
]
),
axis=0,
)
want = tf.expand_dims(
tf.stack(
[
_dummy_bounding_box(0.9),
_dummy_bounding_box(0.4),
_dummy_bounding_box(0.2),
_dummy_bounding_box(0.1),
]
),
axis=0,
)
y_sorted = utils.sort_bounding_boxes(y_pred, bounding_box.XYXY.CONFIDENCE)
self.assertAllClose(y_sorted, want)
def test_sort_bounding_boxes_empty_list(self):
y_pred = tf.stack([])
y_sorted = utils.sort_bounding_boxes(y_pred)
self.assertAllClose(y_pred, y_sorted)
def _dummy_bounding_box(confidence=0.0, category=0):
"""returns a bounding_box dummy with all 0 values, except for confidence."""
return tf.constant([0, 0, 0, 0, category, confidence])
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv import metrics
class SerializationTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
(
"COCORecall",
metrics.COCORecall,
{"class_ids": [0, 1, 2], "bounding_box_format": "xyxy"},
),
(
"COCOMeanAveragePrecision",
metrics.COCOMeanAveragePrecision,
{"class_ids": [0, 1, 2], "bounding_box_format": "xyxy"},
),
)
def test_layer_serialization(self, metric_cls, init_args):
metric = metric_cls(**init_args)
metric_config = metric.get_config()
reconstructed_metric = metric_cls.from_config(metric_config)
self.assertEqual(metric.get_config(), reconstructed_metric.get_config())
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.models.convnext import ConvNeXtBase
from keras_cv.models.convnext import ConvNeXtLarge
from keras_cv.models.convnext import ConvNeXtSmall
from keras_cv.models.convnext import ConvNeXtTiny
from keras_cv.models.convnext import ConvNeXtXLarge
from keras_cv.models.csp_darknet import CSPDarkNet
from keras_cv.models.darknet import DarkNet21
from keras_cv.models.darknet import DarkNet53
from keras_cv.models.densenet import DenseNet121
from keras_cv.models.densenet import DenseNet169
from keras_cv.models.densenet import DenseNet201
from keras_cv.models.efficientnet_v1 import EfficientNetB0
from keras_cv.models.efficientnet_v1 import EfficientNetB1
from keras_cv.models.efficientnet_v1 import EfficientNetB2
from keras_cv.models.efficientnet_v1 import EfficientNetB3
from keras_cv.models.efficientnet_v1 import EfficientNetB4
from keras_cv.models.efficientnet_v1 import EfficientNetB5
from keras_cv.models.efficientnet_v1 import EfficientNetB6
from keras_cv.models.efficientnet_v1 import EfficientNetB7
from keras_cv.models.efficientnet_v2 import EfficientNetV2B0
from keras_cv.models.efficientnet_v2 import EfficientNetV2B1
from keras_cv.models.efficientnet_v2 import EfficientNetV2B2
from keras_cv.models.efficientnet_v2 import EfficientNetV2B3
from keras_cv.models.efficientnet_v2 import EfficientNetV2L
from keras_cv.models.efficientnet_v2 import EfficientNetV2M
from keras_cv.models.efficientnet_v2 import EfficientNetV2S
from keras_cv.models.generative.stable_diffusion import StableDiffusion
from keras_cv.models.mlp_mixer import MLPMixerB16
from keras_cv.models.mlp_mixer import MLPMixerB32
from keras_cv.models.mlp_mixer import MLPMixerL16
from keras_cv.models.mobilenet_v3 import MobileNetV3Large
from keras_cv.models.mobilenet_v3 import MobileNetV3Small
from keras_cv.models.object_detection.faster_rcnn import FasterRCNN
from keras_cv.models.object_detection.retina_net.retina_net import RetinaNet
from keras_cv.models.resnet_v1 import ResNet18
from keras_cv.models.resnet_v1 import ResNet34
from keras_cv.models.resnet_v1 import ResNet50
from keras_cv.models.resnet_v1 import ResNet101
from keras_cv.models.resnet_v1 import ResNet152
from keras_cv.models.resnet_v2 import ResNet50V2
from keras_cv.models.resnet_v2 import ResNet101V2
from keras_cv.models.resnet_v2 import ResNet152V2
from keras_cv.models.vgg19 import VGG19
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.models.__internal__.darknet_utils import CrossStagePartial
from keras_cv.models.__internal__.darknet_utils import DarknetConvBlock
from keras_cv.models.__internal__.darknet_utils import DarknetConvBlockDepthwise
from keras_cv.models.__internal__.darknet_utils import Focus
from keras_cv.models.__internal__.darknet_utils import ResidualBlocks
from keras_cv.models.__internal__.darknet_utils import SpatialPyramidPoolingBottleneck
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""DarkNet model utils for KerasCV.
Reference:
- [YoloV3 Paper](https://arxiv.org/abs/1804.02767)
- [YoloV3 implementation](https://github.com/ultralytics/yolov3)
"""
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend
from tensorflow.keras import layers
def DarknetConvBlock(
filters, kernel_size, strides, use_bias=False, activation="silu", name=None
):
"""The basic conv block used in Darknet. Applies Conv2D followed by a BatchNorm.
Args:
filters: Integer, the dimensionality of the output space (i.e. the number of
output filters in the convolution).
kernel_size: An integer or tuple/list of 2 integers, specifying the height
and width of the 2D convolution window. Can be a single integer to specify
the same value both dimensions.
strides: An integer or tuple/list of 2 integers, specifying the strides of
the convolution along the height and width. Can be a single integer to
the same value both dimensions.
use_bias: Boolean, whether the layer uses a bias vector.
activation: the activation applied after the BatchNorm layer. One of "silu",
"relu" or "leaky_relu". Defaults to "silu".
name: the prefix for the layer names used in the block.
"""
if name is None:
name = f"conv_block{backend.get_uid('conv_block')}"
model_layers = [
layers.Conv2D(
filters,
kernel_size,
strides,
padding="same",
use_bias=use_bias,
),
layers.BatchNormalization(),
]
if activation == "silu":
model_layers.append(layers.Lambda(lambda x: keras.activations.swish(x)))
elif activation == "relu":
model_layers.append(layers.ReLU())
elif activation == "leaky_relu":
model_layers.append(layers.LeakyReLU(0.1))
return keras.Sequential(model_layers, name=None)
def ResidualBlocks(filters, num_blocks, name=None):
"""A residual block used in DarkNet models, repeated `num_blocks` times.
Args:
filters: Integer, the dimensionality of the output spaces (i.e. the number of
output filters in used the blocks).
num_blocks: number of times the residual connections are repeated
name: the prefix for the layer names used in the block.
Returns:
a function that takes an input Tensor representing a ResidualBlock.
"""
if name is None:
name = f"residual_block{backend.get_uid('residual_block')}"
def apply(x):
x = DarknetConvBlock(
filters,
kernel_size=3,
strides=2,
activation="leaky_relu",
name=f"{name}_conv1",
)(x)
for i in range(1, num_blocks + 1):
residual = x
x = DarknetConvBlock(
filters // 2,
kernel_size=1,
strides=1,
activation="leaky_relu",
name=f"{name}_conv{2*i}",
)(x)
x = DarknetConvBlock(
filters,
kernel_size=3,
strides=1,
activation="leaky_relu",
name=f"{name}_conv{2*i + 1}",
)(x)
if i == num_blocks:
x = layers.Add(name=f"{name}_out")([residual, x])
else:
x = layers.Add(name=f"{name}_add_{i}")([residual, x])
return x
return apply
def SpatialPyramidPoolingBottleneck(
filters, hidden_filters=None, kernel_sizes=(5, 9, 13), activation="silu", name=None
):
"""Spatial pyramid pooling layer used in YOLOv3-SPP
Args:
filters: Integer, the dimensionality of the output spaces (i.e. the number of
output filters in used the blocks).
hidden_filters: Integer, the dimensionality of the intermediate bottleneck space
(i.e. the number of output filters in the bottleneck convolution). If None,
it will be equal to filters. Defaults to None.
kernel_sizes: A list or tuple representing all the pool sizes used for the
pooling layers. Defaults to (5, 9, 13).
activation: Activation for the conv layers. Defaults to "silu".
name: the prefix for the layer names used in the block.
Returns:
a function that takes an input Tensor representing an SpatialPyramidPoolingBottleneck.
"""
if name is None:
name = f"spp{backend.get_uid('spp')}"
if hidden_filters is None:
hidden_filters = filters
def apply(x):
x = DarknetConvBlock(
hidden_filters,
kernel_size=1,
strides=1,
activation=activation,
name=f"{name}_conv1",
)(x)
x = [x]
for kernel_size in kernel_sizes:
x.append(
layers.MaxPooling2D(
kernel_size,
strides=1,
padding="same",
name=f"{name}_maxpool_{kernel_size}",
)(x[0])
)
x = layers.Concatenate(name=f"{name}_concat")(x)
x = DarknetConvBlock(
filters,
kernel_size=1,
strides=1,
activation=activation,
name=f"{name}_conv2",
)(x)
return x
return apply
def DarknetConvBlockDepthwise(
filters, kernel_size, strides, activation="silu", name=None
):
"""The depthwise conv block used in CSPDarknet.
Args:
filters: Integer, the dimensionality of the output space (i.e. the number of
output filters in the final convolution).
kernel_size: An integer or tuple/list of 2 integers, specifying the height
and width of the 2D convolution window. Can be a single integer to specify
the same value both dimensions.
strides: An integer or tuple/list of 2 integers, specifying the strides of
the convolution along the height and width. Can be a single integer to
the same value both dimensions.
activation: the activation applied after the final layer. One of "silu",
"relu" or "leaky_relu". Defaults to "silu".
name: the prefix for the layer names used in the block.
"""
if name is None:
name = f"conv_block{backend.get_uid('conv_block')}"
model_layers = [
layers.DepthwiseConv2D(kernel_size, strides, padding="same", use_bias=False),
layers.BatchNormalization(),
]
if activation == "silu":
model_layers.append(layers.Lambda(lambda x: keras.activations.swish(x)))
elif activation == "relu":
model_layers.append(layers.ReLU())
elif activation == "leaky_relu":
model_layers.append(layers.LeakyReLU(0.1))
model_layers.append(
DarknetConvBlock(filters, kernel_size=1, strides=1, activation=activation)
)
return keras.Sequential(model_layers, name=name)
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class CrossStagePartial(layers.Layer):
"""A block used in Cross Stage Partial Darknet.
Args:
filters: Integer, the dimensionality of the output space (i.e. the number of
output filters in the final convolution).
num_bottlenecks: an integer representing the number of blocks added in the
layer bottleneck.
residual: a boolean representing whether the value tensor before the
bottleneck should be added to the output of the bottleneck as a residual.
Defaults to True.
use_depthwise: a boolean value used to decide whether a depthwise conv block
should be used over a regular darknet block. Defaults to False
activation: the activation applied after the final layer. One of "silu",
"relu" or "leaky_relu". Defaults to "silu".
"""
def __init__(
self,
filters,
num_bottlenecks,
residual=True,
use_depthwise=False,
activation="silu",
**kwargs,
):
super().__init__(**kwargs)
self.filters = filters
self.num_bottlenecks = num_bottlenecks
self.residual = residual
self.use_depthwise = use_depthwise
self.activation = activation
hidden_channels = filters // 2
ConvBlock = DarknetConvBlockDepthwise if use_depthwise else DarknetConvBlock
self.darknet_conv1 = DarknetConvBlock(
hidden_channels,
kernel_size=1,
strides=1,
activation=activation,
)
self.darknet_conv2 = DarknetConvBlock(
hidden_channels,
kernel_size=1,
strides=1,
activation=activation,
)
# repeat bottlenecks num_bottleneck times
self.bottleneck_convs = []
for _ in range(num_bottlenecks):
self.bottleneck_convs.append(
DarknetConvBlock(
hidden_channels,
kernel_size=1,
strides=1,
activation=activation,
)
)
self.bottleneck_convs.append(
ConvBlock(
hidden_channels,
kernel_size=3,
strides=1,
activation=activation,
)
)
self.add = layers.Add()
self.concatenate = layers.Concatenate()
self.darknet_conv3 = DarknetConvBlock(
filters, kernel_size=1, strides=1, activation=activation
)
def call(self, x):
x1 = self.darknet_conv1(x)
x2 = self.darknet_conv2(x)
for i in range(self.num_bottlenecks):
residual = x1
x1 = self.bottleneck_convs[2 * i](x1)
x1 = self.bottleneck_convs[2 * i + 1](x1)
if self.residual:
x1 = self.add([residual, x1])
x1 = self.concatenate([x1, x2])
x = self.darknet_conv3(x1)
return x
def get_config(self):
config = {
"filters": self.filters,
"num_bottlenecks": self.num_bottlenecks,
"residual": self.residual,
"use_depthwise": self.use_depthwise,
"activation": self.activation,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
def Focus(name=None):
"""A block used in CSPDarknet to focus information into channels of the image.
If the dimensions of a batch input is (batch_size, width, height, channels), this
layer converts the image into size (batch_size, width/2, height/2, 4*channels).
See [the original discussion on YoloV5 Focus Layer](https://github.com/ultralytics/yolov5/discussions/3181).
Args:
name: the name for the lambda layer used in the block.
Returns:
a function that takes an input Tensor representing a Focus layer.
"""
def apply(x):
return layers.Lambda(
lambda x: tf.concat(
[
x[..., ::2, ::2, :],
x[..., 1::2, ::2, :],
x[..., ::2, 1::2, :],
x[..., 1::2, 1::2, :],
],
axis=-1,
),
name=name,
)(x)
return apply
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ConvNeXt models for Keras.
References:
- [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545)
(CVPR 2022)
"""
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend
from tensorflow.keras import layers
from keras_cv.layers.regularization import StochasticDepth
from keras_cv.models import utils
MODEL_CONFIGS = {
"tiny": {
"depths": [3, 3, 9, 3],
"projection_dims": [96, 192, 384, 768],
"default_size": 224,
},
"small": {
"depths": [3, 3, 27, 3],
"projection_dims": [96, 192, 384, 768],
"default_size": 224,
},
"base": {
"depths": [3, 3, 27, 3],
"projection_dims": [128, 256, 512, 1024],
"default_size": 224,
},
"large": {
"depths": [3, 3, 27, 3],
"projection_dims": [192, 384, 768, 1536],
"default_size": 224,
},
"xlarge": {
"depths": [3, 3, 27, 3],
"projection_dims": [256, 512, 1024, 2048],
"default_size": 224,
},
}
BASE_DOCSTRING = """Instantiates the {name} architecture.
- [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545)
(CVPR 2022)
This function returns a Keras {name} model.
Args:
include_rescaling: whether or not to Rescale the inputs.If set to True,
inputs will be passed through a `Rescaling(1/255.0)` layer.
include_top: whether to include the fully-connected layer at the top of the
network. If provided, classes must be provided.
depths: an iterable containing depths for each individual stages.
projection_dims: An iterable containing output number of channels of
each individual stages.
drop_path_rate: stochastic depth probability, if 0.0, then stochastic
depth won't be used.
layer_scale_init_value: layer scale coefficient, if 0.0, layer scaling
won't be used.
weights: one of `None` (random initialization), or a pretrained weight
file path.
input_shape: optional shape tuple, defaults to `(None, None, 3)`.
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
to use as image input for the model.
pooling: optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model will be the 4D tensor output
of the last convolutional block.
- `avg` means that global average pooling will be applied to the output
of the last convolutional block, and thus the output of the model will
be a 2D tensor.
- `max` means that global max pooling will be applied.
classes: optional number of classes to classify images into, only to be
specified if `include_top` is True.
classifier_activation: A `str` or callable. The activation function to use
on the "top" layer. Ignored unless `include_top=True`. Set
`classifier_activation=None` to return the logits of the "top" layer.
Defaults to `"softmax"`.
name: (Optional) name to pass to the model. Defaults to "{name}".
Returns:
A `keras.Model` instance.
"""
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class LayerScale(layers.Layer):
"""Layer scale module.
References:
- https://arxiv.org/abs/2103.17239
Args:
init_values (float): Initial value for layer scale. Should be within
[0, 1].
projection_dim (int): Projection dimensionality.
Returns:
Tensor multiplied to the scale.
"""
def __init__(self, init_values, projection_dim, **kwargs):
super().__init__(**kwargs)
self.init_values = init_values
self.projection_dim = projection_dim
def build(self, input_shape):
self.gamma = tf.Variable(self.init_values * tf.ones((self.projection_dim,)))
def call(self, x):
return x * self.gamma
def get_config(self):
config = super().get_config()
config.update(
{
"init_values": self.init_values,
"projection_dim": self.projection_dim,
}
)
return config
def ConvNeXtBlock(
projection_dim, drop_path_rate=0.0, layer_scale_init_value=1e-6, name=None
):
"""ConvNeXt block.
References:
- https://arxiv.org/abs/2201.03545
- https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py
Notes:
In the original ConvNeXt implementation (linked above), the authors use
`Dense` layers for pointwise convolutions for increased efficiency.
Following that, this implementation also uses the same.
Args:
projection_dim (int): Number of filters for convolution layers. In the
ConvNeXt paper, this is referred to as projection dimension.
drop_path_rate (float): Probability of dropping paths. Should be within
[0, 1].
layer_scale_init_value (float): Layer scale value. Should be a small float
number.
name: name to path to the keras layer.
Returns:
A function representing a ConvNeXtBlock block.
"""
if name is None:
name = "prestem" + str(backend.get_uid("prestem"))
def apply(inputs):
x = inputs
x = layers.Conv2D(
filters=projection_dim,
kernel_size=7,
padding="same",
groups=projection_dim,
name=name + "_depthwise_conv",
)(x)
x = layers.LayerNormalization(epsilon=1e-6, name=name + "_layernorm")(x)
x = layers.Dense(4 * projection_dim, name=name + "_pointwise_conv_1")(x)
x = layers.Activation("gelu", name=name + "_gelu")(x)
x = layers.Dense(projection_dim, name=name + "_pointwise_conv_2")(x)
if layer_scale_init_value is not None:
x = LayerScale(
layer_scale_init_value,
projection_dim,
name=name + "_layer_scale",
)(x)
if drop_path_rate:
layer = StochasticDepth(drop_path_rate, name=name + "_stochastic_depth")
return layer([inputs, x])
else:
layer = layers.Activation("linear", name=name + "_identity")
return inputs + layer(x)
return apply
def Head(num_classes, activation="softmax", name=None):
"""Implementation of classification head of ConvNeXt.
Args:
num_classes: number of classes for Dense layer
activation: activation function for Dense layer
name: name prefix
Returns:
Classification head function.
"""
if name is None:
name = str(backend.get_uid("head"))
def apply(x):
x = layers.GlobalAveragePooling2D(name=name + "_head_gap")(x)
x = layers.LayerNormalization(epsilon=1e-6, name=name + "_head_layernorm")(x)
x = layers.Dense(num_classes, activation=activation, name=name + "_head_dense")(
x
)
return x
return apply
def ConvNeXt(
include_rescaling,
include_top,
depths,
projection_dims,
drop_path_rate=0.0,
layer_scale_init_value=1e-6,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classes=None,
classifier_activation="softmax",
name="convnext",
):
"""Instantiates ConvNeXt architecture given specific configuration.
Args:
include_rescaling: whether or not to Rescale the inputs. If set to True,
inputs will be passed through a `Rescaling(1/255.0)` layer.
include_top: Boolean denoting whether to include classification head to
the model.
depths: An iterable containing depths for each individual stages.
projection_dims: An iterable containing output number of channels of
each individual stages.
drop_path_rate: Stochastic depth probability. If 0.0, then stochastic
depth won't be used.
layer_scale_init_value: Layer scale coefficient. If 0.0, layer scaling
won't be used.
weights: One of `None` (random initialization), or a pretrained weight
file path.
input_shape: optional shape tuple, defaults to `(None, None, 3)`.
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`).
pooling: optional pooling mode for feature extraction when `include_top`
is `False`.
- `None` means that the output of the model will be the 4D tensor output
of the last convolutional layer.
- `avg` means that global average pooling will be applied to the output
of the last convolutional layer, and thus the output of the model will
be a 2D tensor.
- `max` means that global max pooling will be applied.
classes: optional number of classes to classify images into, only to be
specified if `include_top` is True.
classifier_activation: A `str` or callable. The activation function to use
on the "top" layer. Ignored unless `include_top=True`. Set
`classifier_activation=None` to return the logits of the "top" layer.
name: An optional name for the model.
Returns:
A `keras.Model` instance.
Raises:
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
ValueError: if `classifier_activation` is not `softmax`, or `None`
when using a pretrained top layer.
ValueError: if `include_top` is True but `classes` is not specified.
"""
if weights and not tf.io.gfile.exists(weights):
raise ValueError(
"The `weights` argument should be either "
"`None` or the path to the weights file to be loaded. "
f"Weights file not found at location: {weights}"
)
if include_top and not classes:
raise ValueError(
"If `include_top` is True, "
"you should specify `classes`. "
f"Received: classes={classes}"
)
if include_top and pooling:
raise ValueError(
f"`pooling` must be `None` when `include_top=True`."
f"Received pooling={pooling} and include_top={include_top}. "
)
inputs = utils.parse_model_inputs(input_shape, input_tensor)
x = inputs
if include_rescaling:
x = layers.Rescaling(1 / 255.0)(x)
# Stem block.
stem = keras.Sequential(
[
layers.Conv2D(
projection_dims[0],
kernel_size=4,
strides=4,
name=name + "_stem_conv",
),
layers.LayerNormalization(epsilon=1e-6, name=name + "_stem_layernorm"),
],
name=name + "_stem",
)
# Downsampling blocks.
downsample_layers = []
downsample_layers.append(stem)
num_downsample_layers = 3
for i in range(num_downsample_layers):
downsample_layer = keras.Sequential(
[
layers.LayerNormalization(
epsilon=1e-6,
name=name + "_downsampling_layernorm_" + str(i),
),
layers.Conv2D(
projection_dims[i + 1],
kernel_size=2,
strides=2,
name=name + "_downsampling_conv_" + str(i),
),
],
name=name + "_downsampling_block_" + str(i),
)
downsample_layers.append(downsample_layer)
# Stochastic depth schedule.
# This is referred from the original ConvNeXt codebase:
# https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py#L86
depth_drop_rates = [float(x) for x in tf.linspace(0.0, drop_path_rate, sum(depths))]
# First apply downsampling blocks and then apply ConvNeXt stages.
cur = 0
num_convnext_blocks = 4
for i in range(num_convnext_blocks):
x = downsample_layers[i](x)
for j in range(depths[i]):
x = ConvNeXtBlock(
projection_dim=projection_dims[i],
drop_path_rate=depth_drop_rates[cur + j],
layer_scale_init_value=layer_scale_init_value,
name=name + f"_stage_{i}_block_{j}",
)(x)
cur += depths[i]
if include_top:
x = Head(
num_classes=classes,
activation=classifier_activation,
name=name,
)(x)
else:
if pooling == "avg":
x = layers.GlobalAveragePooling2D()(x)
elif pooling == "max":
x = layers.GlobalMaxPooling2D()(x)
x = layers.LayerNormalization(epsilon=1e-6)(x)
model = keras.Model(inputs=inputs, outputs=x, name=name)
if weights is not None:
model.load_weights(weights)
return model
def ConvNeXtTiny(
include_rescaling,
include_top,
drop_path_rate,
layer_scale_init_value,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classes=None,
classifier_activation="softmax",
name="convnext_tiny",
):
return ConvNeXt(
include_rescaling=include_rescaling,
include_top=include_top,
depths=MODEL_CONFIGS["tiny"]["depths"],
projection_dims=MODEL_CONFIGS["tiny"]["projection_dims"],
drop_path_rate=drop_path_rate,
layer_scale_init_value=layer_scale_init_value,
weights=weights,
input_tensor=input_tensor,
input_shape=input_shape,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
name=name,
)
def ConvNeXtSmall(
include_rescaling,
include_top,
drop_path_rate,
layer_scale_init_value,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classes=None,
classifier_activation="softmax",
name="convnext_small",
):
return ConvNeXt(
include_rescaling=include_rescaling,
include_top=include_top,
depths=MODEL_CONFIGS["small"]["depths"],
projection_dims=MODEL_CONFIGS["small"]["projection_dims"],
drop_path_rate=drop_path_rate,
layer_scale_init_value=layer_scale_init_value,
weights=weights,
input_tensor=input_tensor,
input_shape=input_shape,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
name=name,
)
def ConvNeXtBase(
include_rescaling,
include_top,
drop_path_rate,
layer_scale_init_value,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classes=None,
classifier_activation="softmax",
name="convnext_base",
):
return ConvNeXt(
include_rescaling=include_rescaling,
include_top=include_top,
depths=MODEL_CONFIGS["base"]["depths"],
projection_dims=MODEL_CONFIGS["base"]["projection_dims"],
drop_path_rate=drop_path_rate,
layer_scale_init_value=layer_scale_init_value,
weights=weights,
input_tensor=input_tensor,
input_shape=input_shape,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
name=name,
)
def ConvNeXtLarge(
include_rescaling,
include_top,
drop_path_rate,
layer_scale_init_value,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classes=None,
classifier_activation="softmax",
name="convnext_large",
):
return ConvNeXt(
include_rescaling=include_rescaling,
include_top=include_top,
depths=MODEL_CONFIGS["large"]["depths"],
projection_dims=MODEL_CONFIGS["large"]["projection_dims"],
drop_path_rate=drop_path_rate,
layer_scale_init_value=layer_scale_init_value,
weights=weights,
input_tensor=input_tensor,
input_shape=input_shape,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
name=name,
)
def ConvNeXtXLarge(
include_rescaling,
include_top,
drop_path_rate,
layer_scale_init_value,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classes=None,
classifier_activation="softmax",
name="convnext_xlarge",
):
return ConvNeXt(
include_rescaling=include_rescaling,
include_top=include_top,
depths=MODEL_CONFIGS["xlarge"]["depths"],
projection_dims=MODEL_CONFIGS["xlarge"]["projection_dims"],
drop_path_rate=drop_path_rate,
layer_scale_init_value=layer_scale_init_value,
weights=weights,
input_tensor=input_tensor,
input_shape=input_shape,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
name=name,
)
ConvNeXtTiny.__doc__ = BASE_DOCSTRING.format(name="ConvNeXtTiny")
ConvNeXtSmall.__doc__ = BASE_DOCSTRING.format(name="ConvNeXtSmall")
ConvNeXtBase.__doc__ = BASE_DOCSTRING.format(name="ConvNeXtBase")
ConvNeXtLarge.__doc__ = BASE_DOCSTRING.format(name="ConvNeXtLarge")
ConvNeXtXLarge.__doc__ = BASE_DOCSTRING.format(name="ConvNeXtXLarge")
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.models import convnext
from .models_test import ModelsTest
MODEL_LIST = [
(
convnext.ConvNeXtTiny,
768,
{"drop_path_rate": 0.1, "layer_scale_init_value": 1e-6},
),
(
convnext.ConvNeXtSmall,
768,
{
"drop_path_rate": 0.1,
"layer_scale_init_value": 1e-6,
},
),
(
convnext.ConvNeXtBase,
1024,
{"drop_path_rate": 0.1, "layer_scale_init_value": 1e-6},
),
(
convnext.ConvNeXtLarge,
1536,
{"drop_path_rate": 0.1, "layer_scale_init_value": 1e-6},
),
(
convnext.ConvNeXtXLarge,
2048,
{"drop_path_rate": 0.1, "layer_scale_init_value": 1e-6},
),
]
class ConvNeXtTest(ModelsTest, tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(*MODEL_LIST)
def test_application_base(self, app, _, args):
super()._test_application_base(app, _, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_with_rescaling(self, app, last_dim, args):
super()._test_application_with_rescaling(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_pooling(self, app, last_dim, args):
super()._test_application_pooling(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_variable_input_channels(self, app, last_dim, args):
super()._test_application_variable_input_channels(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_model_can_be_used_as_backbone(self, app, last_dim, args):
super()._test_model_can_be_used_as_backbone(app, last_dim, args)
if __name__ == "__main__":
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment