Commit 0016b0a7 authored by sunxx1's avatar sunxx1
Browse files

Merge branch 'dtk22.04' into 'main'

Dtk22.04

See merge request dcutoolkit/deeplearing/dlexamples_new!49
parents 17bc28d5 7a382d5d
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import bounding_box
class BoundingBoxUtilTestCase(tf.test.TestCase):
def test_clip_to_image(self):
# Test xyxy format unbatched
height = 256
width = 256
bboxes = tf.convert_to_tensor(
[[200, 200, 400, 400, 0], [100, 100, 300, 300, 0]]
)
image = tf.ones(shape=(height, width, 3))
bboxes_out = bounding_box.clip_to_image(
bboxes, bounding_box_format="xyxy", images=image
)
self.assertAllGreaterEqual(bboxes_out, 0)
x1, y1, x2, y2, rest = tf.split(bboxes_out, [1, 1, 1, 1, -1], axis=1)
self.assertAllLessEqual([x1, x2], width)
self.assertAllLessEqual([y1, y2], height)
# Test relative format batched
image = tf.ones(shape=(1, height, width, 3))
bboxes = tf.convert_to_tensor(
[[[0.2, -1, 1.2, 0.3, 0], [0.4, 1.5, 0.2, 0.3, 0]]]
)
bboxes_out = bounding_box.clip_to_image(
bboxes, bounding_box_format="rel_xyxy", images=image
)
self.assertAllLessEqual(bboxes_out, 1)
def test_clip_to_image_filters_fully_out_bounding_boxes(self):
# Test xyxy format unbatched
height = 256
width = 256
bounding_boxes = tf.convert_to_tensor(
[[257, 257, 400, 400, 0], [100, 100, 300, 300, 0]]
)
image = tf.ones(shape=(height, width, 3))
bounding_boxes = bounding_box.clip_to_image(
bounding_boxes, bounding_box_format="xyxy", images=image
)
self.assertAllEqual(
bounding_boxes,
tf.convert_to_tensor([[-1, -1, -1, -1, -1], [100, 100, 256, 256, 0]]),
)
def test_clip_to_image_filters_fully_out_bounding_boxes_negative_area(self):
# Test xyxy format unbatched
height = 256
width = 256
bounding_boxes = tf.convert_to_tensor(
[[0, float("NaN"), 100, 100, 0], [100, 100, 300, 300, 0]]
)
image = tf.ones(shape=(height, width, 3))
bounding_boxes = bounding_box.clip_to_image(
bounding_boxes, bounding_box_format="xyxy", images=image
)
self.assertAllEqual(
bounding_boxes,
tf.convert_to_tensor([[-1, -1, -1, -1, -1], [100, 100, 256, 256, 0]]),
)
def test_clip_to_image_filters_nans(self):
# Test xyxy format unbatched
height = 256
width = 256
bounding_boxes = tf.convert_to_tensor(
[[257, 257, 100, 100, 0], [100, 100, 300, 300, 0]]
)
image = tf.ones(shape=(height, width, 3))
bounding_boxes = bounding_box.clip_to_image(
bounding_boxes, bounding_box_format="xyxy", images=image
)
self.assertAllEqual(
bounding_boxes,
tf.convert_to_tensor([[-1, -1, -1, -1, -1], [100, 100, 256, 256, 0]]),
)
def test_pad_with_sentinels(self):
bounding_boxes = tf.ragged.constant(
[[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]], [[1, 2, 3, 4, 5]]]
)
padded_bounding_boxes = bounding_box.pad_with_sentinels(bounding_boxes)
expected_output = tf.constant(
[
[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]],
[[1, 2, 3, 4, 5], [-1, -1, -1, -1, -1]],
]
)
self.assertAllEqual(padded_bounding_boxes, expected_output)
def test_filter_sentinels(self):
bounding_boxes = tf.ragged.constant(
[[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, -1]], [[1, 2, 3, 4, 5]]]
)
filtered_bounding_boxes = bounding_box.filter_sentinels(bounding_boxes)
expected_output = tf.ragged.constant(
[[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]], [[1, 2, 3, 4, 5]]], ragged_rank=1
)
self.assertAllEqual(filtered_bounding_boxes, expected_output)
def test_filter_sentinels_unbatched(self):
bounding_boxes = tf.convert_to_tensor(
[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, -1]]
)
filtered_bounding_boxes = bounding_box.filter_sentinels(bounding_boxes)
expected_output = tf.convert_to_tensor(
[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]],
)
print(filtered_bounding_boxes, expected_output)
self.assertAllEqual(filtered_bounding_boxes, expected_output)
def test_filter_sentinels_tensor(self):
bounding_boxes = tf.convert_to_tensor(
[
[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]],
[[1, 2, 3, 4, 5], [1, 2, 3, 4, -1]],
]
)
filtered_bounding_boxes = bounding_box.filter_sentinels(bounding_boxes)
expected_output = tf.ragged.constant(
[[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]], [[1, 2, 3, 4, 5]]], ragged_rank=1
)
self.assertAllEqual(filtered_bounding_boxes, expected_output)
def test_pad_with_class_id_ragged(self):
bounding_boxes = tf.ragged.constant(
[[[1, 2, 3, 4], [1, 2, 3, 4]], [[1, 2, 3, 4]]]
)
padded_bounding_boxes = bounding_box.add_class_id(bounding_boxes)
expected_output = tf.ragged.constant(
[[[1, 2, 3, 4, 0], [1, 2, 3, 4, 0]], [[1, 2, 3, 4, 0]]]
)
self.assertAllEqual(padded_bounding_boxes, expected_output)
def test_pad_with_class_id_unbatched(self):
bounding_boxes = tf.convert_to_tensor([[1, 2, 3, 4], [1, 2, 3, 4]])
padded_bounding_boxes = bounding_box.add_class_id(bounding_boxes)
expected_output = tf.convert_to_tensor([[1, 2, 3, 4, 0], [1, 2, 3, 4, 0]])
self.assertAllEqual(padded_bounding_boxes, expected_output)
def test_pad_with_class_id_exists(self):
bounding_boxes = tf.ragged.constant(
[[[1, 2, 3, 4, 0], [1, 2, 3, 4, 0]], [[1, 2, 3, 4, 0]]]
)
with self.assertRaisesRegex(
ValueError,
"The number of values along the final axis of `bounding_boxes` is "
"expected to be 4. But got 5.",
):
bounding_box.add_class_id(bounding_boxes)
def test_pad_with_class_id_wrong_rank(self):
bounding_boxes = tf.ragged.constant(
[[[[1, 2, 3, 4], [1, 2, 3, 4]], [[1, 2, 3, 4]]]]
)
with self.assertRaisesRegex(
ValueError,
f"`bounding_boxes` should be of rank 2 or 3. However "
f"add_class_id received `bounding_boxes` of rank={4}",
):
bounding_box.add_class_id(bounding_boxes)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.core.factor_sampler.constant_factor_sampler import ConstantFactorSampler
from keras_cv.core.factor_sampler.factor_sampler import FactorSampler
from keras_cv.core.factor_sampler.normal_factor_sampler import NormalFactorSampler
from keras_cv.core.factor_sampler.uniform_factor_sampler import UniformFactorSampler
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.core.factor_sampler.factor_sampler import FactorSampler
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class ConstantFactorSampler(FactorSampler):
"""ConstantFactorSampler samples the same factor for every call to `__call__()`.
This is useful in cases where a user wants to always ensure that an augmentation
layer performs augmentations of the same strength.
Args:
value: the value to return from `__call__()`.
Usage:
```python
constant_factor = keras_cv.ConstantFactorSampler(0.5)
random_sharpness = keras_cv.layers.RandomSharpness(factor=constant_factor)
# random_sharpness will now always use a factor of 0.5
```
"""
def __init__(self, value):
self.value = value
def __call__(self, shape=(), dtype="float32"):
return tf.ones(shape=shape, dtype=dtype) * self.value
def get_config(self):
return {"value": self.value}
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import keras_cv
class ConstantFactorSamplerTest(tf.test.TestCase):
def test_sample(self):
factor = keras_cv.ConstantFactorSampler(0.3)
self.assertEqual(factor(), 0.3)
def test_config(self):
factor = keras_cv.ConstantFactorSampler(0.3)
config = factor.get_config()
self.assertEqual(config["value"], 0.3)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class FactorSampler:
"""FactorSampler represents a strength factor for use in an augmentation layer.
FactorSampler should be subclassed and implement a `__call__()` method that returns
a tf.float32, or a float. This method will be used by preprocessing layers to
determine the strength of their augmentation. The specific range of values
supported may vary by layer, but for most layers is the range [0, 1].
"""
def __call__(self, shape=None, dtype="float32"):
raise NotImplementedError(
"FactorSampler subclasses must implement a `__call__()` method."
)
def get_config(self):
return {}
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.core.factor_sampler.factor_sampler import FactorSampler
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class NormalFactorSampler(FactorSampler):
"""NormalFactorSampler samples factors from a normal distribution.
This is useful in cases where a user wants to always ensure that an augmentation
layer performs augmentations of the same strength.
Args:
mean: mean value for the distribution.
stddev: standard deviation of the distribution.
min_value: values below min_value are clipped to min_value.
max_value: values above max_value are clipped to max_value.
Usage:
```python
factor = keras_cv.core.NormalFactor(
mean=0.5,
stddev=0.1,
lower=0,
upper=1
)
random_sharpness = keras_cv.layers.RandomSharpness(factor=factor)
# random_sharpness will now sample normally around 0.5, with a lower of 0 and upper
# bound of 1.
```
"""
def __init__(self, mean, stddev, min_value, max_value, seed=None):
self.mean = mean
self.stddev = stddev
self.min_value = min_value
self.max_value = max_value
self.seed = seed
def __call__(self, shape=(), dtype="float32"):
return tf.clip_by_value(
tf.random.normal(
shape=shape,
mean=self.mean,
stddev=self.stddev,
seed=self.seed,
dtype=dtype,
),
self.min_value,
self.max_value,
)
def get_config(self):
return {
"mean": self.mean,
"stddev": self.stddev,
"min_value": self.min_value,
"max_value": self.max_value,
"seed": self.seed,
}
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import core
class NormalFactorTest(tf.test.TestCase):
def test_sample(self):
factor = core.NormalFactor(mean=0.5, stddev=0.2, min_value=0, max_value=1)
self.assertTrue(0 <= factor() <= 1)
def test_config(self):
factor = core.NormalFactor(mean=0.5, stddev=0.2, min_value=0, max_value=1)
config = factor.get_config()
self.assertEqual(config["mean"], 0.5)
self.assertEqual(config["stddev"], 0.2)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.core.factor_sampler.factor_sampler import FactorSampler
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class UniformFactorSampler(FactorSampler):
"""UniformFactorSampler samples factors uniformly from a range.
This is useful in cases where a user wants to always ensure that an augmentation
layer performs augmentations of the same strength.
Args:
lower: the lower bound of values returned from `__call__()`.
upper: the upper bound of values returned from `__call__()`.
seed: A shape int or Tensor, the seed to the random number generator. Must have
dtype int32 or int64. (When using XLA, only int32 is allowed.)
Usage:
```python
uniform_factor = keras_cv.UniformFactorSampler(0, 0.5)
random_sharpness = keras_cv.layers.RandomSharpness(factor=uniform_factor)
# random_sharpness will now sample factors between 0, and 0.5
```
"""
def __init__(self, lower, upper, seed=None):
self.lower = lower
self.upper = upper
self.seed = seed
def __call__(self, shape=(), dtype="float32"):
return tf.random.uniform(
shape, seed=self.seed, minval=self.lower, maxval=self.upper, dtype=dtype
)
def get_config(self):
return {
"lower": self.lower,
"upper": self.upper,
"seed": self.seed,
}
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import keras_cv
class UniformFactorSamplerTest(tf.test.TestCase):
def test_sample(self):
factor = keras_cv.UniformFactorSampler(0.3, 0.6)
self.assertTrue(0.3 <= factor() <= 0.6)
def test_config(self):
factor = keras_cv.UniformFactorSampler(0.3, 0.6)
config = factor.get_config()
self.assertEqual(config["lower"], 0.3)
self.assertEqual(config["upper"], 0.6)
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:public"])
config_setting(
name = "windows",
constraint_values = ["@bazel_tools//platforms:windows"],
)
cc_library(
name = "box_util",
srcs = ["box_util.cc"],
hdrs = ["box_util.h"],
deps = [
"@local_config_tf//:libtensorflow_framework",
"@local_config_tf//:tf_header_lib",
],
copts = select({
":windows": ["/DEIGEN_STRONG_INLINE=inline", "-DTENSORFLOW_MONOLITHIC_BUILD", "/DPLATFORM_WINDOWS", "/DEIGEN_HAS_C99_MATH", "/DTENSORFLOW_USE_EIGEN_THREADPOOL", "/DEIGEN_AVOID_STL_ARRAY", "/Iexternal/gemmlowp", "/wd4018", "/wd4577", "/DNOGDI", "/UTF_COMPILE_LIBRARY"],
"//conditions:default": ["-pthread", "-std=c++17"],
}),
)
cc_binary(
name = '_keras_cv_custom_ops.so',
srcs = [
"kernels/pairwise_iou_kernel.cc",
"ops/pairwise_iou_op.cc"
],
linkshared = 1,
deps = [
"@local_config_tf//:libtensorflow_framework",
"@local_config_tf//:tf_header_lib",
":box_util",
],
features = select({
":windows": ["windows_export_all_symbols"],
"//conditions:default": [],
}),
copts = select({
":windows": ["/DEIGEN_STRONG_INLINE=inline", "-DTENSORFLOW_MONOLITHIC_BUILD", "/DPLATFORM_WINDOWS", "/DEIGEN_HAS_C99_MATH", "/DTENSORFLOW_USE_EIGEN_THREADPOOL", "/DEIGEN_AVOID_STL_ARRAY", "/Iexternal/gemmlowp", "/wd4018", "/wd4577", "/DNOGDI", "/UTF_COMPILE_LIBRARY"],
"//conditions:default": ["-pthread", "-std=c++17"],
}),
)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
/* Copyright 2022 The KerasCV Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "keras_cv/custom_ops/box_util.h"
#include <algorithm>
#include <cmath>
namespace tensorflow {
namespace kerascv {
namespace box {
const double kEPS = 1e-8;
// Min,max box dimensions (length, width, height). Boxes with dimensions that
// exceed these values will have box intersections of 0.
constexpr double kMinBoxDim = 1e-3;
constexpr double kMaxBoxDim = 1e6;
// A line with the representation a*x + b*y + c = 0.
struct Line {
double a = 0;
double b = 0;
double c = 0;
Line(const Vertex& v1, const Vertex& v2)
: a(v2.y - v1.y), b(v1.x - v2.x), c(v2.x * v1.y - v2.y * v1.x) {}
// Computes the line value for a vertex v as a * v.x + b * v.y + c
double LineValue(const Vertex& v) const { return a * v.x + b * v.y + c; }
// Computes the intersection point with the other line.
Vertex IntersectionPoint(const Line& other) const {
const double w = a * other.b - b * other.a;
CHECK_GT(std::fabs(w), kEPS) << "No intersection between the two lines.";
return Vertex((b * other.c - c * other.b) / w,
(c * other.a - a * other.c) / w);
}
};
// Computes the coordinates of its four vertices given a 2D rotated box,
std::vector<Vertex> ComputeBoxVertices(const double cx, const double cy,
const double w, const double h,
const double heading) {
const double dxcos = (w / 2.) * std::cos(heading);
const double dxsin = (w / 2.) * std::sin(heading);
const double dycos = (h / 2.) * std::cos(heading);
const double dysin = (h / 2.) * std::sin(heading);
return {Vertex(cx - dxcos + dysin, cy - dxsin - dycos),
Vertex(cx + dxcos + dysin, cy + dxsin - dycos),
Vertex(cx + dxcos - dysin, cy + dxsin + dycos),
Vertex(cx - dxcos - dysin, cy - dxsin + dycos)};
}
// Computes the intersection points between two rotated boxes, by following:
//
// 1. Initiazlizes the current intersection points with the vertices of one box,
// and the other box is taken as the cutting box;
//
// 2. For each cutting line in the cutting box (four cutting lines in total):
// For each point in the current intersection points:
// If the point is inside of the cutting line:
// Adds it to the new intersection points;
// if current point and its next point are in the opposite side of the
// cutting line:
// Computes the line of current points and its next point as tmp_line;
// Computes the intersection point between the cutting line and
// tmp_line;
// Adds the intersection point to the new intersection points;
// After checking each cutting line, sets current intersection points as
// new intersection points;
//
// 3. Returns the final intersection points.
std::vector<Vertex> ComputeIntersectionPoints(
const std::vector<Vertex>& rbox_1, const std::vector<Vertex>& rbox_2) {
std::vector<Vertex> intersection = rbox_1;
const int vertices_len = rbox_2.size();
for (int i = 0; i < rbox_2.size(); ++i) {
const int len = intersection.size();
if (len <= 2) {
break;
}
const Vertex& p = rbox_2[i];
const Vertex& q = rbox_2[(i + 1) % vertices_len];
Line cutting_line(p, q);
// Computes line value.
std::vector<double> line_values;
line_values.reserve(len);
for (int j = 0; j < len; ++j) {
line_values.push_back(cutting_line.LineValue(intersection[j]));
}
// Updates current intersection points.
std::vector<Vertex> new_intersection;
for (int j = 0; j < len; ++j) {
const double s_val = line_values[j];
const Vertex& s = intersection[j];
// Adds the current vertex.
if (s_val <= 0 || std::fabs(s_val) <= kEPS) {
new_intersection.push_back(s);
}
const double t_val = line_values[(j + 1) % len];
// Skips the checking of intersection point if the next vertex is on the
// line.
if (std::fabs(t_val) <= kEPS) {
continue;
}
// Adds the intersection point.
if ((s_val > 0 && t_val < 0) || (s_val < 0 && t_val > 0)) {
Line s_t_line(s, intersection[(j + 1) % len]);
new_intersection.push_back(cutting_line.IntersectionPoint(s_t_line));
}
}
intersection = new_intersection;
}
return intersection;
}
// Computes the area of a convex polygon,
double ComputePolygonArea(const std::vector<Vertex>& convex_polygon) {
const int len = convex_polygon.size();
if (len <= 2) {
return 0;
}
double area = 0;
for (int i = 0; i < len; ++i) {
const Vertex& p = convex_polygon[i];
const Vertex& q = convex_polygon[(i + 1) % len];
area += p.x * q.y - p.y * q.x;
}
return std::fabs(0.5 * area);
}
RotatedBox2D::RotatedBox2D(const double cx, const double cy, const double w,
const double h, const double heading)
: cx_(cx), cy_(cy), w_(w), h_(h), heading_(heading) {
// Compute loose bounds on dimensions of box that doesn't require computing
// full intersection. We can do this by trying to compute the largest circle
// swept by rotating the box around its center. The radius of that circle
// is the length of the ray from the center to the box corner. The upper
// bound for this value is the length of the longer dimension divided by two
// and then multiplied by root(2) (worst-case being a square box); we choose
// 1.5 as slightly higher than root(2), and then use these extrema to do
// simple extrema box checks without having to compute the true cos/sin value.
double max_dim = std::max(w_, h_) / 2. * 1.5;
loose_min_x_ = cx_ - max_dim;
loose_max_x_ = cx_ + max_dim;
loose_min_y_ = cy_ - max_dim;
loose_max_y_ = cy_ + max_dim;
extreme_box_dim_ = (w_ <= kMinBoxDim || h_ <= kMinBoxDim);
extreme_box_dim_ |= (w_ >= kMaxBoxDim || h_ >= kMaxBoxDim);
}
double RotatedBox2D::Area() const {
if (area_ < 0) {
const double area = ComputePolygonArea(box_vertices());
area_ = std::fabs(area) <= kEPS ? 0 : area;
}
return area_;
}
const std::vector<Vertex>& RotatedBox2D::box_vertices() const {
if (box_vertices_.empty()) {
box_vertices_ = ComputeBoxVertices(cx_, cy_, w_, h_, heading_);
}
return box_vertices_;
}
bool RotatedBox2D::NonZeroAndValid() const { return !extreme_box_dim_; }
bool RotatedBox2D::MaybeIntersects(const RotatedBox2D& other) const {
// If the box dimensions of either box are too small / large,
// assume they are not well-formed boxes (otherwise we are
// subject to issues due to catastrophic cancellation).
if (extreme_box_dim_ || other.extreme_box_dim_) {
return false;
}
// Check whether the loose extrema overlap -- if not, then there is
// no chance that the two boxes overlap even when computing the true,
// more expensive overlap.
if ((loose_min_x_ > other.loose_max_x_) ||
(loose_max_x_ < other.loose_min_x_) ||
(loose_min_y_ > other.loose_max_y_) ||
(loose_max_y_ < other.loose_min_y_)) {
return false;
}
return true;
}
double RotatedBox2D::Intersection(const RotatedBox2D& other) const {
// Do a fast intersection check - if the boxes are not near each other
// then we can return early. If they are close enough to maybe overlap,
// we do the full check.
if (!MaybeIntersects(other)) {
return 0.0;
}
// Computes the intersection polygon.
const std::vector<Vertex> intersection_polygon =
ComputeIntersectionPoints(box_vertices(), other.box_vertices());
// Computes the intersection area.
const double intersection_area = ComputePolygonArea(intersection_polygon);
return std::fabs(intersection_area) <= kEPS ? 0 : intersection_area;
}
double RotatedBox2D::IoU(const RotatedBox2D& other) const {
// Computes the intersection area.
const double intersection_area = Intersection(other);
if (intersection_area == 0) {
return 0;
}
// Computes the union area.
const double union_area = Area() + other.Area() - intersection_area;
if (std::fabs(union_area) <= kEPS) {
return 0;
}
return intersection_area / union_area;
}
std::vector<Upright3DBox> ParseBoxesFromTensor(const Tensor& boxes_tensor) {
int num_boxes = boxes_tensor.dim_size(0);
const auto t_boxes_tensor = boxes_tensor.matrix<float>();
std::vector<Upright3DBox> bboxes3d;
bboxes3d.reserve(num_boxes);
for (int i = 0; i < num_boxes; ++i) {
const double center_x = t_boxes_tensor(i, 0);
const double center_y = t_boxes_tensor(i, 1);
const double center_z = t_boxes_tensor(i, 2);
const double dimension_x = t_boxes_tensor(i, 3);
const double dimension_y = t_boxes_tensor(i, 4);
const double dimension_z = t_boxes_tensor(i, 5);
const double heading = t_boxes_tensor(i, 6);
const double z_min = center_z - dimension_z / 2;
const double z_max = center_z + dimension_z / 2;
RotatedBox2D box2d(center_x, center_y, dimension_x, dimension_y, heading);
if (dimension_x <= 0 || dimension_y <= 0) {
bboxes3d.emplace_back(RotatedBox2D(), z_min, z_max);
} else {
bboxes3d.emplace_back(box2d, z_min, z_max);
}
}
return bboxes3d;
}
bool Upright3DBox::NonZeroAndValid() const {
// If min is larger than max, the upright box is invalid.
//
// If the min and max are equal, the height of the box is 0. and thus the box
// is zero.
if (z_min - z_max >= 0.) {
return false;
}
return rbox.NonZeroAndValid();
}
double Upright3DBox::IoU(const Upright3DBox& other) const {
// Check that both boxes are non-zero and valid. Otherwise,
// return 0.
if (!NonZeroAndValid() || !other.NonZeroAndValid()) {
return 0;
}
// Quickly check whether z's overlap; if they don't, we can return 0.
const double z_inter =
std::max(.0, std::min(z_max, other.z_max) - std::max(z_min, other.z_min));
if (z_inter == 0) {
return 0;
}
const double base_inter = rbox.Intersection(other.rbox);
if (base_inter == 0) {
return 0;
}
const double volume_1 = rbox.Area() * (z_max - z_min);
const double volume_2 = other.rbox.Area() * (other.z_max - other.z_min);
const double volume_inter = base_inter * z_inter;
const double volume_union = volume_1 + volume_2 - volume_inter;
return volume_inter > 0 ? volume_inter / volume_union : 0;
}
double Upright3DBox::Overlap(const Upright3DBox& other) const {
// Check that both boxes are non-zero and valid. Otherwise,
// return 0.
if (!NonZeroAndValid() || !other.NonZeroAndValid()) {
return 0;
}
const double z_inter =
std::max(.0, std::min(z_max, other.z_max) - std::max(z_min, other.z_min));
if (z_inter == 0) {
return 0;
}
const double base_inter = rbox.Intersection(other.rbox);
if (base_inter == 0) {
return 0;
}
const double volume_1 = rbox.Area() * (z_max - z_min);
const double volume_inter = base_inter * z_inter;
// Normalizes intersection of volume by the volume of this box.
return volume_inter > 0 ? volume_inter / volume_1 : 0;
}
} // namespace box
} // namespace kerascv
} // namespace tensorflow
/* Copyright 2022 The Keras CV Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_PY_KERAS_CV_OPS_BOX_UTIL_H_
#define THIRD_PARTY_PY_KERAS_CV_OPS_BOX_UTIL_H_
#include <string>
#include <vector>
#include "tensorflow/core/framework/tensor.h"
namespace tensorflow {
namespace kerascv {
namespace box {
// A vertex with (x, y) coordinate.
//
// This is an internal implementation detail of RotatedBox2D.
struct Vertex {
// Creates an empty Vertex.
Vertex() = default;
Vertex(const double x, const double y) : x(x), y(y) {}
double x = 0;
double y = 0;
};
// A rotated 2D bounding box represented as (cx, cy, w, h, r). cx, cy are the
// box center coordinates; w, h are the box width and height; heading is the
// rotation angle in radian relative to the 'positive x' direction.
class RotatedBox2D {
public:
// Creates an empty rotated 2D box.
RotatedBox2D() : RotatedBox2D(0, 0, 0, 0, 0) {}
RotatedBox2D(const double cx, const double cy, const double w, const double h,
const double heading);
// Returns the area of the box.
double Area() const;
// Returns the intersection area between this box and the given box.
double Intersection(const RotatedBox2D& other) const;
// Returns the IoU between this box and the given box.
double IoU(const RotatedBox2D& other) const;
// Returns true if the box is valid (width and height are not extremely
// large or small).
bool NonZeroAndValid() const;
private:
// Computes / caches box_vertices_ calculation.
const std::vector<Vertex>& box_vertices() const;
// Returns true if this box and 'other' might intersect.
//
// If this returns false, the two boxes definitely do not intersect. If this
// returns true, it is still possible that the two boxes do not intersect, and
// the more expensive intersection code will be called.
bool MaybeIntersects(const RotatedBox2D& other) const;
double cx_ = 0;
double cy_ = 0;
double w_ = 0;
double h_ = 0;
double heading_ = 0;
// Loose boundaries for fast intersection test.
double loose_min_x_ = -1;
double loose_max_x_ = -1;
double loose_min_y_ = -1;
double loose_max_y_ = -1;
// True if the dimensions of the box are very small or very large in any
// dimension.
bool extreme_box_dim_ = false;
// The following fields are computed on demand. They are logically
// const.
// Cached area. Access via Area() public API.
mutable double area_ = -1;
// Stores the vertices of the box. Access via box_vertices().
mutable std::vector<Vertex> box_vertices_;
};
// A 3D box of 7-DOFs: only allows rotation around the z-axis.
struct Upright3DBox {
RotatedBox2D rbox = RotatedBox2D();
double z_min = 0;
double z_max = 0;
// Creates an empty rotated 3D box.
Upright3DBox() = default;
// Creates a 3D box from the raw input data with size 7. The data format is
// (center_x, center_y, center_z, dimension_x, dimension_y, dimension_z,
// heading)
Upright3DBox(const std::vector<double>& raw)
: rbox(raw[0], raw[1], raw[3], raw[4], raw[6]),
z_min(raw[2] - raw[5] / 2.0),
z_max(raw[2] + raw[5] / 2.0) {}
Upright3DBox(const RotatedBox2D& rb, const double z_min, const double z_max)
: rbox(rb), z_min(z_min), z_max(z_max) {}
// Computes intersection over union (of the volume).
double IoU(const Upright3DBox& other) const;
// Computes overlap: intersection of this box and the given box normalized
// over the volume of this box.
double Overlap(const Upright3DBox& other) const;
// Returns true if the box is valid (width and height are not extremely
// large or small, and zmin < zmax).
bool NonZeroAndValid() const;
};
// Converts a [N, 7] tensor to a vector of N Upright3DBox objects.
std::vector<Upright3DBox> ParseBoxesFromTensor(const Tensor& boxes_tensor);
} // namespace box
} // namespace kerascv
} // namespace tensorflow
#endif // THIRD_PARTY_PY_KERAS_CV_OPS_BOX_UTIL_H_
/* Copyright 2022 The KerasCV Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <vector>
#include "keras_cv/custom_ops/box_util.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/lib/core/errors.h"
namespace tensorflow {
namespace kerascv {
namespace {
class PairwiseIoUOp : public OpKernel {
public:
explicit PairwiseIoUOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
void Compute(OpKernelContext* ctx) override {
const Tensor& a = ctx->input(0);
const Tensor& b = ctx->input(1);
OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(a.shape()),
errors::InvalidArgument("In[0] must be a matrix, but get ",
a.shape().DebugString()));
OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(b.shape()),
errors::InvalidArgument("In[0] must be a matrix, but get ",
b.shape().DebugString()));
OP_REQUIRES(ctx, 7 == a.dim_size(1),
errors::InvalidArgument("Matrix size-incompatible: In[0]: ",
a.shape().DebugString()));
OP_REQUIRES(ctx, 7 == b.dim_size(1),
errors::InvalidArgument("Matrix size-incompatible: In[1]: ",
b.shape().DebugString()));
const int n_a = a.dim_size(0);
const int n_b = b.dim_size(0);
Tensor* iou_a_b = nullptr;
OP_REQUIRES_OK(
ctx, ctx->allocate_output("iou", TensorShape({n_a, n_b}), &iou_a_b));
auto t_iou_a_b = iou_a_b->matrix<float>();
std::vector<box::Upright3DBox> box_a = box::ParseBoxesFromTensor(a);
std::vector<box::Upright3DBox> box_b = box::ParseBoxesFromTensor(b);
for (int i_a = 0; i_a < n_a; ++i_a) {
for (int i_b = 0; i_b < n_b; ++i_b) {
t_iou_a_b(i_a, i_b) = box_a[i_a].IoU(box_b[i_b]);
}
}
}
};
REGISTER_KERNEL_BUILDER(Name("PairwiseIou3D").Device(DEVICE_CPU),
PairwiseIoUOp);
} // namespace
} // namespace kerascv
} // namespace tensorflow
/* Copyright 2022 The KerasCV Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
using namespace tensorflow;
REGISTER_OP("PairwiseIou3D")
.Input("boxes_a: float")
.Input("boxes_b: float")
.Output("iou: float")
.SetShapeFn([](tensorflow::shape_inference::InferenceContext* c) {
c->set_output(
0, c->MakeShape({c->Dim(c->input(0), 0), c->Dim(c->input(1), 0)}));
return tensorflow::Status();
})
.Doc(R"doc(
Calculate pairwise IoUs between two set of 3D bboxes. Every bbox is represented
as [center_x, center_y, center_z, dim_x, dim_y, dim_z, heading].
boxes_a: A tensor of shape [num_boxes_a, 7]
boxes_b: A tensor of shape [num_boxes_b, 7]
)doc");
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.datasets import pascal_voc
### The ImageNet Dataset in keras_cv
In order to load ImageNet with KerasCV, you'll need to download the [original ImageNet dataset](https://image-net.org) and parse the images into TFRecords.
Tensorflow provides a [script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py) to perform this parsing and upload images to Google Cloud Storage (or optionally to local storage).
Please reference that script's instructions on producing ImageNet TFRecords, and then use the KerasCV loader to load records from wherever you choose to store them.
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.datasets.imagenet.load import load
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from tensorflow.keras import layers
def parse_imagenet_example(img_size, crop_to_aspect_ratio):
"""Function to parse a TFRecord example into an image and label"""
resizing = None
if img_size:
resizing = layers.Resizing(
width=img_size[0],
height=img_size[1],
crop_to_aspect_ratio=crop_to_aspect_ratio,
)
def apply(example):
# Read example
image_key = "image/encoded"
label_key = "image/class/label"
keys_to_features = {
image_key: tf.io.FixedLenFeature((), tf.string, ""),
label_key: tf.io.FixedLenFeature([], tf.int64, -1),
}
parsed = tf.io.parse_single_example(example, keys_to_features)
# Decode and resize image
image_bytes = tf.reshape(parsed[image_key], shape=[])
image = tf.io.decode_jpeg(image_bytes, channels=3)
if resizing:
image = resizing(image)
# Decode label
label = tf.cast(tf.reshape(parsed[label_key], shape=()), dtype=tf.int32) - 1
label = tf.one_hot(label, 1000)
return image, label
return apply
def load(
split,
tfrecord_path,
batch_size=None,
shuffle=True,
shuffle_buffer=None,
reshuffle_each_iteration=False,
img_size=None,
crop_to_aspect_ratio=True,
):
"""Loads the ImageNet dataset from TFRecords
Usage:
```python
dataset, ds_info = keras_cv.datasets.imagenet.load(
split="train", tfrecord_path="gs://my-bucket/imagenet-tfrecords"
)
```
Args:
split: the split to load. Should be one of "train" or "validation."
tfrecord_path: the path to your preprocessed ImageNet TFRecords.
See keras_cv/datasets/imagenet/README.md for preprocessing instructions.
batch_size: how many instances to include in batches after loading.
Should only be specified if img_size is specified (so that images
can be resized to the same size before batching).
shuffle: whether or not to shuffle the dataset. Defaults to True.
shuffle_buffer: the size of the buffer to use in shuffling.
reshuffle_each_iteration: whether to reshuffle the dataset on every epoch.
Defaults to False.
img_size: the size to resize the images to. Defaults to None, indicating
that images should not be resized.
Returns:
tf.data.Dataset containing ImageNet. Each entry is a dictionary containing
keys {"image": image, "label": label} where images is a Tensor of shape
[H, W, 3] and label is a Tensor of shape [1000].
"""
if batch_size is not None and img_size is None:
raise ValueError("Batching can only be performed if images are resized.")
num_splits = 1024 if split == "train" else 128
filenames = [
f"{tfrecord_path}/{split}-{i:05d}-of-{num_splits:05d}"
for i in range(0, num_splits)
]
dataset = tf.data.TFRecordDataset(
filenames=filenames, num_parallel_reads=tf.data.AUTOTUNE
)
dataset = dataset.map(
parse_imagenet_example(img_size, crop_to_aspect_ratio),
num_parallel_calls=tf.data.AUTOTUNE,
)
if shuffle:
if not batch_size and not shuffle_buffer:
raise ValueError(
"If `shuffle=True`, either a `batch_size` or `shuffle_buffer` must be "
"provided to `keras_cv.datasets.imagenet.load().`"
)
shuffle_buffer = shuffle_buffer or 8 * batch_size
dataset = dataset.shuffle(
shuffle_buffer, reshuffle_each_iteration=reshuffle_each_iteration
)
if batch_size is not None:
dataset = dataset.batch(batch_size)
return dataset.prefetch(tf.data.AUTOTUNE)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment