Commit 66d00a87 authored by yongzhe2160's avatar yongzhe2160 Committed by Menglong Zhu
Browse files

Merged commit includes the following changes: (#7220)

* Merged commit includes the following changes:
257930561  by yongzhe:

    Mobile LSTD TfLite Client.

--
257928126  by yongzhe:

    Mobile SSD Tflite client.

--
257921181  by menglong:

    Fix discrepancy between pre_bottleneck = {true, false}

--
257561213  by yongzhe:

    File utils.

--
257449226  by yongzhe:

    Mobile SSD Client.

--
257264654  by yongzhe:

    SSD utils.

--
257235648  by yongzhe:

    Proto bazel build rules.

--
256437262  by Menglong Zhu:

    Fix check for FusedBatchNorm op to only verify it as a prefix.

--
256283755  by yongzhe:

    Bazel build and copybara changes.

--
251947295  by yinxiao:

    Add missing interleaved option in checkpoint restore.

--
251513479  by yongzhe:

    Conversion utils.

--
248783193  by yongzhe:

    Branch protos needed for the lstd client.

--
248200507  by menglong:

    Fix proto namespace in example config

--

PiperOrigin-RevId: 257930561

* Delete BUILD
parent 395f6d2d
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
cc_library(
name = "conversion_utils",
srcs = ["conversion_utils.cc"],
hdrs = ["conversion_utils.h"],
deps = [
"@com_google_absl//absl/base:core_headers",
"@com_google_glog//:glog",
],
)
cc_test(
name = "conversion_utils_test",
srcs = ["conversion_utils_test.cc"],
deps = [
":conversion_utils",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "ssd_utils",
srcs = ["ssd_utils.cc"],
hdrs = ["ssd_utils.h"],
deps = [
"//protos:anchor_generation_options_cc_proto",
"//protos:box_encodings_cc_proto",
"//protos:detections_cc_proto",
"@com_google_absl//absl/strings",
"@com_google_glog//:glog",
],
)
cc_library(
name = "file_utils",
srcs = ["file_utils.cc"],
hdrs = ["file_utils.h"],
deps = [
"//protos:labelmap_cc_proto",
"@com_google_absl//absl/strings",
"@com_google_glog//:glog",
],
)
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "utils/conversion_utils.h"
#include <glog/logging.h>
namespace lstm_object_detection {
namespace tflite {
bool HasPadding(int width, int height, int bytes_per_pixel, int bytes_per_row) {
CHECK_LT(0, width);
CHECK_LT(0, height);
CHECK(bytes_per_pixel == 1 || bytes_per_pixel == 3 || bytes_per_pixel == 4);
CHECK_LE(width * bytes_per_pixel, bytes_per_row);
if (bytes_per_pixel == 4) {
return true;
}
return (width * bytes_per_pixel < bytes_per_row);
}
std::vector<uint8_t> RemovePadding(const uint8_t* image_data, int width,
int height, int bytes_per_pixel,
int bytes_per_row) {
CHECK_LT(0, width);
CHECK_LT(0, height);
CHECK(bytes_per_pixel == 1 || bytes_per_pixel == 3 || bytes_per_pixel == 4);
CHECK_LE(width * bytes_per_pixel, bytes_per_row);
const int unpadded_bytes_per_pixel = (bytes_per_pixel == 1 ? 1 : 3);
const int pixel_padding = (bytes_per_pixel == 4 ? 1 : 0);
std::vector<uint8_t> unpadded_image_data(width * height *
unpadded_bytes_per_pixel);
const uint8_t* row_ptr = image_data;
int index = 0;
for (int y = 0; y < height; ++y) {
const uint8_t* ptr = row_ptr;
for (int x = 0; x < width; ++x) {
for (int d = 0; d < unpadded_bytes_per_pixel; ++d) {
unpadded_image_data[index++] = *ptr++;
}
ptr += pixel_padding;
}
row_ptr += bytes_per_row;
}
return unpadded_image_data;
}
} // namespace tflite
} // namespace lstm_object_detection
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Lightweight utilities related to conversion of input images.
#ifndef TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_CONVERSION_UTILS_H_
#define TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_CONVERSION_UTILS_H_
#include <vector>
#include <cstdint>
namespace lstm_object_detection {
namespace tflite {
// Finds out whether a call to 'RemovePadding()' is needed to process the given
// pixel data constellation in order to make it suitable for model input layer.
// All integers must be positive, 'bytes_per_row' must be sufficiently large,
// and for 'bytes_per_pixel' only values 1, 3, 4 may be passed and implies a
// grayscale, RGB, or RGBA image. Returns true iff excessive bytes exist in the
// associated pixel data.
bool HasPadding(int width, int height, int bytes_per_pixel, int bytes_per_row);
// Removes padding at the pixel and row level of pixel data which is stored in
// the usual row major order ("interleaved"). Produces pixel data which is
// suitable for model input layer. If 'HasPadding()' is false then this
// function will return an identical copy of 'image'. For restrictions on the
// integer parameters see comment on 'HasPadding()'.
std::vector<uint8_t> RemovePadding(const uint8_t* image, int width, int height,
int bytes_per_pixel, int bytes_per_row);
} // namespace tflite
} // namespace lstm_object_detection
#endif // TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_CONVERSION_UTILS_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "utils/conversion_utils.h"
#include <vector>
#include <glog/logging.h>
#include <gmock/gmock.h>
#include "gtest/gtest.h"
using testing::ContainerEq;
namespace lstm_object_detection {
namespace tflite {
namespace {
TEST(ConversionUtilsTests, HasPaddingNonPositiveDimensions) {
EXPECT_DEATH(HasPadding(/* width= */ 0, /* height= */ 4,
/* bytes_per_pixel= */ 4, /* bytes_per_row= */ 12),
"");
EXPECT_DEATH(HasPadding(/* width= */ 3, /* height= */ 0,
/* bytes_per_pixel= */ 4, /* bytes_per_row= */ 12),
"");
}
TEST(ConversionUtilsTests, HasPaddingIllegalDepth) {
for (int bytes_per_pixel : {-1, 0, 2, 5, 6}) {
EXPECT_DEATH(HasPadding(/* width= */ 3, /* height= */ 4, bytes_per_pixel,
/* bytes_per_row= */ 12),
"");
}
}
TEST(ConversionUtilsTests, HasPaddingWithRGBAImage) {
const int kWidth = 3;
const int kHeight = 4;
const int kBytesPerPixel = 4;
EXPECT_DEATH(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 11), "");
EXPECT_TRUE(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 12));
EXPECT_TRUE(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 13));
}
TEST(ConversionUtilsTests, HasPaddingWithRGBImage) {
const int kWidth = 3;
const int kHeight = 4;
const int kBytesPerPixel = 3;
EXPECT_DEATH(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 8), "");
EXPECT_FALSE(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 9));
EXPECT_TRUE(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 10));
}
TEST(ConversionUtilsTests, HasPaddingWithGrayscaleImage) {
const int kWidth = 3;
const int kHeight = 4;
const int kBytesPerPixel = 1;
EXPECT_DEATH(
HasPadding(kWidth, kHeight, kBytesPerPixel,
/* bytes_per_row= */ 2), "");
EXPECT_FALSE(
HasPadding(kWidth, kHeight, kBytesPerPixel,
/* bytes_per_row= */ 3));
EXPECT_TRUE(
HasPadding(kWidth, kHeight, kBytesPerPixel,
/* bytes_per_row= */ 4));
}
TEST(ConversionUtilsTests, RemovePaddingWithRGBAImage) {
constexpr int kWidth = 4;
constexpr int kHeight = 2;
constexpr int kBytesPerPixel = 4;
constexpr int kStride = kBytesPerPixel * kWidth * sizeof(uint8_t);
const std::vector<uint8_t> kImageData{
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36};
ASSERT_EQ(kHeight * kStride, kImageData.size());
std::vector<uint8_t> actual =
RemovePadding(&kImageData[0], kWidth, kHeight, kBytesPerPixel, kStride);
const std::vector<uint8_t> kExpected = {
1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15,
21, 22, 23, 25, 26, 27, 29, 30, 31, 33, 34, 35,
};
EXPECT_EQ(3 * kWidth * kHeight, actual.size());
EXPECT_THAT(actual, ContainerEq(kExpected));
}
TEST(ConversionUtilsTests, RemovePaddingWithRGBImage) {
constexpr int kWidth = 4;
constexpr int kHeight = 2;
constexpr int kBytesPerPixel = 3;
constexpr int kBytesPerRow = kBytesPerPixel * kWidth * sizeof(uint8_t);
const std::vector<uint8_t> kImageData{1, 2, 3, 5, 6, 7, 9, 10,
11, 13, 14, 15, 21, 22, 23, 25,
26, 27, 29, 30, 31, 33, 34, 35};
ASSERT_EQ(kHeight * kBytesPerRow, kImageData.size());
std::vector<uint8_t> actual = RemovePadding(&kImageData[0], kWidth, kHeight,
kBytesPerPixel, kBytesPerRow);
EXPECT_EQ(3 * kWidth * kHeight, actual.size());
EXPECT_THAT(actual, ContainerEq(kImageData));
}
TEST(ConversionUtilsTests, RemovePaddingWithGrayscaleImage) {
constexpr int kWidth = 8;
constexpr int kHeight = 2;
constexpr int kBytesPerPixel = 1;
constexpr int kBytesPerRow = kBytesPerPixel * kWidth * sizeof(uint8_t);
const std::vector<uint8_t> kImageData{
1, 2, 3, 4, 5, 6, 7, 8, 21, 22, 23, 24, 25, 26, 27, 28,
};
ASSERT_EQ(kHeight * kBytesPerRow, kImageData.size());
std::vector<uint8_t> actual = RemovePadding(&kImageData[0], kWidth, kHeight,
kBytesPerPixel, kBytesPerRow);
EXPECT_EQ(kWidth * kHeight, actual.size());
EXPECT_THAT(actual, ContainerEq(kImageData));
}
TEST(ConversionUtilsTests, RemovePaddingWithPadding) {
constexpr int kWidth = 8;
constexpr int kHeight = 2;
constexpr int kBytesPerPixel = 1;
// Pad each row with two bytes.
constexpr int kBytesPerRow = kBytesPerPixel * (kWidth + 2) * sizeof(uint8_t);
const std::vector<uint8_t> kImageData{1, 2, 3, 4, 5, 6, 7, 8, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
ASSERT_EQ(kHeight * kBytesPerRow, kImageData.size());
std::vector<uint8_t> actual = RemovePadding(&kImageData[0], kWidth, kHeight,
kBytesPerPixel, kBytesPerRow);
const std::vector<uint8_t> kExpected = {
1, 2, 3, 4, 5, 6, 7, 8, 23, 24, 25, 26, 27, 28, 29, 30,
};
EXPECT_EQ(kWidth * kHeight, actual.size());
EXPECT_THAT(actual, ContainerEq(kExpected));
}
} // namespace
} // namespace tflite
} // namespace lstm_object_detection
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "utils/file_utils.h"
#include <fstream>
#include <glog/logging.h>
namespace lstm_object_detection {
namespace tflite {
std::string ReadFileToString(absl::string_view filename) {
std::ifstream file(filename.data(), std::ios::binary | std::ios::ate);
CHECK(file.is_open());
int filesize = file.tellg();
std::string result;
result.resize(filesize);
CHECK_EQ(result.size(), filesize);
file.seekg(0);
CHECK(file.read(&(result)[0], filesize));
file.close();
return result;
}
bool LoadLabelMapFromFileOrBytes(const std::string& labelmap_file,
const std::string& labelmap_bytes,
protos::StringIntLabelMapProto* labelmap) {
if (!labelmap_bytes.empty()) {
CHECK(labelmap->ParseFromString(labelmap_bytes));
} else {
if (labelmap_file.empty()) {
LOG(ERROR) << "labelmap file empty.";
return false;
}
const std::string proto_bytes = ReadFileToString(labelmap_file);
CHECK(labelmap->ParseFromString(proto_bytes));
}
return true;
}
} // namespace tflite
} // namespace lstm_object_detection
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_FILE_UTILS_H_
#define TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_FILE_UTILS_H_
#include <string>
#include "absl/strings/string_view.h"
#include "protos/labelmap.pb.h"
namespace lstm_object_detection {
namespace tflite {
std::string ReadFileToString(absl::string_view filename);
// Load labelmap from a binary proto file or bytes string.
// labelmap_bytes takes precedence over labelmap_file.
bool LoadLabelMapFromFileOrBytes(const std::string& labelmap_file,
const std::string& labelmap_bytes,
protos::StringIntLabelMapProto* labelmap);
} // namespace tflite
} // namespace lstm_object_detection
#endif // TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_FILE_UTILS_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "utils/ssd_utils.h"
#include <math.h>
#include <cmath>
#include <glog/logging.h>
#include "absl/strings/str_cat.h"
namespace lstm_object_detection {
namespace tflite {
namespace {
using protos::AnchorGenerationOptions;
using protos::BoxCornerEncoding;
using protos::BoxCornerOffsetCoder;
using protos::CenterSizeEncoding;
using protos::CenterSizeOffsetCoder;
using protos::DetectionResults;
void DecreasingArgSort(const std::vector<float>& values,
std::vector<int>* indices) {
indices->resize(values.size());
for (int i = 0; i < values.size(); ++i) (*indices)[i] = i;
std::sort(
indices->begin(), indices->end(),
[&values](const int i, const int j) { return values[i] > values[j]; });
}
void DecreasingPartialArgSort(const float* values, int num_values,
int num_to_sort, int* indices) {
for (int i = 0; i < num_values; ++i) {
indices[i] = i;
}
std::partial_sort(
indices, indices + num_to_sort, indices + num_values,
[&values](const int i, const int j) { return values[i] > values[j]; });
}
// The row index offset is 1 if background class is included and 0 otherwise.
int GetLabelOffset(const int num_boxes,
const int num_classes,
const int score_size) {
const int label_offset = score_size / num_boxes - num_classes;
CHECK_EQ(score_size, (num_classes + label_offset) * num_boxes);
return label_offset;
}
void ApplyThreshold(const std::vector<float>& values,
const float threshold,
std::vector<float>* keep_values,
std::vector<int>* keep_indices) {
for (int i = 0; i < values.size(); i++) {
if (values[i] >= threshold) {
keep_values->emplace_back(values[i]);
keep_indices->emplace_back(i);
}
}
}
void ValidateBoxes(const BoxCornerEncoding& boxes) {
const int num_boxes = boxes.ymin_size();
CHECK_EQ(num_boxes, boxes.ymax_size());
CHECK_EQ(num_boxes, boxes.xmin_size());
CHECK_EQ(num_boxes, boxes.xmax_size());
for (int i = 0; i < num_boxes; ++i) {
CHECK_GE(boxes.ymax(i), boxes.ymin(i));
CHECK_GE(boxes.xmax(i), boxes.xmin(i));
}
}
} // namespace
void DecodeBoxCornerBoxes(const BoxCornerEncoding& predictions,
const CenterSizeEncoding& anchors,
const BoxCornerOffsetCoder& coder,
BoxCornerEncoding* decoded_boxes) {
const int num_boxes = predictions.ymin_size();
CHECK_EQ(num_boxes, anchors.y_size());
CHECK_EQ(predictions.keypoint_y_size(), 0)
<< "BoxCornerOffsetCoder doesn't work with keypoints.";
float ymin, xmin, ymax, xmax;
for (int i = 0; i < num_boxes; ++i) {
ymin = predictions.ymin(i) * coder.stddev() +
(anchors.y(i) - anchors.h(i) / 2);
xmin = predictions.xmin(i) * coder.stddev() +
(anchors.x(i) - anchors.w(i) / 2);
ymax = predictions.ymax(i) * coder.stddev() +
(anchors.y(i) + anchors.h(i) / 2);
xmax = predictions.xmax(i) * coder.stddev() +
(anchors.x(i) + anchors.w(i) / 2);
decoded_boxes->add_ymin(ymin);
decoded_boxes->add_xmin(xmin);
decoded_boxes->add_ymax(std::max(ymax, ymin));
decoded_boxes->add_xmax(std::max(xmax, xmin));
}
}
void DecodeCenterSizeBoxes(const CenterSizeEncoding& predictions,
const CenterSizeEncoding& anchors,
const CenterSizeOffsetCoder& coder,
BoxCornerEncoding* decoded_boxes) {
CHECK_EQ(predictions.y_size(), anchors.y_size());
const int num_boxes = predictions.y_size();
const int num_keypoints = predictions.keypoint_y_size() / num_boxes;
float ycenter, xcenter, h, w, ymin, xmin, ymax, xmax;
for (int i = 0; i < num_boxes; ++i) {
ycenter = predictions.y(i) / coder.y_scale() * anchors.h(i) + anchors.y(i);
xcenter = predictions.x(i) / coder.x_scale() * anchors.w(i) + anchors.x(i);
h = std::exp(predictions.h(i) / coder.h_scale()) * anchors.h(i);
w = std::exp(predictions.w(i) / coder.w_scale()) * anchors.w(i);
ymin = ycenter - h / 2.;
xmin = xcenter - w / 2.;
ymax = ycenter + h / 2.;
xmax = xcenter + w / 2.;
decoded_boxes->add_ymin(ymin);
decoded_boxes->add_xmin(xmin);
decoded_boxes->add_ymax(ymax);
decoded_boxes->add_xmax(xmax);
// keypoints
for (int j = 0; j < num_keypoints; ++j) {
float keypoint_y = predictions.keypoint_y(num_keypoints * i + j) /
coder.keypoint_y_scale() * anchors.h(i) + anchors.y(i);
float keypoint_x = predictions.keypoint_x(num_keypoints * i + j) /
coder.keypoint_x_scale() * anchors.w(i) + anchors.x(i);
decoded_boxes->add_keypoint_y(keypoint_y);
decoded_boxes->add_keypoint_x(keypoint_x);
}
}
}
float ComputeIOU(const BoxCornerEncoding& boxes, const int i, const int j) {
const float area_i =
(boxes.ymax(i) - boxes.ymin(i)) * (boxes.xmax(i) - boxes.xmin(i));
const float area_j =
(boxes.ymax(j) - boxes.ymin(j)) * (boxes.xmax(j) - boxes.xmin(j));
if (area_i <= 0 || area_j <= 0) return 0.0;
const float intersection_ymin = std::max<float>(boxes.ymin(i), boxes.ymin(j));
const float intersection_xmin = std::max<float>(boxes.xmin(i), boxes.xmin(j));
const float intersection_ymax = std::min<float>(boxes.ymax(i), boxes.ymax(j));
const float intersection_xmax = std::min<float>(boxes.xmax(i), boxes.xmax(j));
const float intersection_area =
std::max<float>(intersection_ymax - intersection_ymin, 0.0) *
std::max<float>(intersection_xmax - intersection_xmin, 0.0);
return intersection_area / (area_i + area_j - intersection_area);
}
void NonMaxSuppressionMultiClass(const BoxCornerEncoding& boxes,
const std::vector<float>& scores,
const int num_classes,
const int max_detection_per_class,
const float score_threshold,
const float iou_threshold,
DetectionResults* detections) {
const int num_boxes = boxes.ymin_size();
const int num_keypoints = boxes.keypoint_y_size() / num_boxes;
// The row index offset is 1 if the background class is included.
const int label_offset =
GetLabelOffset(num_boxes, num_classes, scores.size());
detections->Clear();
std::vector<int> selected;
std::vector<float> class_scores;
class_scores.resize(num_boxes);
// For each class, perform non-max suppression.
for (int col = 0; col < num_classes; col++) {
for (int row = 0; row < num_boxes; row++) {
class_scores[row] =
scores[row * (num_classes + label_offset) + col + label_offset];
}
NonMaxSuppression(boxes, class_scores, max_detection_per_class,
score_threshold, iou_threshold, &selected);
for (const auto& selected_index : selected) {
auto* new_detection = detections->add_detection();
auto* new_detection_box = new_detection->mutable_box();
new_detection_box->add_ymin(boxes.ymin(selected_index));
new_detection_box->add_xmin(boxes.xmin(selected_index));
new_detection_box->add_ymax(boxes.ymax(selected_index));
new_detection_box->add_xmax(boxes.xmax(selected_index));
new_detection->add_score(class_scores[selected_index]);
new_detection->add_class_index(col);
for (int i = 0; i < num_keypoints; ++i) {
new_detection_box->add_keypoint_y(boxes.keypoint_y(
selected_index * num_keypoints + i));
new_detection_box->add_keypoint_x(boxes.keypoint_x(
selected_index * num_keypoints + i));
}
}
}
}
void NonMaxSuppressionMultiClassFast(
const BoxCornerEncoding& boxes, const std::vector<float>& scores,
const int num_classes, const int max_detection, const int max_category,
const float score_threshold, const float iou_threshold,
DetectionResults* detections) {
const int num_boxes = boxes.ymin_size();
const int num_keypoints = boxes.keypoint_y_size() / num_boxes;
const int label_offset =
GetLabelOffset(num_boxes, num_classes, scores.size());
int num_category = std::min(max_category, num_classes);
detections->Clear();
std::vector<float> max_scores;
max_scores.resize(num_boxes);
std::vector<int> sorted_class_indices;
sorted_class_indices.resize(num_boxes * num_classes);
for (int row = 0; row < num_boxes; row++) {
const float* box_scores =
scores.data() + row * (num_classes + label_offset) + label_offset;
int* class_indices = sorted_class_indices.data() + row * num_classes;
DecreasingPartialArgSort(box_scores, num_classes, num_category,
class_indices);
max_scores[row] = box_scores[class_indices[0]];
}
// Perform non-max suppression on max scores
std::vector<int> selected;
NonMaxSuppression(boxes, max_scores, max_detection, score_threshold,
iou_threshold, &selected);
for (const auto& selected_index : selected) {
auto* new_detection = detections->add_detection();
auto* new_detection_box = new_detection->mutable_box();
new_detection_box->add_ymin(boxes.ymin(selected_index));
new_detection_box->add_xmin(boxes.xmin(selected_index));
new_detection_box->add_ymax(boxes.ymax(selected_index));
new_detection_box->add_xmax(boxes.xmax(selected_index));
const float* box_scores = scores.data() +
selected_index * (num_classes + label_offset) +
label_offset;
const int* class_indices =
sorted_class_indices.data() + selected_index * num_classes;
for (int i = 0; i < num_category; ++i) {
new_detection->add_score(box_scores[class_indices[i]]);
new_detection->add_class_index(class_indices[i]);
}
for (int i = 0; i < num_keypoints; ++i) {
new_detection_box->add_keypoint_y(boxes.keypoint_y(
selected_index * num_keypoints + i));
new_detection_box->add_keypoint_x(boxes.keypoint_x(
selected_index * num_keypoints + i));
}
}
}
void NonMaxSuppressionMultiClassRestrict(
std::vector<int> restricted_class_indices, const BoxCornerEncoding& boxes,
const std::vector<float>& scores, const int num_classes,
const int max_detection, const int max_category,
const float score_threshold, const float iou_threshold,
DetectionResults* detections) {
int num_boxes = boxes.ymin_size();
const int label_offset =
GetLabelOffset(num_boxes, num_classes, scores.size());
// Slice the score matrix along columns to extract the scores of the
// restricted classes.
int restricted_num_classes = restricted_class_indices.size();
std::vector<float> restricted_scores;
restricted_scores.reserve(num_boxes * restricted_num_classes);
for (int i = 0; i < num_boxes; ++i) {
for (int index : restricted_class_indices) {
CHECK(index >= 0 && index < num_classes + label_offset);
restricted_scores.push_back(
scores[i * (num_classes + label_offset) + index + label_offset]);
}
}
// Apply non-maxima suppression to the sliced score matrix.
NonMaxSuppressionMultiClassFast(
boxes, restricted_scores, restricted_num_classes, max_detection,
max_category, score_threshold, iou_threshold, detections);
// Resulting indices are based on score matrix column index: remap to the
// original class indices.
for (auto& detection : *detections->mutable_detection()) {
for (int i = 0; i < detection.class_index_size(); ++i) {
detection.set_class_index(
i, restricted_class_indices[detection.class_index(i)]);
}
}
}
void NonMaxSuppression(const BoxCornerEncoding& boxes,
const std::vector<float>& scores,
const int max_detection, const float score_threshold,
const float iou_threshold, std::vector<int>* selected) {
CHECK_EQ(boxes.ymin_size(), scores.size())
<< "The number of bounding boxes and scores does not match.";
CHECK_GT(max_detection, 0) << "Maximum detections should be positive.";
CHECK_GT(iou_threshold, 0.0) << "iou_threshold should be positive.";
CHECK_LT(iou_threshold, 1.0) << "iou_threshold should be less than 1.";
ValidateBoxes(boxes);
// threshold scores
std::vector<int> keep_indices;
std::vector<float> keep_scores;
ApplyThreshold(scores, score_threshold, &keep_scores, &keep_indices);
std::vector<int> sorted_indices;
DecreasingArgSort(keep_scores, &sorted_indices);
const int num_boxes = keep_scores.size();
const int output_size = std::min(num_boxes, max_detection);
std::vector<bool> active(num_boxes, true);
selected->clear();
int num_active = active.size();
for (int i = 0; i < num_boxes; ++i) {
if (num_active == 0 || selected->size() >= output_size) break;
if (active[i]) {
selected->push_back(keep_indices[sorted_indices[i]]);
active[i] = false;
num_active--;
} else {
continue;
}
for (int j = i + 1; j < num_boxes; ++j) {
if (active[j]) {
float iou = ComputeIOU(boxes, keep_indices[sorted_indices[i]],
keep_indices[sorted_indices[j]]);
if (iou > iou_threshold) {
active[j] = false;
num_active--;
}
}
}
}
}
void NormalizeDetectionBoxes(const int width, const int height,
DetectionResults* boxes) {
for (auto& det : *boxes->mutable_detection()) {
auto *box = det.mutable_box();
box->set_ymin(0, box->ymin(0) / height);
box->set_ymax(0, box->ymax(0) / height);
box->set_xmin(0, box->xmin(0) / width);
box->set_xmax(0, box->xmax(0) / width);
const int num_keypoints = box->keypoint_y_size();
for (int i = 0; i < num_keypoints; ++i) {
box->set_keypoint_y(i, box->keypoint_y(i) / height);
box->set_keypoint_x(i, box->keypoint_x(i) / width);
}
}
}
void DenormalizeDetectionBoxes(const int width, const int height,
DetectionResults* boxes) {
for (auto& det : *boxes->mutable_detection()) {
auto* box = det.mutable_box();
box->set_ymin(0, box->ymin(0) * (height - 1));
box->set_ymax(0, box->ymax(0) * (height - 1));
box->set_xmin(0, box->xmin(0) * (width - 1));
box->set_xmax(0, box->xmax(0) * (width - 1));
const int num_keypoints = box->keypoint_y_size();
for (int i = 0; i < num_keypoints; ++i) {
box->set_keypoint_y(i, box->keypoint_y(i) * (height - 1));
box->set_keypoint_x(i, box->keypoint_x(i) * (width - 1));
}
}
}
void ClampBoxCoordinates(DetectionResults* boxes) {
for (auto& detection : *boxes->mutable_detection()) {
auto* box = detection.mutable_box();
box->set_ymin(0, std::max(0.f, box->ymin(0)));
box->set_ymax(0, std::min(1.f, box->ymax(0)));
box->set_xmin(0, std::max(0.f, box->xmin(0)));
box->set_xmax(0, std::min(1.f, box->xmax(0)));
}
}
bool GenerateSsdAnchors(const AnchorGenerationOptions& options,
CenterSizeEncoding* anchors) {
const int base_anchor_width = options.base_anchor_width();
const int base_anchor_height = options.base_anchor_height();
const float min_anchor_scale = options.min_anchor_scale();
const float max_anchor_scale = options.max_anchor_scale();
const float* aspect_ratios_ptr = options.anchor_aspect_ratios().data();
const int num_aspect_ratios = options.anchor_aspect_ratios_size();
const std::vector<float> anchor_aspect_ratios(
aspect_ratios_ptr, aspect_ratios_ptr + num_aspect_ratios);
const int* strides_ptr = options.anchor_strides().data();
const int num_strides = options.anchor_strides_size();
const std::vector<int> anchor_strides(strides_ptr, strides_ptr + num_strides);
// Must set both image width and height or neither
CHECK_EQ(options.has_image_width(), options.has_image_height());
if (options.has_image_width() && options.has_image_height()) {
const int* offsets_ptr = options.anchor_offsets().data();
const int num_offsets = options.anchor_offsets_size();
const std::vector<int> anchor_offsets(offsets_ptr,
offsets_ptr + num_offsets);
return GenerateSsdAnchors(
options.image_width(), options.image_height(), base_anchor_width,
base_anchor_height, min_anchor_scale, max_anchor_scale,
anchor_aspect_ratios, anchor_strides, anchor_offsets, anchors);
}
return GenerateSsdAnchors(base_anchor_width, base_anchor_height,
min_anchor_scale, max_anchor_scale,
anchor_aspect_ratios, anchor_strides, anchors);
}
bool GenerateSsdAnchors(int input_width, int input_height, float min_scale,
float max_scale,
const std::vector<float>& aspect_ratios,
const std::vector<int>& anchor_strides,
CenterSizeEncoding* anchors) {
int num_layers = anchor_strides.size();
std::vector<int> anchor_offsets(num_layers);
for (int i = 0; i < num_layers; ++i) {
anchor_offsets[i] = (anchor_strides[i] + 1) / 2;
}
return GenerateSsdAnchors(input_width,
input_height,
input_width,
input_height,
min_scale,
max_scale,
aspect_ratios,
anchor_strides,
anchor_offsets,
anchors);
}
bool GenerateSsdAnchors(int input_width, int input_height,
int base_anchor_width, int base_anchor_height,
float min_scale, float max_scale,
const std::vector<float>& aspect_ratios,
const std::vector<int>& anchor_strides,
const std::vector<int>& anchor_offsets,
CenterSizeEncoding* anchors) {
constexpr float kSqrt2 = 1.414213562f;
int num_layers = anchor_strides.size();
if (num_layers != anchor_offsets.size()) {
LOG(ERROR) << absl::StrCat("The size of anchor strides (",
anchor_strides.size(),
") and anchor "
"offsets (",
anchor_offsets.size(), ") must be the same.");
return false;
}
std::vector<float> scales(num_layers);
// Populate scales.
for (int i = 0; i < num_layers; ++i) {
scales[i] = min_scale + (max_scale - min_scale) * i / (num_layers - 1);
}
// Populate square roots of aspect ratios.
int num_aspect_ratios = aspect_ratios.size();
std::vector<float> sqrt_aspect_ratios(num_aspect_ratios);
for (int i = 0; i < num_aspect_ratios; ++i) {
sqrt_aspect_ratios[i] = std::sqrt(aspect_ratios[i]);
}
// Generate anchors.
float normalized_width = static_cast<float>(base_anchor_width) / input_width;
float normalized_height =
static_cast<float>(base_anchor_height) / input_height;
anchors->Clear();
for (int i = 0; i < num_layers; ++i) {
float scale = scales[i];
float next_scale;
if (i == num_layers - 1) {
next_scale = 1.0;
} else {
next_scale = scales[i + 1];
}
float interpolated_scale = std::sqrt(scale * next_scale);
float normalized_scale_width = scale * normalized_width;
float normalized_scale_height = scale * normalized_height;
int anchor_map_height =
(input_height + anchor_strides[i] - 1) / anchor_strides[i];
int anchor_map_width =
(input_width + anchor_strides[i] - 1) / anchor_strides[i];
for (int anchor_idx_y = 0; anchor_idx_y < anchor_map_height;
++anchor_idx_y) {
float y = static_cast<float>(
anchor_offsets[i] + anchor_strides[i] * anchor_idx_y) / input_height;
for (int anchor_idx_x = 0; anchor_idx_x < anchor_map_width;
++anchor_idx_x) {
float x = static_cast<float>(
anchor_offsets[i] + anchor_strides[i] * anchor_idx_x) / input_width;
if (i == 0) {
// Scale: 0.1, Aspect Ratio: 1.0
anchors->add_x(x);
anchors->add_y(y);
anchors->add_w(0.1 * normalized_width);
anchors->add_h(0.1 * normalized_height);
// Scale: scale, Aspect Ratio: 2.0
anchors->add_x(x);
anchors->add_y(y);
anchors->add_w(normalized_scale_width * kSqrt2);
anchors->add_h(normalized_scale_height / kSqrt2);
// Scale: scale, Aspect Ratio: 0.5
anchors->add_x(x);
anchors->add_y(y);
anchors->add_w(normalized_scale_width / kSqrt2);
anchors->add_h(normalized_scale_height * kSqrt2);
continue;
}
for (int j = 0; j < num_aspect_ratios; ++j) {
// Scale: scale, Aspect Ratio: aspect_ratio
anchors->add_x(x);
anchors->add_y(y);
anchors->add_w(normalized_scale_width * sqrt_aspect_ratios[j]);
anchors->add_h(normalized_scale_height / sqrt_aspect_ratios[j]);
}
// Interpolated anchors
anchors->add_x(x);
anchors->add_y(y);
anchors->add_w(interpolated_scale * normalized_width);
anchors->add_h(interpolated_scale * normalized_height);
}
}
}
return true;
}
} // namespace tflite
} // namespace lstm_object_detection
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_SSD_UTILS_H_
#define TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_SSD_UTILS_H_
#include "protos/anchor_generation_options.pb.h"
#include "protos/box_encodings.pb.h"
#include "protos/detections.pb.h"
namespace lstm_object_detection {
namespace tflite {
// Decodes bounding boxes using CenterSizeOffsetCoder given network
// predictions and anchor encodings.
void DecodeCenterSizeBoxes(const protos::CenterSizeEncoding& predictions,
const protos::CenterSizeEncoding& anchors,
const protos::CenterSizeOffsetCoder& coder,
protos::BoxCornerEncoding* decoded_boxes);
// Decodes bounding boxes using BoxCornerOffsetCoder given network
// predictions and anchor encodings.
void DecodeBoxCornerBoxes(const protos::BoxCornerEncoding& predictions,
const protos::CenterSizeEncoding& anchors,
const protos::BoxCornerOffsetCoder& coder,
protos::BoxCornerEncoding* decoded_boxes);
// Computes IOU overlap between two bounding boxes.
float ComputeIOU(const protos::BoxCornerEncoding& boxes, const int i,
const int j);
// Performs Non-max suppression (multi-class) on a list of bounding boxes
// and prediction scores.
void NonMaxSuppressionMultiClass(const protos::BoxCornerEncoding& boxes,
const std::vector<float>& scores,
const int num_classes,
const int max_detection_per_class,
const float score_threshold,
const float iou_threshold,
protos::DetectionResults* detections);
// A fast (but not exact) version of non-max suppression (multi-class).
// Instead of computing per class non-max suppression, anchor-wise class
// maximum is computed on a list of bounding boxes and scores. This means
// that different classes can suppress each other.
void NonMaxSuppressionMultiClassFast(
const protos::BoxCornerEncoding& boxes, const std::vector<float>& scores,
const int num_classes, const int max_detection, const int max_category,
const float score_threshold, const float iou_threshold,
protos::DetectionResults* detections);
// Similar to NonMaxSuppressionMultiClassFast, but restricts the results to
// the provided list of class indices. This effectively filters out any class
// whose index is not in this whitelist.
void NonMaxSuppressionMultiClassRestrict(
std::vector<int> restricted_class_indices,
const protos::BoxCornerEncoding& boxes, const std::vector<float>& scores,
const int num_classes, const int max_detection, const int max_category,
const float score_threshold, const float iou_threshold,
protos::DetectionResults* detections);
// Performs Non-max suppression (single class) on a list of bounding boxes
// and scores. The function implements a modified version of:
// third_party/tensorflow/core/kernels/non_max_suppression_op.cc
void NonMaxSuppression(const protos::BoxCornerEncoding& boxes,
const std::vector<float>& scores,
const int max_detection, const float score_threshold,
const float iou_threshold,
std::vector<int>* selected_indices);
// Normalizes output bounding boxes such that the coordinates are in [0, 1].
void NormalizeDetectionBoxes(const int width, const int height,
protos::DetectionResults* boxes);
// Denormalizes output bounding boxes so that the coordinates are scaled to
// the absolute width and height.
void DenormalizeDetectionBoxes(const int width, const int height,
protos::DetectionResults* boxes);
// Clamps detection box coordinates to be between [0, 1].
void ClampBoxCoordinates(protos::DetectionResults* boxes);
// Generates SSD anchors for the given input and anchor parameters. These
// methods generate the anchors described in https://arxiv.org/abs/1512.02325
// and is similar to the anchor generation logic in
// //third_party/tensorflow_models/
// object_detection/anchor_generators/multiple_grid_anchor_generator.py.
bool GenerateSsdAnchors(int input_width, int input_height, float min_scale,
float max_scale,
const std::vector<float>& aspect_ratios,
const std::vector<int>& anchor_strides,
protos::CenterSizeEncoding* anchors);
bool GenerateSsdAnchors(int input_width, int input_height,
int base_anchor_width, int base_anchor_height,
float min_scale, float max_scale,
const std::vector<float>& aspect_ratios,
const std::vector<int>& anchor_strides,
const std::vector<int>& anchor_offsets,
protos::CenterSizeEncoding* anchors);
bool GenerateSsdAnchors(const protos::AnchorGenerationOptions& options,
protos::CenterSizeEncoding* anchors);
} // namespace tflite
} // namespace lstm_object_detection
#endif // TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_SSD_UTILS_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment