Commit 66d00a87 authored by yongzhe2160's avatar yongzhe2160 Committed by Menglong Zhu
Browse files

Merged commit includes the following changes: (#7220)

* Merged commit includes the following changes:
257930561  by yongzhe:

    Mobile LSTD TfLite Client.

--
257928126  by yongzhe:

    Mobile SSD Tflite client.

--
257921181  by menglong:

    Fix discrepancy between pre_bottleneck = {true, false}

--
257561213  by yongzhe:

    File utils.

--
257449226  by yongzhe:

    Mobile SSD Client.

--
257264654  by yongzhe:

    SSD utils.

--
257235648  by yongzhe:

    Proto bazel build rules.

--
256437262  by Menglong Zhu:

    Fix check for FusedBatchNorm op to only verify it as a prefix.

--
256283755  by yongzhe:

    Bazel build and copybara changes.

--
251947295  by yinxiao:

    Add missing interleaved option in checkpoint restore.

--
251513479  by yongzhe:

    Conversion utils.

--
248783193  by yongzhe:

    Branch protos needed for the lstd client.

--
248200507  by menglong:

    Fix proto namespace in example config

--

PiperOrigin-RevId: 257930561

* Delete BUILD
parent 395f6d2d
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
cc_library(
name = "conversion_utils",
srcs = ["conversion_utils.cc"],
hdrs = ["conversion_utils.h"],
deps = [
"@com_google_absl//absl/base:core_headers",
"@com_google_glog//:glog",
],
)
cc_test(
name = "conversion_utils_test",
srcs = ["conversion_utils_test.cc"],
deps = [
":conversion_utils",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "ssd_utils",
srcs = ["ssd_utils.cc"],
hdrs = ["ssd_utils.h"],
deps = [
"//protos:anchor_generation_options_cc_proto",
"//protos:box_encodings_cc_proto",
"//protos:detections_cc_proto",
"@com_google_absl//absl/strings",
"@com_google_glog//:glog",
],
)
cc_library(
name = "file_utils",
srcs = ["file_utils.cc"],
hdrs = ["file_utils.h"],
deps = [
"//protos:labelmap_cc_proto",
"@com_google_absl//absl/strings",
"@com_google_glog//:glog",
],
)
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "utils/conversion_utils.h"
#include <glog/logging.h>
namespace lstm_object_detection {
namespace tflite {
bool HasPadding(int width, int height, int bytes_per_pixel, int bytes_per_row) {
CHECK_LT(0, width);
CHECK_LT(0, height);
CHECK(bytes_per_pixel == 1 || bytes_per_pixel == 3 || bytes_per_pixel == 4);
CHECK_LE(width * bytes_per_pixel, bytes_per_row);
if (bytes_per_pixel == 4) {
return true;
}
return (width * bytes_per_pixel < bytes_per_row);
}
std::vector<uint8_t> RemovePadding(const uint8_t* image_data, int width,
int height, int bytes_per_pixel,
int bytes_per_row) {
CHECK_LT(0, width);
CHECK_LT(0, height);
CHECK(bytes_per_pixel == 1 || bytes_per_pixel == 3 || bytes_per_pixel == 4);
CHECK_LE(width * bytes_per_pixel, bytes_per_row);
const int unpadded_bytes_per_pixel = (bytes_per_pixel == 1 ? 1 : 3);
const int pixel_padding = (bytes_per_pixel == 4 ? 1 : 0);
std::vector<uint8_t> unpadded_image_data(width * height *
unpadded_bytes_per_pixel);
const uint8_t* row_ptr = image_data;
int index = 0;
for (int y = 0; y < height; ++y) {
const uint8_t* ptr = row_ptr;
for (int x = 0; x < width; ++x) {
for (int d = 0; d < unpadded_bytes_per_pixel; ++d) {
unpadded_image_data[index++] = *ptr++;
}
ptr += pixel_padding;
}
row_ptr += bytes_per_row;
}
return unpadded_image_data;
}
} // namespace tflite
} // namespace lstm_object_detection
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Lightweight utilities related to conversion of input images.
#ifndef TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_CONVERSION_UTILS_H_
#define TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_CONVERSION_UTILS_H_
#include <vector>
#include <cstdint>
namespace lstm_object_detection {
namespace tflite {
// Finds out whether a call to 'RemovePadding()' is needed to process the given
// pixel data constellation in order to make it suitable for model input layer.
// All integers must be positive, 'bytes_per_row' must be sufficiently large,
// and for 'bytes_per_pixel' only values 1, 3, 4 may be passed and implies a
// grayscale, RGB, or RGBA image. Returns true iff excessive bytes exist in the
// associated pixel data.
bool HasPadding(int width, int height, int bytes_per_pixel, int bytes_per_row);
// Removes padding at the pixel and row level of pixel data which is stored in
// the usual row major order ("interleaved"). Produces pixel data which is
// suitable for model input layer. If 'HasPadding()' is false then this
// function will return an identical copy of 'image'. For restrictions on the
// integer parameters see comment on 'HasPadding()'.
std::vector<uint8_t> RemovePadding(const uint8_t* image, int width, int height,
int bytes_per_pixel, int bytes_per_row);
} // namespace tflite
} // namespace lstm_object_detection
#endif // TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_CONVERSION_UTILS_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "utils/conversion_utils.h"
#include <vector>
#include <glog/logging.h>
#include <gmock/gmock.h>
#include "gtest/gtest.h"
using testing::ContainerEq;
namespace lstm_object_detection {
namespace tflite {
namespace {
TEST(ConversionUtilsTests, HasPaddingNonPositiveDimensions) {
EXPECT_DEATH(HasPadding(/* width= */ 0, /* height= */ 4,
/* bytes_per_pixel= */ 4, /* bytes_per_row= */ 12),
"");
EXPECT_DEATH(HasPadding(/* width= */ 3, /* height= */ 0,
/* bytes_per_pixel= */ 4, /* bytes_per_row= */ 12),
"");
}
TEST(ConversionUtilsTests, HasPaddingIllegalDepth) {
for (int bytes_per_pixel : {-1, 0, 2, 5, 6}) {
EXPECT_DEATH(HasPadding(/* width= */ 3, /* height= */ 4, bytes_per_pixel,
/* bytes_per_row= */ 12),
"");
}
}
TEST(ConversionUtilsTests, HasPaddingWithRGBAImage) {
const int kWidth = 3;
const int kHeight = 4;
const int kBytesPerPixel = 4;
EXPECT_DEATH(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 11), "");
EXPECT_TRUE(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 12));
EXPECT_TRUE(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 13));
}
TEST(ConversionUtilsTests, HasPaddingWithRGBImage) {
const int kWidth = 3;
const int kHeight = 4;
const int kBytesPerPixel = 3;
EXPECT_DEATH(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 8), "");
EXPECT_FALSE(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 9));
EXPECT_TRUE(
HasPadding(kWidth, kHeight, kBytesPerPixel, /* bytes_per_row= */ 10));
}
TEST(ConversionUtilsTests, HasPaddingWithGrayscaleImage) {
const int kWidth = 3;
const int kHeight = 4;
const int kBytesPerPixel = 1;
EXPECT_DEATH(
HasPadding(kWidth, kHeight, kBytesPerPixel,
/* bytes_per_row= */ 2), "");
EXPECT_FALSE(
HasPadding(kWidth, kHeight, kBytesPerPixel,
/* bytes_per_row= */ 3));
EXPECT_TRUE(
HasPadding(kWidth, kHeight, kBytesPerPixel,
/* bytes_per_row= */ 4));
}
TEST(ConversionUtilsTests, RemovePaddingWithRGBAImage) {
constexpr int kWidth = 4;
constexpr int kHeight = 2;
constexpr int kBytesPerPixel = 4;
constexpr int kStride = kBytesPerPixel * kWidth * sizeof(uint8_t);
const std::vector<uint8_t> kImageData{
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36};
ASSERT_EQ(kHeight * kStride, kImageData.size());
std::vector<uint8_t> actual =
RemovePadding(&kImageData[0], kWidth, kHeight, kBytesPerPixel, kStride);
const std::vector<uint8_t> kExpected = {
1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15,
21, 22, 23, 25, 26, 27, 29, 30, 31, 33, 34, 35,
};
EXPECT_EQ(3 * kWidth * kHeight, actual.size());
EXPECT_THAT(actual, ContainerEq(kExpected));
}
TEST(ConversionUtilsTests, RemovePaddingWithRGBImage) {
constexpr int kWidth = 4;
constexpr int kHeight = 2;
constexpr int kBytesPerPixel = 3;
constexpr int kBytesPerRow = kBytesPerPixel * kWidth * sizeof(uint8_t);
const std::vector<uint8_t> kImageData{1, 2, 3, 5, 6, 7, 9, 10,
11, 13, 14, 15, 21, 22, 23, 25,
26, 27, 29, 30, 31, 33, 34, 35};
ASSERT_EQ(kHeight * kBytesPerRow, kImageData.size());
std::vector<uint8_t> actual = RemovePadding(&kImageData[0], kWidth, kHeight,
kBytesPerPixel, kBytesPerRow);
EXPECT_EQ(3 * kWidth * kHeight, actual.size());
EXPECT_THAT(actual, ContainerEq(kImageData));
}
TEST(ConversionUtilsTests, RemovePaddingWithGrayscaleImage) {
constexpr int kWidth = 8;
constexpr int kHeight = 2;
constexpr int kBytesPerPixel = 1;
constexpr int kBytesPerRow = kBytesPerPixel * kWidth * sizeof(uint8_t);
const std::vector<uint8_t> kImageData{
1, 2, 3, 4, 5, 6, 7, 8, 21, 22, 23, 24, 25, 26, 27, 28,
};
ASSERT_EQ(kHeight * kBytesPerRow, kImageData.size());
std::vector<uint8_t> actual = RemovePadding(&kImageData[0], kWidth, kHeight,
kBytesPerPixel, kBytesPerRow);
EXPECT_EQ(kWidth * kHeight, actual.size());
EXPECT_THAT(actual, ContainerEq(kImageData));
}
TEST(ConversionUtilsTests, RemovePaddingWithPadding) {
constexpr int kWidth = 8;
constexpr int kHeight = 2;
constexpr int kBytesPerPixel = 1;
// Pad each row with two bytes.
constexpr int kBytesPerRow = kBytesPerPixel * (kWidth + 2) * sizeof(uint8_t);
const std::vector<uint8_t> kImageData{1, 2, 3, 4, 5, 6, 7, 8, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
ASSERT_EQ(kHeight * kBytesPerRow, kImageData.size());
std::vector<uint8_t> actual = RemovePadding(&kImageData[0], kWidth, kHeight,
kBytesPerPixel, kBytesPerRow);
const std::vector<uint8_t> kExpected = {
1, 2, 3, 4, 5, 6, 7, 8, 23, 24, 25, 26, 27, 28, 29, 30,
};
EXPECT_EQ(kWidth * kHeight, actual.size());
EXPECT_THAT(actual, ContainerEq(kExpected));
}
} // namespace
} // namespace tflite
} // namespace lstm_object_detection
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "utils/file_utils.h"
#include <fstream>
#include <glog/logging.h>
namespace lstm_object_detection {
namespace tflite {
std::string ReadFileToString(absl::string_view filename) {
std::ifstream file(filename.data(), std::ios::binary | std::ios::ate);
CHECK(file.is_open());
int filesize = file.tellg();
std::string result;
result.resize(filesize);
CHECK_EQ(result.size(), filesize);
file.seekg(0);
CHECK(file.read(&(result)[0], filesize));
file.close();
return result;
}
bool LoadLabelMapFromFileOrBytes(const std::string& labelmap_file,
const std::string& labelmap_bytes,
protos::StringIntLabelMapProto* labelmap) {
if (!labelmap_bytes.empty()) {
CHECK(labelmap->ParseFromString(labelmap_bytes));
} else {
if (labelmap_file.empty()) {
LOG(ERROR) << "labelmap file empty.";
return false;
}
const std::string proto_bytes = ReadFileToString(labelmap_file);
CHECK(labelmap->ParseFromString(proto_bytes));
}
return true;
}
} // namespace tflite
} // namespace lstm_object_detection
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_FILE_UTILS_H_
#define TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_FILE_UTILS_H_
#include <string>
#include "absl/strings/string_view.h"
#include "protos/labelmap.pb.h"
namespace lstm_object_detection {
namespace tflite {
std::string ReadFileToString(absl::string_view filename);
// Load labelmap from a binary proto file or bytes string.
// labelmap_bytes takes precedence over labelmap_file.
bool LoadLabelMapFromFileOrBytes(const std::string& labelmap_file,
const std::string& labelmap_bytes,
protos::StringIntLabelMapProto* labelmap);
} // namespace tflite
} // namespace lstm_object_detection
#endif // TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_FILE_UTILS_H_
This diff is collapsed.
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_SSD_UTILS_H_
#define TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_SSD_UTILS_H_
#include "protos/anchor_generation_options.pb.h"
#include "protos/box_encodings.pb.h"
#include "protos/detections.pb.h"
namespace lstm_object_detection {
namespace tflite {
// Decodes bounding boxes using CenterSizeOffsetCoder given network
// predictions and anchor encodings.
void DecodeCenterSizeBoxes(const protos::CenterSizeEncoding& predictions,
const protos::CenterSizeEncoding& anchors,
const protos::CenterSizeOffsetCoder& coder,
protos::BoxCornerEncoding* decoded_boxes);
// Decodes bounding boxes using BoxCornerOffsetCoder given network
// predictions and anchor encodings.
void DecodeBoxCornerBoxes(const protos::BoxCornerEncoding& predictions,
const protos::CenterSizeEncoding& anchors,
const protos::BoxCornerOffsetCoder& coder,
protos::BoxCornerEncoding* decoded_boxes);
// Computes IOU overlap between two bounding boxes.
float ComputeIOU(const protos::BoxCornerEncoding& boxes, const int i,
const int j);
// Performs Non-max suppression (multi-class) on a list of bounding boxes
// and prediction scores.
void NonMaxSuppressionMultiClass(const protos::BoxCornerEncoding& boxes,
const std::vector<float>& scores,
const int num_classes,
const int max_detection_per_class,
const float score_threshold,
const float iou_threshold,
protos::DetectionResults* detections);
// A fast (but not exact) version of non-max suppression (multi-class).
// Instead of computing per class non-max suppression, anchor-wise class
// maximum is computed on a list of bounding boxes and scores. This means
// that different classes can suppress each other.
void NonMaxSuppressionMultiClassFast(
const protos::BoxCornerEncoding& boxes, const std::vector<float>& scores,
const int num_classes, const int max_detection, const int max_category,
const float score_threshold, const float iou_threshold,
protos::DetectionResults* detections);
// Similar to NonMaxSuppressionMultiClassFast, but restricts the results to
// the provided list of class indices. This effectively filters out any class
// whose index is not in this whitelist.
void NonMaxSuppressionMultiClassRestrict(
std::vector<int> restricted_class_indices,
const protos::BoxCornerEncoding& boxes, const std::vector<float>& scores,
const int num_classes, const int max_detection, const int max_category,
const float score_threshold, const float iou_threshold,
protos::DetectionResults* detections);
// Performs Non-max suppression (single class) on a list of bounding boxes
// and scores. The function implements a modified version of:
// third_party/tensorflow/core/kernels/non_max_suppression_op.cc
void NonMaxSuppression(const protos::BoxCornerEncoding& boxes,
const std::vector<float>& scores,
const int max_detection, const float score_threshold,
const float iou_threshold,
std::vector<int>* selected_indices);
// Normalizes output bounding boxes such that the coordinates are in [0, 1].
void NormalizeDetectionBoxes(const int width, const int height,
protos::DetectionResults* boxes);
// Denormalizes output bounding boxes so that the coordinates are scaled to
// the absolute width and height.
void DenormalizeDetectionBoxes(const int width, const int height,
protos::DetectionResults* boxes);
// Clamps detection box coordinates to be between [0, 1].
void ClampBoxCoordinates(protos::DetectionResults* boxes);
// Generates SSD anchors for the given input and anchor parameters. These
// methods generate the anchors described in https://arxiv.org/abs/1512.02325
// and is similar to the anchor generation logic in
// //third_party/tensorflow_models/
// object_detection/anchor_generators/multiple_grid_anchor_generator.py.
bool GenerateSsdAnchors(int input_width, int input_height, float min_scale,
float max_scale,
const std::vector<float>& aspect_ratios,
const std::vector<int>& anchor_strides,
protos::CenterSizeEncoding* anchors);
bool GenerateSsdAnchors(int input_width, int input_height,
int base_anchor_width, int base_anchor_height,
float min_scale, float max_scale,
const std::vector<float>& aspect_ratios,
const std::vector<int>& anchor_strides,
const std::vector<int>& anchor_offsets,
protos::CenterSizeEncoding* anchors);
bool GenerateSsdAnchors(const protos::AnchorGenerationOptions& options,
protos::CenterSizeEncoding* anchors);
} // namespace tflite
} // namespace lstm_object_detection
#endif // TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_UTILS_SSD_UTILS_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment