Unverified Commit f8889760 authored by MissPenguin's avatar MissPenguin Committed by GitHub
Browse files

Merge pull request #2426 from WenmuZhou/android_demo

add Android demo
parents 9c6ff0a7 a0c93dbd
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ocr_crnn_process.h"
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <iostream>
#include <vector>
const std::string CHARACTER_TYPE = "ch";
const int MAX_DICT_LENGTH = 6624;
const std::vector<int> REC_IMAGE_SHAPE = {3, 32, 320};
static cv::Mat crnn_resize_norm_img(cv::Mat img, float wh_ratio) {
int imgC = REC_IMAGE_SHAPE[0];
int imgW = REC_IMAGE_SHAPE[2];
int imgH = REC_IMAGE_SHAPE[1];
if (CHARACTER_TYPE == "ch")
imgW = int(32 * wh_ratio);
float ratio = float(img.cols) / float(img.rows);
int resize_w = 0;
if (ceilf(imgH * ratio) > imgW)
resize_w = imgW;
else
resize_w = int(ceilf(imgH * ratio));
cv::Mat resize_img;
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
cv::INTER_CUBIC);
resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.f);
for (int h = 0; h < resize_img.rows; h++) {
for (int w = 0; w < resize_img.cols; w++) {
resize_img.at<cv::Vec3f>(h, w)[0] =
(resize_img.at<cv::Vec3f>(h, w)[0] - 0.5) * 2;
resize_img.at<cv::Vec3f>(h, w)[1] =
(resize_img.at<cv::Vec3f>(h, w)[1] - 0.5) * 2;
resize_img.at<cv::Vec3f>(h, w)[2] =
(resize_img.at<cv::Vec3f>(h, w)[2] - 0.5) * 2;
}
}
cv::Mat dist;
cv::copyMakeBorder(resize_img, dist, 0, 0, 0, int(imgW - resize_w),
cv::BORDER_CONSTANT, {0, 0, 0});
return dist;
}
cv::Mat crnn_resize_img(const cv::Mat &img, float wh_ratio) {
int imgC = REC_IMAGE_SHAPE[0];
int imgW = REC_IMAGE_SHAPE[2];
int imgH = REC_IMAGE_SHAPE[1];
if (CHARACTER_TYPE == "ch") {
imgW = int(32 * wh_ratio);
}
float ratio = float(img.cols) / float(img.rows);
int resize_w = 0;
if (ceilf(imgH * ratio) > imgW)
resize_w = imgW;
else
resize_w = int(ceilf(imgH * ratio));
cv::Mat resize_img;
cv::resize(img, resize_img, cv::Size(resize_w, imgH));
return resize_img;
}
cv::Mat get_rotate_crop_image(const cv::Mat &srcimage,
const std::vector<std::vector<int>> &box) {
std::vector<std::vector<int>> points = box;
int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
int left = int(*std::min_element(x_collect, x_collect + 4));
int right = int(*std::max_element(x_collect, x_collect + 4));
int top = int(*std::min_element(y_collect, y_collect + 4));
int bottom = int(*std::max_element(y_collect, y_collect + 4));
cv::Mat img_crop;
srcimage(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
for (int i = 0; i < points.size(); i++) {
points[i][0] -= left;
points[i][1] -= top;
}
int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
pow(points[0][1] - points[1][1], 2)));
int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
pow(points[0][1] - points[3][1], 2)));
cv::Point2f pts_std[4];
pts_std[0] = cv::Point2f(0., 0.);
pts_std[1] = cv::Point2f(img_crop_width, 0.);
pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
pts_std[3] = cv::Point2f(0.f, img_crop_height);
cv::Point2f pointsf[4];
pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
cv::Mat dst_img;
cv::warpPerspective(img_crop, dst_img, M,
cv::Size(img_crop_width, img_crop_height),
cv::BORDER_REPLICATE);
if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
/*
cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
cv::transpose(dst_img, srcCopy);
cv::flip(srcCopy, srcCopy, 0);
return srcCopy;
*/
cv::transpose(dst_img, dst_img);
cv::flip(dst_img, dst_img, 0);
return dst_img;
} else {
return dst_img;
}
}
//
// Created by fujiayi on 2020/7/3.
//
#pragma once
#include "common.h"
#include <opencv2/opencv.hpp>
#include <vector>
extern const std::vector<int> REC_IMAGE_SHAPE;
cv::Mat get_rotate_crop_image(const cv::Mat &srcimage,
const std::vector<std::vector<int>> &box);
cv::Mat crnn_resize_img(const cv::Mat &img, float wh_ratio);
template <class ForwardIterator>
inline size_t argmax(ForwardIterator first, ForwardIterator last) {
return std::distance(first, std::max_element(first, last));
}
\ No newline at end of file
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ocr_clipper.hpp"
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <math.h>
#include <vector>
static void getcontourarea(float **box, float unclip_ratio, float &distance) {
int pts_num = 4;
float area = 0.0f;
float dist = 0.0f;
for (int i = 0; i < pts_num; i++) {
area += box[i][0] * box[(i + 1) % pts_num][1] -
box[i][1] * box[(i + 1) % pts_num][0];
dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) *
(box[i][0] - box[(i + 1) % pts_num][0]) +
(box[i][1] - box[(i + 1) % pts_num][1]) *
(box[i][1] - box[(i + 1) % pts_num][1]));
}
area = fabs(float(area / 2.0));
distance = area * unclip_ratio / dist;
}
static cv::RotatedRect unclip(float **box) {
float unclip_ratio = 2.0;
float distance = 1.0;
getcontourarea(box, unclip_ratio, distance);
ClipperLib::ClipperOffset offset;
ClipperLib::Path p;
p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1]))
<< ClipperLib::IntPoint(int(box[1][0]), int(box[1][1]))
<< ClipperLib::IntPoint(int(box[2][0]), int(box[2][1]))
<< ClipperLib::IntPoint(int(box[3][0]), int(box[3][1]));
offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
ClipperLib::Paths soln;
offset.Execute(soln, distance);
std::vector<cv::Point2f> points;
for (int j = 0; j < soln.size(); j++) {
for (int i = 0; i < soln[soln.size() - 1].size(); i++) {
points.emplace_back(soln[j][i].X, soln[j][i].Y);
}
}
cv::RotatedRect res = cv::minAreaRect(points);
return res;
}
static float **Mat2Vec(cv::Mat mat) {
auto **array = new float *[mat.rows];
for (int i = 0; i < mat.rows; ++i) {
array[i] = new float[mat.cols];
}
for (int i = 0; i < mat.rows; ++i) {
for (int j = 0; j < mat.cols; ++j) {
array[i][j] = mat.at<float>(i, j);
}
}
return array;
}
static void quickSort(float **s, int l, int r) {
if (l < r) {
int i = l, j = r;
float x = s[l][0];
float *xp = s[l];
while (i < j) {
while (i < j && s[j][0] >= x) {
j--;
}
if (i < j) {
std::swap(s[i++], s[j]);
}
while (i < j && s[i][0] < x) {
i++;
}
if (i < j) {
std::swap(s[j--], s[i]);
}
}
s[i] = xp;
quickSort(s, l, i - 1);
quickSort(s, i + 1, r);
}
}
static void quickSort_vector(std::vector<std::vector<int>> &box, int l, int r,
int axis) {
if (l < r) {
int i = l, j = r;
int x = box[l][axis];
std::vector<int> xp(box[l]);
while (i < j) {
while (i < j && box[j][axis] >= x) {
j--;
}
if (i < j) {
std::swap(box[i++], box[j]);
}
while (i < j && box[i][axis] < x) {
i++;
}
if (i < j) {
std::swap(box[j--], box[i]);
}
}
box[i] = xp;
quickSort_vector(box, l, i - 1, axis);
quickSort_vector(box, i + 1, r, axis);
}
}
static std::vector<std::vector<int>>
order_points_clockwise(std::vector<std::vector<int>> pts) {
std::vector<std::vector<int>> box = pts;
quickSort_vector(box, 0, int(box.size() - 1), 0);
std::vector<std::vector<int>> leftmost = {box[0], box[1]};
std::vector<std::vector<int>> rightmost = {box[2], box[3]};
if (leftmost[0][1] > leftmost[1][1]) {
std::swap(leftmost[0], leftmost[1]);
}
if (rightmost[0][1] > rightmost[1][1]) {
std::swap(rightmost[0], rightmost[1]);
}
std::vector<std::vector<int>> rect = {leftmost[0], rightmost[0], rightmost[1],
leftmost[1]};
return rect;
}
static float **get_mini_boxes(cv::RotatedRect box, float &ssid) {
ssid = box.size.width >= box.size.height ? box.size.height : box.size.width;
cv::Mat points;
cv::boxPoints(box, points);
// sorted box points
auto array = Mat2Vec(points);
quickSort(array, 0, 3);
float *idx1 = array[0], *idx2 = array[1], *idx3 = array[2], *idx4 = array[3];
if (array[3][1] <= array[2][1]) {
idx2 = array[3];
idx3 = array[2];
} else {
idx2 = array[2];
idx3 = array[3];
}
if (array[1][1] <= array[0][1]) {
idx1 = array[1];
idx4 = array[0];
} else {
idx1 = array[0];
idx4 = array[1];
}
array[0] = idx1;
array[1] = idx2;
array[2] = idx3;
array[3] = idx4;
return array;
}
template <class T> T clamp(T x, T min, T max) {
if (x > max) {
return max;
}
if (x < min) {
return min;
}
return x;
}
static float clampf(float x, float min, float max) {
if (x > max)
return max;
if (x < min)
return min;
return x;
}
float box_score_fast(float **box_array, cv::Mat pred) {
auto array = box_array;
int width = pred.cols;
int height = pred.rows;
float box_x[4] = {array[0][0], array[1][0], array[2][0], array[3][0]};
float box_y[4] = {array[0][1], array[1][1], array[2][1], array[3][1]};
int xmin = clamp(int(std::floorf(*(std::min_element(box_x, box_x + 4)))), 0,
width - 1);
int xmax = clamp(int(std::ceilf(*(std::max_element(box_x, box_x + 4)))), 0,
width - 1);
int ymin = clamp(int(std::floorf(*(std::min_element(box_y, box_y + 4)))), 0,
height - 1);
int ymax = clamp(int(std::ceilf(*(std::max_element(box_y, box_y + 4)))), 0,
height - 1);
cv::Mat mask;
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
cv::Point root_point[4];
root_point[0] = cv::Point(int(array[0][0]) - xmin, int(array[0][1]) - ymin);
root_point[1] = cv::Point(int(array[1][0]) - xmin, int(array[1][1]) - ymin);
root_point[2] = cv::Point(int(array[2][0]) - xmin, int(array[2][1]) - ymin);
root_point[3] = cv::Point(int(array[3][0]) - xmin, int(array[3][1]) - ymin);
const cv::Point *ppt[1] = {root_point};
int npt[] = {4};
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
cv::Mat croppedImg;
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
.copyTo(croppedImg);
auto score = cv::mean(croppedImg, mask)[0];
return score;
}
std::vector<std::vector<std::vector<int>>>
boxes_from_bitmap(const cv::Mat &pred, const cv::Mat &bitmap) {
const int min_size = 3;
const int max_candidates = 1000;
const float box_thresh = 0.5;
int width = bitmap.cols;
int height = bitmap.rows;
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
cv::CHAIN_APPROX_SIMPLE);
int num_contours =
contours.size() >= max_candidates ? max_candidates : contours.size();
std::vector<std::vector<std::vector<int>>> boxes;
for (int _i = 0; _i < num_contours; _i++) {
float ssid;
cv::RotatedRect box = cv::minAreaRect(contours[_i]);
auto array = get_mini_boxes(box, ssid);
auto box_for_unclip = array;
// end get_mini_box
if (ssid < min_size) {
continue;
}
float score;
score = box_score_fast(array, pred);
// end box_score_fast
if (score < box_thresh) {
continue;
}
// start for unclip
cv::RotatedRect points = unclip(box_for_unclip);
// end for unclip
cv::RotatedRect clipbox = points;
auto cliparray = get_mini_boxes(clipbox, ssid);
if (ssid < min_size + 2)
continue;
int dest_width = pred.cols;
int dest_height = pred.rows;
std::vector<std::vector<int>> intcliparray;
for (int num_pt = 0; num_pt < 4; num_pt++) {
std::vector<int> a{int(clampf(roundf(cliparray[num_pt][0] / float(width) *
float(dest_width)),
0, float(dest_width))),
int(clampf(roundf(cliparray[num_pt][1] /
float(height) * float(dest_height)),
0, float(dest_height)))};
intcliparray.emplace_back(std::move(a));
}
boxes.emplace_back(std::move(intcliparray));
} // end for
return boxes;
}
int _max(int a, int b) { return a >= b ? a : b; }
int _min(int a, int b) { return a >= b ? b : a; }
std::vector<std::vector<std::vector<int>>>
filter_tag_det_res(const std::vector<std::vector<std::vector<int>>> &o_boxes,
float ratio_h, float ratio_w, const cv::Mat &srcimg) {
int oriimg_h = srcimg.rows;
int oriimg_w = srcimg.cols;
std::vector<std::vector<std::vector<int>>> boxes{o_boxes};
std::vector<std::vector<std::vector<int>>> root_points;
for (int n = 0; n < boxes.size(); n++) {
boxes[n] = order_points_clockwise(boxes[n]);
for (int m = 0; m < boxes[0].size(); m++) {
boxes[n][m][0] /= ratio_w;
boxes[n][m][1] /= ratio_h;
boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
}
}
for (int n = 0; n < boxes.size(); n++) {
int rect_width, rect_height;
rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
pow(boxes[n][0][1] - boxes[n][1][1], 2)));
rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
pow(boxes[n][0][1] - boxes[n][3][1], 2)));
if (rect_width <= 10 || rect_height <= 10)
continue;
root_points.push_back(boxes[n]);
}
return root_points;
}
\ No newline at end of file
//
// Created by fujiayi on 2020/7/2.
//
#pragma once
#include <opencv2/opencv.hpp>
#include <vector>
std::vector<std::vector<std::vector<int>>>
boxes_from_bitmap(const cv::Mat &pred, const cv::Mat &bitmap);
std::vector<std::vector<std::vector<int>>>
filter_tag_det_res(const std::vector<std::vector<std::vector<int>>> &o_boxes,
float ratio_h, float ratio_w, const cv::Mat &srcimg);
\ No newline at end of file
//
// Created by fujiayi on 2020/7/1.
//
#include "ocr_ppredictor.h"
#include "common.h"
#include "ocr_cls_process.h"
#include "ocr_crnn_process.h"
#include "ocr_db_post_process.h"
#include "preprocess.h"
namespace ppredictor {
OCR_PPredictor::OCR_PPredictor(const OCR_Config &config) : _config(config) {}
int OCR_PPredictor::init(const std::string &det_model_content,
const std::string &rec_model_content,
const std::string &cls_model_content) {
_det_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR, _config.mode});
_det_predictor->init_nb(det_model_content);
_rec_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
_rec_predictor->init_nb(rec_model_content);
_cls_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
_cls_predictor->init_nb(cls_model_content);
return RETURN_OK;
}
int OCR_PPredictor::init_from_file(const std::string &det_model_path,
const std::string &rec_model_path,
const std::string &cls_model_path) {
_det_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR, _config.mode});
_det_predictor->init_from_file(det_model_path);
_rec_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
_rec_predictor->init_from_file(rec_model_path);
_cls_predictor = std::unique_ptr<PPredictor>(
new PPredictor{_config.thread_num, NET_OCR_INTERNAL, _config.mode});
_cls_predictor->init_from_file(cls_model_path);
return RETURN_OK;
}
/**
* for debug use, show result of First Step
* @param filter_boxes
* @param boxes
* @param srcimg
*/
static void
visual_img(const std::vector<std::vector<std::vector<int>>> &filter_boxes,
const std::vector<std::vector<std::vector<int>>> &boxes,
const cv::Mat &srcimg) {
// visualization
cv::Point rook_points[filter_boxes.size()][4];
for (int n = 0; n < filter_boxes.size(); n++) {
for (int m = 0; m < filter_boxes[0].size(); m++) {
rook_points[n][m] =
cv::Point(int(filter_boxes[n][m][0]), int(filter_boxes[n][m][1]));
}
}
cv::Mat img_vis;
srcimg.copyTo(img_vis);
for (int n = 0; n < boxes.size(); n++) {
const cv::Point *ppt[1] = {rook_points[n]};
int npt[] = {4};
cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
}
// 调试用,自行替换需要修改的路径
cv::imwrite("/sdcard/1/vis.png", img_vis);
}
std::vector<OCRPredictResult>
OCR_PPredictor::infer_ocr(const std::vector<int64_t> &dims,
const float *input_data, int input_len, int net_flag,
cv::Mat &origin) {
PredictorInput input = _det_predictor->get_first_input();
input.set_dims(dims);
input.set_data(input_data, input_len);
std::vector<PredictorOutput> results = _det_predictor->infer();
PredictorOutput &res = results.at(0);
std::vector<std::vector<std::vector<int>>> filtered_box = calc_filtered_boxes(
res.get_float_data(), res.get_size(), (int)dims[2], (int)dims[3], origin);
LOGI("Filter_box size %ld", filtered_box.size());
return infer_rec(filtered_box, origin);
}
std::vector<OCRPredictResult> OCR_PPredictor::infer_rec(
const std::vector<std::vector<std::vector<int>>> &boxes,
const cv::Mat &origin_img) {
std::vector<float> mean = {0.5f, 0.5f, 0.5f};
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
std::vector<int64_t> dims = {1, 3, 0, 0};
std::vector<OCRPredictResult> ocr_results;
PredictorInput input = _rec_predictor->get_first_input();
for (auto bp = boxes.crbegin(); bp != boxes.crend(); ++bp) {
const std::vector<std::vector<int>> &box = *bp;
cv::Mat crop_img = get_rotate_crop_image(origin_img, box);
crop_img = infer_cls(crop_img);
float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
cv::Mat input_image = crnn_resize_img(crop_img, wh_ratio);
input_image.convertTo(input_image, CV_32FC3, 1 / 255.0f);
const float *dimg = reinterpret_cast<const float *>(input_image.data);
int input_size = input_image.rows * input_image.cols;
dims[2] = input_image.rows;
dims[3] = input_image.cols;
input.set_dims(dims);
neon_mean_scale(dimg, input.get_mutable_float_data(), input_size, mean,
scale);
std::vector<PredictorOutput> results = _rec_predictor->infer();
const float *predict_batch = results.at(0).get_float_data();
const std::vector<int64_t> predict_shape = results.at(0).get_shape();
OCRPredictResult res;
// ctc decode
int argmax_idx;
int last_index = 0;
float score = 0.f;
int count = 0;
float max_value = 0.0f;
for (int n = 0; n < predict_shape[1]; n++) {
argmax_idx = int(argmax(&predict_batch[n * predict_shape[2]],
&predict_batch[(n + 1) * predict_shape[2]]));
max_value =
float(*std::max_element(&predict_batch[n * predict_shape[2]],
&predict_batch[(n + 1) * predict_shape[2]]));
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
score += max_value;
count += 1;
res.word_index.push_back(argmax_idx);
}
last_index = argmax_idx;
}
score /= count;
if (res.word_index.empty()) {
continue;
}
res.score = score;
res.points = box;
ocr_results.emplace_back(std::move(res));
}
LOGI("ocr_results finished %lu", ocr_results.size());
return ocr_results;
}
cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
std::vector<float> mean = {0.5f, 0.5f, 0.5f};
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
std::vector<int64_t> dims = {1, 3, 0, 0};
std::vector<OCRPredictResult> ocr_results;
PredictorInput input = _cls_predictor->get_first_input();
cv::Mat input_image = cls_resize_img(img);
input_image.convertTo(input_image, CV_32FC3, 1 / 255.0f);
const float *dimg = reinterpret_cast<const float *>(input_image.data);
int input_size = input_image.rows * input_image.cols;
dims[2] = input_image.rows;
dims[3] = input_image.cols;
input.set_dims(dims);
neon_mean_scale(dimg, input.get_mutable_float_data(), input_size, mean,
scale);
std::vector<PredictorOutput> results = _cls_predictor->infer();
const float *scores = results.at(0).get_float_data();
float score = 0;
int label = 0;
for (int64_t i = 0; i < results.at(0).get_size(); i++) {
LOGI("output scores [%f]", scores[i]);
if (scores[i] > score) {
score = scores[i];
label = i;
}
}
cv::Mat srcimg;
img.copyTo(srcimg);
if (label % 2 == 1 && score > thresh) {
cv::rotate(srcimg, srcimg, 1);
}
return srcimg;
}
std::vector<std::vector<std::vector<int>>>
OCR_PPredictor::calc_filtered_boxes(const float *pred, int pred_size,
int output_height, int output_width,
const cv::Mat &origin) {
const double threshold = 0.3;
const double maxvalue = 1;
cv::Mat pred_map = cv::Mat::zeros(output_height, output_width, CV_32F);
memcpy(pred_map.data, pred, pred_size * sizeof(float));
cv::Mat cbuf_map;
pred_map.convertTo(cbuf_map, CV_8UC1);
cv::Mat bit_map;
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
std::vector<std::vector<std::vector<int>>> boxes =
boxes_from_bitmap(pred_map, bit_map);
float ratio_h = output_height * 1.0f / origin.rows;
float ratio_w = output_width * 1.0f / origin.cols;
std::vector<std::vector<std::vector<int>>> filter_boxes =
filter_tag_det_res(boxes, ratio_h, ratio_w, origin);
return filter_boxes;
}
std::vector<int>
OCR_PPredictor::postprocess_rec_word_index(const PredictorOutput &res) {
const int *rec_idx = res.get_int_data();
const std::vector<std::vector<uint64_t>> rec_idx_lod = res.get_lod();
std::vector<int> pred_idx;
for (int n = int(rec_idx_lod[0][0]); n < int(rec_idx_lod[0][1] * 2); n += 2) {
pred_idx.emplace_back(rec_idx[n]);
}
return pred_idx;
}
float OCR_PPredictor::postprocess_rec_score(const PredictorOutput &res) {
const float *predict_batch = res.get_float_data();
const std::vector<int64_t> predict_shape = res.get_shape();
const std::vector<std::vector<uint64_t>> predict_lod = res.get_lod();
int blank = predict_shape[1];
float score = 0.f;
int count = 0;
for (int n = predict_lod[0][0]; n < predict_lod[0][1] - 1; n++) {
int argmax_idx = argmax(predict_batch + n * predict_shape[1],
predict_batch + (n + 1) * predict_shape[1]);
float max_value = predict_batch[n * predict_shape[1] + argmax_idx];
if (blank - 1 - argmax_idx > 1e-5) {
score += max_value;
count += 1;
}
}
if (count == 0) {
LOGE("calc score count 0");
} else {
score /= count;
}
LOGI("calc score: %f", score);
return score;
}
NET_TYPE OCR_PPredictor::get_net_flag() const { return NET_OCR; }
}
\ No newline at end of file
//
// Created by fujiayi on 2020/7/1.
//
#pragma once
#include "ppredictor.h"
#include <opencv2/opencv.hpp>
#include <paddle_api.h>
#include <string>
namespace ppredictor {
/**
* Config
*/
struct OCR_Config {
int thread_num = 4; // Thread num
paddle::lite_api::PowerMode mode =
paddle::lite_api::LITE_POWER_HIGH; // PaddleLite Mode
};
/**
* PolyGone Result
*/
struct OCRPredictResult {
std::vector<int> word_index;
std::vector<std::vector<int>> points;
float score;
};
/**
* OCR there are 2 models
* 1. First model(det),select polygones to show where are the texts
* 2. crop from the origin images, use these polygones to infer
*/
class OCR_PPredictor : public PPredictor_Interface {
public:
OCR_PPredictor(const OCR_Config &config);
virtual ~OCR_PPredictor() {}
/**
* 初始化二个模型的Predictor
* @param det_model_content
* @param rec_model_content
* @return
*/
int init(const std::string &det_model_content,
const std::string &rec_model_content,
const std::string &cls_model_content);
int init_from_file(const std::string &det_model_path,
const std::string &rec_model_path,
const std::string &cls_model_path);
/**
* Return OCR result
* @param dims
* @param input_data
* @param input_len
* @param net_flag
* @param origin
* @return
*/
virtual std::vector<OCRPredictResult>
infer_ocr(const std::vector<int64_t> &dims, const float *input_data,
int input_len, int net_flag, cv::Mat &origin);
virtual NET_TYPE get_net_flag() const;
private:
/**
* calcul Polygone from the result image of first model
* @param pred
* @param output_height
* @param output_width
* @param origin
* @return
*/
std::vector<std::vector<std::vector<int>>>
calc_filtered_boxes(const float *pred, int pred_size, int output_height,
int output_width, const cv::Mat &origin);
/**
* infer for second model
*
* @param boxes
* @param origin
* @return
*/
std::vector<OCRPredictResult>
infer_rec(const std::vector<std::vector<std::vector<int>>> &boxes,
const cv::Mat &origin);
/**
* infer for cls model
*
* @param boxes
* @param origin
* @return
*/
cv::Mat infer_cls(const cv::Mat &origin, float thresh = 0.9);
/**
* Postprocess or sencod model to extract text
* @param res
* @return
*/
std::vector<int> postprocess_rec_word_index(const PredictorOutput &res);
/**
* calculate confidence of second model text result
* @param res
* @return
*/
float postprocess_rec_score(const PredictorOutput &res);
std::unique_ptr<PPredictor> _det_predictor;
std::unique_ptr<PPredictor> _rec_predictor;
std::unique_ptr<PPredictor> _cls_predictor;
OCR_Config _config;
};
}
#include "ppredictor.h"
#include "common.h"
namespace ppredictor {
PPredictor::PPredictor(int thread_num, int net_flag,
paddle::lite_api::PowerMode mode)
: _thread_num(thread_num), _net_flag(net_flag), _mode(mode) {}
int PPredictor::init_nb(const std::string &model_content) {
paddle::lite_api::MobileConfig config;
config.set_model_from_buffer(model_content);
return _init(config);
}
int PPredictor::init_from_file(const std::string &model_content) {
paddle::lite_api::MobileConfig config;
config.set_model_from_file(model_content);
return _init(config);
}
template <typename ConfigT> int PPredictor::_init(ConfigT &config) {
config.set_threads(_thread_num);
config.set_power_mode(_mode);
_predictor = paddle::lite_api::CreatePaddlePredictor(config);
LOGI("paddle instance created");
return RETURN_OK;
}
PredictorInput PPredictor::get_input(int index) {
PredictorInput input{_predictor->GetInput(index), index, _net_flag};
_is_input_get = true;
return input;
}
std::vector<PredictorInput> PPredictor::get_inputs(int num) {
std::vector<PredictorInput> results;
for (int i = 0; i < num; i++) {
results.emplace_back(get_input(i));
}
return results;
}
PredictorInput PPredictor::get_first_input() { return get_input(0); }
std::vector<PredictorOutput> PPredictor::infer() {
LOGI("infer Run start %d", _net_flag);
std::vector<PredictorOutput> results;
if (!_is_input_get) {
return results;
}
_predictor->Run();
LOGI("infer Run end");
for (int i = 0; i < _predictor->GetOutputNames().size(); i++) {
std::unique_ptr<const paddle::lite_api::Tensor> output_tensor =
_predictor->GetOutput(i);
LOGI("output tensor[%d] size %ld", i, product(output_tensor->shape()));
PredictorOutput result{std::move(output_tensor), i, _net_flag};
results.emplace_back(std::move(result));
}
return results;
}
NET_TYPE PPredictor::get_net_flag() const { return (NET_TYPE)_net_flag; }
}
\ No newline at end of file
#pragma once
#include "paddle_api.h"
#include "predictor_input.h"
#include "predictor_output.h"
namespace ppredictor {
/**
* PaddleLite Preditor Common Interface
*/
class PPredictor_Interface {
public:
virtual ~PPredictor_Interface() {}
virtual NET_TYPE get_net_flag() const = 0;
};
/**
* Common Predictor
*/
class PPredictor : public PPredictor_Interface {
public:
PPredictor(
int thread_num, int net_flag = 0,
paddle::lite_api::PowerMode mode = paddle::lite_api::LITE_POWER_HIGH);
virtual ~PPredictor() {}
/**
* init paddlitelite opt model,nb format ,or use ini_paddle
* @param model_content
* @return 0
*/
virtual int init_nb(const std::string &model_content);
virtual int init_from_file(const std::string &model_content);
std::vector<PredictorOutput> infer();
std::shared_ptr<paddle::lite_api::PaddlePredictor> get_predictor() {
return _predictor;
}
virtual std::vector<PredictorInput> get_inputs(int num);
virtual PredictorInput get_input(int index);
virtual PredictorInput get_first_input();
virtual NET_TYPE get_net_flag() const;
protected:
template <typename ConfigT> int _init(ConfigT &config);
private:
int _thread_num;
paddle::lite_api::PowerMode _mode;
std::shared_ptr<paddle::lite_api::PaddlePredictor> _predictor;
bool _is_input_get = false;
int _net_flag;
};
}
#include "predictor_input.h"
namespace ppredictor {
void PredictorInput::set_dims(std::vector<int64_t> dims) {
// yolov3
if (_net_flag == 101 && _index == 1) {
_tensor->Resize({1, 2});
_tensor->mutable_data<int>()[0] = (int)dims.at(2);
_tensor->mutable_data<int>()[1] = (int)dims.at(3);
} else {
_tensor->Resize(dims);
}
_is_dims_set = true;
}
float *PredictorInput::get_mutable_float_data() {
if (!_is_dims_set) {
LOGE("PredictorInput::set_dims is not called");
}
return _tensor->mutable_data<float>();
}
void PredictorInput::set_data(const float *input_data, int input_float_len) {
float *input_raw_data = get_mutable_float_data();
memcpy(input_raw_data, input_data, input_float_len * sizeof(float));
}
}
\ No newline at end of file
#pragma once
#include "common.h"
#include <paddle_api.h>
#include <vector>
namespace ppredictor {
class PredictorInput {
public:
PredictorInput(std::unique_ptr<paddle::lite_api::Tensor> &&tensor, int index,
int net_flag)
: _tensor(std::move(tensor)), _index(index), _net_flag(net_flag) {}
void set_dims(std::vector<int64_t> dims);
float *get_mutable_float_data();
void set_data(const float *input_data, int input_float_len);
private:
std::unique_ptr<paddle::lite_api::Tensor> _tensor;
bool _is_dims_set = false;
int _index;
int _net_flag;
};
}
#include "predictor_output.h"
namespace ppredictor {
const float *PredictorOutput::get_float_data() const {
return _tensor->data<float>();
}
const int *PredictorOutput::get_int_data() const {
return _tensor->data<int>();
}
const std::vector<std::vector<uint64_t>> PredictorOutput::get_lod() const {
return _tensor->lod();
}
int64_t PredictorOutput::get_size() const {
if (_net_flag == NET_OCR) {
return _tensor->shape().at(2) * _tensor->shape().at(3);
} else {
return product(_tensor->shape());
}
}
const std::vector<int64_t> PredictorOutput::get_shape() const {
return _tensor->shape();
}
}
\ No newline at end of file
#pragma once
#include "common.h"
#include <paddle_api.h>
#include <vector>
namespace ppredictor {
class PredictorOutput {
public:
PredictorOutput() {}
PredictorOutput(std::unique_ptr<const paddle::lite_api::Tensor> &&tensor,
int index, int net_flag)
: _tensor(std::move(tensor)), _index(index), _net_flag(net_flag) {}
const float *get_float_data() const;
const int *get_int_data() const;
int64_t get_size() const;
const std::vector<std::vector<uint64_t>> get_lod() const;
const std::vector<int64_t> get_shape() const;
std::vector<float> data; // return float, or use data_int
std::vector<int> data_int; // several layers return int ,or use data
std::vector<int64_t> shape; // PaddleLite output shape
std::vector<std::vector<uint64_t>> lod; // PaddleLite output lod
private:
std::unique_ptr<const paddle::lite_api::Tensor> _tensor;
int _index;
int _net_flag;
};
}
#include "preprocess.h"
#include <android/bitmap.h>
cv::Mat bitmap_to_cv_mat(JNIEnv *env, jobject bitmap) {
AndroidBitmapInfo info;
int result = AndroidBitmap_getInfo(env, bitmap, &info);
if (result != ANDROID_BITMAP_RESULT_SUCCESS) {
LOGE("AndroidBitmap_getInfo failed, result: %d", result);
return cv::Mat{};
}
if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888) {
LOGE("Bitmap format is not RGBA_8888 !");
return cv::Mat{};
}
unsigned char *srcData = NULL;
AndroidBitmap_lockPixels(env, bitmap, (void **)&srcData);
cv::Mat mat = cv::Mat::zeros(info.height, info.width, CV_8UC4);
memcpy(mat.data, srcData, info.height * info.width * 4);
AndroidBitmap_unlockPixels(env, bitmap);
cv::cvtColor(mat, mat, cv::COLOR_RGBA2BGR);
/**
if (!cv::imwrite("/sdcard/1/copy.jpg", mat)){
LOGE("Write image failed " );
}
*/
return mat;
}
cv::Mat resize_img(const cv::Mat &img, int height, int width) {
if (img.rows == height && img.cols == width) {
return img;
}
cv::Mat new_img;
cv::resize(img, new_img, cv::Size(height, width));
return new_img;
}
// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
void neon_mean_scale(const float *din, float *dout, int size,
const std::vector<float> &mean,
const std::vector<float> &scale) {
if (mean.size() != 3 || scale.size() != 3) {
LOGE("[ERROR] mean or scale size must equal to 3");
return;
}
float32x4_t vmean0 = vdupq_n_f32(mean[0]);
float32x4_t vmean1 = vdupq_n_f32(mean[1]);
float32x4_t vmean2 = vdupq_n_f32(mean[2]);
float32x4_t vscale0 = vdupq_n_f32(scale[0]);
float32x4_t vscale1 = vdupq_n_f32(scale[1]);
float32x4_t vscale2 = vdupq_n_f32(scale[2]);
float *dout_c0 = dout;
float *dout_c1 = dout + size;
float *dout_c2 = dout + size * 2;
int i = 0;
for (; i < size - 3; i += 4) {
float32x4x3_t vin3 = vld3q_f32(din);
float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
vst1q_f32(dout_c0, vs0);
vst1q_f32(dout_c1, vs1);
vst1q_f32(dout_c2, vs2);
din += 12;
dout_c0 += 4;
dout_c1 += 4;
dout_c2 += 4;
}
for (; i < size; i++) {
*(dout_c0++) = (*(din++) - mean[0]) * scale[0];
*(dout_c1++) = (*(din++) - mean[1]) * scale[1];
*(dout_c2++) = (*(din++) - mean[2]) * scale[2];
}
}
\ No newline at end of file
#pragma once
#include "common.h"
#include <jni.h>
#include <opencv2/opencv.hpp>
cv::Mat bitmap_to_cv_mat(JNIEnv *env, jobject bitmap);
cv::Mat resize_img(const cv::Mat &img, int height, int width);
void neon_mean_scale(const float *din, float *dout, int size,
const std::vector<float> &mean,
const std::vector<float> &scale);
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.baidu.paddle.lite.demo.ocr;
import android.content.res.Configuration;
import android.os.Bundle;
import android.preference.PreferenceActivity;
import android.view.MenuInflater;
import android.view.View;
import android.view.ViewGroup;
import androidx.annotation.LayoutRes;
import androidx.annotation.Nullable;
import androidx.appcompat.app.ActionBar;
import androidx.appcompat.app.AppCompatDelegate;
import androidx.appcompat.widget.Toolbar;
/**
* A {@link PreferenceActivity} which implements and proxies the necessary calls
* to be used with AppCompat.
* <p>
* This technique can be used with an {@link android.app.Activity} class, not just
* {@link PreferenceActivity}.
*/
public abstract class AppCompatPreferenceActivity extends PreferenceActivity {
private AppCompatDelegate mDelegate;
@Override
protected void onCreate(Bundle savedInstanceState) {
getDelegate().installViewFactory();
getDelegate().onCreate(savedInstanceState);
super.onCreate(savedInstanceState);
}
@Override
protected void onPostCreate(Bundle savedInstanceState) {
super.onPostCreate(savedInstanceState);
getDelegate().onPostCreate(savedInstanceState);
}
public ActionBar getSupportActionBar() {
return getDelegate().getSupportActionBar();
}
public void setSupportActionBar(@Nullable Toolbar toolbar) {
getDelegate().setSupportActionBar(toolbar);
}
@Override
public MenuInflater getMenuInflater() {
return getDelegate().getMenuInflater();
}
@Override
public void setContentView(@LayoutRes int layoutResID) {
getDelegate().setContentView(layoutResID);
}
@Override
public void setContentView(View view) {
getDelegate().setContentView(view);
}
@Override
public void setContentView(View view, ViewGroup.LayoutParams params) {
getDelegate().setContentView(view, params);
}
@Override
public void addContentView(View view, ViewGroup.LayoutParams params) {
getDelegate().addContentView(view, params);
}
@Override
protected void onPostResume() {
super.onPostResume();
getDelegate().onPostResume();
}
@Override
protected void onTitleChanged(CharSequence title, int color) {
super.onTitleChanged(title, color);
getDelegate().setTitle(title);
}
@Override
public void onConfigurationChanged(Configuration newConfig) {
super.onConfigurationChanged(newConfig);
getDelegate().onConfigurationChanged(newConfig);
}
@Override
protected void onStop() {
super.onStop();
getDelegate().onStop();
}
@Override
protected void onDestroy() {
super.onDestroy();
getDelegate().onDestroy();
}
public void invalidateOptionsMenu() {
getDelegate().invalidateOptionsMenu();
}
private AppCompatDelegate getDelegate() {
if (mDelegate == null) {
mDelegate = AppCompatDelegate.create(this, null);
}
return mDelegate;
}
}
package com.baidu.paddle.lite.demo.ocr;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.os.Build;
import android.os.Bundle;
import android.os.Handler;
import android.os.HandlerThread;
import android.os.Message;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.ImageView;
import android.widget.TextView;
import android.widget.Toast;
import androidx.appcompat.app.AppCompatActivity;
import java.io.IOException;
import java.io.InputStream;
public class MiniActivity extends AppCompatActivity {
public static final int REQUEST_LOAD_MODEL = 0;
public static final int REQUEST_RUN_MODEL = 1;
public static final int REQUEST_UNLOAD_MODEL = 2;
public static final int RESPONSE_LOAD_MODEL_SUCCESSED = 0;
public static final int RESPONSE_LOAD_MODEL_FAILED = 1;
public static final int RESPONSE_RUN_MODEL_SUCCESSED = 2;
public static final int RESPONSE_RUN_MODEL_FAILED = 3;
private static final String TAG = "MiniActivity";
protected Handler receiver = null; // Receive messages from worker thread
protected Handler sender = null; // Send command to worker thread
protected HandlerThread worker = null; // Worker thread to load&run model
protected volatile Predictor predictor = null;
private String assetModelDirPath = "models/ocr_v2_for_cpu";
private String assetlabelFilePath = "labels/ppocr_keys_v1.txt";
private Button button;
private ImageView imageView; // image result
private TextView textView; // text result
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_mini);
Log.i(TAG, "SHOW in Logcat");
// Prepare the worker thread for mode loading and inference
worker = new HandlerThread("Predictor Worker");
worker.start();
sender = new Handler(worker.getLooper()) {
public void handleMessage(Message msg) {
switch (msg.what) {
case REQUEST_LOAD_MODEL:
// Load model and reload test image
if (!onLoadModel()) {
runOnUiThread(new Runnable() {
@Override
public void run() {
Toast.makeText(MiniActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show();
}
});
}
break;
case REQUEST_RUN_MODEL:
// Run model if model is loaded
final boolean isSuccessed = onRunModel();
runOnUiThread(new Runnable() {
@Override
public void run() {
if (isSuccessed){
onRunModelSuccessed();
}else{
Toast.makeText(MiniActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show();
}
}
});
break;
}
}
};
sender.sendEmptyMessage(REQUEST_LOAD_MODEL); // corresponding to REQUEST_LOAD_MODEL, to call onLoadModel()
imageView = findViewById(R.id.imageView);
textView = findViewById(R.id.sample_text);
button = findViewById(R.id.button);
button.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
sender.sendEmptyMessage(REQUEST_RUN_MODEL);
}
});
}
@Override
protected void onDestroy() {
onUnloadModel();
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR2) {
worker.quitSafely();
} else {
worker.quit();
}
super.onDestroy();
}
/**
* call in onCreate, model init
*
* @return
*/
private boolean onLoadModel() {
if (predictor == null) {
predictor = new Predictor();
}
return predictor.init(this, assetModelDirPath, assetlabelFilePath);
}
/**
* init engine
* call in onCreate
*
* @return
*/
private boolean onRunModel() {
try {
String assetImagePath = "images/0.jpg";
InputStream imageStream = getAssets().open(assetImagePath);
Bitmap image = BitmapFactory.decodeStream(imageStream);
// Input is Bitmap
predictor.setInputImage(image);
return predictor.isLoaded() && predictor.runModel();
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
private void onRunModelSuccessed() {
Log.i(TAG, "onRunModelSuccessed");
textView.setText(predictor.outputResult);
imageView.setImageBitmap(predictor.outputImage);
}
private void onUnloadModel() {
if (predictor != null) {
predictor.releaseModel();
}
}
}
package com.baidu.paddle.lite.demo.ocr;
import android.graphics.Bitmap;
import android.util.Log;
import java.util.ArrayList;
import java.util.concurrent.atomic.AtomicBoolean;
public class OCRPredictorNative {
private static final AtomicBoolean isSOLoaded = new AtomicBoolean();
public static void loadLibrary() throws RuntimeException {
if (!isSOLoaded.get() && isSOLoaded.compareAndSet(false, true)) {
try {
System.loadLibrary("Native");
} catch (Throwable e) {
RuntimeException exception = new RuntimeException(
"Load libNative.so failed, please check it exists in apk file.", e);
throw exception;
}
}
}
private Config config;
private long nativePointer = 0;
public OCRPredictorNative(Config config) {
this.config = config;
loadLibrary();
nativePointer = init(config.detModelFilename, config.recModelFilename,config.clsModelFilename,
config.cpuThreadNum, config.cpuPower);
Log.i("OCRPredictorNative", "load success " + nativePointer);
}
public ArrayList<OcrResultModel> runImage(float[] inputData, int width, int height, int channels, Bitmap originalImage) {
Log.i("OCRPredictorNative", "begin to run image " + inputData.length + " " + width + " " + height);
float[] dims = new float[]{1, channels, height, width};
float[] rawResults = forward(nativePointer, inputData, dims, originalImage);
ArrayList<OcrResultModel> results = postprocess(rawResults);
return results;
}
public static class Config {
public int cpuThreadNum;
public String cpuPower;
public String detModelFilename;
public String recModelFilename;
public String clsModelFilename;
}
public void destory(){
if (nativePointer > 0) {
release(nativePointer);
nativePointer = 0;
}
}
protected native long init(String detModelPath, String recModelPath,String clsModelPath, int threadNum, String cpuMode);
protected native float[] forward(long pointer, float[] buf, float[] ddims, Bitmap originalImage);
protected native void release(long pointer);
private ArrayList<OcrResultModel> postprocess(float[] raw) {
ArrayList<OcrResultModel> results = new ArrayList<OcrResultModel>();
int begin = 0;
while (begin < raw.length) {
int point_num = Math.round(raw[begin]);
int word_num = Math.round(raw[begin + 1]);
OcrResultModel model = parse(raw, begin + 2, point_num, word_num);
begin += 2 + 1 + point_num * 2 + word_num;
results.add(model);
}
return results;
}
private OcrResultModel parse(float[] raw, int begin, int pointNum, int wordNum) {
int current = begin;
OcrResultModel model = new OcrResultModel();
model.setConfidence(raw[current]);
current++;
for (int i = 0; i < pointNum; i++) {
model.addPoints(Math.round(raw[current + i * 2]), Math.round(raw[current + i * 2 + 1]));
}
current += (pointNum * 2);
for (int i = 0; i < wordNum; i++) {
int index = Math.round(raw[current + i]);
model.addWordIndex(index);
}
Log.i("OCRPredictorNative", "word finished " + wordNum);
return model;
}
}
package com.baidu.paddle.lite.demo.ocr;
import android.graphics.Point;
import java.util.ArrayList;
import java.util.List;
public class OcrResultModel {
private List<Point> points;
private List<Integer> wordIndex;
private String label;
private float confidence;
public OcrResultModel() {
super();
points = new ArrayList<>();
wordIndex = new ArrayList<>();
}
public void addPoints(int x, int y) {
Point point = new Point(x, y);
points.add(point);
}
public void addWordIndex(int index) {
wordIndex.add(index);
}
public List<Point> getPoints() {
return points;
}
public List<Integer> getWordIndex() {
return wordIndex;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public float getConfidence() {
return confidence;
}
public void setConfidence(float confidence) {
this.confidence = confidence;
}
}
package com.baidu.paddle.lite.demo.ocr;
import android.content.Context;
import android.graphics.Bitmap;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Paint;
import android.graphics.Path;
import android.graphics.Point;
import android.util.Log;
import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Vector;
import static android.graphics.Color.*;
public class Predictor {
private static final String TAG = Predictor.class.getSimpleName();
public boolean isLoaded = false;
public int warmupIterNum = 1;
public int inferIterNum = 1;
public int cpuThreadNum = 4;
public String cpuPowerMode = "LITE_POWER_HIGH";
public String modelPath = "";
public String modelName = "";
protected OCRPredictorNative paddlePredictor = null;
protected float inferenceTime = 0;
// Only for object detection
protected Vector<String> wordLabels = new Vector<String>();
protected String inputColorFormat = "BGR";
protected long[] inputShape = new long[]{1, 3, 960};
protected float[] inputMean = new float[]{0.485f, 0.456f, 0.406f};
protected float[] inputStd = new float[]{1.0f / 0.229f, 1.0f / 0.224f, 1.0f / 0.225f};
protected float scoreThreshold = 0.1f;
protected Bitmap inputImage = null;
protected Bitmap outputImage = null;
protected volatile String outputResult = "";
protected float preprocessTime = 0;
protected float postprocessTime = 0;
public Predictor() {
}
public boolean init(Context appCtx, String modelPath, String labelPath) {
isLoaded = loadModel(appCtx, modelPath, cpuThreadNum, cpuPowerMode);
if (!isLoaded) {
return false;
}
isLoaded = loadLabel(appCtx, labelPath);
return isLoaded;
}
public boolean init(Context appCtx, String modelPath, String labelPath, int cpuThreadNum, String cpuPowerMode,
String inputColorFormat,
long[] inputShape, float[] inputMean,
float[] inputStd, float scoreThreshold) {
if (inputShape.length != 3) {
Log.e(TAG, "Size of input shape should be: 3");
return false;
}
if (inputMean.length != inputShape[1]) {
Log.e(TAG, "Size of input mean should be: " + Long.toString(inputShape[1]));
return false;
}
if (inputStd.length != inputShape[1]) {
Log.e(TAG, "Size of input std should be: " + Long.toString(inputShape[1]));
return false;
}
if (inputShape[0] != 1) {
Log.e(TAG, "Only one batch is supported in the image classification demo, you can use any batch size in " +
"your Apps!");
return false;
}
if (inputShape[1] != 1 && inputShape[1] != 3) {
Log.e(TAG, "Only one/three channels are supported in the image classification demo, you can use any " +
"channel size in your Apps!");
return false;
}
if (!inputColorFormat.equalsIgnoreCase("BGR")) {
Log.e(TAG, "Only BGR color format is supported.");
return false;
}
boolean isLoaded = init(appCtx, modelPath, labelPath);
if (!isLoaded) {
return false;
}
this.inputColorFormat = inputColorFormat;
this.inputShape = inputShape;
this.inputMean = inputMean;
this.inputStd = inputStd;
this.scoreThreshold = scoreThreshold;
return true;
}
protected boolean loadModel(Context appCtx, String modelPath, int cpuThreadNum, String cpuPowerMode) {
// Release model if exists
releaseModel();
// Load model
if (modelPath.isEmpty()) {
return false;
}
String realPath = modelPath;
if (!modelPath.substring(0, 1).equals("/")) {
// Read model files from custom path if the first character of mode path is '/'
// otherwise copy model to cache from assets
realPath = appCtx.getCacheDir() + "/" + modelPath;
Utils.copyDirectoryFromAssets(appCtx, modelPath, realPath);
}
if (realPath.isEmpty()) {
return false;
}
OCRPredictorNative.Config config = new OCRPredictorNative.Config();
config.cpuThreadNum = cpuThreadNum;
config.detModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_det_opt.nb";
config.recModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_rec_opt.nb";
config.clsModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_cls_opt.nb";
Log.e("Predictor", "model path" + config.detModelFilename + " ; " + config.recModelFilename + ";" + config.clsModelFilename);
config.cpuPower = cpuPowerMode;
paddlePredictor = new OCRPredictorNative(config);
this.cpuThreadNum = cpuThreadNum;
this.cpuPowerMode = cpuPowerMode;
this.modelPath = realPath;
this.modelName = realPath.substring(realPath.lastIndexOf("/") + 1);
return true;
}
public void releaseModel() {
if (paddlePredictor != null) {
paddlePredictor.destory();
paddlePredictor = null;
}
isLoaded = false;
cpuThreadNum = 1;
cpuPowerMode = "LITE_POWER_HIGH";
modelPath = "";
modelName = "";
}
protected boolean loadLabel(Context appCtx, String labelPath) {
wordLabels.clear();
wordLabels.add("black");
// Load word labels from file
try {
InputStream assetsInputStream = appCtx.getAssets().open(labelPath);
int available = assetsInputStream.available();
byte[] lines = new byte[available];
assetsInputStream.read(lines);
assetsInputStream.close();
String words = new String(lines);
String[] contents = words.split("\n");
for (String content : contents) {
wordLabels.add(content);
}
Log.i(TAG, "Word label size: " + wordLabels.size());
} catch (Exception e) {
Log.e(TAG, e.getMessage());
return false;
}
return true;
}
public boolean runModel() {
if (inputImage == null || !isLoaded()) {
return false;
}
// Pre-process image, and feed input tensor with pre-processed data
Bitmap scaleImage = Utils.resizeWithStep(inputImage, Long.valueOf(inputShape[2]).intValue(), 32);
Date start = new Date();
int channels = (int) inputShape[1];
int width = scaleImage.getWidth();
int height = scaleImage.getHeight();
float[] inputData = new float[channels * width * height];
if (channels == 3) {
int[] channelIdx = null;
if (inputColorFormat.equalsIgnoreCase("RGB")) {
channelIdx = new int[]{0, 1, 2};
} else if (inputColorFormat.equalsIgnoreCase("BGR")) {
channelIdx = new int[]{2, 1, 0};
} else {
Log.i(TAG, "Unknown color format " + inputColorFormat + ", only RGB and BGR color format is " +
"supported!");
return false;
}
int[] channelStride = new int[]{width * height, width * height * 2};
int p = scaleImage.getPixel(scaleImage.getWidth() - 1, scaleImage.getHeight() - 1);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int color = scaleImage.getPixel(x, y);
float[] rgb = new float[]{(float) red(color) / 255.0f, (float) green(color) / 255.0f,
(float) blue(color) / 255.0f};
inputData[y * width + x] = (rgb[channelIdx[0]] - inputMean[0]) / inputStd[0];
inputData[y * width + x + channelStride[0]] = (rgb[channelIdx[1]] - inputMean[1]) / inputStd[1];
inputData[y * width + x + channelStride[1]] = (rgb[channelIdx[2]] - inputMean[2]) / inputStd[2];
}
}
} else if (channels == 1) {
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int color = inputImage.getPixel(x, y);
float gray = (float) (red(color) + green(color) + blue(color)) / 3.0f / 255.0f;
inputData[y * width + x] = (gray - inputMean[0]) / inputStd[0];
}
}
} else {
Log.i(TAG, "Unsupported channel size " + Integer.toString(channels) + ", only channel 1 and 3 is " +
"supported!");
return false;
}
float[] pixels = inputData;
Log.i(TAG, "pixels " + pixels[0] + " " + pixels[1] + " " + pixels[2] + " " + pixels[3]
+ " " + pixels[pixels.length / 2] + " " + pixels[pixels.length / 2 + 1] + " " + pixels[pixels.length - 2] + " " + pixels[pixels.length - 1]);
Date end = new Date();
preprocessTime = (float) (end.getTime() - start.getTime());
// Warm up
for (int i = 0; i < warmupIterNum; i++) {
paddlePredictor.runImage(inputData, width, height, channels, inputImage);
}
warmupIterNum = 0; // do not need warm
// Run inference
start = new Date();
ArrayList<OcrResultModel> results = paddlePredictor.runImage(inputData, width, height, channels, inputImage);
end = new Date();
inferenceTime = (end.getTime() - start.getTime()) / (float) inferIterNum;
results = postprocess(results);
Log.i(TAG, "[stat] Preprocess Time: " + preprocessTime
+ " ; Inference Time: " + inferenceTime + " ;Box Size " + results.size());
drawResults(results);
return true;
}
public boolean isLoaded() {
return paddlePredictor != null && isLoaded;
}
public String modelPath() {
return modelPath;
}
public String modelName() {
return modelName;
}
public int cpuThreadNum() {
return cpuThreadNum;
}
public String cpuPowerMode() {
return cpuPowerMode;
}
public float inferenceTime() {
return inferenceTime;
}
public Bitmap inputImage() {
return inputImage;
}
public Bitmap outputImage() {
return outputImage;
}
public String outputResult() {
return outputResult;
}
public float preprocessTime() {
return preprocessTime;
}
public float postprocessTime() {
return postprocessTime;
}
public void setInputImage(Bitmap image) {
if (image == null) {
return;
}
this.inputImage = image.copy(Bitmap.Config.ARGB_8888, true);
}
private ArrayList<OcrResultModel> postprocess(ArrayList<OcrResultModel> results) {
for (OcrResultModel r : results) {
StringBuffer word = new StringBuffer();
for (int index : r.getWordIndex()) {
if (index >= 0 && index < wordLabels.size()) {
word.append(wordLabels.get(index));
} else {
Log.e(TAG, "Word index is not in label list:" + index);
word.append("×");
}
}
r.setLabel(word.toString());
}
return results;
}
private void drawResults(ArrayList<OcrResultModel> results) {
StringBuffer outputResultSb = new StringBuffer("");
for (int i = 0; i < results.size(); i++) {
OcrResultModel result = results.get(i);
StringBuilder sb = new StringBuilder("");
sb.append(result.getLabel());
sb.append(" ").append(result.getConfidence());
sb.append("; Points: ");
for (Point p : result.getPoints()) {
sb.append("(").append(p.x).append(",").append(p.y).append(") ");
}
Log.i(TAG, sb.toString()); // show LOG in Logcat panel
outputResultSb.append(i + 1).append(": ").append(result.getLabel()).append("\n");
}
outputResult = outputResultSb.toString();
outputImage = inputImage;
Canvas canvas = new Canvas(outputImage);
Paint paintFillAlpha = new Paint();
paintFillAlpha.setStyle(Paint.Style.FILL);
paintFillAlpha.setColor(Color.parseColor("#3B85F5"));
paintFillAlpha.setAlpha(50);
Paint paint = new Paint();
paint.setColor(Color.parseColor("#3B85F5"));
paint.setStrokeWidth(5);
paint.setStyle(Paint.Style.STROKE);
for (OcrResultModel result : results) {
Path path = new Path();
List<Point> points = result.getPoints();
path.moveTo(points.get(0).x, points.get(0).y);
for (int i = points.size() - 1; i >= 0; i--) {
Point p = points.get(i);
path.lineTo(p.x, p.y);
}
canvas.drawPath(path, paint);
canvas.drawPath(path, paintFillAlpha);
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment