Unverified Commit 26219d5f authored by shaohua.zhang's avatar shaohua.zhang Committed by GitHub
Browse files

Merge pull request #1 from PaddlePaddle/develop

update-2020-7-17
parents 0e8a3417 311c5997
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <include/ocr_rec.h>
namespace PaddleOCR {
void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,
cv::Mat &img) {
cv::Mat srcimg;
img.copyTo(srcimg);
cv::Mat crop_img;
cv::Mat resize_img;
std::cout << "The predicted text is :" << std::endl;
int index = 0;
for (int i = boxes.size() - 1; i >= 0; i--) {
crop_img = GetRotateCropImage(srcimg, boxes[i]);
float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
this->resize_op_.Run(crop_img, resize_img, wh_ratio);
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
this->is_scale_);
std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
this->permute_op_.Run(&resize_img, input.data());
auto input_names = this->predictor_->GetInputNames();
auto input_t = this->predictor_->GetInputTensor(input_names[0]);
input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
input_t->copy_from_cpu(input.data());
this->predictor_->ZeroCopyRun();
std::vector<int64_t> rec_idx;
auto output_names = this->predictor_->GetOutputNames();
auto output_t = this->predictor_->GetOutputTensor(output_names[0]);
auto rec_idx_lod = output_t->lod();
auto shape_out = output_t->shape();
int out_num = std::accumulate(shape_out.begin(), shape_out.end(), 1,
std::multiplies<int>());
rec_idx.resize(out_num);
output_t->copy_to_cpu(rec_idx.data());
std::vector<int> pred_idx;
for (int n = int(rec_idx_lod[0][0]); n < int(rec_idx_lod[0][1]); n++) {
pred_idx.push_back(int(rec_idx[n]));
}
if (pred_idx.size() < 1e-3)
continue;
index += 1;
std::cout << index << "\t";
for (int n = 0; n < pred_idx.size(); n++) {
std::cout << label_list_[pred_idx[n]];
}
std::vector<float> predict_batch;
auto output_t_1 = this->predictor_->GetOutputTensor(output_names[1]);
auto predict_lod = output_t_1->lod();
auto predict_shape = output_t_1->shape();
int out_num_1 = std::accumulate(predict_shape.begin(), predict_shape.end(),
1, std::multiplies<int>());
predict_batch.resize(out_num_1);
output_t_1->copy_to_cpu(predict_batch.data());
int argmax_idx;
int blank = predict_shape[1];
float score = 0.f;
int count = 0;
float max_value = 0.0f;
for (int n = predict_lod[0][0]; n < predict_lod[0][1] - 1; n++) {
argmax_idx =
int(Utility::argmax(&predict_batch[n * predict_shape[1]],
&predict_batch[(n + 1) * predict_shape[1]]));
max_value =
float(*std::max_element(&predict_batch[n * predict_shape[1]],
&predict_batch[(n + 1) * predict_shape[1]]));
if (blank - 1 - argmax_idx > 1e-5) {
score += max_value;
count += 1;
}
}
score /= count;
std::cout << "\tscore: " << score << std::endl;
}
}
void CRNNRecognizer::LoadModel(const std::string &model_dir) {
AnalysisConfig config;
config.SetModel(model_dir + "/model", model_dir + "/params");
if (this->use_gpu_) {
config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
} else {
config.DisableGpu();
if (this->use_mkldnn_) {
config.EnableMKLDNN();
}
config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
}
// false for zero copy tensor
config.SwitchUseFeedFetchOps(false);
// true for multiple input
config.SwitchSpecifyInputNames(true);
config.SwitchIrOptim(true);
config.EnableMemoryOptim();
config.DisableGlogInfo();
this->predictor_ = CreatePaddlePredictor(config);
}
cv::Mat CRNNRecognizer::GetRotateCropImage(const cv::Mat &srcimage,
std::vector<std::vector<int>> box) {
cv::Mat image;
srcimage.copyTo(image);
std::vector<std::vector<int>> points = box;
int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
int left = int(*std::min_element(x_collect, x_collect + 4));
int right = int(*std::max_element(x_collect, x_collect + 4));
int top = int(*std::min_element(y_collect, y_collect + 4));
int bottom = int(*std::max_element(y_collect, y_collect + 4));
cv::Mat img_crop;
image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
for (int i = 0; i < points.size(); i++) {
points[i][0] -= left;
points[i][1] -= top;
}
int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
pow(points[0][1] - points[1][1], 2)));
int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
pow(points[0][1] - points[3][1], 2)));
cv::Point2f pts_std[4];
pts_std[0] = cv::Point2f(0., 0.);
pts_std[1] = cv::Point2f(img_crop_width, 0.);
pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
pts_std[3] = cv::Point2f(0.f, img_crop_height);
cv::Point2f pointsf[4];
pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
cv::Mat dst_img;
cv::warpPerspective(img_crop, dst_img, M,
cv::Size(img_crop_width, img_crop_height),
cv::BORDER_REPLICATE);
if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
cv::transpose(dst_img, srcCopy);
cv::flip(srcCopy, srcCopy, 0);
return srcCopy;
} else {
return dst_img;
}
}
} // namespace PaddleOCR
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <include/postprocess_op.h>
namespace PaddleOCR {
void PostProcessor::GetContourArea(const std::vector<std::vector<float>> &box,
float unclip_ratio, float &distance) {
int pts_num = 4;
float area = 0.0f;
float dist = 0.0f;
for (int i = 0; i < pts_num; i++) {
area += box[i][0] * box[(i + 1) % pts_num][1] -
box[i][1] * box[(i + 1) % pts_num][0];
dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) *
(box[i][0] - box[(i + 1) % pts_num][0]) +
(box[i][1] - box[(i + 1) % pts_num][1]) *
(box[i][1] - box[(i + 1) % pts_num][1]));
}
area = fabs(float(area / 2.0));
distance = area * unclip_ratio / dist;
}
cv::RotatedRect PostProcessor::UnClip(std::vector<std::vector<float>> box,
const float &unclip_ratio) {
float distance = 1.0;
GetContourArea(box, unclip_ratio, distance);
ClipperLib::ClipperOffset offset;
ClipperLib::Path p;
p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1]))
<< ClipperLib::IntPoint(int(box[1][0]), int(box[1][1]))
<< ClipperLib::IntPoint(int(box[2][0]), int(box[2][1]))
<< ClipperLib::IntPoint(int(box[3][0]), int(box[3][1]));
offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
ClipperLib::Paths soln;
offset.Execute(soln, distance);
std::vector<cv::Point2f> points;
for (int j = 0; j < soln.size(); j++) {
for (int i = 0; i < soln[soln.size() - 1].size(); i++) {
points.emplace_back(soln[j][i].X, soln[j][i].Y);
}
}
cv::RotatedRect res;
if (points.size() <= 0) {
res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0);
} else {
res = cv::minAreaRect(points);
}
return res;
}
float **PostProcessor::Mat2Vec(cv::Mat mat) {
auto **array = new float *[mat.rows];
for (int i = 0; i < mat.rows; ++i)
array[i] = new float[mat.cols];
for (int i = 0; i < mat.rows; ++i) {
for (int j = 0; j < mat.cols; ++j) {
array[i][j] = mat.at<float>(i, j);
}
}
return array;
}
std::vector<std::vector<int>>
PostProcessor::OrderPointsClockwise(std::vector<std::vector<int>> pts) {
std::vector<std::vector<int>> box = pts;
std::sort(box.begin(), box.end(), XsortInt);
std::vector<std::vector<int>> leftmost = {box[0], box[1]};
std::vector<std::vector<int>> rightmost = {box[2], box[3]};
if (leftmost[0][1] > leftmost[1][1])
std::swap(leftmost[0], leftmost[1]);
if (rightmost[0][1] > rightmost[1][1])
std::swap(rightmost[0], rightmost[1]);
std::vector<std::vector<int>> rect = {leftmost[0], rightmost[0], rightmost[1],
leftmost[1]};
return rect;
}
std::vector<std::vector<float>> PostProcessor::Mat2Vector(cv::Mat mat) {
std::vector<std::vector<float>> img_vec;
std::vector<float> tmp;
for (int i = 0; i < mat.rows; ++i) {
tmp.clear();
for (int j = 0; j < mat.cols; ++j) {
tmp.push_back(mat.at<float>(i, j));
}
img_vec.push_back(tmp);
}
return img_vec;
}
bool PostProcessor::XsortFp32(std::vector<float> a, std::vector<float> b) {
if (a[0] != b[0])
return a[0] < b[0];
return false;
}
bool PostProcessor::XsortInt(std::vector<int> a, std::vector<int> b) {
if (a[0] != b[0])
return a[0] < b[0];
return false;
}
std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box,
float &ssid) {
ssid = std::max(box.size.width, box.size.height);
cv::Mat points;
cv::boxPoints(box, points);
auto array = Mat2Vector(points);
std::sort(array.begin(), array.end(), XsortFp32);
std::vector<float> idx1 = array[0], idx2 = array[1], idx3 = array[2],
idx4 = array[3];
if (array[3][1] <= array[2][1]) {
idx2 = array[3];
idx3 = array[2];
} else {
idx2 = array[2];
idx3 = array[3];
}
if (array[1][1] <= array[0][1]) {
idx1 = array[1];
idx4 = array[0];
} else {
idx1 = array[0];
idx4 = array[1];
}
array[0] = idx1;
array[1] = idx2;
array[2] = idx3;
array[3] = idx4;
return array;
}
float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
cv::Mat pred) {
auto array = box_array;
int width = pred.cols;
int height = pred.rows;
float box_x[4] = {array[0][0], array[1][0], array[2][0], array[3][0]};
float box_y[4] = {array[0][1], array[1][1], array[2][1], array[3][1]};
int xmin = clamp(int(std::floor(*(std::min_element(box_x, box_x + 4)))), 0,
width - 1);
int xmax = clamp(int(std::ceil(*(std::max_element(box_x, box_x + 4)))), 0,
width - 1);
int ymin = clamp(int(std::floor(*(std::min_element(box_y, box_y + 4)))), 0,
height - 1);
int ymax = clamp(int(std::ceil(*(std::max_element(box_y, box_y + 4)))), 0,
height - 1);
cv::Mat mask;
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
cv::Point root_point[4];
root_point[0] = cv::Point(int(array[0][0]) - xmin, int(array[0][1]) - ymin);
root_point[1] = cv::Point(int(array[1][0]) - xmin, int(array[1][1]) - ymin);
root_point[2] = cv::Point(int(array[2][0]) - xmin, int(array[2][1]) - ymin);
root_point[3] = cv::Point(int(array[3][0]) - xmin, int(array[3][1]) - ymin);
const cv::Point *ppt[1] = {root_point};
int npt[] = {4};
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
cv::Mat croppedImg;
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
.copyTo(croppedImg);
auto score = cv::mean(croppedImg, mask)[0];
return score;
}
std::vector<std::vector<std::vector<int>>>
PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
const float &box_thresh,
const float &det_db_unclip_ratio) {
const int min_size = 3;
const int max_candidates = 1000;
int width = bitmap.cols;
int height = bitmap.rows;
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
cv::CHAIN_APPROX_SIMPLE);
int num_contours =
contours.size() >= max_candidates ? max_candidates : contours.size();
std::vector<std::vector<std::vector<int>>> boxes;
for (int _i = 0; _i < num_contours; _i++) {
if (contours[_i].size() <= 2) {
continue;
}
float ssid;
cv::RotatedRect box = cv::minAreaRect(contours[_i]);
auto array = GetMiniBoxes(box, ssid);
auto box_for_unclip = array;
// end get_mini_box
if (ssid < min_size) {
continue;
}
float score;
score = BoxScoreFast(array, pred);
if (score < box_thresh)
continue;
// start for unclip
cv::RotatedRect points = UnClip(box_for_unclip, det_db_unclip_ratio);
if (points.size.height < 1.001 && points.size.width < 1.001) {
continue;
}
// end for unclip
cv::RotatedRect clipbox = points;
auto cliparray = GetMiniBoxes(clipbox, ssid);
if (ssid < min_size + 2)
continue;
int dest_width = pred.cols;
int dest_height = pred.rows;
std::vector<std::vector<int>> intcliparray;
for (int num_pt = 0; num_pt < 4; num_pt++) {
std::vector<int> a{int(clampf(roundf(cliparray[num_pt][0] / float(width) *
float(dest_width)),
0, float(dest_width))),
int(clampf(roundf(cliparray[num_pt][1] /
float(height) * float(dest_height)),
0, float(dest_height)))};
intcliparray.push_back(a);
}
boxes.push_back(intcliparray);
} // end for
return boxes;
}
std::vector<std::vector<std::vector<int>>>
PostProcessor::FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
float ratio_h, float ratio_w, cv::Mat srcimg) {
int oriimg_h = srcimg.rows;
int oriimg_w = srcimg.cols;
std::vector<std::vector<std::vector<int>>> root_points;
for (int n = 0; n < boxes.size(); n++) {
boxes[n] = OrderPointsClockwise(boxes[n]);
for (int m = 0; m < boxes[0].size(); m++) {
boxes[n][m][0] /= ratio_w;
boxes[n][m][1] /= ratio_h;
boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
}
}
for (int n = 0; n < boxes.size(); n++) {
int rect_width, rect_height;
rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
pow(boxes[n][0][1] - boxes[n][1][1], 2)));
rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
pow(boxes[n][0][1] - boxes[n][3][1], 2)));
if (rect_width <= 10 || rect_height <= 10)
continue;
root_points.push_back(boxes[n]);
}
return root_points;
}
} // namespace PaddleOCR
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/preprocess_op.h>
namespace PaddleOCR {
void Permute::Run(const cv::Mat *im, float *data) {
int rh = im->rows;
int rw = im->cols;
int rc = im->channels();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
}
}
void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
const std::vector<float> &scale, const bool is_scale) {
double e = 1.0;
if (is_scale) {
e /= 255.0;
}
(*im).convertTo(*im, CV_32FC3, e);
for (int h = 0; h < im->rows; h++) {
for (int w = 0; w < im->cols; w++) {
im->at<cv::Vec3f>(h, w)[0] =
(im->at<cv::Vec3f>(h, w)[0] - mean[0]) * scale[0];
im->at<cv::Vec3f>(h, w)[1] =
(im->at<cv::Vec3f>(h, w)[1] - mean[1]) * scale[1];
im->at<cv::Vec3f>(h, w)[2] =
(im->at<cv::Vec3f>(h, w)[2] - mean[2]) * scale[2];
}
}
}
void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
int max_size_len, float &ratio_h, float &ratio_w) {
int w = img.cols;
int h = img.rows;
float ratio = 1.f;
int max_wh = w >= h ? w : h;
if (max_wh > max_size_len) {
if (h > w) {
ratio = float(max_size_len) / float(h);
} else {
ratio = float(max_size_len) / float(w);
}
}
int resize_h = int(float(h) * ratio);
int resize_w = int(float(w) * ratio);
if (resize_h % 32 == 0)
resize_h = resize_h;
else if (resize_h / 32 < 1 + 1e-5)
resize_h = 32;
else
resize_h = (resize_h / 32 - 1) * 32;
if (resize_w % 32 == 0)
resize_w = resize_w;
else if (resize_w / 32 < 1)
resize_w = 32;
else
resize_w = (resize_w / 32 - 1) * 32;
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
ratio_h = float(resize_h) / float(h);
ratio_w = float(resize_w) / float(w);
}
void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
const std::vector<int> &rec_image_shape) {
int imgC, imgH, imgW;
imgC = rec_image_shape[0];
imgH = rec_image_shape[1];
imgW = rec_image_shape[2];
imgW = int(32 * wh_ratio);
float ratio = float(img.cols) / float(img.rows);
int resize_w, resize_h;
if (ceilf(imgH * ratio) > imgW)
resize_w = imgW;
else
resize_w = int(ceilf(imgH * ratio));
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
cv::INTER_LINEAR);
}
} // namespace PaddleOCR
\ No newline at end of file
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <ostream>
#include <vector>
#include <include/utility.h>
namespace PaddleOCR {
std::vector<std::string> Utility::ReadDict(const std::string &path) {
std::ifstream in(path);
std::string line;
std::vector<std::string> m_vec;
if (in) {
while (getline(in, line)) {
m_vec.push_back(line);
}
} else {
std::cout << "no such label file: " << path << ", exit the program..."
<< std::endl;
exit(1);
}
return m_vec;
}
void Utility::VisualizeBboxes(
const cv::Mat &srcimg,
const std::vector<std::vector<std::vector<int>>> &boxes) {
cv::Point rook_points[boxes.size()][4];
for (int n = 0; n < boxes.size(); n++) {
for (int m = 0; m < boxes[0].size(); m++) {
rook_points[n][m] = cv::Point(int(boxes[n][m][0]), int(boxes[n][m][1]));
}
}
cv::Mat img_vis;
srcimg.copyTo(img_vis);
for (int n = 0; n < boxes.size(); n++) {
const cv::Point *ppt[1] = {rook_points[n]};
int npt[] = {4};
cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
}
cv::imwrite("./ocr_vis.png", img_vis);
std::cout << "The detection visualized image saved in ./ocr_vis.png.pn"
<< std::endl;
}
} // namespace PaddleOCR
\ No newline at end of file
OPENCV_DIR=your_opencv_dir
LIB_DIR=your_paddle_inference_dir
CUDA_LIB_DIR=your_cuda_lib_dir
CUDNN_LIB_DIR=/your_cudnn_lib_dir
BUILD_DIR=build
rm -rf ${BUILD_DIR}
mkdir ${BUILD_DIR}
cd ${BUILD_DIR}
cmake .. \
-DPADDLE_LIB=${LIB_DIR} \
-DWITH_MKL=ON \
-DDEMO_NAME=ocr_system \
-DWITH_GPU=OFF \
-DWITH_STATIC_LIB=OFF \
-DUSE_TENSORRT=OFF \
-DOPENCV_DIR=${OPENCV_DIR} \
-DCUDNN_LIB=${CUDNN_LIB_DIR} \
-DCUDA_LIB=${CUDA_LIB_DIR} \
make -j
# model load config
use_gpu 0
gpu_id 0
gpu_mem 4000
cpu_math_library_num_threads 10
use_mkldnn 0
# det config
max_side_len 960
det_db_thresh 0.3
det_db_box_thresh 0.5
det_db_unclip_ratio 2.0
det_model_dir ./inference/det_db
# rec config
rec_model_dir ./inference/rec_crnn
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
img_path ../../doc/imgs/11.jpg
# show the detection results
visualize 0
./build/ocr_system ./tools/config.txt ../../doc/imgs/12.jpg
{
"modules_info": {
"ocr_det": {
"init_args": {
"version": "1.0.0",
"use_gpu": true
},
"predict_args": {
}
}
},
"port": 8866,
"use_multiprocess": false,
"workers": 2
}
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import ast
import copy
import math
import os
import time
from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor
from paddlehub.common.logger import logger
from paddlehub.module.module import moduleinfo, runnable, serving
from PIL import Image
import cv2
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
from tools.infer.utility import base64_to_cv2
from tools.infer.predict_det import TextDetector
@moduleinfo(
name="ocr_det",
version="1.0.0",
summary="ocr detection service",
author="paddle-dev",
author_email="paddle-dev@baidu.com",
type="cv/text_recognition")
class OCRDet(hub.Module):
def _initialize(self, use_gpu=False, enable_mkldnn=False):
"""
initialize with the necessary elements
"""
from ocr_det.params import read_params
cfg = read_params()
cfg.use_gpu = use_gpu
if use_gpu:
try:
_places = os.environ["CUDA_VISIBLE_DEVICES"]
int(_places[0])
print("use gpu: ", use_gpu)
print("CUDA_VISIBLE_DEVICES: ", _places)
cfg.gpu_mem = 8000
except:
raise RuntimeError(
"Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
)
cfg.ir_optim = True
cfg.enable_mkldnn = enable_mkldnn
self.text_detector = TextDetector(cfg)
def read_images(self, paths=[]):
images = []
for img_path in paths:
assert os.path.isfile(
img_path), "The {} isn't a valid file.".format(img_path)
img = cv2.imread(img_path)
if img is None:
logger.info("error in loading image:{}".format(img_path))
continue
images.append(img)
return images
def predict(self,
images=[],
paths=[]):
"""
Get the text box in the predicted images.
Args:
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
paths (list[str]): The paths of images. If paths not images
Returns:
res (list): The result of text detection box and save path of images.
"""
if images != [] and isinstance(images, list) and paths == []:
predicted_data = images
elif images == [] and isinstance(paths, list) and paths != []:
predicted_data = self.read_images(paths)
else:
raise TypeError("The input data is inconsistent with expectations.")
assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
all_results = []
for img in predicted_data:
if img is None:
logger.info("error in loading image")
all_results.append([])
continue
dt_boxes, elapse = self.text_detector(img)
logger.info("Predict time : {}".format(elapse))
rec_res_final = []
for dno in range(len(dt_boxes)):
rec_res_final.append(
{
'text_region': dt_boxes[dno].astype(np.int).tolist()
}
)
all_results.append(rec_res_final)
return all_results
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.predict(images_decode, **kwargs)
return results
if __name__ == '__main__':
ocr = OCRDet()
image_path = [
'./doc/imgs/11.jpg',
'./doc/imgs/12.jpg',
]
res = ocr.predict(paths=image_path)
print(res)
\ No newline at end of file
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class Config(object):
pass
def read_params():
cfg = Config()
#params for text detector
cfg.det_algorithm = "DB"
cfg.det_model_dir = "./inference/ch_det_mv3_db/"
cfg.det_max_side_len = 960
#DB parmas
cfg.det_db_thresh =0.3
cfg.det_db_box_thresh =0.5
cfg.det_db_unclip_ratio =2.0
# #EAST parmas
# cfg.det_east_score_thresh = 0.8
# cfg.det_east_cover_thresh = 0.1
# cfg.det_east_nms_thresh = 0.2
# #params for text recognizer
# cfg.rec_algorithm = "CRNN"
# cfg.rec_model_dir = "./inference/ch_det_mv3_crnn/"
# cfg.rec_image_shape = "3, 32, 320"
# cfg.rec_char_type = 'ch'
# cfg.rec_batch_num = 30
# cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
# cfg.use_space_char = True
return cfg
\ No newline at end of file
{
"modules_info": {
"ocr_rec": {
"init_args": {
"version": "1.0.0",
"use_gpu": true
},
"predict_args": {
}
}
},
"port": 8867,
"use_multiprocess": false,
"workers": 2
}
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import ast
import copy
import math
import os
import time
from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor
from paddlehub.common.logger import logger
from paddlehub.module.module import moduleinfo, runnable, serving
from PIL import Image
import cv2
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
from tools.infer.utility import base64_to_cv2
from tools.infer.predict_rec import TextRecognizer
@moduleinfo(
name="ocr_rec",
version="1.0.0",
summary="ocr recognition service",
author="paddle-dev",
author_email="paddle-dev@baidu.com",
type="cv/text_recognition")
class OCRRec(hub.Module):
def _initialize(self, use_gpu=False, enable_mkldnn=False):
"""
initialize with the necessary elements
"""
from ocr_rec.params import read_params
cfg = read_params()
cfg.use_gpu = use_gpu
if use_gpu:
try:
_places = os.environ["CUDA_VISIBLE_DEVICES"]
int(_places[0])
print("use gpu: ", use_gpu)
print("CUDA_VISIBLE_DEVICES: ", _places)
cfg.gpu_mem = 8000
except:
raise RuntimeError(
"Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
)
cfg.ir_optim = True
cfg.enable_mkldnn = enable_mkldnn
self.text_recognizer = TextRecognizer(cfg)
def read_images(self, paths=[]):
images = []
for img_path in paths:
assert os.path.isfile(
img_path), "The {} isn't a valid file.".format(img_path)
img = cv2.imread(img_path)
if img is None:
logger.info("error in loading image:{}".format(img_path))
continue
images.append(img)
return images
def predict(self,
images=[],
paths=[]):
"""
Get the text box in the predicted images.
Args:
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
paths (list[str]): The paths of images. If paths not images
Returns:
res (list): The result of text detection box and save path of images.
"""
if images != [] and isinstance(images, list) and paths == []:
predicted_data = images
elif images == [] and isinstance(paths, list) and paths != []:
predicted_data = self.read_images(paths)
else:
raise TypeError("The input data is inconsistent with expectations.")
assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
img_list = []
for img in predicted_data:
if img is None:
continue
img_list.append(img)
rec_res_final = []
try:
rec_res, predict_time = self.text_recognizer(img_list)
for dno in range(len(rec_res)):
text, score = rec_res[dno]
rec_res_final.append(
{
'text': text,
'confidence': float(score),
}
)
except Exception as e:
print(e)
return [[]]
return [rec_res_final]
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.predict(images_decode, **kwargs)
return results
if __name__ == '__main__':
ocr = OCRRec()
image_path = [
'./doc/imgs_words/ch/word_1.jpg',
'./doc/imgs_words/ch/word_2.jpg',
'./doc/imgs_words/ch/word_3.jpg',
]
res = ocr.predict(paths=image_path)
print(res)
\ No newline at end of file
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class Config(object):
pass
def read_params():
cfg = Config()
# #params for text detector
# cfg.det_algorithm = "DB"
# cfg.det_model_dir = "./inference/ch_det_mv3_db/"
# cfg.det_max_side_len = 960
# #DB parmas
# cfg.det_db_thresh =0.3
# cfg.det_db_box_thresh =0.5
# cfg.det_db_unclip_ratio =2.0
# #EAST parmas
# cfg.det_east_score_thresh = 0.8
# cfg.det_east_cover_thresh = 0.1
# cfg.det_east_nms_thresh = 0.2
#params for text recognizer
cfg.rec_algorithm = "CRNN"
cfg.rec_model_dir = "./inference/ch_rec_mv3_crnn/"
cfg.rec_image_shape = "3, 32, 320"
cfg.rec_char_type = 'ch'
cfg.rec_batch_num = 30
cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
cfg.use_space_char = True
return cfg
\ No newline at end of file
{
"modules_info": {
"ocr_system": {
"init_args": {
"version": "1.0.0",
"use_gpu": true
},
"predict_args": {
}
}
},
"port": 8868,
"use_multiprocess": false,
"workers": 2
}
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import ast
import copy
import math
import os
import time
from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor
from paddlehub.common.logger import logger
from paddlehub.module.module import moduleinfo, runnable, serving
from PIL import Image
import cv2
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
from tools.infer.utility import base64_to_cv2
from tools.infer.predict_system import TextSystem
@moduleinfo(
name="ocr_system",
version="1.0.0",
summary="ocr system service",
author="paddle-dev",
author_email="paddle-dev@baidu.com",
type="cv/text_recognition")
class OCRSystem(hub.Module):
def _initialize(self, use_gpu=False, enable_mkldnn=False):
"""
initialize with the necessary elements
"""
from ocr_system.params import read_params
cfg = read_params()
cfg.use_gpu = use_gpu
if use_gpu:
try:
_places = os.environ["CUDA_VISIBLE_DEVICES"]
int(_places[0])
print("use gpu: ", use_gpu)
print("CUDA_VISIBLE_DEVICES: ", _places)
cfg.gpu_mem = 8000
except:
raise RuntimeError(
"Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
)
cfg.ir_optim = True
cfg.enable_mkldnn = enable_mkldnn
self.text_sys = TextSystem(cfg)
def read_images(self, paths=[]):
images = []
for img_path in paths:
assert os.path.isfile(
img_path), "The {} isn't a valid file.".format(img_path)
img = cv2.imread(img_path)
if img is None:
logger.info("error in loading image:{}".format(img_path))
continue
images.append(img)
return images
def predict(self,
images=[],
paths=[]):
"""
Get the chinese texts in the predicted images.
Args:
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
paths (list[str]): The paths of images. If paths not images
Returns:
res (list): The result of chinese texts and save path of images.
"""
if images != [] and isinstance(images, list) and paths == []:
predicted_data = images
elif images == [] and isinstance(paths, list) and paths != []:
predicted_data = self.read_images(paths)
else:
raise TypeError("The input data is inconsistent with expectations.")
assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
all_results = []
for img in predicted_data:
if img is None:
logger.info("error in loading image")
all_results.append([])
continue
starttime = time.time()
dt_boxes, rec_res = self.text_sys(img)
elapse = time.time() - starttime
logger.info("Predict time: {}".format(elapse))
dt_num = len(dt_boxes)
rec_res_final = []
for dno in range(dt_num):
text, score = rec_res[dno]
rec_res_final.append(
{
'text': text,
'confidence': float(score),
'text_region': dt_boxes[dno].astype(np.int).tolist()
}
)
all_results.append(rec_res_final)
return all_results
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.predict(images_decode, **kwargs)
return results
if __name__ == '__main__':
ocr = OCRSystem()
image_path = [
'./doc/imgs/11.jpg',
'./doc/imgs/12.jpg',
]
res = ocr.predict(paths=image_path)
print(res)
\ No newline at end of file
# -*- coding:utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class Config(object):
pass
def read_params():
cfg = Config()
#params for text detector
cfg.det_algorithm = "DB"
cfg.det_model_dir = "./inference/ch_det_mv3_db/"
cfg.det_max_side_len = 960
#DB parmas
cfg.det_db_thresh =0.3
cfg.det_db_box_thresh =0.5
cfg.det_db_unclip_ratio =2.0
#EAST parmas
cfg.det_east_score_thresh = 0.8
cfg.det_east_cover_thresh = 0.1
cfg.det_east_nms_thresh = 0.2
#params for text recognizer
cfg.rec_algorithm = "CRNN"
cfg.rec_model_dir = "./inference/ch_rec_mv3_crnn/"
cfg.rec_image_shape = "3, 32, 320"
cfg.rec_char_type = 'ch'
cfg.rec_batch_num = 30
cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
cfg.use_space_char = True
return cfg
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment