Commit b9098935 authored by Leif's avatar Leif
Browse files

Merge remote-tracking branch 'upstream/dygraph' into dy3

parents 47752ddf 0e32093f
//
// timer.h
// face_demo
//
// Created by Li,Xiaoyang(SYS) on 2019/8/20.
// Copyright © 2019年 Li,Xiaoyang(SYS). All rights reserved.
//
#ifndef timer_h
#define timer_h
#include <chrono>
#include <list>
class Timer final {
public:
Timer() {}
~Timer() {}
void clear() {
ms_time.clear();
}
void start() {
tstart = std::chrono::system_clock::now();
}
void end() {
tend = std::chrono::system_clock::now();
auto ts = std::chrono::duration_cast<std::chrono::microseconds>(tend - tstart);
float elapse_ms = 1000.f * float(ts.count()) * std::chrono::microseconds::period::num / \
std::chrono::microseconds::period::den;
ms_time.push_back(elapse_ms);
}
float get_average_ms() {
if (ms_time.size() == 0) {
return 0.f;
}
float sum = 0.f;
for (auto i : ms_time){
sum += i;
}
return sum / ms_time.size();
}
float get_sum_ms(){
if (ms_time.size() == 0) {
return 0.f;
}
float sum = 0.f;
for (auto i : ms_time){
sum += i;
}
return sum;
}
// return tile (0-99) time.
float get_tile_time(float tile) {
if (tile <0 || tile > 100) {
return -1.f;
}
int total_items = (int)ms_time.size();
if (total_items <= 0) {
return -2.f;
}
ms_time.sort();
int pos = (int)(tile * total_items / 100);
auto it = ms_time.begin();
for (int i = 0; i < pos; ++i) {
++it;
}
return *it;
}
const std::list<float> get_time_stat() {
return ms_time;
}
private:
std::chrono::time_point<std::chrono::system_clock> tstart;
std::chrono::time_point<std::chrono::system_clock> tend;
std::list<float> ms_time;
};
#endif /* timer_h */
ARM_ABI = arm8
export ARM_ABI
include ../Makefile.def
LITE_ROOT=../../../
THIRD_PARTY_DIR=${LITE_ROOT}/third_party
OPENCV_VERSION=opencv4.1.0
OPENCV_LIBS = ../../../third_party/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_imgcodecs.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_imgproc.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_core.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libtegra_hal.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibjpeg-turbo.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibwebp.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibpng.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibjasper.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibtiff.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libIlmImf.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libtbb.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libcpufeatures.a
OPENCV_INCLUDE = -I../../../third_party/${OPENCV_VERSION}/arm64-v8a/include
CXX_INCLUDES = $(INCLUDES) ${OPENCV_INCLUDE} -I$(LITE_ROOT)/cxx/include
CXX_LIBS = ${OPENCV_LIBS} -L$(LITE_ROOT)/cxx/lib/ -lpaddle_light_api_shared $(SYSTEM_LIBS)
###############################################################
# How to use one of static libaray: #
# `libpaddle_api_full_bundled.a` #
# `libpaddle_api_light_bundled.a` #
###############################################################
# Note: default use lite's shared library. #
###############################################################
# 1. Comment above line using `libpaddle_light_api_shared.so`
# 2. Undo comment below line using `libpaddle_api_light_bundled.a`
#CXX_LIBS = $(LITE_ROOT)/cxx/lib/libpaddle_api_light_bundled.a $(SYSTEM_LIBS)
ocr_db_crnn: fetch_opencv ocr_db_crnn.o crnn_process.o db_post_process.o clipper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) ocr_db_crnn.o crnn_process.o db_post_process.o clipper.o -o ocr_db_crnn $(CXX_LIBS) $(LDFLAGS)
ocr_db_crnn.o: ocr_db_crnn.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o ocr_db_crnn.o -c ocr_db_crnn.cc
crnn_process.o: fetch_opencv crnn_process.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o crnn_process.o -c crnn_process.cc
db_post_process.o: fetch_clipper fetch_opencv db_post_process.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o db_post_process.o -c db_post_process.cc
clipper.o: fetch_clipper
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o clipper.o -c clipper.cpp
fetch_clipper:
@test -e clipper.hpp || \
( echo "Fetch clipper " && \
wget -c https://paddle-inference-dist.cdn.bcebos.com/PaddleLite/Clipper/clipper.hpp)
@ test -e clipper.cpp || \
wget -c https://paddle-inference-dist.cdn.bcebos.com/PaddleLite/Clipper/clipper.cpp
fetch_opencv:
@ test -d ${THIRD_PARTY_DIR} || mkdir ${THIRD_PARTY_DIR}
@ test -e ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz || \
(echo "fetch opencv libs" && \
wget -P ${THIRD_PARTY_DIR} https://paddle-inference-dist.bj.bcebos.com/${OPENCV_VERSION}.tar.gz)
@ test -d ${THIRD_PARTY_DIR}/${OPENCV_VERSION} || \
tar -zxvf ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz -C ${THIRD_PARTY_DIR}
.PHONY: clean
clean:
rm -f ocr_db_crnn.o clipper.o db_post_process.o crnn_process.o
rm -f ocr_db_crnn
max_side_len 960
det_db_thresh 0.3
det_db_box_thresh 0.5
det_db_unclip_ratio 2.0
\ No newline at end of file
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "crnn_process.h" //NOLINT
#include <algorithm>
#include <memory>
#include <string>
const std::vector<int> rec_image_shape{3, 32, 320};
cv::Mat CrnnResizeImg(cv::Mat img, float wh_ratio) {
int imgC, imgH, imgW;
imgC = rec_image_shape[0];
imgW = rec_image_shape[2];
imgH = rec_image_shape[1];
imgW = int(32 * wh_ratio);
float ratio = static_cast<float>(img.cols) / static_cast<float>(img.rows);
int resize_w, resize_h;
if (ceilf(imgH * ratio) > imgW)
resize_w = imgW;
else
resize_w = static_cast<int>(ceilf(imgH * ratio));
cv::Mat resize_img;
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
cv::INTER_LINEAR);
return resize_img;
}
std::vector<std::string> ReadDict(std::string path) {
std::ifstream in(path);
std::string filename;
std::string line;
std::vector<std::string> m_vec;
if (in) {
while (getline(in, line)) {
m_vec.push_back(line);
}
} else {
std::cout << "no such file" << std::endl;
}
return m_vec;
}
cv::Mat GetRotateCropImage(cv::Mat srcimage,
std::vector<std::vector<int>> box) {
cv::Mat image;
srcimage.copyTo(image);
std::vector<std::vector<int>> points = box;
int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
int left = int(*std::min_element(x_collect, x_collect + 4));
int right = int(*std::max_element(x_collect, x_collect + 4));
int top = int(*std::min_element(y_collect, y_collect + 4));
int bottom = int(*std::max_element(y_collect, y_collect + 4));
cv::Mat img_crop;
image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
for (int i = 0; i < points.size(); i++) {
points[i][0] -= left;
points[i][1] -= top;
}
int img_crop_width =
static_cast<int>(sqrt(pow(points[0][0] - points[1][0], 2) +
pow(points[0][1] - points[1][1], 2)));
int img_crop_height =
static_cast<int>(sqrt(pow(points[0][0] - points[3][0], 2) +
pow(points[0][1] - points[3][1], 2)));
cv::Point2f pts_std[4];
pts_std[0] = cv::Point2f(0., 0.);
pts_std[1] = cv::Point2f(img_crop_width, 0.);
pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
pts_std[3] = cv::Point2f(0.f, img_crop_height);
cv::Point2f pointsf[4];
pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
cv::Mat dst_img;
cv::warpPerspective(img_crop, dst_img, M,
cv::Size(img_crop_width, img_crop_height),
cv::BORDER_REPLICATE);
const float ratio = 1.5;
if (static_cast<float>(dst_img.rows) >=
static_cast<float>(dst_img.cols) * ratio) {
cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
cv::transpose(dst_img, srcCopy);
cv::flip(srcCopy, srcCopy, 0);
return srcCopy;
} else {
return dst_img;
}
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstring>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "math.h" //NOLINT
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
cv::Mat CrnnResizeImg(cv::Mat img, float wh_ratio);
std::vector<std::string> ReadDict(std::string path);
cv::Mat GetRotateCropImage(cv::Mat srcimage, std::vector<std::vector<int>> box);
template <class ForwardIterator>
inline size_t Argmax(ForwardIterator first, ForwardIterator last) {
return std::distance(first, std::max_element(first, last));
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "db_post_process.h" // NOLINT
#include <algorithm>
#include <utility>
void GetContourArea(std::vector<std::vector<float>> box, float unclip_ratio,
float &distance) {
int pts_num = 4;
float area = 0.0f;
float dist = 0.0f;
for (int i = 0; i < pts_num; i++) {
area += box[i][0] * box[(i + 1) % pts_num][1] -
box[i][1] * box[(i + 1) % pts_num][0];
dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) *
(box[i][0] - box[(i + 1) % pts_num][0]) +
(box[i][1] - box[(i + 1) % pts_num][1]) *
(box[i][1] - box[(i + 1) % pts_num][1]));
}
area = fabs(float(area / 2.0));
distance = area * unclip_ratio / dist;
}
cv::RotatedRect Unclip(std::vector<std::vector<float>> box,
float unclip_ratio) {
float distance = 1.0;
GetContourArea(box, unclip_ratio, distance);
ClipperLib::ClipperOffset offset;
ClipperLib::Path p;
p << ClipperLib::IntPoint(static_cast<int>(box[0][0]),
static_cast<int>(box[0][1]))
<< ClipperLib::IntPoint(static_cast<int>(box[1][0]),
static_cast<int>(box[1][1]))
<< ClipperLib::IntPoint(static_cast<int>(box[2][0]),
static_cast<int>(box[2][1]))
<< ClipperLib::IntPoint(static_cast<int>(box[3][0]),
static_cast<int>(box[3][1]));
offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
ClipperLib::Paths soln;
offset.Execute(soln, distance);
std::vector<cv::Point2f> points;
for (int j = 0; j < soln.size(); j++) {
for (int i = 0; i < soln[soln.size() - 1].size(); i++) {
points.emplace_back(soln[j][i].X, soln[j][i].Y);
}
}
cv::RotatedRect res = cv::minAreaRect(points);
return res;
}
std::vector<std::vector<float>> Mat2Vector(cv::Mat mat) {
std::vector<std::vector<float>> img_vec;
std::vector<float> tmp;
for (int i = 0; i < mat.rows; ++i) {
tmp.clear();
for (int j = 0; j < mat.cols; ++j) {
tmp.push_back(mat.at<float>(i, j));
}
img_vec.push_back(tmp);
}
return img_vec;
}
bool XsortFp32(std::vector<float> a, std::vector<float> b) {
if (a[0] != b[0])
return a[0] < b[0];
return false;
}
bool XsortInt(std::vector<int> a, std::vector<int> b) {
if (a[0] != b[0])
return a[0] < b[0];
return false;
}
std::vector<std::vector<int>>
OrderPointsClockwise(std::vector<std::vector<int>> pts) {
std::vector<std::vector<int>> box = pts;
std::sort(box.begin(), box.end(), XsortInt);
std::vector<std::vector<int>> leftmost = {box[0], box[1]};
std::vector<std::vector<int>> rightmost = {box[2], box[3]};
if (leftmost[0][1] > leftmost[1][1])
std::swap(leftmost[0], leftmost[1]);
if (rightmost[0][1] > rightmost[1][1])
std::swap(rightmost[0], rightmost[1]);
std::vector<std::vector<int>> rect = {leftmost[0], rightmost[0], rightmost[1],
leftmost[1]};
return rect;
}
std::vector<std::vector<float>> GetMiniBoxes(cv::RotatedRect box, float &ssid) {
ssid = std::max(box.size.width, box.size.height);
cv::Mat points;
cv::boxPoints(box, points);
auto array = Mat2Vector(points);
std::sort(array.begin(), array.end(), XsortFp32);
std::vector<float> idx1 = array[0], idx2 = array[1], idx3 = array[2],
idx4 = array[3];
if (array[3][1] <= array[2][1]) {
idx2 = array[3];
idx3 = array[2];
} else {
idx2 = array[2];
idx3 = array[3];
}
if (array[1][1] <= array[0][1]) {
idx1 = array[1];
idx4 = array[0];
} else {
idx1 = array[0];
idx4 = array[1];
}
array[0] = idx1;
array[1] = idx2;
array[2] = idx3;
array[3] = idx4;
return array;
}
float BoxScoreFast(std::vector<std::vector<float>> box_array, cv::Mat pred) {
auto array = box_array;
int width = pred.cols;
int height = pred.rows;
float box_x[4] = {array[0][0], array[1][0], array[2][0], array[3][0]};
float box_y[4] = {array[0][1], array[1][1], array[2][1], array[3][1]};
int xmin = clamp(
static_cast<int>(std::floorf(*(std::min_element(box_x, box_x + 4)))), 0,
width - 1);
int xmax =
clamp(static_cast<int>(std::ceilf(*(std::max_element(box_x, box_x + 4)))),
0, width - 1);
int ymin = clamp(
static_cast<int>(std::floorf(*(std::min_element(box_y, box_y + 4)))), 0,
height - 1);
int ymax =
clamp(static_cast<int>(std::ceilf(*(std::max_element(box_y, box_y + 4)))),
0, height - 1);
cv::Mat mask;
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
cv::Point root_point[4];
root_point[0] = cv::Point(static_cast<int>(array[0][0]) - xmin,
static_cast<int>(array[0][1]) - ymin);
root_point[1] = cv::Point(static_cast<int>(array[1][0]) - xmin,
static_cast<int>(array[1][1]) - ymin);
root_point[2] = cv::Point(static_cast<int>(array[2][0]) - xmin,
static_cast<int>(array[2][1]) - ymin);
root_point[3] = cv::Point(static_cast<int>(array[3][0]) - xmin,
static_cast<int>(array[3][1]) - ymin);
const cv::Point *ppt[1] = {root_point};
int npt[] = {4};
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
cv::Mat croppedImg;
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
.copyTo(croppedImg);
auto score = cv::mean(croppedImg, mask)[0];
return score;
}
std::vector<std::vector<std::vector<int>>>
BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
std::map<std::string, double> Config) {
const int min_size = 3;
const int max_candidates = 1000;
const float box_thresh = static_cast<float>(Config["det_db_box_thresh"]);
const float unclip_ratio = static_cast<float>(Config["det_db_unclip_ratio"]);
int width = bitmap.cols;
int height = bitmap.rows;
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
cv::CHAIN_APPROX_SIMPLE);
int num_contours =
contours.size() >= max_candidates ? max_candidates : contours.size();
std::vector<std::vector<std::vector<int>>> boxes;
for (int i = 0; i < num_contours; i++) {
float ssid;
if (contours[i].size() <= 2)
continue;
cv::RotatedRect box = cv::minAreaRect(contours[i]);
auto array = GetMiniBoxes(box, ssid);
auto box_for_unclip = array;
// end get_mini_box
if (ssid < min_size) {
continue;
}
float score;
score = BoxScoreFast(array, pred);
// end box_score_fast
if (score < box_thresh)
continue;
// start for unclip
cv::RotatedRect points = Unclip(box_for_unclip, unclip_ratio);
if (points.size.height < 1.001 && points.size.width < 1.001)
continue;
// end for unclip
cv::RotatedRect clipbox = points;
auto cliparray = GetMiniBoxes(clipbox, ssid);
if (ssid < min_size + 2)
continue;
int dest_width = pred.cols;
int dest_height = pred.rows;
std::vector<std::vector<int>> intcliparray;
for (int num_pt = 0; num_pt < 4; num_pt++) {
std::vector<int> a{
static_cast<int>(clamp(
roundf(cliparray[num_pt][0] / float(width) * float(dest_width)),
float(0), float(dest_width))),
static_cast<int>(clamp(
roundf(cliparray[num_pt][1] / float(height) * float(dest_height)),
float(0), float(dest_height)))};
intcliparray.push_back(a);
}
boxes.push_back(intcliparray);
} // end for
return boxes;
}
std::vector<std::vector<std::vector<int>>>
FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes, float ratio_h,
float ratio_w, cv::Mat srcimg) {
int oriimg_h = srcimg.rows;
int oriimg_w = srcimg.cols;
std::vector<std::vector<std::vector<int>>> root_points;
for (int n = 0; n < static_cast<int>(boxes.size()); n++) {
boxes[n] = OrderPointsClockwise(boxes[n]);
for (int m = 0; m < static_cast<int>(boxes[0].size()); m++) {
boxes[n][m][0] /= ratio_w;
boxes[n][m][1] /= ratio_h;
boxes[n][m][0] =
static_cast<int>(std::min(std::max(boxes[n][m][0], 0), oriimg_w - 1));
boxes[n][m][1] =
static_cast<int>(std::min(std::max(boxes[n][m][1], 0), oriimg_h - 1));
}
}
for (int n = 0; n < boxes.size(); n++) {
int rect_width, rect_height;
rect_width =
static_cast<int>(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
pow(boxes[n][0][1] - boxes[n][1][1], 2)));
rect_height =
static_cast<int>(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
pow(boxes[n][0][1] - boxes[n][3][1], 2)));
if (rect_width <= 10 || rect_height <= 10)
continue;
root_points.push_back(boxes[n]);
}
return root_points;
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include <iostream>
#include <map>
#include <vector>
#include "clipper.hpp"
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
template <class T> T clamp(T x, T min, T max) {
if (x > max)
return max;
if (x < min)
return min;
return x;
}
std::vector<std::vector<float>> Mat2Vector(cv::Mat mat);
void GetContourArea(std::vector<std::vector<float>> box, float unclip_ratio,
float &distance);
cv::RotatedRect Unclip(std::vector<std::vector<float>> box, float unclip_ratio);
std::vector<std::vector<float>> Mat2Vector(cv::Mat mat);
bool XsortFp32(std::vector<float> a, std::vector<float> b);
bool XsortInt(std::vector<int> a, std::vector<int> b);
std::vector<std::vector<int>>
OrderPointsClockwise(std::vector<std::vector<int>> pts);
std::vector<std::vector<float>> GetMiniBoxes(cv::RotatedRect box, float &ssid);
float BoxScoreFast(std::vector<std::vector<float>> box_array, cv::Mat pred);
std::vector<std::vector<std::vector<int>>>
BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
std::map<std::string, double> Config);
std::vector<std::vector<std::vector<int>>>
FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes, float ratio_h,
float ratio_w, cv::Mat srcimg);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_api.h" // NOLINT
#include <chrono>
#include "crnn_process.h"
#include "db_post_process.h"
using namespace paddle::lite_api; // NOLINT
using namespace std;
// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
void NeonMeanScale(const float *din, float *dout, int size,
const std::vector<float> mean,
const std::vector<float> scale) {
if (mean.size() != 3 || scale.size() != 3) {
std::cerr << "[ERROR] mean or scale size must equal to 3\n";
exit(1);
}
float32x4_t vmean0 = vdupq_n_f32(mean[0]);
float32x4_t vmean1 = vdupq_n_f32(mean[1]);
float32x4_t vmean2 = vdupq_n_f32(mean[2]);
float32x4_t vscale0 = vdupq_n_f32(scale[0]);
float32x4_t vscale1 = vdupq_n_f32(scale[1]);
float32x4_t vscale2 = vdupq_n_f32(scale[2]);
float *dout_c0 = dout;
float *dout_c1 = dout + size;
float *dout_c2 = dout + size * 2;
int i = 0;
for (; i < size - 3; i += 4) {
float32x4x3_t vin3 = vld3q_f32(din);
float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
vst1q_f32(dout_c0, vs0);
vst1q_f32(dout_c1, vs1);
vst1q_f32(dout_c2, vs2);
din += 12;
dout_c0 += 4;
dout_c1 += 4;
dout_c2 += 4;
}
for (; i < size; i++) {
*(dout_c0++) = (*(din++) - mean[0]) * scale[0];
*(dout_c1++) = (*(din++) - mean[1]) * scale[1];
*(dout_c2++) = (*(din++) - mean[2]) * scale[2];
}
}
// resize image to a size multiple of 32 which is required by the network
cv::Mat DetResizeImg(const cv::Mat img, int max_size_len,
std::vector<float> &ratio_hw) {
int w = img.cols;
int h = img.rows;
float ratio = 1.f;
int max_wh = w >= h ? w : h;
if (max_wh > max_size_len) {
if (h > w) {
ratio = static_cast<float>(max_size_len) / static_cast<float>(h);
} else {
ratio = static_cast<float>(max_size_len) / static_cast<float>(w);
}
}
int resize_h = static_cast<int>(float(h) * ratio);
int resize_w = static_cast<int>(float(w) * ratio);
if (resize_h % 32 == 0)
resize_h = resize_h;
else if (resize_h / 32 < 1 + 1e-5)
resize_h = 32;
else
resize_h = (resize_h / 32 - 1) * 32;
if (resize_w % 32 == 0)
resize_w = resize_w;
else if (resize_w / 32 < 1 + 1e-5)
resize_w = 32;
else
resize_w = (resize_w / 32 - 1) * 32;
cv::Mat resize_img;
cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
ratio_hw.push_back(static_cast<float>(resize_h) / static_cast<float>(h));
ratio_hw.push_back(static_cast<float>(resize_w) / static_cast<float>(w));
return resize_img;
}
void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
std::shared_ptr<PaddlePredictor> predictor_crnn,
std::vector<std::string> &rec_text,
std::vector<float> &rec_text_score,
std::vector<std::string> charactor_dict) {
std::vector<float> mean = {0.5f, 0.5f, 0.5f};
std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
cv::Mat srcimg;
img.copyTo(srcimg);
cv::Mat crop_img;
cv::Mat resize_img;
int index = 0;
for (int i = boxes.size() - 1; i >= 0; i--) {
crop_img = GetRotateCropImage(srcimg, boxes[i]);
float wh_ratio =
static_cast<float>(crop_img.cols) / static_cast<float>(crop_img.rows);
resize_img = CrnnResizeImg(crop_img, wh_ratio);
resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.f);
const float *dimg = reinterpret_cast<const float *>(resize_img.data);
std::unique_ptr<Tensor> input_tensor0(
std::move(predictor_crnn->GetInput(0)));
input_tensor0->Resize({1, 3, resize_img.rows, resize_img.cols});
auto *data0 = input_tensor0->mutable_data<float>();
NeonMeanScale(dimg, data0, resize_img.rows * resize_img.cols, mean, scale);
//// Run CRNN predictor
predictor_crnn->Run();
// Get output and run postprocess
std::unique_ptr<const Tensor> output_tensor0(
std::move(predictor_crnn->GetOutput(0)));
auto *rec_idx = output_tensor0->data<int64>();
auto rec_idx_lod = output_tensor0->lod();
auto shape_out = output_tensor0->shape();
std::vector<int> pred_idx;
for (int n = static_cast<int>(rec_idx_lod[0][0]);
n < static_cast<int>(rec_idx_lod[0][1]); n += 1) {
pred_idx.push_back(static_cast<int>(rec_idx[n]));
}
if (pred_idx.size() < 1e-3)
continue;
index += 1;
std::string pred_txt = "";
for (int n = 0; n < pred_idx.size(); n++) {
pred_txt += charactor_dict[pred_idx[n]];
}
rec_text.push_back(pred_txt);
////get score
std::unique_ptr<const Tensor> output_tensor1(
std::move(predictor_crnn->GetOutput(1)));
auto *predict_batch = output_tensor1->data<float>();
auto predict_shape = output_tensor1->shape();
auto predict_lod = output_tensor1->lod();
int blank = predict_shape[1];
float score = 0.f;
int count = 0;
for (int n = predict_lod[0][0]; n < predict_lod[0][1] - 1; n++) {
int argmax_idx =
static_cast<int>(Argmax(&predict_batch[n * predict_shape[1]],
&predict_batch[(n + 1) * predict_shape[1]]));
float max_value =
float(*std::max_element(&predict_batch[n * predict_shape[1]],
&predict_batch[(n + 1) * predict_shape[1]]));
if (blank - 1 - argmax_idx > 1e-5) {
score += max_value;
count += 1;
}
}
score /= count;
rec_text_score.push_back(score);
}
}
std::vector<std::vector<std::vector<int>>>
RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
std::map<std::string, double> Config) {
// Read img
int max_side_len = int(Config["max_side_len"]);
cv::Mat srcimg;
img.copyTo(srcimg);
std::vector<float> ratio_hw;
img = DetResizeImg(img, max_side_len, ratio_hw);
cv::Mat img_fp;
img.convertTo(img_fp, CV_32FC3, 1.0 / 255.f);
// Prepare input data from image
std::unique_ptr<Tensor> input_tensor0(std::move(predictor->GetInput(0)));
input_tensor0->Resize({1, 3, img_fp.rows, img_fp.cols});
auto *data0 = input_tensor0->mutable_data<float>();
std::vector<float> mean = {0.485f, 0.456f, 0.406f};
std::vector<float> scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
const float *dimg = reinterpret_cast<const float *>(img_fp.data);
NeonMeanScale(dimg, data0, img_fp.rows * img_fp.cols, mean, scale);
// Run predictor
predictor->Run();
// Get output and post process
std::unique_ptr<const Tensor> output_tensor(
std::move(predictor->GetOutput(0)));
auto *outptr = output_tensor->data<float>();
auto shape_out = output_tensor->shape();
// Save output
float pred[shape_out[2] * shape_out[3]];
unsigned char cbuf[shape_out[2] * shape_out[3]];
for (int i = 0; i < int(shape_out[2] * shape_out[3]); i++) {
pred[i] = static_cast<float>(outptr[i]);
cbuf[i] = static_cast<unsigned char>((outptr[i]) * 255);
}
cv::Mat cbuf_map(shape_out[2], shape_out[3], CV_8UC1,
reinterpret_cast<unsigned char *>(cbuf));
cv::Mat pred_map(shape_out[2], shape_out[3], CV_32F,
reinterpret_cast<float *>(pred));
const double threshold = double(Config["det_db_thresh"]) * 255;
const double maxvalue = 255;
cv::Mat bit_map;
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
auto boxes = BoxesFromBitmap(pred_map, bit_map, Config);
std::vector<std::vector<std::vector<int>>> filter_boxes =
FilterTagDetRes(boxes, ratio_hw[0], ratio_hw[1], srcimg);
return filter_boxes;
}
std::shared_ptr<PaddlePredictor> loadModel(std::string model_file) {
MobileConfig config;
config.set_model_from_file(model_file);
std::shared_ptr<PaddlePredictor> predictor =
CreatePaddlePredictor<MobileConfig>(config);
return predictor;
}
cv::Mat Visualization(cv::Mat srcimg,
std::vector<std::vector<std::vector<int>>> boxes) {
cv::Point rook_points[boxes.size()][4];
for (int n = 0; n < boxes.size(); n++) {
for (int m = 0; m < boxes[0].size(); m++) {
rook_points[n][m] = cv::Point(static_cast<int>(boxes[n][m][0]),
static_cast<int>(boxes[n][m][1]));
}
}
cv::Mat img_vis;
srcimg.copyTo(img_vis);
for (int n = 0; n < boxes.size(); n++) {
const cv::Point *ppt[1] = {rook_points[n]};
int npt[] = {4};
cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
}
cv::imwrite("./vis.jpg", img_vis);
std::cout << "The detection visualized image saved in ./vis.jpg" << std::endl;
return img_vis;
}
std::vector<std::string> split(const std::string &str,
const std::string &delim) {
std::vector<std::string> res;
if ("" == str)
return res;
char *strs = new char[str.length() + 1];
std::strcpy(strs, str.c_str());
char *d = new char[delim.length() + 1];
std::strcpy(d, delim.c_str());
char *p = std::strtok(strs, d);
while (p) {
string s = p;
res.push_back(s);
p = std::strtok(NULL, d);
}
return res;
}
std::map<std::string, double> LoadConfigTxt(std::string config_path) {
auto config = ReadDict(config_path);
std::map<std::string, double> dict;
for (int i = 0; i < config.size(); i++) {
std::vector<std::string> res = split(config[i], " ");
dict[res[0]] = stod(res[1]);
}
return dict;
}
int main(int argc, char **argv) {
if (argc < 5) {
std::cerr << "[ERROR] usage: " << argv[0]
<< " det_model_file rec_model_file image_path\n";
exit(1);
}
std::string det_model_file = argv[1];
std::string rec_model_file = argv[2];
std::string img_path = argv[3];
std::string dict_path = argv[4];
//// load config from txt file
auto Config = LoadConfigTxt("./config.txt");
auto start = std::chrono::system_clock::now();
auto det_predictor = loadModel(det_model_file);
auto rec_predictor = loadModel(rec_model_file);
auto charactor_dict = ReadDict(dict_path);
charactor_dict.push_back(" ");
cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
auto boxes = RunDetModel(det_predictor, srcimg, Config);
std::vector<std::string> rec_text;
std::vector<float> rec_text_score;
RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score,
charactor_dict);
auto end = std::chrono::system_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end - start);
//// visualization
auto img_vis = Visualization(srcimg, boxes);
//// print recognized text
for (int i = 0; i < rec_text.size(); i++) {
std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
<< std::endl;
}
std::cout << "花费了"
<< double(duration.count()) *
std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den
<< "秒" << std::endl;
return 0;
}
#!/bin/bash
mkdir -p $1/demo/cxx/ocr/debug/
cp ../../ppocr/utils/ppocr_keys_v1.txt $1/demo/cxx/ocr/debug/
cp -r ./* $1/demo/cxx/ocr/
cp ./config.txt $1/demo/cxx/ocr/debug/
cp ../../doc/imgs/11.jpg $1/demo/cxx/ocr/debug/
echo "Prepare Done"
# 端侧部署
本教程将介绍基于[Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) 在移动端部署PaddleOCR超轻量中文检测、识别模型的详细步骤。
Paddle Lite是飞桨轻量化推理引擎,为手机、IOT端提供高效推理能力,并广泛整合跨平台硬件,为端侧部署及应用落地问题提供轻量化的部署方案。
## 1. 准备环境
### 运行准备
- 电脑(编译Paddle Lite)
- 安卓手机(armv7或armv8)
### 1.1 准备交叉编译环境
交叉编译环境用于编译 Paddle Lite 和 PaddleOCR 的C++ demo。
支持多种开发环境,不同开发环境的编译流程请参考对应文档。
1. [Docker](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html#docker)
2. [Linux](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html#android)
3. [MAC OS](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html#id13)
4. [Windows](https://paddle-lite.readthedocs.io/zh/latest/demo_guides/x86.html#id4)
### 1.2 准备预测库
预测库有两种获取方式:
- 1. 直接下载,预测库下载链接如下:
|平台|预测库下载链接|
|-|-|
|Android|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/Android/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.CV_ON.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/Android/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.CV_ON.tar.gz)|
|IOS|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/iOS/inference_lite_lib.ios.armv7.with_extra.CV_ON.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/iOS/inference_lite_lib.ios64.armv8.with_extra.CV_ON.tar.gz)|
|x86(Linux)|[预测库](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/X86/Linux/inference_lite_lib.x86.linux.tar.gz)|
注:如果是从下Paddle-Lite[官网文档](https://paddle-lite.readthedocs.io/zh/latest/user_guides/release_lib.html#android-toolchain-gcc)下载的预测库,
注意选择`with_extra=ON,with_cv=ON`的下载链接。
- 2. 编译Paddle-Lite得到预测库,Paddle-Lite的编译方式如下:
```
git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite
git checkout develop
./lite/tools/build_android.sh --arch=armv8 --with_cv=ON --with_extra=ON
```
注意:编译Paddle-Lite获得预测库时,需要打开`--with_cv=ON --with_extra=ON`两个选项,`--arch`表示`arm`版本,这里指定为armv8,
更多编译命令
介绍请参考[链接](https://paddle-lite.readthedocs.io/zh/latest/user_guides/Compile/Android.html#id2)
直接下载预测库并解压后,可以得到`inference_lite_lib.android.armv8/`文件夹,通过编译Paddle-Lite得到的预测库位于
`Paddle-Lite/build.lite.android.armv8.gcc/inference_lite_lib.android.armv8/`文件夹下。
预测库的文件目录如下:
```
inference_lite_lib.android.armv8/
|-- cxx C++ 预测库和头文件
| |-- include C++ 头文件
| | |-- paddle_api.h
| | |-- paddle_image_preprocess.h
| | |-- paddle_lite_factory_helper.h
| | |-- paddle_place.h
| | |-- paddle_use_kernels.h
| | |-- paddle_use_ops.h
| | `-- paddle_use_passes.h
| `-- lib C++预测库
| |-- libpaddle_api_light_bundled.a C++静态库
| `-- libpaddle_light_api_shared.so C++动态库
|-- java Java预测库
| |-- jar
| | `-- PaddlePredictor.jar
| |-- so
| | `-- libpaddle_lite_jni.so
| `-- src
|-- demo C++和Java示例代码
| |-- cxx C++ 预测库demo
| `-- java Java 预测库demo
```
## 2 开始运行
### 2.1 模型优化
Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括量化、子图融合、混合调度、Kernel优选等方法,使用Paddle-lite的opt工具可以自动
对inference模型进行优化,优化后的模型更轻量,模型运行速度更快。
下述表格中提供了优化好的超轻量中文模型:
|模型简介|检测模型|识别模型|Paddle-Lite版本|
|-|-|-|-|
|超轻量级中文OCR opt优化模型|[下载地址](https://paddleocr.bj.bcebos.com/deploy/lite/ch_det_mv3_db_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/deploy/lite/ch_rec_mv3_crnn_opt.nb)|develop|
如果直接使用上述表格中的模型进行部署,可略过下述步骤,直接阅读 [2.2节](#2.2与手机联调)
如果要部署的模型不在上述表格中,则需要按照如下步骤获得优化后的模型。
模型优化需要Paddle-Lite的opt可执行文件,可以通过编译Paddle-Lite源码获得,编译步骤如下:
```
# 如果准备环境时已经clone了Paddle-Lite,则不用重新clone Paddle-Lite
git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite
git checkout develop
# 启动编译
./lite/tools/build.sh build_optimize_tool
```
编译完成后,opt文件位于`build.opt/lite/api/`下,可通过如下方式查看opt的运行选项和使用方式;
```
cd build.opt/lite/api/
./opt
```
|选项|说明|
|-|-|
|--model_dir|待优化的PaddlePaddle模型(非combined形式)的路径|
|--model_file|待优化的PaddlePaddle模型(combined形式)的网络结构文件路径|
|--param_file|待优化的PaddlePaddle模型(combined形式)的权重文件路径|
|--optimize_out_type|输出模型类型,目前支持两种类型:protobuf和naive_buffer,其中naive_buffer是一种更轻量级的序列化/反序列化实现。若您需要在mobile端执行模型预测,请将此选项设置为naive_buffer。默认为protobuf|
|--optimize_out|优化模型的输出路径|
|--valid_targets|指定模型可执行的backend,默认为arm。目前可支持x86、arm、opencl、npu、xpu,可以同时指定多个backend(以空格分隔),Model Optimize Tool将会自动选择最佳方式。如果需要支持华为NPU(Kirin 810/990 Soc搭载的达芬奇架构NPU),应当设置为npu, arm|
|--record_tailoring_info|当使用 根据模型裁剪库文件 功能时,则设置该选项为true,以记录优化后模型含有的kernel和OP信息,默认为false|
`--model_dir`适用于待优化的模型是非combined方式,PaddleOCR的inference模型是combined方式,即模型结构和模型参数使用单独一个文件存储。
下面以PaddleOCR的超轻量中文模型为例,介绍使用编译好的opt文件完成inference模型到Paddle-Lite优化模型的转换。
```
# 下载PaddleOCR的超轻量文inference模型,并解压
wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar && tar xf ch_det_mv3_db_infer.tar
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar && tar xf ch_rec_mv3_crnn_infer.tar
# 转换检测模型
./opt --model_file=./ch_det_mv3_db/model --param_file=./ch_det_mv3_db/params --optimize_out_type=naive_buffer --optimize_out=./ch_det_mv3_db_opt --valid_targets=arm
# 转换识别模型
./opt --model_file=./ch_rec_mv3_crnn/model --param_file=./ch_rec_mv3_crnn/params --optimize_out_type=naive_buffer --optimize_out=./ch_rec_mv3_crnn_opt --valid_targets=arm
```
转换成功后,当前目录下会多出`ch_det_mv3_db_opt.nb`, `ch_rec_mv3_crnn_opt.nb`结尾的文件,即是转换成功的模型文件。
注意:使用paddle-lite部署时,需要使用opt工具优化后的模型。 opt 转换的输入模型是paddle保存的inference模型
<a name="2.2与手机联调"></a>
### 2.2 与手机联调
首先需要进行一些准备工作。
1. 准备一台arm8的安卓手机,如果编译的预测库和opt文件是armv7,则需要arm7的手机,并修改Makefile中`ARM_ABI = arm7`
2. 打开手机的USB调试选项,选择文件传输模式,连接电脑。
3. 电脑上安装adb工具,用于调试。 adb安装方式如下:
3.1. MAC电脑安装ADB:
```
brew cask install android-platform-tools
```
3.2. Linux安装ADB
```
sudo apt update
sudo apt install -y wget adb
```
3.3. Window安装ADB
win上安装需要去谷歌的安卓平台下载adb软件包进行安装:[链接](https://developer.android.com/studio)
打开终端,手机连接电脑,在终端中输入
```
adb devices
```
如果有device输出,则表示安装成功。
```
List of devices attached
744be294 device
```
4. 准备优化后的模型、预测库文件、测试图像和使用的字典文件。
```
git clone https://github.com/PaddlePaddle/PaddleOCR.git
cd PaddleOCR/deploy/lite/
# 运行prepare.sh,准备预测库文件、测试图像和使用的字典文件,并放置在预测库中的demo/cxx/ocr文件夹下
sh prepare.sh /{lite prediction library path}/inference_lite_lib.android.armv8
# 进入OCR demo的工作目录
cd /{lite prediction library path}/inference_lite_lib.android.armv8/
cd demo/cxx/ocr/
# 将C++预测动态库so文件复制到debug文件夹中
cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
```
准备测试图像,以`PaddleOCR/doc/imgs/11.jpg`为例,将测试的图像复制到`demo/cxx/ocr/debug/`文件夹下。
准备lite opt工具优化后的模型文件,`ch_det_mv3_db_opt.nb,ch_rec_mv3_crnn_opt.nb`,放置在`demo/cxx/ocr/debug/`文件夹下。
执行完成后,ocr文件夹下将有如下文件格式:
```
demo/cxx/ocr/
|-- debug/
| |--ch_det_mv3_db_opt.nb 优化后的检测模型文件
| |--ch_rec_mv3_crnn_opt.nb 优化后的识别模型文件
| |--11.jpg 待测试图像
| |--ppocr_keys_v1.txt 字典文件
| |--libpaddle_light_api_shared.so C++预测库文件
| |--config.txt DB-CRNN超参数配置
|-- config.txt DB-CRNN超参数配置
|-- crnn_process.cc 识别模型CRNN的预处理和后处理文件
|-- crnn_process.h
|-- db_post_process.cc 检测模型DB的后处理文件
|-- db_post_process.h
|-- Makefile 编译文件
|-- ocr_db_crnn.cc C++预测源文件
```
5. 启动调试
上述步骤完成后就可以使用adb将文件push到手机上运行,步骤如下:
```
# 执行编译,得到可执行文件ocr_db_crnn
# ocr_db_crnn可执行文件的使用方式为:
# ./ocr_db_crnn 检测模型文件 识别模型文件 测试图像路径
make -j
# 将编译的可执行文件移动到debug文件夹中
mv ocr_db_crnn ./debug/
# 将debug文件夹push到手机上
adb push debug /data/local/tmp/
adb shell
cd /data/local/tmp/debug
export LD_LIBRARY_PATH=/data/local/tmp/debug:$LD_LIBRARY_PATH
./ocr_db_crnn ch_det_mv3_db_opt.nb ch_rec_mv3_crnn_opt.nb ./11.jpg ppocr_keys_v1.txt
```
如果对代码做了修改,则需要重新编译并push到手机上。
运行效果如下:
<div align="center">
<img src="../imgs/demo.png" width="600">
</div>
# Tutorial of PaddleOCR Mobile deployment
This tutorial will introduce how to use paddle-lite to deploy paddleOCR ultra-lightweight Chinese and English detection models on mobile phones.
paddle-lite is a lightweight inference engine for PaddlePaddle.
It provides efficient inference capabilities for mobile phones and IOTs,
and extensively integrates cross-platform hardware to provide lightweight
deployment solutions for end-side deployment issues.
## 1. Preparation
- Computer (for Compiling Paddle Lite)
- Mobile phone (arm7 or arm8)
## 2. Build PaddleLite library
[build for Docker](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html#docker)
[build for Linux](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html#android)
[build for MAC OS](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html#id13)
[build for windows](https://paddle-lite.readthedocs.io/zh/latest/demo_guides/x86.html#id4)
## 3. Download prebuild library for android and ios
|Platform|Prebuild library Download Link|
|-|-|
|Android|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/Android/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.CV_ON.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/Android/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.CV_ON.tar.gz)|
|IOS|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/iOS/inference_lite_lib.ios.armv7.with_extra.CV_ON.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/iOS/inference_lite_lib.ios64.armv8.with_extra.CV_ON.tar.gz)|
|x86(Linux)|[预测库](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/X86/Linux/inference_lite_lib.x86.linux.tar.gz)|
The structure of the prediction library is as follows:
```
inference_lite_lib.android.armv8/
|-- cxx C++ prebuild library
| |-- include C++
| | |-- paddle_api.h
| | |-- paddle_image_preprocess.h
| | |-- paddle_lite_factory_helper.h
| | |-- paddle_place.h
| | |-- paddle_use_kernels.h
| | |-- paddle_use_ops.h
| | `-- paddle_use_passes.h
| `-- lib
| |-- libpaddle_api_light_bundled.a C++ static library
| `-- libpaddle_light_api_shared.so C++ dynamic library
|-- java Java predict library
| |-- jar
| | `-- PaddlePredictor.jar
| |-- so
| | `-- libpaddle_lite_jni.so
| `-- src
|-- demo C++ and java demo
| |-- cxx
| `-- java
```
## 4. Inference Model Optimization
Paddle Lite provides a variety of strategies to automatically optimize the original training model, including quantization, sub-graph fusion, hybrid scheduling, Kernel optimization and so on. In order to make the optimization process more convenient and easy to use, Paddle Lite provide opt tools to automatically complete the optimization steps and output a lightweight, optimal executable model.
If you use PaddleOCR 8.6M OCR model to deploy, you can directly download the optimized model.
|Introduction|Detection model|Recognition model|Paddle Lite branch |
|-|-|-|-|
|lightweight Chinese OCR optimized model|[Download](https://paddleocr.bj.bcebos.com/deploy/lite/ch_det_mv3_db_opt.nb)|[Download](https://paddleocr.bj.bcebos.com/deploy/lite/ch_rec_mv3_crnn_opt.nb)|develop|
If the model to be deployed is not in the above table, you need to follow the steps below to obtain the optimized model.
```
git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite
git checkout develop
./lite/tools/build.sh build_optimize_tool
```
The `opt` tool can be obtained by compiling Paddle Lite.
After the compilation is complete, the opt file is located under `build.opt/lite/api/`.
The `opt` can optimize the inference model saved by paddle.io.save_inference_model to get the model that the paddlelite API can use.
The usage of opt is as follows:
```
wget https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar && tar xf ch_det_mv3_db_infer.tar
wget https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar && tar xf ch_rec_mv3_crnn_infer.tar
./opt --model_file=./ch_det_mv3_db/model --param_file=./ch_det_mv3_db/params --optimize_out_type=naive_buffer --optimize_out=./ch_det_mv3_db_opt --valid_targets=arm
./opt --model_file=./ch_rec_mv3_crnn/model --param_file=./ch_rec_mv3_crnn/params --optimize_out_type=naive_buffer --optimize_out=./ch_rec_mv3_crnn_opt --valid_targets=arm
```
When the above code command is completed, there will be two more files `ch_det_mv3_db_opt.nb`,
`ch_rec_mv3_crnn_opt.nb` in the current directory, which is the converted model file.
## 5. Run optimized model on Phone
1. Prepare an Android phone with arm8. If the compiled prediction library and opt file are armv7, you need an arm7 phone and modify ARM_ABI = arm7 in the Makefile.
2. Make sure the phone is connected to the computer, open the USB debugging option of the phone, and select the file transfer mode.
3. Install the adb tool on the computer.
3.1 Install ADB for MAC
```
brew cask install android-platform-tools
```
3.2 Install ADB for Linux
```
sudo apt update
sudo apt install -y wget adb
```
3.3 Install ADB for windows
[Download Link](https://developer.android.com/studio)
Verify whether adb is installed successfully
```
$ adb devices
List of devices attached
744be294 device
```
If there is `device` output, it means the installation was successful.
4. Prepare optimized models, prediction library files, test images and dictionary files used.
```
git clone https://github.com/PaddlePaddle/PaddleOCR.git
cd PaddleOCR/deploy/lite/
# run prepare.sh
sh prepare.sh /{lite prediction library path}/inference_lite_lib.android.armv8
#
cd /{lite prediction library path}/inference_lite_lib.android.armv8/
cd demo/cxx/ocr/
# copy paddle-lite C++ .so file to debug/ directory
cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
cd inference_lite_lib.android.armv8/demo/cxx/ocr/
cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
```
Prepare the test image, taking `PaddleOCR/doc/imgs/11.jpg` as an example, copy the image file to the `demo/cxx/ocr/debug/` folder.
Prepare the model files optimized by the lite opt tool, `ch_det_mv3_db_opt.nb, ch_rec_mv3_crnn_opt.nb`,
and place them under the `demo/cxx/ocr/debug/` folder.
The structure of the OCR demo is as follows after the above command is executed:
```
demo/cxx/ocr/
|-- debug/
| |--ch_det_mv3_db_opt.nb Detection model
| |--ch_rec_mv3_crnn_opt.nb Recognition model
| |--11.jpg Image for OCR
| |--ppocr_keys_v1.txt Dictionary file
| |--libpaddle_light_api_shared.so C++ .so file
| |--config.txt Config file
|-- config.txt
|-- crnn_process.cc
|-- crnn_process.h
|-- db_post_process.cc
|-- db_post_process.h
|-- Makefile
|-- ocr_db_crnn.cc
```
5. Run Model on phone
```
cd inference_lite_lib.android.armv8/demo/cxx/ocr/
make -j
mv ocr_db_crnn ./debug/
adb push debug /data/local/tmp/
adb shell
cd /data/local/tmp/debug
export LD_LIBRARY_PATH=/data/local/tmp/debug:$LD_LIBRARY_PATH
# run model
./ocr_db_crnn ch_det_mv3_db_opt.nb ch_rec_mv3_crnn_opt.nb ./11.jpg ppocr_keys_v1.txt
```
The outputs are as follows:
<div align="center">
<img src="../imgs/demo.png" width="600">
</div>
...@@ -10,14 +10,14 @@ ...@@ -10,14 +10,14 @@
## 配置文件参数介绍 ## 配置文件参数介绍
`rec_chinese_lite_train_v1.1.yml ` 为例 `rec_chinese_lite_train_v2.0.yml ` 为例
### Global ### Global
| 字段 | 用途 | 默认值 | 备注 | | 字段 | 用途 | 默认值 | 备注 |
| :----------------------: | :---------------------: | :--------------: | :--------------------: | | :----------------------: | :---------------------: | :--------------: | :--------------------: |
| use_gpu | 设置代码是否在gpu运行 | true | \ | | use_gpu | 设置代码是否在gpu运行 | true | \ |
| epoch_num | 最大训练epoch数 | 500 | \ | | epoch_num | 最大训练epoch数 | 500 | \ |
| log_smooth_window | 滑动窗口大小 | 20 | \ | | log_smooth_window | log队列长度,每次打印输出队列里的中间值 | 20 | \ |
| print_batch_step | 设置打印log间隔 | 10 | \ | | print_batch_step | 设置打印log间隔 | 10 | \ |
| save_model_dir | 设置模型保存路径 | output/{算法名称} | \ | | save_model_dir | 设置模型保存路径 | output/{算法名称} | \ |
| save_epoch_step | 设置模型保存间隔 | 3 | \ | | save_epoch_step | 设置模型保存间隔 | 3 | \ |
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
| name | 优化器类名 | Adam | 目前支持`Momentum`,`Adam`,`RMSProp`, 见[ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) | | name | 优化器类名 | Adam | 目前支持`Momentum`,`Adam`,`RMSProp`, 见[ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) |
| beta1 | 设置一阶矩估计的指数衰减率 | 0.9 | \ | | beta1 | 设置一阶矩估计的指数衰减率 | 0.9 | \ |
| beta2 | 设置二阶矩估计的指数衰减率 | 0.999 | \ | | beta2 | 设置二阶矩估计的指数衰减率 | 0.999 | \ |
| clip_norm | 所允许的二范数最大值 | | \ |
| **lr** | 设置学习率decay方式 | - | \ | | **lr** | 设置学习率decay方式 | - | \ |
| name | 学习率decay类名 | Cosine | 目前支持`Linear`,`Cosine`,`Step`,`Piecewise`, 见[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) | | name | 学习率decay类名 | Cosine | 目前支持`Linear`,`Cosine`,`Step`,`Piecewise`, 见[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) |
| learning_rate | 基础学习率 | 0.001 | \ | | learning_rate | 基础学习率 | 0.001 | \ |
...@@ -119,4 +120,4 @@ ...@@ -119,4 +120,4 @@
| shuffle | 每个epoch是否将数据集顺序打乱 | True | \ | | shuffle | 每个epoch是否将数据集顺序打乱 | True | \ |
| batch_size_per_card | 训练时单卡batch size | 256 | \ | | batch_size_per_card | 训练时单卡batch size | 256 | \ |
| drop_last | 是否丢弃因数据集样本数不能被 batch_size 整除而产生的最后一个不完整的mini-batch | True | \ | | drop_last | 是否丢弃因数据集样本数不能被 batch_size 整除而产生的最后一个不完整的mini-batch | True | \ |
| num_workers | 用于加载数据的子进程个数,若为0即为不开启子进程,在主进程中进行数据加载 | 8 | \ | | num_workers | 用于加载数据的子进程个数,若为0即为不开启子进程,在主进程中进行数据加载 | 8 | \ |
\ No newline at end of file
...@@ -186,7 +186,7 @@ python3 tools/infer/predict_det.py --det_algorithm="EAST" --image_dir="./doc/img ...@@ -186,7 +186,7 @@ python3 tools/infer/predict_det.py --det_algorithm="EAST" --image_dir="./doc/img
``` ```
可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'det_res'。结果示例如下: 可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'det_res'。结果示例如下:
![](../imgs_results/det_res_img_10_east.jpg) (coming soon)
**注意**:本代码库中,EAST后处理Locality-Aware NMS有python和c++两种版本,c++版速度明显快于python版。由于c++版本nms编译版本问题,只有python3.5环境下会调用c++版nms,其他情况将调用python版nms。 **注意**:本代码库中,EAST后处理Locality-Aware NMS有python和c++两种版本,c++版速度明显快于python版。由于c++版本nms编译版本问题,只有python3.5环境下会调用c++版nms,其他情况将调用python版nms。
...@@ -205,7 +205,7 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img ...@@ -205,7 +205,7 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
``` ```
可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'det_res'。结果示例如下: 可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'det_res'。结果示例如下:
![](../imgs_results/det_res_img_10_sast.jpg) (coming soon)
#### (2). 弯曲文本检测模型(Total-Text) #### (2). 弯曲文本检测模型(Total-Text)
首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在Total-Text英文数据集训练的模型为例([模型下载地址(coming soon)](link)),可以使用如下命令进行转换: 首先将SAST文本检测训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在Total-Text英文数据集训练的模型为例([模型下载地址(coming soon)](link)),可以使用如下命令进行转换:
...@@ -221,7 +221,7 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img ...@@ -221,7 +221,7 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
``` ```
可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'det_res'。结果示例如下: 可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'det_res'。结果示例如下:
![](../imgs_results/det_res_img623_sast.jpg) (coming soon)
**注意**:本代码库中,SAST后处理Locality-Aware NMS有python和c++两种版本,c++版速度明显快于python版。由于c++版本nms编译版本问题,只有python3.5环境下会调用c++版nms,其他情况将调用python版nms。 **注意**:本代码库中,SAST后处理Locality-Aware NMS有python和c++两种版本,c++版速度明显快于python版。由于c++版本nms编译版本问题,只有python3.5环境下会调用c++版nms,其他情况将调用python版nms。
...@@ -245,15 +245,16 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" ...@@ -245,15 +245,16 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg"
执行命令后,上面图像的预测结果(识别的文本和得分)会打印到屏幕上,示例如下: 执行命令后,上面图像的预测结果(识别的文本和得分)会打印到屏幕上,示例如下:
Predicts of ./doc/imgs_words/ch/word_4.jpg:['实力活力', 0.89552695] ```bash
Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
```
<a name="基于CTC损失的识别模型推理"></a> <a name="基于CTC损失的识别模型推理"></a>
### 2. 基于CTC损失的识别模型推理 ### 2. 基于CTC损失的识别模型推理
我们以 CRNN 为例,介绍基于CTC损失的识别模型推理。 Rosetta 使用方式类似,不用设置识别算法参数rec_algorithm。 我们以 CRNN 为例,介绍基于CTC损失的识别模型推理。 Rosetta 使用方式类似,不用设置识别算法参数rec_algorithm。
首先将 Rosetta 文本识别训练过程中保存的模型,转换成inference model。以基于Resnet34_vd骨干网络,使用MJSynth和SynthText两个英文文本识别合成数据集训练 首先将 CRNN 文本识别训练过程中保存的模型,转换成inference model。以基于Resnet34_vd骨干网络,使用MJSynth和SynthText两个英文文本识别合成数据集训练
的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar) ),可以使用如下命令进行转换: 的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar) ),可以使用如下命令进行转换:
``` ```
...@@ -261,7 +262,7 @@ python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o G ...@@ -261,7 +262,7 @@ python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o G
``` ```
STAR-Net文本识别模型推理,可以执行如下命令: CRNN 文本识别模型推理,可以执行如下命令:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_type="en" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
...@@ -281,7 +282,9 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png ...@@ -281,7 +282,9 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
执行命令后,上面图像的识别结果如下: 执行命令后,上面图像的识别结果如下:
Predicts of ./doc/imgs_words_en/word_336.png:['super', 0.9999555] ```bash
Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073)
```
**注意**:由于上述模型是参考[DTRB](https://arxiv.org/abs/1904.01906)文本识别训练和评估流程,与超轻量级中文识别模型训练有两方面不同: **注意**:由于上述模型是参考[DTRB](https://arxiv.org/abs/1904.01906)文本识别训练和评估流程,与超轻量级中文识别模型训练有两方面不同:
...@@ -295,10 +298,10 @@ dict_character = list(self.character_str) ...@@ -295,10 +298,10 @@ dict_character = list(self.character_str)
``` ```
### 4. 自定义文本识别字典的推理 ### 4. 自定义文本识别字典的推理
如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径 如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径,并且设置 `rec_char_type=ch`
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_char_dict_path="your text dict path" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path"
``` ```
<a name="多语言模型的推理"></a> <a name="多语言模型的推理"></a>
...@@ -313,9 +316,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" - ...@@ -313,9 +316,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" -
执行命令后,上图的预测结果为: 执行命令后,上图的预测结果为:
``` text ``` text
2020-09-19 16:15:05,076-INFO: index: [205 206 38 39] Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
2020-09-19 16:15:05,077-INFO: word : 바탕으로
2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535
``` ```
<a name="方向分类模型推理"></a> <a name="方向分类模型推理"></a>
...@@ -378,4 +379,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d ...@@ -378,4 +379,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d
执行命令后,识别结果图像如下: 执行命令后,识别结果图像如下:
![](../imgs_results/img_10.jpg) (coming soon)
## OCR模型列表(V1.1,9月22日更新) ## OCR模型列表(V2.0,2020年12月12日更新)
- [一、文本检测模型](#文本检测模型) - [一、文本检测模型](#文本检测模型)
- [二、文本识别模型](#文本识别模型) - [二、文本识别模型](#文本识别模型)
...@@ -10,19 +10,20 @@ ...@@ -10,19 +10,20 @@
PaddleOCR提供的可下载模型包括`推理模型``训练模型``预训练模型``slim模型`,模型区别说明如下: PaddleOCR提供的可下载模型包括`推理模型``训练模型``预训练模型``slim模型`,模型区别说明如下:
|模型类型|模型格式|简介| |模型类型|模型格式|简介|
|-|-|-| |--- | --- | --- |
|推理模型|model、params|用于python预测引擎推理,[详情](./inference.md)| |推理模型|inference.pdmodel、inference.pdiparams|用于python预测引擎推理,[详情](./inference.md)|
|训练模型、预训练模型|\*.pdmodel\*.pdopt、\*.pdparams|训练过程中保存的checkpoints模型,保存的是模型的参数,多用于模型指标评估和恢复训练| |训练模型、预训练模型|\*.pdparams\*.pdopt、\*.states |训练过程中保存的模型的参数、优化器状态和训练中间信息,多用于模型指标评估和恢复训练|
|slim模型|\*.nb|用于lite部署| |slim模型|\*.nb|用于lite部署|
<a name="文本检测模型"></a> <a name="文本检测模型"></a>
### 一、文本检测模型 ### 一、文本检测模型
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
|-|-|-|-|-| | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v1.1_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|[det_mv3_db_v1.1.yml](../../configs/det/det_mv3_db_v1.1.yml)|1.4M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/det/ch_ppocr_mobile_v1.1_det_prune_infer.tar) / [slim模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/lite/ch_ppocr_mobile_v1.1_det_prune_opt.nb)| |ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |[推理模型 (coming soon)](link) / [slim模型 (coming soon)](link)|
|ch_ppocr_mobile_v1.1_det|原始超轻量模型,支持中英文、多语种文本检测|[det_mv3_db_v1.1.yml](../../configs/det/det_mv3_db_v1.1.yml)|2.6M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_train.tar)| |ch_ppocr_mobile_v2.0_det|原始超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|
|ch_ppocr_server_v1.1_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|[det_r18_vd_db_v1.1.yml](../../configs/det/det_r18_vd_db_v1.1.yml)|47.2M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_train.tar)| |ch_ppocr_server_v2.0_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)|
<a name="文本识别模型"></a> <a name="文本识别模型"></a>
...@@ -30,42 +31,44 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 ...@@ -30,42 +31,44 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
<a name="中文识别模型"></a> <a name="中文识别模型"></a>
#### 1. 中文识别模型 #### 1. 中文识别模型
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
|-|-|-|-|-| | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v1.1_rec|slim裁剪量化版超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v1.1.yml](../../configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml)|1.6M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/rec/ch_ppocr_mobile_v1.1_rec_quant_infer.tar) / [slim模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/lite/ch_ppocr_mobile_v1.1_rec_quant_opt.nb) | |ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |[推理模型 (coming soon)](link) / [slim模型 (coming soon)](link) |
|ch_ppocr_mobile_v1.1_rec|原始超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v1.1.yml](../../configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml)|4.6M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_pre.tar) | |ch_ppocr_mobile_v2.0_rec|原始超轻量模型,支持中英文、数字识别|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
|ch_ppocr_server_v1.1_rec|通用模型,支持中英文、数字识别|[rec_chinese_common_train_v1.1.yml](../../configs/rec/ch_ppocr_v1.1/rec_chinese_common_train_v1.1.yml)|105M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_pre.tar) | |ch_ppocr_server_v2.0_rec|通用模型,支持中英文、数字识别|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
**说明:** `训练模型`是基于预训练模型在真实数据与竖排合成文本数据上finetune得到的模型,在真实应用场景中有着更好的表现,`预训练模型`则是直接基于全量真实数据与合成数据训练得到,更适合用于在自己的数据集上finetune。 **说明:** `训练模型`是基于预训练模型在真实数据与竖排合成文本数据上finetune得到的模型,在真实应用场景中有着更好的表现,`预训练模型`则是直接基于全量真实数据与合成数据训练得到,更适合用于在自己的数据集上finetune。
<a name="英文识别模型"></a> <a name="英文识别模型"></a>
#### 2. 英文识别模型 #### 2. 英文识别模型
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
|-|-|-|-|-| | --- | --- | --- | --- | --- |
|en_ppocr_mobile_slim_v1.1_rec|slim裁剪量化版超轻量模型,支持英文、数字识别|[rec_en_lite_train.yml](../../configs/rec/multi_languages/rec_en_lite_train.yml)|0.9M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/en/en_ppocr_mobile_v1.1_rec_quant_infer.tar) / [slim模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/en/en_ppocr_mobile_v1.1_rec_quant_opt.nb) | |en_number_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| |[推理模型 (coming soon )](link) / [slim模型 (coming soon)](link) |
|en_ppocr_mobile_v1.1_rec|原始超轻量模型,支持英文、数字识别|[rec_en_lite_train.yml](../../configs/rec/multi_languages/rec_en_lite_train.yml)|2.0M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_train.tar) | |en_number_mobile_v2.0_rec|原始超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) |
<a name="多语言识别模型"></a> <a name="多语言识别模型"></a>
#### 3. 多语言识别模型(更多语言持续更新中...) #### 3. 多语言识别模型(更多语言持续更新中...)
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
|-|-|-|-|-| | --- | --- | --- | --- | --- |
| french_ppocr_mobile_v1.1_rec |法文识别|[rec_french_lite_train.yml](../../configs/rec/multi_languages/rec_french_lite_train.yml)|2.1M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_train.tar) | | french_mobile_v2.0_rec |法文识别|[rec_french_lite_train.yml](../../configs/rec/multi_language/rec_french_lite_train.yml)|2.65M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_train.tar) |
| german_ppocr_mobile_v1.1_rec |德文识别|[rec_ger_lite_train.yml](../../configs/rec/multi_languages/rec_ger_lite_train.yml)|2.1M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_train.tar) | | german_mobile_v2.0_rec |德文识别|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
| korean_ppocr_mobile_v1.1_rec |韩文识别|[rec_korean_lite_train.yml](../../configs/rec/multi_languages/rec_korean_lite_train.yml)|3.4M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_train.tar) | | korean_mobile_v2.0_rec |韩文识别|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
| japan_ppocr_mobile_v1.1_rec |日文识别|[rec_japan_lite_train.yml](../../configs/rec/multi_languages/rec_japan_lite_train.yml)|3.7M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_train.tar) | | japan_mobile_v2.0_rec |日文识别|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
<a name="文本方向分类模型"></a> <a name="文本方向分类模型"></a>
### 三、文本方向分类模型 ### 三、文本方向分类模型
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
|-|-|-|-|-| | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_v1.1_cls_quant|slim量化版模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|0.5M|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_quant_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_quant_train.tar) / [slim模型](https://paddleocr.bj.bcebos.com/20-09-22/mobile/lite/ch_ppocr_mobile_v1.1_cls_quant_opt.nb) | |ch_ppocr_mobile_slim_v2.0_cls|slim量化版模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |[推理模型 (coming soon)](link) / [训练模型](link) / [slim模型](link) |
|ch_ppocr_mobile_v1.1_cls|原始模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|850kb|[推理模型](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_train.tar) | |ch_ppocr_mobile_v2.0_cls|原始模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |
## OCR模型列表(V1.1,2020年9月22日更新)
## OCR模型列表(V1.0,7月16日更新) [1.1系列模型地址](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/models_list.md)
|模型名称|模型简介|检测模型地址|识别模型地址|支持空格的识别模型地址|
|-|-|-|-|-|
|chinese_db_crnn_mobile|8.6M超轻量级中文OCR模型|[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db.tar) |[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn.tar) |[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance.tar)
|chinese_db_crnn_server|通用中文OCR模型|[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db.tar) |[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn.tar) |[推理模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance.tar)
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
| 模型简介 | 模型名称 |推荐场景 | 检测模型 | 方向分类器 | 识别模型 | | 模型简介 | 模型名称 |推荐场景 | 检测模型 | 方向分类器 | 识别模型 |
| ------------ | --------------- | ----------------|---- | ---------- | -------- | | ------------ | --------------- | ----------------|---- | ---------- | -------- |
| 中英文超轻量OCR模型(8.1M) | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | | 中英文超轻量OCR模型(8.1M) | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
| 中英文通用OCR模型(155.1M) | ch_ppocr_server_v2.0_xx |服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | | 中英文通用OCR模型(143M) | ch_ppocr_server_v2.0_xx |服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
* windows 环境下如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下 * windows 环境下如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下
......
...@@ -37,8 +37,6 @@ ln -sf <path/to/dataset> <path/to/paddle_ocr>/train_data/dataset ...@@ -37,8 +37,6 @@ ln -sf <path/to/dataset> <path/to/paddle_ocr>/train_data/dataset
若您本地没有数据集,可以在官网下载 [icdar2015](http://rrc.cvc.uab.es/?ch=4&com=downloads) 数据,用于快速验证。也可以参考[DTRB](https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here),下载 benchmark 所需的lmdb格式数据集。 若您本地没有数据集,可以在官网下载 [icdar2015](http://rrc.cvc.uab.es/?ch=4&com=downloads) 数据,用于快速验证。也可以参考[DTRB](https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here),下载 benchmark 所需的lmdb格式数据集。
如果希望复现SRN的论文指标,需要下载离线[增广数据](https://pan.baidu.com/s/1-HSZ-ZVdqBF2HaBZ5pRAKA),提取码: y3ry。增广数据是由MJSynth和SynthText做旋转和扰动得到的。数据下载完成后请解压到 {your_path}/PaddleOCR/train_data/data_lmdb_release/training/ 路径下。
<a name="自定义数据集"></a> <a name="自定义数据集"></a>
* 使用自己数据集 * 使用自己数据集
...@@ -65,7 +63,7 @@ wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_t ...@@ -65,7 +63,7 @@ wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_t
wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt
``` ```
PaddleOCR 也提供了数据格式转换脚本,可以将官网 label 转换支持的数据格式。 数据转换工具在 `train_data/gen_label.py`, 这里以训练集为例: PaddleOCR 也提供了数据格式转换脚本,可以将官网 label 转换支持的数据格式。 数据转换工具在 `ppocr/utils/gen_label.py`, 这里以训练集为例:
``` ```
# 将官网下载的标签文件转换为 rec_gt_label.txt # 将官网下载的标签文件转换为 rec_gt_label.txt
...@@ -116,9 +114,9 @@ n ...@@ -116,9 +114,9 @@ n
word_dict.txt 每行有一个单字,将字符与数字索引映射在一起,“and” 将被映射成 [2 5 1] word_dict.txt 每行有一个单字,将字符与数字索引映射在一起,“and” 将被映射成 [2 5 1]
`ppocr/utils/ppocr_keys_v1.txt` 是一个包含6623个字符的中文字典 `ppocr/utils/ppocr_keys_v1.txt` 是一个包含6623个字符的中文字典
`ppocr/utils/ic15_dict.txt` 是一个包含36个字符的英文字典 `ppocr/utils/ic15_dict.txt` 是一个包含36个字符的英文字典
`ppocr/utils/dict/french_dict.txt` 是一个包含118个字符的法文字典 `ppocr/utils/dict/french_dict.txt` 是一个包含118个字符的法文字典
...@@ -128,6 +126,8 @@ word_dict.txt 每行有一个单字,将字符与数字索引映射在一起, ...@@ -128,6 +126,8 @@ word_dict.txt 每行有一个单字,将字符与数字索引映射在一起,
`ppocr/utils/dict/german_dict.txt` 是一个包含131个字符的德文字典 `ppocr/utils/dict/german_dict.txt` 是一个包含131个字符的德文字典
`ppocr/utils/dict/en_dict.txt` 是一个包含63个字符的英文字典
您可以按需使用。 您可以按需使用。
...@@ -155,10 +155,10 @@ PaddleOCR提供了训练脚本、评估脚本和预测脚本,本节将以 CRNN ...@@ -155,10 +155,10 @@ PaddleOCR提供了训练脚本、评估脚本和预测脚本,本节将以 CRNN
``` ```
cd PaddleOCR/ cd PaddleOCR/
# 下载MobileNetV3的预训练模型 # 下载MobileNetV3的预训练模型
wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_mv3_none_bilstm_ctc.tar wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar
# 解压模型参数 # 解压模型参数
cd pretrain_models cd pretrain_models
tar -xf rec_mv3_none_bilstm_ctc.tar && rm -rf rec_mv3_none_bilstm_ctc.tar tar -xf rec_mv3_none_bilstm_ctc_v2.0_train.tar && rm -rf rec_mv3_none_bilstm_ctc_v2.0_train.tar
``` ```
开始训练: 开始训练:
...@@ -204,9 +204,7 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t ...@@ -204,9 +204,7 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t
| rec_mv3_tps_bilstm_attn.yml | RARE | Mobilenet_v3 large 0.5 | tps | BiLSTM | attention | | rec_mv3_tps_bilstm_attn.yml | RARE | Mobilenet_v3 large 0.5 | tps | BiLSTM | attention |
| rec_r34_vd_none_bilstm_ctc.yml | CRNN | Resnet34_vd | None | BiLSTM | ctc | | rec_r34_vd_none_bilstm_ctc.yml | CRNN | Resnet34_vd | None | BiLSTM | ctc |
| rec_r34_vd_none_none_ctc.yml | Rosetta | Resnet34_vd | None | None | ctc | | rec_r34_vd_none_none_ctc.yml | Rosetta | Resnet34_vd | None | None | ctc |
| rec_r34_vd_tps_bilstm_attn.yml | RARE | Resnet34_vd | tps | BiLSTM | attention |
| rec_r34_vd_tps_bilstm_ctc.yml | STARNet | Resnet34_vd | tps | BiLSTM | ctc | | rec_r34_vd_tps_bilstm_ctc.yml | STARNet | Resnet34_vd | tps | BiLSTM | ctc |
| rec_r50fpn_vd_none_srn.yml | SRN | Resnet50_fpn_vd | None | rnn | srn |
训练中文数据,推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml),如您希望尝试其他算法在中文数据集上的效果,请参考下列说明修改配置文件: 训练中文数据,推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml),如您希望尝试其他算法在中文数据集上的效果,请参考下列说明修改配置文件:
......
...@@ -348,7 +348,7 @@ im_show.save('result.jpg') ...@@ -348,7 +348,7 @@ im_show.save('result.jpg')
| cls_batch_num | 进行分类时,同时前向的图片数 |30 | | cls_batch_num | 进行分类时,同时前向的图片数 |30 |
| enable_mkldnn | 是否启用mkldnn | FALSE | | enable_mkldnn | 是否启用mkldnn | FALSE |
| use_zero_copy_run | 是否通过zero_copy_run的方式进行前向 | FALSE | | use_zero_copy_run | 是否通过zero_copy_run的方式进行前向 | FALSE |
| lang | 模型语言类型,目前支持 文(ch)英文(en) | ch | | lang | 模型语言类型,目前支持 目前支持中英文(ch)英文(en)、法语(french)、德语(german)、韩语(korean)、日语(japan) | ch |
| det | 前向时使用启动检测 | TRUE | | det | 前向时使用启动检测 | TRUE |
| rec | 前向时是否启动识别 | TRUE | | rec | 前向时是否启动识别 | TRUE |
| cls | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类) | FALSE | | cls | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类) | FALSE |
...@@ -9,14 +9,14 @@ The following list can be viewed through `--help` ...@@ -9,14 +9,14 @@ The following list can be viewed through `--help`
## INTRODUCTION TO GLOBAL PARAMETERS OF CONFIGURATION FILE ## INTRODUCTION TO GLOBAL PARAMETERS OF CONFIGURATION FILE
Take rec_chinese_lite_train_v1.1.yml as an example Take rec_chinese_lite_train_v2.0.yml as an example
### Global ### Global
| Parameter | Use | Defaults | Note | | Parameter | Use | Defaults | Note |
| :----------------------: | :---------------------: | :--------------: | :--------------------: | | :----------------------: | :---------------------: | :--------------: | :--------------------: |
| use_gpu | Set using GPU or not | true | \ | | use_gpu | Set using GPU or not | true | \ |
| epoch_num | Maximum training epoch number | 500 | \ | | epoch_num | Maximum training epoch number | 500 | \ |
| log_smooth_window | Sliding window size | 20 | \ | | log_smooth_window | Log queue length, the median value in the queue each time will be printed | 20 | \ |
| print_batch_step | Set print log interval | 10 | \ | | print_batch_step | Set print log interval | 10 | \ |
| save_model_dir | Set model save path | output/{算法名称} | \ | | save_model_dir | Set model save path | output/{算法名称} | \ |
| save_epoch_step | Set model save interval | 3 | \ | | save_epoch_step | Set model save interval | 3 | \ |
...@@ -41,6 +41,7 @@ Take rec_chinese_lite_train_v1.1.yml as an example ...@@ -41,6 +41,7 @@ Take rec_chinese_lite_train_v1.1.yml as an example
| name | Optimizer class name | Adam | Currently supports`Momentum`,`Adam`,`RMSProp`, see [ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) | | name | Optimizer class name | Adam | Currently supports`Momentum`,`Adam`,`RMSProp`, see [ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) |
| beta1 | Set the exponential decay rate for the 1st moment estimates | 0.9 | \ | | beta1 | Set the exponential decay rate for the 1st moment estimates | 0.9 | \ |
| beta2 | Set the exponential decay rate for the 2nd moment estimates | 0.999 | \ | | beta2 | Set the exponential decay rate for the 2nd moment estimates | 0.999 | \ |
| clip_norm | The maximum norm value | - | \ |
| **lr** | Set the learning rate decay method | - | \ | | **lr** | Set the learning rate decay method | - | \ |
| name | Learning rate decay class name | Cosine | Currently supports`Linear`,`Cosine`,`Step`,`Piecewise`, see[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) | | name | Learning rate decay class name | Cosine | Currently supports`Linear`,`Cosine`,`Step`,`Piecewise`, see[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) |
| learning_rate | Set the base learning rate | 0.001 | \ | | learning_rate | Set the base learning rate | 0.001 | \ |
...@@ -118,4 +119,4 @@ In ppocr, the network is divided into four stages: Transform, Backbone, Neck and ...@@ -118,4 +119,4 @@ In ppocr, the network is divided into four stages: Transform, Backbone, Neck and
| shuffle | Does each epoch disrupt the order of the data set | True | \ | | shuffle | Does each epoch disrupt the order of the data set | True | \ |
| batch_size_per_card | Single card batch size during training | 256 | \ | | batch_size_per_card | Single card batch size during training | 256 | \ |
| drop_last | Whether to discard the last incomplete mini-batch because the number of samples in the data set cannot be divisible by batch_size | True | \ | | drop_last | Whether to discard the last incomplete mini-batch because the number of samples in the data set cannot be divisible by batch_size | True | \ |
| num_workers | The number of sub-processes used to load data, if it is 0, the sub-process is not started, and the data is loaded in the main process | 8 | \ | | num_workers | The number of sub-processes used to load data, if it is 0, the sub-process is not started, and the data is loaded in the main process | 8 | \ |
\ No newline at end of file
...@@ -192,7 +192,7 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img_10.jpg" --det_ ...@@ -192,7 +192,7 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img_10.jpg" --det_
The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows: The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows:
![](../imgs_results/det_res_img_10_east.jpg) (coming soon)
**Note**: EAST post-processing locality aware NMS has two versions: Python and C++. The speed of C++ version is obviously faster than that of Python version. Due to the compilation version problem of NMS of C++ version, C++ version NMS will be called only in Python 3.5 environment, and python version NMS will be called in other cases. **Note**: EAST post-processing locality aware NMS has two versions: Python and C++. The speed of C++ version is obviously faster than that of Python version. Due to the compilation version problem of NMS of C++ version, C++ version NMS will be called only in Python 3.5 environment, and python version NMS will be called in other cases.
...@@ -214,7 +214,7 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img ...@@ -214,7 +214,7 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows: The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows:
![](../imgs_results/det_res_img_10_sast.jpg) (coming soon)
#### (2). Curved text detection model (Total-Text) #### (2). Curved text detection model (Total-Text)
First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the Total-Text English dataset as an example ([model download link (coming soon)](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)), you can use the following command to convert: First, convert the model saved in the SAST text detection training process into an inference model. Taking the model based on the Resnet50_vd backbone network and trained on the Total-Text English dataset as an example ([model download link (coming soon)](https://paddleocr.bj.bcebos.com/SAST/sast_r50_vd_total_text.tar)), you can use the following command to convert:
...@@ -231,7 +231,7 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img ...@@ -231,7 +231,7 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows: The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows:
![](../imgs_results/det_res_img623_sast.jpg) (coming soon)
**Note**: SAST post-processing locality aware NMS has two versions: Python and C++. The speed of C++ version is obviously faster than that of Python version. Due to the compilation version problem of NMS of C++ version, C++ version NMS will be called only in Python 3.5 environment, and python version NMS will be called in other cases. **Note**: SAST post-processing locality aware NMS has two versions: Python and C++. The speed of C++ version is obviously faster than that of Python version. Due to the compilation version problem of NMS of C++ version, C++ version NMS will be called only in Python 3.5 environment, and python version NMS will be called in other cases.
...@@ -254,8 +254,9 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" ...@@ -254,8 +254,9 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg"
After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen. After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen.
Predicts of ./doc/imgs_words/ch/word_4.jpg:['实力活力', 0.89552695] ```bash
Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
```
<a name="CTC-BASED_RECOGNITION"></a> <a name="CTC-BASED_RECOGNITION"></a>
### 2. CTC-BASED TEXT RECOGNITION MODEL INFERENCE ### 2. CTC-BASED TEXT RECOGNITION MODEL INFERENCE
...@@ -276,7 +277,6 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png ...@@ -276,7 +277,6 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
<a name="ATTENTION-BASED_RECOGNITION"></a> <a name="ATTENTION-BASED_RECOGNITION"></a>
### 3. ATTENTION-BASED TEXT RECOGNITION MODEL INFERENCE ### 3. ATTENTION-BASED TEXT RECOGNITION MODEL INFERENCE
![](../imgs_words_en/word_336.png)
The recognition model based on Attention loss is different from ctc, and additional recognition algorithm parameters need to be set --rec_algorithm="RARE" The recognition model based on Attention loss is different from ctc, and additional recognition algorithm parameters need to be set --rec_algorithm="RARE"
After executing the command, the recognition result of the above image is as follows: After executing the command, the recognition result of the above image is as follows:
...@@ -284,8 +284,13 @@ After executing the command, the recognition result of the above image is as fol ...@@ -284,8 +284,13 @@ After executing the command, the recognition result of the above image is as fol
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rare/" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_algorithm="RARE" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rare/" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_algorithm="RARE"
``` ```
Predicts of ./doc/imgs_words_en/word_336.png:['super', 0.9999555] ![](../imgs_words_en/word_336.png)
After executing the command, the recognition result of the above image is as follows:
```bash
Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073)
```
**Note**:Since the above model refers to [DTRB](https://arxiv.org/abs/1904.01906) text recognition training and evaluation process, it is different from the training of lightweight Chinese recognition model in two aspects: **Note**:Since the above model refers to [DTRB](https://arxiv.org/abs/1904.01906) text recognition training and evaluation process, it is different from the training of lightweight Chinese recognition model in two aspects:
- The image resolution used in training is different: the image resolution used in training the above model is [3,32,100], while during our Chinese model training, in order to ensure the recognition effect of long text, the image resolution used in training is [3, 32, 320]. The default shape parameter of the inference stage is the image resolution used in training phase, that is [3, 32, 320]. Therefore, when running inference of the above English model here, you need to set the shape of the recognition image through the parameter `rec_image_shape`. - The image resolution used in training is different: the image resolution used in training the above model is [3,32,100], while during our Chinese model training, in order to ensure the recognition effect of long text, the image resolution used in training is [3, 32, 320]. The default shape parameter of the inference stage is the image resolution used in training phase, that is [3, 32, 320]. Therefore, when running inference of the above English model here, you need to set the shape of the recognition image through the parameter `rec_image_shape`.
...@@ -299,10 +304,10 @@ dict_character = list(self.character_str) ...@@ -299,10 +304,10 @@ dict_character = list(self.character_str)
<a name="USING_CUSTOM_CHARACTERS"></a> <a name="USING_CUSTOM_CHARACTERS"></a>
### 4. TEXT RECOGNITION MODEL INFERENCE USING CUSTOM CHARACTERS DICTIONARY ### 4. TEXT RECOGNITION MODEL INFERENCE USING CUSTOM CHARACTERS DICTIONARY
If the chars dictionary is modified during training, you need to specify the new dictionary path by setting the parameter `rec_char_dict_path` when using your inference model to predict. If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch`
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_char_dict_path="your text dict path" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path"
``` ```
<a name="MULTILINGUAL_MODEL_INFERENCE"></a> <a name="MULTILINGUAL_MODEL_INFERENCE"></a>
...@@ -318,9 +323,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" - ...@@ -318,9 +323,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" -
After executing the command, the prediction result of the above figure is: After executing the command, the prediction result of the above figure is:
``` text ``` text
2020-09-19 16:15:05,076-INFO: index: [205 206 38 39] Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
2020-09-19 16:15:05,077-INFO: word : 바탕으로
2020-09-19 16:15:05,077-INFO: score: 0.9171358942985535
``` ```
<a name="ANGLE_CLASSIFICATION_MODEL_INFERENCE"></a> <a name="ANGLE_CLASSIFICATION_MODEL_INFERENCE"></a>
...@@ -381,4 +384,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d ...@@ -381,4 +384,4 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d
After executing the command, the recognition result image is as follows: After executing the command, the recognition result image is as follows:
![](../imgs_results/img_10.jpg) (coming soon)
## OCR model list(V1.1, updated on 9.22) ## OCR model list(V1.1, updated on 2020.12.12)
- [1. Text Detection Model](#Detection) - [1. Text Detection Model](#Detection)
- [2. Text Recognition Model](#Recognition) - [2. Text Recognition Model](#Recognition)
...@@ -10,61 +10,62 @@ ...@@ -10,61 +10,62 @@
The downloadable models provided by PaddleOCR include `inference model`, `trained model`, `pre-trained model` and `slim model`. The differences between the models are as follows: The downloadable models provided by PaddleOCR include `inference model`, `trained model`, `pre-trained model` and `slim model`. The differences between the models are as follows:
|model type|model format|description| |model type|model format|description|
|-|-|-| |--- | --- | --- |
|inference model|model、params|Used for reasoning based on Python prediction engine. [detail](./inference_en.md)| |inference model|inference.pdmodel、inference.pdiparams|Used for reasoning based on Python prediction engine[detail](./inference_en.md)|
|trained model / pre-trained model|\*.pdmodel\*.pdopt、\*.pdparams|The checkpoints model saved in the training process, which stores the parameters of the model, mostly used for model evaluation and continuous training.| |trained model, pre-trained model|\*.pdparams\*.pdopt、\*.states |The checkpoints model saved in the training process, which stores the parameters of the model, mostly used for model evaluation and continuous training.|
|slim model|\*.nb|Generally used for Lite deployment| |slim model|\*.nb|Generally used for Lite deployment|
<a name="Detection"></a> <a name="Detection"></a>
### 1. Text Detection Model ### 1. Text Detection Model
|model name|description|config|model size|download|
|-|-|-|-|-|
|ch_ppocr_mobile_slim_v1.1_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|[det_mv3_db_v1.1.yml](../../configs/det/det_mv3_db_v1.1.yml)|1.4M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/det/ch_ppocr_mobile_v1.1_det_prune_infer.tar) / [slim model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/lite/ch_ppocr_mobile_v1.1_det_prune_opt.nb)|
|ch_ppocr_mobile_v1.1_det|Original lightweight model, supporting Chinese, English, multilingual text detection|[det_mv3_db_v1.1.yml](../../configs/det/det_mv3_db_v1.1.yml)|2.6M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_train.tar)|
|ch_ppocr_server_v1.1_det|General model, which is larger than the lightweight model, but achieved better performance|[det_r18_vd_db_v1.1.yml](../../configs/det/det_r18_vd_db_v1.1.yml)|47.2M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/server/det/ch_ppocr_server_v1.1_det_train.tar)|
|model name|description|config|model size|download|
| --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |[inference model (coming soon)](link) / [slim model (coming soon)](link)|
|ch_ppocr_mobile_v2.0_det|Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|
|ch_ppocr_server_v2.0_det|General model, which is larger than the lightweight model, but achieved better performance|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)|
<a name="Recognition"></a> <a name="Recognition"></a>
### 2. Text Recognition Model ### 2. Text Recognition Model
<a name="Chinese"></a> <a name="Chinese"></a>
#### Chinese Recognition Model #### Chinese Recognition Model
|model name|description|config|model size|download| |model name|description|config|model size|download|
|-|-|-|-|-| | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v1.1_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v1.1.yml](../../configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml)|1.6M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/rec/ch_ppocr_mobile_v1.1_rec_quant_infer.tar) / [slim model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/lite/ch_ppocr_mobile_v1.1_rec_quant_opt.nb) | |ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |[inference model (coming soon)](link) / [slim model (coming soon)](link) |
|ch_ppocr_mobile_v1.1_rec|Original lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v1.1.yml](../../configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml)|4.6M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_pre.tar) | |ch_ppocr_mobile_v2.0_rec|Original lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
|ch_ppocr_server_v1.1_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v1.1.yml](../../configs/rec/ch_ppocr_v1.1/rec_chinese_common_train_v1.1.yml)|105M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/20-09-22/server/rec/ch_ppocr_server_v1.1_rec_pre.tar) | |ch_ppocr_server_v2.0_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
**Note:** The `trained model` is finetuned on the `pre-trained model` with real data and synthsized vertical text data, which achieved better performance in real scene. The `pre-trained model` is directly trained on the full amount of real data and synthsized data, which is more suitable for finetune on your own dataset. **Note:** The `trained model` is finetuned on the `pre-trained model` with real data and synthsized vertical text data, which achieved better performance in real scene. The `pre-trained model` is directly trained on the full amount of real data and synthsized data, which is more suitable for finetune on your own dataset.
<a name="English"></a> <a name="English"></a>
#### English Recognition Model #### English Recognition Model
|model name|description|config|model size|download| |model name|description|config|model size|download|
|-|-|-|-|-| | --- | --- | --- | --- | --- |
|en_ppocr_mobile_slim_v1.1_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_lite_train.yml](../../configs/rec/multi_languages/rec_en_lite_train.yml)|0.9M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/en/en_ppocr_mobile_v1.1_rec_quant_infer.tar) / [slim model](https://paddleocr.bj.bcebos.com/20-09-22/mobile-slim/en/en_ppocr_mobile_v1.1_rec_quant_opt.nb) | |en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| |[inference model (coming soon )](link) / [slim model (coming soon)](link) |
|en_ppocr_mobile_v1.1_rec|Original lightweight model, supporting English and number recognition|[rec_en_lite_train.yml](../../configs/rec/multi_languages/rec_en_lite_train.yml)|2.0M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_train.tar) | |en_number_mobile_v2.0_rec|Original lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) |
<a name="Multilingual"></a> <a name="Multilingual"></a>
#### Multilingual Recognition Model(Updating...) #### Multilingual Recognition Model(Updating...)
|model name|description|config|model size|download|
|-|-|-|-|-|
| french_ppocr_mobile_v1.1_rec |Lightweight model for French recognition|[rec_french_lite_train.yml](../../configs/rec/multi_languages/rec_french_lite_train.yml)|2.1M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_train.tar) |
| german_ppocr_mobile_v1.1_rec |German model for French recognition|[rec_ger_lite_train.yml](../../configs/rec/multi_languages/rec_ger_lite_train.yml)|2.1M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_train.tar) |
| korean_ppocr_mobile_v1.1_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_languages/rec_korean_lite_train.yml)|3.4M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_train.tar) |
| japan_ppocr_mobile_v1.1_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_languages/rec_japan_lite_train.yml)|3.7M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_train.tar) |
|model name|description|config|model size|download|
| --- | --- | --- | --- | --- |
| french_mobile_v2.0_rec |Lightweight model for French recognition|[rec_french_lite_train.yml](../../configs/rec/multi_language/rec_french_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_train.tar) |
| german_mobile_v2.0_rec |Lightweight model for French recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
| korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
| japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
<a name="Angle"></a> <a name="Angle"></a>
### 3. Text Angle Classification Model ### 3. Text Angle Classification Model
|model name|description|config|model size|download| |model name|description|config|model size|download|
|-|-|-|-|-| | --- | --- | --- | --- | --- |
|ch_ppocr_mobile_v1.1_cls_quant|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|0.5M|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_quant_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_quant_train.tar) / [slim model](https://paddleocr.bj.bcebos.com/20-09-22/mobile/lite/ch_ppocr_mobile_v1.1_cls_quant_opt.nb) | |ch_ppocr_mobile_slim_v2.0_cls|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |[inference model (coming soon)](link) / [trained model](link) / [slim model](link) |
|ch_ppocr_mobile_v1.1_cls|Original model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|850kb|[inference model](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_train.tar) | |ch_ppocr_mobile_v2.0_cls|Original model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |
## OCR model list (V1.1,updated on 2020.9.22)
## OCR model list(V1.0, updated on 7.16) [1.1 series model address](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/models_list.md)
|model name|description|detection model|recognition model|recognition model supporting space recognition|
|-|-|-|-|-|
|chinese_db_crnn_mobile|8.6M lightweight OCR model|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db.tar) | [inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn.tar) |[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance.tar)
|chinese_db_crnn_server|General OCR model|[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_det_r50_vd_db.tar) | [inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn.tar) |[inference model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_enhance.tar)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment