Commit 3b86a3cf authored by benjaminwan's avatar benjaminwan
Browse files

version: 1.1.0

parent 22437c6f
#ifdef __cplusplus
#ifndef __OCR_LITE_C_API_H__
#define __OCR_LITE_C_API_H__
extern "C"
{
#ifdef WIN32
#ifdef __CLIB__
#define _QM_OCR_API __declspec(dllexport)
#else
#define _QM_OCR_API __declspec(dllimport)
#endif
#else
#define _QM_OCR_API
#endif
typedef void *OCR_HANDLE;
typedef char OCR_BOOL;
#ifndef NULL
#define NULL 0
#endif
#define TRUE 1
#define FALSE 0
typedef struct __ocr_param {
int padding;
int maxSideLen;
float boxScoreThresh;
float boxThresh;
float unClipRatio;
int doAngle; // 1 means do
int mostAngle; // 1 means true
} OCR_PARAM;
/*
By default, nThreads should be the number of threads
*/
_QM_OCR_API OCR_HANDLE
OcrInit(const char *szDetModel, const char *szClsModel, const char *szRecModel, const char *szKeyPath, int nThreads);
_QM_OCR_API OCR_BOOL
OcrDetect(OCR_HANDLE handle, const char *imgPath, const char *imgName, OCR_PARAM *pParam);
_QM_OCR_API int OcrGetLen(OCR_HANDLE handle);
_QM_OCR_API OCR_BOOL OcrGetResult(OCR_HANDLE handle, char *szBuf, int nLen);
_QM_OCR_API void OcrDestroy(OCR_HANDLE handle);
};
#endif //__OCR_LITE_C_API_H__
#endif //__cplusplus
#ifdef __JNI__
#ifndef __OCR_RESULT_UTILS_H__
#define __OCR_RESULT_UTILS_H__
#include <jni.h>
#include "OcrStruct.h"
class OcrResultUtils {
public:
OcrResultUtils(JNIEnv *env, OcrResult &ocrResult);
~OcrResultUtils();
jobject getJObject();
private:
JNIEnv *jniEnv;
jobject jOcrResult;
jclass newJListClass();
jmethodID getListConstructor(jclass clazz);
jobject getTextBlock(TextBlock &textBlock);
jobject getTextBlocks(std::vector<TextBlock> &textBlocks);
jobject newJPoint(cv::Point &point);
jobject newJBoxPoint(std::vector<cv::Point> &boxPoint);
jfloatArray newJScoreArray(std::vector<float> &scores);
};
#endif //__OCR_RESULT_UTILS_H__
#endif
#ifndef __OCR_STRUCT_H__
#define __OCR_STRUCT_H__
#include "opencv2/core.hpp"
#include <vector>
struct ScaleParam {
int srcWidth;
int srcHeight;
int dstWidth;
int dstHeight;
float ratioWidth;
float ratioHeight;
};
struct TextBox {
std::vector<cv::Point> boxPoint;
float score;
};
struct Angle {
int index;
float score;
double time;
};
struct TextLine {
std::string text;
std::vector<float> charScores;
double time;
};
struct TextBlock {
std::vector<cv::Point> boxPoint;
float boxScore;
int angleIndex;
float angleScore;
double angleTime;
std::string text;
std::vector<float> charScores;
double crnnTime;
double blockTime;
};
struct OcrResult {
double dbNetTime;
std::vector<TextBlock> textBlocks;
cv::Mat boxImg;
double detectTime;
std::string strRes;
};
#endif //__OCR_STRUCT_H__
#ifndef __OCR_UTILS_H__
#define __OCR_UTILS_H__
#include <opencv2/core.hpp>
#include "OcrStruct.h"
#include <onnxruntime/core/session/onnxruntime_cxx_api.h>
#include <numeric>
#include <sys/stat.h>
template<typename T, typename... Ts>
static std::unique_ptr<T> makeUnique(Ts &&... params) {
return std::unique_ptr<T>(new T(std::forward<Ts>(params)...));
}
template<typename T>
static double getMean(std::vector<T> &input) {
auto sum = accumulate(input.begin(), input.end(), 0.0);
return sum / input.size();
}
template<typename T>
static double getStdev(std::vector<T> &input, double mean) {
if (input.size() <= 1) return 0;
double accum = 0.0;
for_each(input.begin(), input.end(), [&](const double d) {
accum += (d - mean) * (d - mean);
});
double stdev = sqrt(accum / (input.size() - 1));
return stdev;
}
template<class T>
inline T clamp(T x, T min, T max) {
if (x > max)
return max;
if (x < min)
return min;
return x;
}
double getCurrentTime();
inline bool isFileExists(const std::string &name) {
struct stat buffer;
return (stat(name.c_str(), &buffer) == 0);
}
std::wstring strToWstr(std::string str);
ScaleParam getScaleParam(cv::Mat &src, const float scale);
ScaleParam getScaleParam(cv::Mat &src, const int targetSize);
std::vector<cv::Point2f> getBox(const cv::RotatedRect &rect);
int getThickness(cv::Mat &boxImg);
void drawTextBox(cv::Mat &boxImg, cv::RotatedRect &rect, int thickness);
void drawTextBox(cv::Mat &boxImg, const std::vector<cv::Point> &box, int thickness);
void drawTextBoxes(cv::Mat &boxImg, std::vector<TextBox> &textBoxes, int thickness);
cv::Mat matRotateClockWise180(cv::Mat src);
cv::Mat matRotateClockWise90(cv::Mat src);
cv::Mat getRotateCropImage(const cv::Mat &src, std::vector<cv::Point> box);
cv::Mat adjustTargetImg(cv::Mat &src, int dstWidth, int dstHeight);
std::vector<cv::Point2f> getMinBoxes(const cv::RotatedRect &boxRect, float &maxSideLen);
float boxScoreFast(const std::vector<cv::Point2f> &boxes, const cv::Mat &pred);
cv::RotatedRect unClip(std::vector<cv::Point2f> box, float unClipRatio);
std::vector<float> substractMeanNormalize(cv::Mat &src, const float *meanVals, const float *normVals);
std::vector<int> getAngleIndexes(std::vector<Angle> &angles);
std::vector<char *> getInputNames(Ort::Session *session);
std::vector<char *> getOutputNames(Ort::Session *session);
void saveImg(cv::Mat &img, const char *imgPath);
std::string getSrcImgFilePath(const char *path, const char *imgName);
std::string getResultTxtFilePath(const char *path, const char *imgName);
std::string getResultImgFilePath(const char *path, const char *imgName);
std::string getDebugImgFilePath(const char *path, const char *imgName, int i, const char *tag);
#endif //__OCR_UTILS_H__
/*******************************************************************************
* *
* Author : Angus Johnson *
* Version : 6.4.2 *
* Date : 27 February 2017 *
* Website : http://www.angusj.com *
* Copyright : Angus Johnson 2010-2017 *
* *
* License: *
* Use, modification & distribution is subject to Boost Software License Ver 1. *
* http://www.boost.org/LICENSE_1_0.txt *
* *
* Attributions: *
* The code in this library is an extension of Bala Vatti's clipping algorithm: *
* "A generic solution to polygon clipping" *
* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
* http://portal.acm.org/citation.cfm?id=129906 *
* *
* Computer graphics and geometric modeling: implementation and algorithms *
* By Max K. Agoston *
* Springer; 1 edition (January 4, 2005) *
* http://books.google.com/books?q=vatti+clipping+agoston *
* *
* See also: *
* "Polygon Offsetting by Computing Winding Numbers" *
* Paper no. DETC2005-85513 pp. 565-575 *
* ASME 2005 International Design Engineering Technical Conferences *
* and Computers and Information in Engineering Conference (IDETC/CIE2005) *
* September 24-28, 2005 , Long Beach, California, USA *
* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
* *
*******************************************************************************/
#ifndef clipper_hpp
#define clipper_hpp
#define CLIPPER_VERSION "6.4.2"
//use_int32: When enabled 32bit ints are used instead of 64bit ints. This
//improve performance but coordinate values are limited to the range +/- 46340
//#define use_int32
//use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
//#define use_xyz
//use_lines: Enables line clipping. Adds a very minor cost to performance.
#define use_lines
//use_deprecated: Enables temporary support for the obsolete functions
//#define use_deprecated
#include <vector>
#include <list>
#include <set>
#include <stdexcept>
#include <cstring>
#include <cstdlib>
#include <ostream>
#include <functional>
#include <queue>
namespace ClipperLib {
enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
enum PolyType { ptSubject, ptClip };
//By far the most widely used winding rules for polygon filling are
//EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
//Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
//see http://glprogramming.com/red/chapter11.html
enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
#ifdef use_int32
typedef int cInt;
static cInt const loRange = 0x7FFF;
static cInt const hiRange = 0x7FFF;
#else
typedef signed long long cInt;
static cInt const loRange = 0x3FFFFFFF;
static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
typedef signed long long long64; //used by Int128 class
typedef unsigned long long ulong64;
#endif
struct IntPoint {
cInt X;
cInt Y;
#ifdef use_xyz
cInt Z;
IntPoint(cInt x = 0, cInt y = 0, cInt z = 0): X(x), Y(y), Z(z) {};
#else
IntPoint(cInt x = 0, cInt y = 0): X(x), Y(y) {};
#endif
friend inline bool operator== (const IntPoint& a, const IntPoint& b)
{
return a.X == b.X && a.Y == b.Y;
}
friend inline bool operator!= (const IntPoint& a, const IntPoint& b)
{
return a.X != b.X || a.Y != b.Y;
}
};
//------------------------------------------------------------------------------
typedef std::vector< IntPoint > Path;
typedef std::vector< Path > Paths;
inline Path& operator <<(Path& poly, const IntPoint& p) {poly.push_back(p); return poly;}
inline Paths& operator <<(Paths& polys, const Path& p) {polys.push_back(p); return polys;}
std::ostream& operator <<(std::ostream &s, const IntPoint &p);
std::ostream& operator <<(std::ostream &s, const Path &p);
std::ostream& operator <<(std::ostream &s, const Paths &p);
struct DoublePoint
{
double X;
double Y;
DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
};
//------------------------------------------------------------------------------
#ifdef use_xyz
typedef void (*ZFillCallback)(IntPoint& e1bot, IntPoint& e1top, IntPoint& e2bot, IntPoint& e2top, IntPoint& pt);
#endif
enum InitOptions {ioReverseSolution = 1, ioStrictlySimple = 2, ioPreserveCollinear = 4};
enum JoinType {jtSquare, jtRound, jtMiter};
enum EndType {etClosedPolygon, etClosedLine, etOpenButt, etOpenSquare, etOpenRound};
class PolyNode;
typedef std::vector< PolyNode* > PolyNodes;
class PolyNode
{
public:
PolyNode();
virtual ~PolyNode(){};
Path Contour;
PolyNodes Childs;
PolyNode* Parent;
PolyNode* GetNext() const;
bool IsHole() const;
bool IsOpen() const;
int ChildCount() const;
private:
//PolyNode& operator =(PolyNode& other);
unsigned Index; //node index in Parent.Childs
bool m_IsOpen;
JoinType m_jointype;
EndType m_endtype;
PolyNode* GetNextSiblingUp() const;
void AddChild(PolyNode& child);
friend class Clipper; //to access Index
friend class ClipperOffset;
};
class PolyTree: public PolyNode
{
public:
~PolyTree(){ Clear(); };
PolyNode* GetFirst() const;
void Clear();
int Total() const;
private:
//PolyTree& operator =(PolyTree& other);
PolyNodes AllNodes;
friend class Clipper; //to access AllNodes
};
bool Orientation(const Path &poly);
double Area(const Path &poly);
int PointInPolygon(const IntPoint &pt, const Path &path);
void SimplifyPolygon(const Path &in_poly, Paths &out_polys, PolyFillType fillType = pftEvenOdd);
void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, PolyFillType fillType = pftEvenOdd);
void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
void CleanPolygon(const Path& in_poly, Path& out_poly, double distance = 1.415);
void CleanPolygon(Path& poly, double distance = 1.415);
void CleanPolygons(const Paths& in_polys, Paths& out_polys, double distance = 1.415);
void CleanPolygons(Paths& polys, double distance = 1.415);
void MinkowskiSum(const Path& pattern, const Path& path, Paths& solution, bool pathIsClosed);
void MinkowskiSum(const Path& pattern, const Paths& paths, Paths& solution, bool pathIsClosed);
void MinkowskiDiff(const Path& poly1, const Path& poly2, Paths& solution);
void PolyTreeToPaths(const PolyTree& polytree, Paths& paths);
void ClosedPathsFromPolyTree(const PolyTree& polytree, Paths& paths);
void OpenPathsFromPolyTree(PolyTree& polytree, Paths& paths);
void ReversePath(Path& p);
void ReversePaths(Paths& p);
struct IntRect { cInt left; cInt top; cInt right; cInt bottom; };
//enums that are used internally ...
enum EdgeSide { esLeft = 1, esRight = 2};
//forward declarations (for stuff used internally) ...
struct TEdge;
struct IntersectNode;
struct LocalMinimum;
struct OutPt;
struct OutRec;
struct Join;
typedef std::vector < OutRec* > PolyOutList;
typedef std::vector < TEdge* > EdgeList;
typedef std::vector < Join* > JoinList;
typedef std::vector < IntersectNode* > IntersectList;
//------------------------------------------------------------------------------
//ClipperBase is the ancestor to the Clipper class. It should not be
//instantiated directly. This class simply abstracts the conversion of sets of
//polygon coordinates into edge objects that are stored in a LocalMinima list.
class ClipperBase
{
public:
ClipperBase();
virtual ~ClipperBase();
virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
virtual void Clear();
IntRect GetBounds();
bool PreserveCollinear() {return m_PreserveCollinear;};
void PreserveCollinear(bool value) {m_PreserveCollinear = value;};
protected:
void DisposeLocalMinimaList();
TEdge* AddBoundsToLML(TEdge *e, bool IsClosed);
virtual void Reset();
TEdge* ProcessBound(TEdge* E, bool IsClockwise);
void InsertScanbeam(const cInt Y);
bool PopScanbeam(cInt &Y);
bool LocalMinimaPending();
bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
OutRec* CreateOutRec();
void DisposeAllOutRecs();
void DisposeOutRec(PolyOutList::size_type index);
void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
void DeleteFromAEL(TEdge *e);
void UpdateEdgeIntoAEL(TEdge *&e);
typedef std::vector<LocalMinimum> MinimaList;
MinimaList::iterator m_CurrentLM;
MinimaList m_MinimaList;
bool m_UseFullRange;
EdgeList m_edges;
bool m_PreserveCollinear;
bool m_HasOpenPaths;
PolyOutList m_PolyOuts;
TEdge *m_ActiveEdges;
typedef std::priority_queue<cInt> ScanbeamList;
ScanbeamList m_Scanbeam;
};
//------------------------------------------------------------------------------
class Clipper : public virtual ClipperBase
{
public:
Clipper(int initOptions = 0);
bool Execute(ClipType clipType,
Paths &solution,
PolyFillType fillType = pftEvenOdd);
bool Execute(ClipType clipType,
Paths &solution,
PolyFillType subjFillType,
PolyFillType clipFillType);
bool Execute(ClipType clipType,
PolyTree &polytree,
PolyFillType fillType = pftEvenOdd);
bool Execute(ClipType clipType,
PolyTree &polytree,
PolyFillType subjFillType,
PolyFillType clipFillType);
bool ReverseSolution() { return m_ReverseOutput; };
void ReverseSolution(bool value) {m_ReverseOutput = value;};
bool StrictlySimple() {return m_StrictSimple;};
void StrictlySimple(bool value) {m_StrictSimple = value;};
//set the callback function for z value filling on intersections (otherwise Z is 0)
#ifdef use_xyz
void ZFillFunction(ZFillCallback zFillFunc);
#endif
protected:
virtual bool ExecuteInternal();
private:
JoinList m_Joins;
JoinList m_GhostJoins;
IntersectList m_IntersectList;
ClipType m_ClipType;
typedef std::list<cInt> MaximaList;
MaximaList m_Maxima;
TEdge *m_SortedEdges;
bool m_ExecuteLocked;
PolyFillType m_ClipFillType;
PolyFillType m_SubjFillType;
bool m_ReverseOutput;
bool m_UsingPolyTree;
bool m_StrictSimple;
#ifdef use_xyz
ZFillCallback m_ZFill; //custom callback
#endif
void SetWindingCount(TEdge& edge);
bool IsEvenOddFillType(const TEdge& edge) const;
bool IsEvenOddAltFillType(const TEdge& edge) const;
void InsertLocalMinimaIntoAEL(const cInt botY);
void InsertEdgeIntoAEL(TEdge *edge, TEdge* startEdge);
void AddEdgeToSEL(TEdge *edge);
bool PopEdgeFromSEL(TEdge *&edge);
void CopyAELToSEL();
void DeleteFromSEL(TEdge *e);
void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
bool IsContributing(const TEdge& edge) const;
bool IsTopHorz(const cInt XPos);
void DoMaxima(TEdge *e);
void ProcessHorizontals();
void ProcessHorizontal(TEdge *horzEdge);
void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
OutPt* AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
OutRec* GetOutRec(int idx);
void AppendPolygon(TEdge *e1, TEdge *e2);
void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
OutPt* AddOutPt(TEdge *e, const IntPoint &pt);
OutPt* GetLastOutPt(TEdge *e);
bool ProcessIntersections(const cInt topY);
void BuildIntersectList(const cInt topY);
void ProcessIntersectList();
void ProcessEdgesAtTopOfScanbeam(const cInt topY);
void BuildResult(Paths& polys);
void BuildResult2(PolyTree& polytree);
void SetHoleState(TEdge *e, OutRec *outrec);
void DisposeIntersectNodes();
bool FixupIntersectionOrder();
void FixupOutPolygon(OutRec &outrec);
void FixupOutPolyline(OutRec &outrec);
bool IsHole(TEdge *e);
bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
void FixHoleLinkage(OutRec &outrec);
void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
void ClearJoins();
void ClearGhostJoins();
void AddGhostJoin(OutPt *op, const IntPoint offPt);
bool JoinPoints(Join *j, OutRec* outRec1, OutRec* outRec2);
void JoinCommonEdges();
void DoSimplePolygons();
void FixupFirstLefts1(OutRec* OldOutRec, OutRec* NewOutRec);
void FixupFirstLefts2(OutRec* InnerOutRec, OutRec* OuterOutRec);
void FixupFirstLefts3(OutRec* OldOutRec, OutRec* NewOutRec);
#ifdef use_xyz
void SetZ(IntPoint& pt, TEdge& e1, TEdge& e2);
#endif
};
//------------------------------------------------------------------------------
class ClipperOffset
{
public:
ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
~ClipperOffset();
void AddPath(const Path& path, JoinType joinType, EndType endType);
void AddPaths(const Paths& paths, JoinType joinType, EndType endType);
void Execute(Paths& solution, double delta);
void Execute(PolyTree& solution, double delta);
void Clear();
double MiterLimit;
double ArcTolerance;
private:
Paths m_destPolys;
Path m_srcPoly;
Path m_destPoly;
std::vector<DoublePoint> m_normals;
double m_delta, m_sinA, m_sin, m_cos;
double m_miterLim, m_StepsPerRad;
IntPoint m_lowest;
PolyNode m_polyNodes;
void FixOrientations();
void DoOffset(double delta);
void OffsetPoint(int j, int& k, JoinType jointype);
void DoSquare(int j, int k);
void DoMiter(int j, int k, double r);
void DoRound(int j, int k);
};
//------------------------------------------------------------------------------
class clipperException : public std::exception
{
public:
clipperException(const char* description): m_descr(description) {}
virtual ~clipperException() throw() {}
virtual const char* what() const throw() {return m_descr.c_str();}
private:
std::string m_descr;
};
//------------------------------------------------------------------------------
} //ClipperLib namespace
#endif //clipper_hpp
/*
* getopt - POSIX like getopt for Windows console Application
*
* win-c - Windows Console Library
* Copyright (c) 2015 Koji Takami
* Released under the MIT license
* https://github.com/takamin/win-c/blob/master/LICENSE
*/
#ifndef _GETOPT_H_
#define _GETOPT_H_
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
int getopt(int argc, char *const argv[],
const char *optstring);
extern char *optarg;
extern int optind, opterr, optopt;
#define no_argument 0
#define required_argument 1
#define optional_argument 2
struct option {
const char *name;
int has_arg;
int *flag;
int val;
};
int getopt_long(int argc, char *const argv[],
const char *optstring,
const struct option *longopts, int *longindex);
/****************************************************************************
int getopt_long_only(int argc, char* const argv[],
const char* optstring,
const struct option* longopts, int* longindex);
****************************************************************************/
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // _GETOPT_H_
#ifndef __MAIN_H__
#define __MAIN_H__
#include "getopt.h"
static const struct option long_options[] = {
{"models", required_argument, NULL, 'd'},
{"det", required_argument, NULL, '1'},
{"cls", required_argument, NULL, '2'},
{"rec", required_argument, NULL, '3'},
{"keys", required_argument, NULL, '4'},
{"image", required_argument, NULL, 'i'},
{"numThread", required_argument, NULL, 't'},
{"padding", required_argument, NULL, 'p'},
{"maxSideLen", required_argument, NULL, 's'},
{"boxScoreThresh", required_argument, NULL, 'b'},
{"boxThresh", required_argument, NULL, 'o'},
{"unClipRatio", required_argument, NULL, 'u'},
{"doAngle", required_argument, NULL, 'a'},
{"mostAngle", required_argument, NULL, 'A'},
{"version", no_argument, NULL, 'v'},
{"help", no_argument, NULL, 'h'},
{"loopCount", required_argument, NULL, 'l'},
{NULL, no_argument, NULL, 0}
};
const char *usageMsg = "(-d --models) (-1 --det) (-2 --cls) (-3 --rec) (-4 --keys) (-i --image)\n"\
"[-t --numThread] [-p --padding] [-s --maxSideLen]\n" \
"[-b --boxScoreThresh] [-o --boxThresh] [-u --unClipRatio]\n" \
"[-a --noAngle] [-A --mostAngle]\n\n";
const char *requiredMsg = "-d --models: models directory.\n" \
"-1 --det: model file name of det.\n" \
"-2 --cls: model file name of cls.\n" \
"-3 --rec: model file name of rec.\n" \
"-4 --keys: keys file name.\n" \
"-i --image: path of target image.\n\n";
const char *optionalMsg = "-t --numThread: value of numThread(int), default: 4\n" \
"-p --padding: value of padding(int), default: 0\n" \
"-s --maxSideLen: Long side of picture for resize(int), default: 1024\n" \
"-b --boxScoreThresh: value of boxScoreThresh(float), default: 0.5\n" \
"-o --boxThresh: value of boxThresh(float), default: 0.3\n" \
"-u --unClipRatio: value of unClipRatio(float), default: 2.0\n" \
"-a --doAngle: Enable(1)/Disable(0) Angle Net, default: Enable\n" \
"-A --mostAngle: Enable(1)/Disable(0) Most Possible AngleIndex, default: Enable\n\n";
const char *otherMsg = "-v --version: show version\n" \
"-h --help: print this help\n\n";
const char *example1Msg = "Example1: %s --models models --det det.onnx --cls cls.onnx --rec rec.onnx --keys keys.txt --image 1.jpg\n";
const char *example2Msg = "Example2: %s -d models -1 det.onnx -2 cls.onnx -3 rec.onnx -4 keys.txt -i 1.jpg -t 4 -p 50 -s 0 -b 0.6 -o 0.3 -u 2.0 -a 1 -A 1\n";
#endif //__MAIN_H__
#ifndef __OCR_VERSION_H__
#define __OCR_VERSION_H__
#define VERSION "1.1.0"
#endif //__OCR_VERSION_H__
This diff is collapsed.
if (APPLE)
message("配置macOS OnnxRuntime 路径: ${CMAKE_CURRENT_LIST_DIR}/macos")
set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/macos")
elseif (WIN32)
if (CMAKE_CL_64)
message("配置WINDOWS OnnxRuntime x64 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x64")
set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x64")
else ()
message("配置WINDOWS OnnxRuntime x86 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x86")
set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x86")
endif ()
elseif (UNIX)
message("配置Linux OnnxRuntime 路径: ${CMAKE_CURRENT_LIST_DIR}/linux")
set(OnnxRuntime_DIR "${CMAKE_CURRENT_LIST_DIR}/linux")
endif ()
if (WIN32)
if (CMAKE_CL_64)
message("配置WINDOWS OpenCV x64 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x64")
set(OpenCV_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x64")
else ()
message("配置WINDOWS OpenCV x86 路径: ${CMAKE_CURRENT_LIST_DIR}/windows-x86")
set(OpenCV_DIR "${CMAKE_CURRENT_LIST_DIR}/windows-x86")
endif ()
elseif (APPLE)
message("配置macOS OpenCV 路径: ${CMAKE_CURRENT_LIST_DIR}/macos/lib/cmake/opencv4")
set(OpenCV_DIR "${CMAKE_CURRENT_LIST_DIR}/macos/lib/cmake/opencv4")
elseif (UNIX)
message("配置Linux OpenCV 路径: ${CMAKE_CURRENT_LIST_DIR}/linux/lib/cmake/opencv4")
set(OpenCV_DIR "${CMAKE_CURRENT_LIST_DIR}/linux/lib/cmake/opencv4")
endif ()
chcp 65001
:: Set Param
@ECHO OFF
@SETLOCAL
echo "Setting the Number of Threads=%NUMBER_OF_PROCESSORS% Using an OpenMP Environment Variable"
set OMP_NUM_THREADS=%NUMBER_OF_PROCESSORS%
:MainExec
echo "请输入测试选项并回车: 1)CPU-x64, 2)CPU-x86"
set /p flag=
if %flag% == 1 (call :PrepareCpuX64)^
else if %flag% == 2 (call :PrepareCpuX86)^
else (echo 输入错误!Input Error!)
echo "请输入循环次数:"
set /p LOOP_COUNT=
SET TARGET_IMG=images/1.jpg
if not exist %TARGET_IMG% (
echo "找不到待识别的目标图片:%TARGET_IMG%,请打开本文件并编辑TARGET_IMG"
PAUSE
exit
)
if exist %EXE_PATH%\install\bin (
SET EXE_PATH=%EXE_PATH%\install\bin
)
%EXE_PATH%\benchmark.exe --version
%EXE_PATH%\benchmark.exe --models models ^
--det ch_PP-OCRv3_det_infer.onnx ^
--cls ch_ppocr_mobile_v2.0_cls_infer.onnx ^
--rec ch_PP-OCRv3_rec_infer.onnx ^
--keys ppocr_keys_v1.txt ^
--image %TARGET_IMG% ^
--numThread %NUMBER_OF_PROCESSORS% ^
--padding 50 ^
--maxSideLen 1024 ^
--boxScoreThresh 0.5 ^
--boxThresh 0.3 ^
--unClipRatio 1.5 ^
--doAngle 1 ^
--mostAngle 1 ^
--loopCount %LOOP_COUNT%
popd
echo.
GOTO:MainExec
:PrepareCpuX64
set EXE_PATH=win-BIN-x64
set GPU_INDEX=-1
GOTO:EOF
:PrepareCpuX86
set EXE_PATH=win-BIN-Win32
set GPU_INDEX=-1
GOTO:EOF
@ENDLOCAL
#!/usr/bin/env bash
function PrepareVar(){
EXE_PATH=${sysOS}-BIN
}
sysOS=`uname -s`
NUM_THREADS=1
if [ $sysOS == "Darwin" ];then
#echo "I'm MacOS"
NUM_THREADS=$(sysctl -n hw.ncpu)
elif [ $sysOS == "Linux" ];then
#echo "I'm Linux"
NUM_THREADS=$(grep ^processor /proc/cpuinfo | wc -l)
else
echo "Other OS: $sysOS"
fi
echo "Setting the Number of Threads=$NUM_THREADS Using an OpenMP Environment Variable"
set OMP_NUM_THREADS=$NUM_THREADS
PrepareVar
echo "请输入循环次数"
read -p "" LOOP_COUNT
TARGET_IMG=images/1.jpg
if [ ! -f "$TARGET_IMG" ]; then
echo "找不到待识别的目标图片:${TARGET_IMG},请打开本文件并编辑TARGET_IMG"
exit
fi
./${EXE_PATH}/benchmark --version
./${EXE_PATH}/benchmark --models models \
--det ch_PP-OCRv3_det_infer.onnx \
--cls ch_ppocr_mobile_v2.0_cls_infer.onnx \
--rec ch_PP-OCRv3_rec_infer.onnx \
--keys ppocr_keys_v1.txt \
--image $TARGET_IMG \
--numThread $NUM_THREADS \
--padding 50 \
--maxSideLen 1024 \
--boxScoreThresh 0.5 \
--boxThresh 0.3 \
--unClipRatio 1.5 \
--doAngle 1 \
--mostAngle 1 \
--loopCount $LOOP_COUNT
\ No newline at end of file
chcp 65001
:: Set Param
@ECHO OFF
@SETLOCAL
echo "Setting the Number of Threads=%NUMBER_OF_PROCESSORS% Using an OpenMP Environment Variable"
set OMP_NUM_THREADS=%NUMBER_OF_PROCESSORS%
:MainExec
echo "请输入测试选项并回车: 1)CPU-x64, 2)CPU-x86"
set /p flag=
if %flag% == 1 (call :PrepareCpuX64)^
else if %flag% == 2 (call :PrepareCpuX86)^
else (echo 输入错误!Input Error!)
SET TARGET_IMG=images/1.jpg
if not exist %TARGET_IMG% (
echo "找不到待识别的目标图片:%TARGET_IMG%,请打开本文件并编辑TARGET_IMG"
PAUSE
exit
)
if exist %EXE_PATH%\install\bin (
SET EXE_PATH=%EXE_PATH%\install\bin
)
%EXE_PATH%\RapidOcrOnnx.exe --version
%EXE_PATH%\RapidOcrOnnx.exe --models models ^
--det ch_PP-OCRv3_det_infer.onnx ^
--cls ch_ppocr_mobile_v2.0_cls_infer.onnx ^
--rec ch_PP-OCRv3_rec_infer.onnx ^
--keys ppocr_keys_v1.txt ^
--image %TARGET_IMG% ^
--numThread %NUMBER_OF_PROCESSORS% ^
--padding 50 ^
--maxSideLen 1024 ^
--boxScoreThresh 0.5 ^
--boxThresh 0.3 ^
--unClipRatio 1.5 ^
--doAngle 1 ^
--mostAngle 1
echo.
GOTO:MainExec
:PrepareCpuX64
set EXE_PATH=win-BIN-x64
GOTO:EOF
:PrepareCpuX86
set EXE_PATH=win-BIN-Win32
GOTO:EOF
@ENDLOCAL
#!/usr/bin/env bash
function PrepareVar(){
EXE_PATH=${sysOS}-BIN
}
sysOS=`uname -s`
NUM_THREADS=1
if [ $sysOS == "Darwin" ];then
#echo "I'm MacOS"
NUM_THREADS=$(sysctl -n hw.ncpu)
elif [ $sysOS == "Linux" ];then
#echo "I'm Linux"
NUM_THREADS=$(grep ^processor /proc/cpuinfo | wc -l)
else
echo "Other OS: $sysOS"
fi
echo "Setting the Number of Threads=$NUM_THREADS Using an OpenMP Environment Variable"
set OMP_NUM_THREADS=$NUM_THREADS
PrepareVar
TARGET_IMG=images/1.jpg
if [ ! -f "$TARGET_IMG" ]; then
echo "找不到待识别的目标图片:${TARGET_IMG},请打开本文件并编辑TARGET_IMG"
exit
fi
##### run test on MacOS or Linux
./${EXE_PATH}/RapidOcrOnnx --version
./${EXE_PATH}/RapidOcrOnnx --models models \
--det ch_PP-OCRv3_det_infer.onnx \
--cls ch_ppocr_mobile_v2.0_cls_infer.onnx \
--rec ch_PP-OCRv3_rec_infer.onnx \
--keys ppocr_keys_v1.txt \
--image $TARGET_IMG \
--numThread $NUM_THREADS \
--padding 50 \
--maxSideLen 1024 \
--boxScoreThresh 0.5 \
--boxThresh 0.3 \
--unClipRatio 1.5 \
--doAngle 1 \
--mostAngle 1
\ No newline at end of file
#include "AngleNet.h"
#include "OcrUtils.h"
#include <numeric>
AngleNet::AngleNet() {}
AngleNet::~AngleNet() {
delete session;
for (auto name : inputNames) {
free(name);
}
for (auto name : outputNames) {
free(name);
}
}
void AngleNet::setNumThread(int numOfThread) {
numThread = numOfThread;
//===session options===
// Sets the number of threads used to parallelize the execution within nodes
// A value of 0 means ORT will pick a default
//sessionOptions.SetIntraOpNumThreads(numThread);
//set OMP_NUM_THREADS=16
// Sets the number of threads used to parallelize the execution of the graph (across nodes)
// If sequential execution is enabled this value is ignored
// A value of 0 means ORT will pick a default
sessionOptions.SetInterOpNumThreads(numThread);
// Sets graph optimization level
// ORT_DISABLE_ALL -> To disable all optimizations
// ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals)
// ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
// ORT_ENABLE_ALL -> To Enable All possible opitmizations
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
}
void AngleNet::initModel(const std::string &pathStr) {
#ifdef _WIN32
std::wstring clsPath = strToWstr(pathStr);
session = new Ort::Session(env, clsPath.c_str(), sessionOptions);
#else
session = new Ort::Session(env, pathStr.c_str(), sessionOptions);
#endif
inputNames = getInputNames(session);
outputNames = getOutputNames(session);
}
Angle scoreToAngle(const std::vector<float> &outputData) {
int maxIndex = 0;
float maxScore = 0;
for (int i = 0; i < outputData.size(); i++) {
if (outputData[i] > maxScore) {
maxScore = outputData[i];
maxIndex = i;
}
}
return {maxIndex, maxScore};
}
Angle AngleNet::getAngle(cv::Mat &src) {
std::vector<float> inputTensorValues = substractMeanNormalize(src, meanValues, normValues);
std::array<int64_t, 4> inputShape{1, src.channels(), src.rows, src.cols};
auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(),
inputTensorValues.size(), inputShape.data(),
inputShape.size());
assert(inputTensor.IsTensor());
auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor,
inputNames.size(), outputNames.data(), outputNames.size());
assert(outputTensor.size() == 1 && outputTensor.front().IsTensor());
std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1,
std::multiplies<int64_t>());
float *floatArray = outputTensor.front().GetTensorMutableData<float>();
std::vector<float> outputData(floatArray, floatArray + outputCount);
return scoreToAngle(outputData);
}
std::vector<Angle> AngleNet::getAngles(std::vector<cv::Mat> &partImgs, const char *path,
const char *imgName, bool doAngle, bool mostAngle) {
int size = partImgs.size();
std::vector<Angle> angles(size);
if (doAngle) {
for (int i = 0; i < size; ++i) {
double startAngle = getCurrentTime();
cv::Mat angleImg;
cv::resize(partImgs[i], angleImg, cv::Size(dstWidth, dstHeight));
Angle angle = getAngle(angleImg);
double endAngle = getCurrentTime();
angle.time = endAngle - startAngle;
angles[i] = angle;
//OutPut AngleImg
if (isOutputAngleImg) {
std::string angleImgFile = getDebugImgFilePath(path, imgName, i, "-angle-");
saveImg(angleImg, angleImgFile.c_str());
}
}
} else {
for (int i = 0; i < size; ++i) {
angles[i] = Angle{-1, 0.f};
}
}
//Most Possible AngleIndex
if (doAngle && mostAngle) {
auto angleIndexes = getAngleIndexes(angles);
double sum = std::accumulate(angleIndexes.begin(), angleIndexes.end(), 0.0);
double halfPercent = angles.size() / 2.0f;
int mostAngleIndex;
if (sum < halfPercent) {//all angle set to 0
mostAngleIndex = 0;
} else {//all angle set to 1
mostAngleIndex = 1;
}
//printf("Set All Angle to mostAngleIndex(%d)\n", mostAngleIndex);
for (int i = 0; i < angles.size(); ++i) {
Angle angle = angles[i];
angle.index = mostAngleIndex;
angles.at(i) = angle;
}
}
return angles;
}
\ No newline at end of file
#include "CrnnNet.h"
#include "OcrUtils.h"
#include <fstream>
#include <numeric>
CrnnNet::CrnnNet() {}
CrnnNet::~CrnnNet() {
delete session;
for (auto name: inputNames) {
free(name);
}
for (auto name: outputNames) {
free(name);
}
}
void CrnnNet::setNumThread(int numOfThread) {
numThread = numOfThread;
//===session options===
// Sets the number of threads used to parallelize the execution within nodes
// A value of 0 means ORT will pick a default
//sessionOptions.SetIntraOpNumThreads(numThread);
//set OMP_NUM_THREADS=16
// Sets the number of threads used to parallelize the execution of the graph (across nodes)
// If sequential execution is enabled this value is ignored
// A value of 0 means ORT will pick a default
sessionOptions.SetInterOpNumThreads(numThread);
// Sets graph optimization level
// ORT_DISABLE_ALL -> To disable all optimizations
// ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals)
// ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
// ORT_ENABLE_ALL -> To Enable All possible opitmizations
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
}
void CrnnNet::initModel(const std::string &pathStr, const std::string &keysPath) {
#ifdef _WIN32
std::wstring crnnPath = strToWstr(pathStr);
session = new Ort::Session(env, crnnPath.c_str(), sessionOptions);
#else
session = new Ort::Session(env, pathStr.c_str(), sessionOptions);
#endif
inputNames = getInputNames(session);
outputNames = getOutputNames(session);
//load keys
std::ifstream in(keysPath.c_str());
std::string line;
if (in) {
while (getline(in, line)) {// line中不包括每行的换行符
keys.push_back(line);
}
} else {
printf("The keys.txt file was not found\n");
return;
}
}
template<class ForwardIterator>
inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
return std::distance(first, std::max_element(first, last));
}
TextLine CrnnNet::scoreToTextLine(const std::vector<float> &outputData, int h, int w) {
int keySize = keys.size();
std::string strRes;
std::vector<float> scores;
int lastIndex = 0;
int maxIndex;
float maxValue;
for (int i = 0; i < h; i++) {
maxIndex = int(argmax(&outputData[i * w], &outputData[(i + 1) * w]));
maxValue = float(*std::max_element(&outputData[i * w], &outputData[(i + 1) * w]));
if (maxIndex > 0 && maxIndex < keySize && (!(i > 0 && maxIndex == lastIndex))) {
scores.emplace_back(maxValue);
strRes.append(keys[maxIndex - 1]);
}
lastIndex = maxIndex;
}
return {strRes, scores};
}
TextLine CrnnNet::getTextLine(const cv::Mat &src) {
float scale = (float) dstHeight / (float) src.rows;
int dstWidth = int((float) src.cols * scale);
cv::Mat srcResize;
resize(src, srcResize, cv::Size(dstWidth, dstHeight));
std::vector<float> inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues);
std::array<int64_t, 4> inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols};
auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(),
inputTensorValues.size(), inputShape.data(),
inputShape.size());
assert(inputTensor.IsTensor());
auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor,
inputNames.size(), outputNames.data(), outputNames.size());
assert(outputTensor.size() == 1 && outputTensor.front().IsTensor());
std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1,
std::multiplies<int64_t>());
float *floatArray = outputTensor.front().GetTensorMutableData<float>();
std::vector<float> outputData(floatArray, floatArray + outputCount);
return scoreToTextLine(outputData, outputShape[1], outputShape[2]);
}
std::vector<TextLine> CrnnNet::getTextLines(std::vector<cv::Mat> &partImg, const char *path, const char *imgName) {
int size = partImg.size();
std::vector<TextLine> textLines(size);
for (int i = 0; i < size; ++i) {
//OutPut DebugImg
if (isOutputDebugImg) {
std::string debugImgFile = getDebugImgFilePath(path, imgName, i, "-debug-");
saveImg(partImg[i], debugImgFile.c_str());
}
//getTextLine
double startCrnnTime = getCurrentTime();
TextLine textLine = getTextLine(partImg[i]);
double endCrnnTime = getCurrentTime();
textLine.time = endCrnnTime - startCrnnTime;
textLines[i] = textLine;
}
return textLines;
}
\ No newline at end of file
#include "DbNet.h"
#include "OcrUtils.h"
DbNet::DbNet() {}
DbNet::~DbNet() {
delete session;
for (auto name : inputNames) {
free(name);
}
for (auto name : outputNames) {
free(name);
}
}
void DbNet::setNumThread(int numOfThread) {
numThread = numOfThread;
//===session options===
// Sets the number of threads used to parallelize the execution within nodes
// A value of 0 means ORT will pick a default
//sessionOptions.SetIntraOpNumThreads(numThread);
//set OMP_NUM_THREADS=16
// Sets the number of threads used to parallelize the execution of the graph (across nodes)
// If sequential execution is enabled this value is ignored
// A value of 0 means ORT will pick a default
sessionOptions.SetInterOpNumThreads(numThread);
// Sets graph optimization level
// ORT_DISABLE_ALL -> To disable all optimizations
// ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals)
// ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
// ORT_ENABLE_ALL -> To Enable All possible opitmizations
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
}
void DbNet::initModel(const std::string &pathStr) {
#ifdef _WIN32
std::wstring detPath = strToWstr(pathStr);
session = new Ort::Session(env, detPath.c_str(), sessionOptions);
#else
session = new Ort::Session(env, pathStr.c_str(), sessionOptions);
#endif
inputNames = getInputNames(session);
outputNames = getOutputNames(session);
}
std::vector<TextBox> findRsBoxes(const cv::Mat &predMat, const cv::Mat &dilateMat, ScaleParam &s,
const float boxScoreThresh, const float unClipRatio) {
const int longSideThresh = 3;//minBox 长边门限
const int maxCandidates = 1000;
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(dilateMat, contours, hierarchy, cv::RETR_LIST,
cv::CHAIN_APPROX_SIMPLE);
int numContours = contours.size() >= maxCandidates ? maxCandidates : contours.size();
std::vector<TextBox> rsBoxes;
for (int i = 0; i < numContours; i++) {
if (contours[i].size() <= 2) {
continue;
}
cv::RotatedRect minAreaRect = cv::minAreaRect(contours[i]);
float longSide;
std::vector<cv::Point2f> minBoxes = getMinBoxes(minAreaRect, longSide);
if (longSide < longSideThresh) {
continue;
}
float boxScore = boxScoreFast(minBoxes, predMat);
if (boxScore < boxScoreThresh)
continue;
//-----unClip-----
cv::RotatedRect clipRect = unClip(minBoxes, unClipRatio);
if (clipRect.size.height < 1.001 && clipRect.size.width < 1.001) {
continue;
}
//-----unClip-----
std::vector<cv::Point2f> clipMinBoxes = getMinBoxes(clipRect, longSide);
if (longSide < longSideThresh + 2)
continue;
std::vector<cv::Point> intClipMinBoxes;
for (int p = 0; p < clipMinBoxes.size(); p++) {
float x = clipMinBoxes[p].x / s.ratioWidth;
float y = clipMinBoxes[p].y / s.ratioHeight;
int ptX = (std::min)((std::max)(int(x), 0), s.srcWidth - 1);
int ptY = (std::min)((std::max)(int(y), 0), s.srcHeight - 1);
cv::Point point{ptX, ptY};
intClipMinBoxes.push_back(point);
}
rsBoxes.push_back(TextBox{intClipMinBoxes, boxScore});
}
reverse(rsBoxes.begin(), rsBoxes.end());
return rsBoxes;
}
std::vector<TextBox>
DbNet::getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh, float boxThresh, float unClipRatio) {
cv::Mat srcResize;
resize(src, srcResize, cv::Size(s.dstWidth, s.dstHeight));
std::vector<float> inputTensorValues = substractMeanNormalize(srcResize, meanValues, normValues);
std::array<int64_t, 4> inputShape{1, srcResize.channels(), srcResize.rows, srcResize.cols};
auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(),
inputTensorValues.size(), inputShape.data(),
inputShape.size());
assert(inputTensor.IsTensor());
auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor,
inputNames.size(), outputNames.data(), outputNames.size());
assert(outputTensor.size() == 1 && outputTensor.front().IsTensor());
std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1,
std::multiplies<int64_t>());
float *floatArray = outputTensor.front().GetTensorMutableData<float>();
std::vector<float> outputData(floatArray, floatArray + outputCount);
//-----Data preparation-----
int outHeight = outputShape[2];
int outWidth = outputShape[3];
int area = outHeight * outWidth;
std::vector<float> predData(area, 0.0);
std::vector<unsigned char> cbufData(area, ' ');
for (int i = 0; i < area; i++) {
predData[i] = float(outputData[i]);
cbufData[i] = (unsigned char) ((outputData[i]) * 255);
}
cv::Mat predMat(outHeight, outWidth, CV_32F, (float *) predData.data());
cv::Mat cBufMat(outHeight, outWidth, CV_8UC1, (unsigned char *) cbufData.data());
//-----boxThresh-----
const double maxValue = 255;
const double threshold = boxThresh * 255;
cv::Mat thresholdMat;
cv::threshold(cBufMat, thresholdMat, threshold, maxValue, cv::THRESH_BINARY);
//-----dilate-----
cv::Mat dilateMat;
cv::Mat dilateElement = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
cv::dilate(thresholdMat, dilateMat, dilateElement);
return findRsBoxes(predMat, dilateMat, s, boxScoreThresh, unClipRatio);
}
\ No newline at end of file
#include "OcrLite.h"
#include "OcrUtils.h"
#include <stdarg.h> //windows&linux
OcrLite::OcrLite() {}
OcrLite::~OcrLite() {
if (isOutputResultTxt) {
fclose(resultTxt);
}
}
void OcrLite::setNumThread(int numOfThread) {
dbNet.setNumThread(numOfThread);
angleNet.setNumThread(numOfThread);
crnnNet.setNumThread(numOfThread);
}
void OcrLite::initLogger(bool isConsole, bool isPartImg, bool isResultImg) {
isOutputConsole = isConsole;
isOutputPartImg = isPartImg;
isOutputResultImg = isResultImg;
}
void OcrLite::enableResultTxt(const char *path, const char *imgName) {
isOutputResultTxt = true;
std::string resultTxtPath = getResultTxtFilePath(path, imgName);
printf("resultTxtPath(%s)\n", resultTxtPath.c_str());
resultTxt = fopen(resultTxtPath.c_str(), "w");
}
bool OcrLite::initModels(const std::string &detPath, const std::string &clsPath,
const std::string &recPath, const std::string &keysPath) {
Logger("=====Init Models=====\n");
Logger("--- Init DbNet ---\n");
dbNet.initModel(detPath);
Logger("--- Init AngleNet ---\n");
angleNet.initModel(clsPath);
Logger("--- Init CrnnNet ---\n");
crnnNet.initModel(recPath, keysPath);
Logger("Init Models Success!\n");
return true;
}
void OcrLite::Logger(const char *format, ...) {
if (!(isOutputConsole || isOutputResultTxt)) return;
char *buffer = (char *) malloc(8192);
va_list args;
va_start(args, format);
vsprintf(buffer, format, args);
va_end(args);
if (isOutputConsole) printf("%s", buffer);
if (isOutputResultTxt) fprintf(resultTxt, "%s", buffer);
free(buffer);
}
cv::Mat makePadding(cv::Mat &src, const int padding) {
if (padding <= 0) return src;
cv::Scalar paddingScalar = {255, 255, 255};
cv::Mat paddingSrc;
cv::copyMakeBorder(src, paddingSrc, padding, padding, padding, padding, cv::BORDER_ISOLATED, paddingScalar);
return paddingSrc;
}
OcrResult OcrLite::detect(const char *path, const char *imgName,
const int padding, const int maxSideLen,
float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle) {
std::string imgFile = getSrcImgFilePath(path, imgName);
cv::Mat originSrc = imread(imgFile, cv::IMREAD_COLOR);//default : BGR
int originMaxSide = (std::max)(originSrc.cols, originSrc.rows);
int resize;
if (maxSideLen <= 0 || maxSideLen > originMaxSide) {
resize = originMaxSide;
} else {
resize = maxSideLen;
}
resize += 2*padding;
cv::Rect paddingRect(padding, padding, originSrc.cols, originSrc.rows);
cv::Mat paddingSrc = makePadding(originSrc, padding);
ScaleParam scale = getScaleParam(paddingSrc, resize);
OcrResult result;
result = detect(path, imgName, paddingSrc, paddingRect, scale,
boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle);
return result;
}
OcrResult OcrLite::detect(const cv::Mat& mat, int padding, int maxSideLen, float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle)
{
cv::Mat originSrc = mat;
int originMaxSide = (std::max)(originSrc.cols, originSrc.rows);
int resize;
if (maxSideLen <= 0 || maxSideLen > originMaxSide) {
resize = originMaxSide;
}
else {
resize = maxSideLen;
}
resize += 2 * padding;
cv::Rect paddingRect(padding, padding, originSrc.cols, originSrc.rows);
cv::Mat paddingSrc = makePadding(originSrc, padding);
ScaleParam scale = getScaleParam(paddingSrc, resize);
OcrResult result;
result = detect(NULL, NULL, paddingSrc, paddingRect, scale,
boxScoreThresh, boxThresh, unClipRatio, doAngle, mostAngle);
return result;
}
std::vector<cv::Mat> OcrLite::getPartImages(cv::Mat &src, std::vector<TextBox> &textBoxes,
const char *path, const char *imgName) {
std::vector<cv::Mat> partImages;
for (int i = 0; i < textBoxes.size(); ++i) {
cv::Mat partImg = getRotateCropImage(src, textBoxes[i].boxPoint);
partImages.emplace_back(partImg);
//OutPut DebugImg
if (isOutputPartImg) {
std::string debugImgFile = getDebugImgFilePath(path, imgName, i, "-part-");
saveImg(partImg, debugImgFile.c_str());
}
}
return partImages;
}
OcrResult OcrLite::detect(const char *path, const char *imgName,
cv::Mat &src, cv::Rect &originRect, ScaleParam &scale,
float boxScoreThresh, float boxThresh, float unClipRatio, bool doAngle, bool mostAngle) {
cv::Mat textBoxPaddingImg = src.clone();
int thickness = getThickness(src);
Logger("=====Start detect=====\n");
Logger("ScaleParam(sw:%d,sh:%d,dw:%d,dh:%d,%f,%f)\n", scale.srcWidth, scale.srcHeight,
scale.dstWidth, scale.dstHeight,
scale.ratioWidth, scale.ratioHeight);
Logger("---------- step: dbNet getTextBoxes ----------\n");
double startTime = getCurrentTime();
std::vector<TextBox> textBoxes = dbNet.getTextBoxes(src, scale, boxScoreThresh, boxThresh, unClipRatio);
double endDbNetTime = getCurrentTime();
double dbNetTime = endDbNetTime - startTime;
Logger("dbNetTime(%fms)\n", dbNetTime);
for (int i = 0; i < textBoxes.size(); ++i) {
Logger("TextBox[%d](+padding)[score(%f),[x: %d, y: %d], [x: %d, y: %d], [x: %d, y: %d], [x: %d, y: %d]]\n", i,
textBoxes[i].score,
textBoxes[i].boxPoint[0].x, textBoxes[i].boxPoint[0].y,
textBoxes[i].boxPoint[1].x, textBoxes[i].boxPoint[1].y,
textBoxes[i].boxPoint[2].x, textBoxes[i].boxPoint[2].y,
textBoxes[i].boxPoint[3].x, textBoxes[i].boxPoint[3].y);
}
Logger("---------- step: drawTextBoxes ----------\n");
drawTextBoxes(textBoxPaddingImg, textBoxes, thickness);
//---------- getPartImages ----------
std::vector<cv::Mat> partImages = getPartImages(src, textBoxes, path, imgName);
Logger("---------- step: angleNet getAngles ----------\n");
std::vector<Angle> angles;
angles = angleNet.getAngles(partImages, path, imgName, doAngle, mostAngle);
//Log Angles
for (int i = 0; i < angles.size(); ++i) {
Logger("angle[%d][index(%d), score(%f), time(%fms)]\n", i, angles[i].index, angles[i].score, angles[i].time);
}
//Rotate partImgs
for (int i = 0; i < partImages.size(); ++i) {
if (angles[i].index == 1) {
partImages.at(i) = matRotateClockWise180(partImages[i]);
}
}
Logger("---------- step: crnnNet getTextLine ----------\n");
std::vector<TextLine> textLines = crnnNet.getTextLines(partImages, path, imgName);
//Log TextLines
for (int i = 0; i < textLines.size(); ++i) {
Logger("textLine[%d](%s)\n", i, textLines[i].text.c_str());
std::ostringstream txtScores;
for (int s = 0; s < textLines[i].charScores.size(); ++s) {
if (s == 0) {
txtScores << textLines[i].charScores[s];
} else {
txtScores << " ," << textLines[i].charScores[s];
}
}
Logger("textScores[%d]{%s}\n", i, std::string(txtScores.str()).c_str());
Logger("crnnTime[%d](%fms)\n", i, textLines[i].time);
}
std::vector<TextBlock> textBlocks;
for (int i = 0; i < textLines.size(); ++i) {
std::vector<cv::Point> boxPoint = std::vector<cv::Point>(4);
int padding = originRect.x;//padding conversion
boxPoint[0] = cv::Point(textBoxes[i].boxPoint[0].x - padding, textBoxes[i].boxPoint[0].y - padding);
boxPoint[1] = cv::Point(textBoxes[i].boxPoint[1].x - padding, textBoxes[i].boxPoint[1].y - padding);
boxPoint[2] = cv::Point(textBoxes[i].boxPoint[2].x - padding, textBoxes[i].boxPoint[2].y - padding);
boxPoint[3] = cv::Point(textBoxes[i].boxPoint[3].x - padding, textBoxes[i].boxPoint[3].y - padding);
TextBlock textBlock{boxPoint, textBoxes[i].score, angles[i].index, angles[i].score,
angles[i].time, textLines[i].text, textLines[i].charScores, textLines[i].time,
angles[i].time + textLines[i].time};
textBlocks.emplace_back(textBlock);
}
double endTime = getCurrentTime();
double fullTime = endTime - startTime;
Logger("=====End detect=====\n");
Logger("FullDetectTime(%fms)\n", fullTime);
//cropped to original size
cv::Mat textBoxImg;
if (originRect.x > 0 && originRect.y > 0) {
textBoxPaddingImg(originRect).copyTo(textBoxImg);
} else {
textBoxImg = textBoxPaddingImg;
}
//Save result.jpg
if (isOutputResultImg) {
std::string resultImgFile = getResultImgFilePath(path, imgName);
imwrite(resultImgFile, textBoxImg);
}
std::string strRes;
for (int i = 0; i < textBlocks.size(); ++i) {
strRes.append(textBlocks[i].text);
strRes.append("\n");
}
return OcrResult{dbNetTime, textBlocks, textBoxImg, fullTime, strRes};
}
\ No newline at end of file
#ifdef __CLIB__
#include "OcrLiteCApi.h"
#include "OcrLite.h"
extern "C"
{
typedef struct {
OcrLite OcrObj;
std::string strRes;
} OCR_OBJ;
_QM_OCR_API OCR_HANDLE
OcrInit(const char *szDetModel, const char *szClsModel, const char *szRecModel, const char *szKeyPath, int nThreads) {
OCR_OBJ *pOcrObj = new OCR_OBJ;
if (pOcrObj) {
pOcrObj->OcrObj.setNumThread(nThreads);
pOcrObj->OcrObj.initModels(szDetModel, szClsModel, szRecModel, szKeyPath);
return pOcrObj;
} else {
return nullptr;
}
}
_QM_OCR_API OCR_BOOL
OcrDetect(OCR_HANDLE handle, const char *imgPath, const char *imgName, OCR_PARAM *pParam) {
OCR_OBJ *pOcrObj = (OCR_OBJ *) handle;
if (!pOcrObj)
return FALSE;
OCR_PARAM Param = *pParam;
if (Param.padding == 0)
Param.padding = 50;
if (Param.maxSideLen == 0)
Param.maxSideLen = 1024;
if (Param.boxScoreThresh == 0)
Param.boxScoreThresh = 0.6;
if (Param.boxThresh == 0)
Param.boxThresh = 0.3f;
if (Param.unClipRatio == 0)
Param.unClipRatio = 2.0;
if (Param.doAngle == 0)
Param.doAngle = 1;
if (Param.mostAngle == 0)
Param.mostAngle = 1;
OcrResult result = pOcrObj->OcrObj.detect(imgPath, imgName, Param.padding, Param.maxSideLen,
Param.boxScoreThresh, Param.boxThresh, Param.unClipRatio,
Param.doAngle != 0, Param.mostAngle != 0);
if (result.strRes.length() > 0) {
pOcrObj->strRes = result.strRes;
return TRUE;
} else
return FALSE;
}
_QM_OCR_API int OcrGetLen(OCR_HANDLE handle) {
OCR_OBJ *pOcrObj = (OCR_OBJ *) handle;
if (!pOcrObj)
return 0;
return pOcrObj->strRes.size() + 1;
}
_QM_OCR_API OCR_BOOL OcrGetResult(OCR_HANDLE handle, char *szBuf, int nLen) {
OCR_OBJ *pOcrObj = (OCR_OBJ *) handle;
if (!pOcrObj)
return FALSE;
if (nLen > pOcrObj->strRes.size()) {
strncpy(szBuf, pOcrObj->strRes.c_str(), pOcrObj->strRes.size());
szBuf[pOcrObj->strRes.size() - 1] = 0;
}
return pOcrObj->strRes.size();
}
_QM_OCR_API void OcrDestroy(OCR_HANDLE handle) {
OCR_OBJ *pOcrObj = (OCR_OBJ *) handle;
if (pOcrObj)
delete pOcrObj;
}
};
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment