#pragma once #include #include #include #include #include #include #include #include "CommonDefinition.h" #include "Filesystem.h" #include "SimpleLog.h" #include "clipper.h" namespace ppocr{ struct _TEXT_BOX { cv::Rect t_rect; float score; }; using T_BOX = struct _TEXT_BOX; class CTCDecode { private: //inference image float* data; std::unordered_map device_data; migraphx::program net; int batch_size; int net_input_width; int net_input_height; int net_input_channel; bool offload_copy; std::string precision_mode; std::unordered_map dev_argument; void* input_buffer_device; void* output_buffer_device; void* output_buffer_host; migraphx::shape input_shape; migraphx::shape output_shape; std::string input_name; std::string output_name; //postprocess: n_channel->model output channel,feature_size--> feature size one channel int n_channel; int feature_size; std::vector k_words; public: CTCDecode(std::string rec_model_path, std::string precision_mode="fp32", int image_width=480, int image_height=48, int channel=3, int batch_size = 1, bool offload_copy = true, std::string character_dict_path="./ppocr_keys_v5.txt"); ~CTCDecode(); /** * @brief 字符识别编码,可支持,最长可支持预测90个字符,18385个字符 */ std::string forward(cv::Mat& img); private: /** * @brief 预处理 * pixel = (src_img*scale-0.5)/0.5; * scale = 1.0/255 * @param img 字符图片 * @param data 预处理输出 * @param img_w 模型输入宽 * @param img_h 模型输入高 * @return 成功:true,失败:false */ bool preproc(cv::Mat img,float* data,int img_w=480,int img_h=48); /** * @brief 模型预测后处理,获取每行中概率最大的字符,组成一句长度最大为90个字符的句子,模型预测输出shape=[1,90,18385] * @param feature model output * @return 成功:text,失败:"" */ std::string postprocess(float* feature); /** * @brief 解码,将模型预测输出与字符集关联起来 * @param probs 模型预测的最大概率 * @param indexs 模型预测的最大概率的索引值 * @param mean_prob 预测句子的平均概率 * @return 成功:text,失败:"" */ std::string decode(std::vector& probs,std::vector& indexs,float& mean_prob); }; class OcrDet { private: std::string precision_mode; bool offload_copy; migraphx::program net; migraphx::shape input_shape; migraphx::shape output_shape; std::string input_name; std::string output_name; int det_batch_size; int data_size ; float segm_thres; float box_thres; int net_input_width; int net_input_height; int net_input_channel; float* data; //Allocate device buffer and host buffer,if offload_copy is false std::unordered_map dev_argument; void* input_buffer_device; void* output_buffer_device; void* output_buffer_host; //postprocess int n_channel; int feature_size; //single channel feature map size. int output_width; int output_height; int max_candidates;//maximun number of candidates contours. public: OcrDet(std::string det_model_path, std::string precision_mode="float32", bool offload_copy = true, float segm_thres = 0.3, float box_thresh = 0.7); ~OcrDet(); bool forward(cv::Mat& img,std::vector>>& text_roi_boxes); private: /** * @brief 预处理 * pixel = (scale*src_img*mean/std); * scale = 1.0/255 * mean = [0.485, 0.456, 0.406] * std = [0.229, 0.224, 0.225] * @param img 字符图片 * @param data 预处理输出 * @return 成功:w,h维度的缩放比例 */ cv::Size preproc(cv::Mat img,float* data); /** * @brief 后处理,通过模型预测的二值图获取文本区域 * @param feature 模型预测tensor(这里字符检测使用了dbnet) * @param boxes 字符区域坐标 * @return 成功:0,失败:-1 */ int postprocess(float* feature, std::vector>> &boxes); int boxes_from_bitmap(cv::Mat& bit_map,std::vector& box); std::vector>>boxes_from_bitmap( const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, const float &det_db_unclip_ratio, const bool &use_polygon_score); std::vector> Mat2Vector(cv::Mat mat); /** * @brief 统计多边形区域的平均得分 * @param contour 字符区域的轮廓点集合 * @param pred 模型预测二值图 * @return score */ float polygon_score_acc(std::vector contour,cv::Mat pred); /** * @brief 对模型预测的区域进行向内或向外扩散,扩散比例是unclip_ratio ,目的是找到更加合适的字符区域 * @param box 字符区域坐标 * @param pred 模型预测二值图 * @return 处理后的字符区域 */ cv::RotatedRect unClip(std::vector> box, const float &unclip_ratio); /** * @brief 计算偏移距离 * distance = area * unclip_ratio / dist; * area = ∑(x_i*y_{i+1} - x_{i+1}*y_i) * dist = sqrtf(dx * dx + dy * dy) * * @param box 字符区域坐标 * @param unclip_ratio 缩放比例 * @param distance 偏移距离 * @return NONE */ void get_contour_area(const std::vector> &box, float unclip_ratio, float &distance) ; /** * @brief 无效字符区域过滤。首先将boxes映射回原始图像,然后过滤无效区域 * @param boxes 字符区域坐标 * @param ratio_h 垂直方向缩放比例 * @param ratio_w 水平方向缩放比例 * @param srcimg 原始图像 * * @return 字符区域有效坐标 */ std::vector>> filter_det_res(std::vector>> boxes, float ratio_h, float ratio_w, cv::Mat srcimg); /** * @brief 对字符区域按照从上到下,从左到右的顺序排序 * @param pts 字符区域坐标 * * @return 字符区域有效坐标 */ std::vector> order_points_clockwise(std::vector> pts); /** * @brief 获取最小矩形坐标 * @param box 字符区域最小外接矩形的坐标 * @param ssid box的最大边 * @return 字符区域有效坐标 */ std::vector> get_mini_boxes(cv::RotatedRect box,float &ssid) ; /** * @brief 计算bitmap上的t_rect区域的平均分数 * @param box_array 模型预测的字符区域 * @param pred 模型预测二值图 * @return score */ float box_score_fast(std::vector> box_array,cv::Mat pred) ; void visualize_boxes(const cv::Mat &srcimg, const std::vector>> &boxes) ; bool text_recognition(const cv::Mat &srcimg, const std::vector>> &boxes); }; class ppOcrEngine { private: std::shared_ptr text_detector; std::shared_ptr text_recognizer; public: ppOcrEngine(const std::string &det_model_path, const std::string &rec_model_path, const std::string &character_dict_path, const float segm_thres=0.3, const float box_thresh=0.7, bool offload_copy =true, std::string precision_mode = "fp32") ; ~ppOcrEngine(); std::vector forward(cv::Mat &srcimg); }; }