1、新增warm up功能 2、新增图片叠加OCR字符功能

417a4ca0 · liuhy · 369751c2 · 417a4ca0 · 369751c2 · 417a4ca0
Commit 417a4ca0 authored Jul 08, 2025 by liuhy
16 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,7 @@ set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} -std=c++17)
 set(CMAKE_BUILD_TYPE release)
 set(INCLUDE_PATH    ${CMAKE_CURRENT_SOURCE_DIR}/Src/
+                    /usr/include/freetype2
                    $ENV{DTKROOT}/include/
                    ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility
                    ${CMAKE_CURRENT_SOURCE_DIR}/3rdParty/opencv/include)
@@ -17,6 +18,7 @@ include_directories(${INCLUDE_PATH})
 # 添加依赖库路径
 set(LIBRARY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/3rdParty/opencv/lib
+                /usr/lib/x86_64-linux-gnu
                $ENV{DTKROOT}/lib/)
 link_directories(${LIBRARY_PATH})
@@ -24,6 +26,7 @@ link_directories(${LIBRARY_PATH})
 set(LIBRARY opencv_core
            opencv_imgproc
            opencv_imgcodecs
+            freetype
            opencv_dnn
            migraphx
            migraphx_gpu
@@ -36,6 +39,7 @@ link_libraries(${LIBRARY})
 set(SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/Src/main.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/Filesystem.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/clipper.cpp
+                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/cv_put_Text.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/ocr_engine.cpp)
 # 添加可执行目标

--- a/Doc/Images/CRNN.png
+++ b/Doc/Images/CRNN.png
--- a/Doc/Images/DBNet.png
+++ b/Doc/Images/DBNet.png
--- a/Doc/Images/res.jpg
+++ b/Doc/Images/res.jpg
--- a/Doc/Tutorial_Cpp.md
+++ b/Doc/Tutorial_Cpp.md
@@ -4,11 +4,15 @@ PP-OCRv5 是PP-OCR新一代文字识别解决方案，该方案聚焦于多场
 ## 模型简介
 ### 文本检测 
-文本检测使用了dbnet(论文地址：https://arxiv.org/pdf/1911.08947),网络结构:![alt text](Images/DBNet.png),模型输出概率图，并用Vatti Clipping算法对字符区域多边形简化处理，sample中借助Clipping 库。 sample中使用动态shape（N,3,H,C）,最大输入shape是[1,3,640,640],模型地址：Resource/Models/ppocrv5_server_det_infer.onnx
+文本检测使用了dbnet( 论文地址：https://arxiv.org/pdf/1911.08947 ),网络结构:
+![alt text](Images/DBNet.png) 
+模型输出概率图，并用Vatti Clipping算法对字符区域多边形简化处理，sample中借助Clipping 库。 sample模型输入shape为[1,3,640,640],模型路径：Resource/Models/ppocrv5_server_det_infer.onnx
 ### 文本识别
-文本识别使用了CRNN+CTCDecode(https://arxiv.org/pdf/2009.09941)，网络结构：![alt text](Images/CRNN.png)，sample中使用了动态shape (N,3,48,W),最大输入shape是[1,3,48,720],模型地址：Resource/Models/ppocrv5_server_rec_infer.onnx
+文本识别使用了CRNN+CTCDecode( https://arxiv.org/pdf/2009.09941 )，网络结构：
+![(Images/CRNN.png)](Images/CRNN.png)
+sample中模型输入shape为[1,3,48,720],模型路径：Resource/Models/ppocrv5_server_rec_infer.onnx
 ## 预处理
 ### 检测模型预处理
 检测模型输入数据预处理：
@@ -110,7 +114,7 @@ class ppOcrEngine {
                    const float segm_thres=0.3,
                    const float box_thresh=0.7,
                    bool offload_copy =true,
-                    std::string precision_mode = "fp32") ;
+                    std::string precision_mode = "fp16") ;
                    /**
         * @brief OCR engine初始化
         * @param det_model_path  字符检测模型路径
@@ -119,7 +123,7 @@ class ppOcrEngine {
         * @param segm_thres   像素分割阈值
         * @param box_thresh   字符区域box阈值
         * @param offload_copy 内存拷贝存模式， 支持两种数据拷贝方式：*offload_copy=true、offload_copy=false。当offload_copy为true时，不需*要进行内存拷贝，如果为false，需要先预分配输入输出的设备内存，并在推理* *前，将预处理数据拷贝到设备内存，推理后将模型输出从设备内存中拷贝出来
-         * @param precision_mode   精度模式，支持：fp32、fp16
+         * @param precision_mode   精度模式，支持：fp32、fp16，默认支持fp16
         * 
         * @return NONE
         */
@@ -130,36 +134,11 @@ class ppOcrEngine {
    class CTCDecode
    {
    private:
-        //inference image
+        ...
-        float* data;
-        std::unordered_map<std::string, migraphx::argument> device_data;
-        migraphx::program net;
-        int batch_size;
-        int net_input_width;
-        int net_input_height;
-        int net_input_channel;
-        bool offload_copy;
-        std::string precision_mode;
-        std::unordered_map<std::string, migraphx::argument> dev_argument;
-        void* input_buffer_device;
-        void* output_buffer_device;
-        void* output_buffer_host;
-        migraphx::shape input_shape;
-        migraphx::shape output_shape;
-        std::string input_name;
-        std::string output_name;
-        //postprocess: n_channel->model output channel,feature_size--> feature size one channel
-        int n_channel;
-        int feature_size;
-        std::vector<std::string> k_words;
    public:
        CTCDecode(std::string rec_model_path,
-        std::string precision_mode="fp32",
+        std::string precision_mode="fp16",
        int image_width=480,
        int image_height=48,
        int channel=3,
@@ -169,73 +148,21 @@ class ppOcrEngine {
        ~CTCDecode();
        /**
-         * @brief 字符识别编码，可支持，最长可支持预测90个字符，18385个字符
+         * @brief 字符识别、编码API 字符识别编码，可支持，最长可支持预测90个字符，18385个字符
+         * @param img 输入图片
+         * @return 编码后的字符串
         */
        std::string forward(cv::Mat& img);
    private:
-        /**
+       ...
-         * @brief 预处理
-         * pixel = (src_img*scale-0.5)/0.5;
-         * scale = 1.0/255
-         * @param img  字符图片
-         * @param data 预处理输出
-         * @param img_w 模型输入宽
-         * @param img_h 模型输入高
-         * @return 成功：true,失败：false
-         */
-        bool preproc(cv::Mat img,float* data,int img_w=480,int img_h=48);
-        /**
-         * @brief 模型预测后处理，获取每行中概率最大的字符，组成一句长度最大为90个字符的句子，模型预测输出shape=[1,90,18385]
-         * @param feature model output 
-         * @return 成功：text,失败：""
-         */
-        std::string postprocess(float* feature);
-        /**
-         * @brief 解码，将模型预测输出与字符集关联起来
-         * @param probs 模型预测的最大概率
-         * @param indexs 模型预测的最大概率的索引值
-         * @param mean_prob 预测句子的平均概率
-         * @return 成功：text,失败：""
-         */
-        std::string decode(std::vector<float>& probs,std::vector<int>& indexs,float& mean_prob);
    };
    class OcrDet
    {
    private:
-        std::string precision_mode;
+        ...
-        bool offload_copy;
-        migraphx::program net;
-        migraphx::shape input_shape;
-        migraphx::shape output_shape;
-        std::string input_name;
-        std::string output_name;
-        int det_batch_size;
-        int data_size ; 
-        float segm_thres;
-        float box_thres;  
-        int net_input_width;
-        int net_input_height;
-        int net_input_channel;
-        float* data;
-        //Allocate device buffer and host buffer,if offload_copy is false
-        std::unordered_map<std::string, migraphx::argument> dev_argument;
-        void* input_buffer_device;
-        void* output_buffer_device;
-        void* output_buffer_host;
-        //postprocess
-        int n_channel;
-        int feature_size;  //single channel feature map size.
-        int output_width;
-        int output_height;
-        int max_candidates;//maximun number of candidates contours.
    public:
        OcrDet(std::string det_model_path,
@@ -244,113 +171,19 @@ class ppOcrEngine {
            float segm_thres = 0.3,
            float box_thresh = 0.7);
        ~OcrDet();
-        bool forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes);
-    private:
-        /**
-         * @brief 预处理
-         * pixel = (scale*src_img*mean/std);
-         * scale = 1.0/255
-         * mean = [0.485, 0.456, 0.406]
-         * std = [0.229, 0.224, 0.225]  
-         * @param img  字符图片
-         * @param data 预处理输出
-         * @return 成功：w,h维度的缩放比例
-         */
-        cv::Size preproc(cv::Mat img,float* data);
-        /**
-         * @brief 后处理，通过模型预测的二值图获取文本区域
-         * @param feature  模型预测tensor（这里字符检测使用了dbnet）
-         * @param boxes 字符区域坐标
-         * @return 成功：0,失败：-1
-         */
-        int  postprocess(float* feature, std::vector<std::vector<std::vector<int>>> &boxes);
-        int boxes_from_bitmap(cv::Mat& bit_map,std::vector<T_BOX>& box);
-        std::vector<std::vector<std::vector<int>>>boxes_from_bitmap(
-        const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
-        const float &det_db_unclip_ratio, const bool &use_polygon_score);
-        std::vector<std::vector<float>> Mat2Vector(cv::Mat mat);
-        /**
-         * @brief 统计多边形区域的平均得分
-         * @param contour  字符区域的轮廓点集合
-         * @param pred  模型预测二值图
-         * @return score
-         */
-        float polygon_score_acc(std::vector<cv::Point> contour,cv::Mat pred);
-        /**
-         * @brief 对模型预测的区域进行向内或向外扩散，扩散比例是unclip_ratio ，目的是找到更加合适的字符区域
-         * @param box  字符区域坐标
-         * @param pred  模型预测二值图
-         * @return 处理后的字符区域
-         */
-        cv::RotatedRect unClip(std::vector<std::vector<float>> box,
-                                      const float &unclip_ratio);
-         /**
-         * @brief 计算偏移距离
-         *  distance = area * unclip_ratio / dist;
-         *  area = ∑(x_i*y_{i+1} - x_{i+1}*y_i)
-         *  dist = sqrtf(dx * dx + dy * dy)
-         *
-         * @param box  字符区域坐标
-         * @param unclip_ratio  缩放比例
-         * @param distance 偏移距离
-         * @return  NONE
-         */
-        void get_contour_area(const std::vector<std::vector<float>> &box,
-                                   float unclip_ratio, float &distance) ;
-        /**
-         * @brief 无效字符区域过滤。首先将boxes映射回原始图像，然后过滤无效区域
-         * @param boxes  字符区域坐标
-         * @param ratio_h  垂直方向缩放比例
-         * @param ratio_w  水平方向缩放比例
-         * @param srcimg   原始图像
-         * 
-         * @return  字符区域有效坐标
-         */
-        std::vector<std::vector<std::vector<int>>> filter_det_res(std::vector<std::vector<std::vector<int>>> boxes,
-                                float ratio_h, float ratio_w, cv::Mat srcimg);
        /**
-         * @brief 对字符区域按照从上到下，从左到右的顺序排序
+         * @brief 字符检测模型推理API
-         * @param pts  字符区域坐标
+         * @param img 原始图片
-         * 
+         * @param text_roi_boxes  字符区域坐标，格式：[[[tl.x, tl.y], [tr.x, tr.y],[], [br.x, br.y], [bl.x, bl.y]]]]
-         * @return  字符区域有效坐标
+         *                                                  |              |               |              |
-         */
+         *                                               左上坐标        右上坐标         右下坐标        左下坐标
-        std::vector<std::vector<int>> order_points_clockwise(std::vector<std::vector<int>> pts);
+         * @return 成功返回true，失败返回false
-         /**
-         * @brief 获取最小矩形坐标
-         * @param box  字符区域最小外接矩形的坐标
-         * @param ssid  box的最大边
-         * @return  字符区域有效坐标
         */
-        std::vector<std::vector<float>> get_mini_boxes(cv::RotatedRect box,float &ssid) ;
+        bool forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes);
-        /**
-         * @brief 计算bitmap上的t_rect区域的平均分数
-         * @param box_array  模型预测的字符区域
-         * @param pred  模型预测二值图
-         * @return score
-         */
-        float box_score_fast(std::vector<std::vector<float>> box_array,cv::Mat pred) ;
-        void visualize_boxes(const cv::Mat &srcimg,
-        const std::vector<std::vector<std::vector<int>>> &boxes) ;
-        bool text_recognition(const cv::Mat &srcimg,
+    private:
-        const std::vector<std::vector<std::vector<int>>> &boxes);
+        ...
    };
@@ -358,119 +191,84 @@ class ppOcrEngine {
 ## 推理
-### 字符检测模型推理
+- 字符检测
+- 字符识别、解码
+- 字符框可视化
+- OCR结果可视化
 ```c++
-bool OcrDet::forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes)
+std::vector<std::string> ppOcrEngine::forward(cv::Mat &srcimg){
-    {
+        std::vector<std::vector<std::vector<int>>> text_roi_boxes;
-        std::vector<std::vector<std::vector<int>>> boxes;
-        //输入数据预处理
+        std::vector<std::string> text_vec;
-        cv::Size ratio = preproc(img,data);
+        auto start = std::chrono::high_resolution_clock::now();
-        /*
+        //字符区域检测
-        支持两种数据拷贝方式：offload_copy=true、offload_copy=false。当offload_copy为true时，不需要进行内存拷贝，如果为false，需要先预分配输入输出的设备内存，并在推理前，将预处理数据拷贝到设备内存，推理后将模型输出从设备内存中拷贝出来，在做后处理。
+        text_detector->forward(srcimg,text_roi_boxes);
-        */
+        if(text_roi_boxes.size() == 0)
-        if( this->offload_copy ==false )
        {
-            hipMemcpy(input_buffer_device,
+            std::cout<<"Not found text roi !\n";
-                  (void*)data,
+            return std::vector<std::string>();
-                  this->input_shape.bytes(),
-                  hipMemcpyHostToDevice);
-            std::vector<migraphx::argument> results = net.eval(dev_argument);
-            hipMemcpy(output_buffer_host,
-            (void*)output_buffer_device,
-            output_shape.bytes(),
-            hipMemcpyDeviceToHost);
-            postprocess((float *)output_buffer_host,boxes);
-            std::cout<<"copy mode ..."<<std::endl;
-        }else{
-            std::unordered_map<std::string, migraphx::argument> inputData;
-            inputData[input_name] = migraphx::argument{input_shape, (float *)data};
-            std::vector<migraphx::argument> results = net.eval(inputData);
-            migraphx::argument result = results[0] ; //get output data  
-            postprocess((float *)result.data(),boxes);
-            std::cout<<"offload copy mode ..."<<std::endl;
        }
-        //计算等比缩放比例
+        std::vector<cv::Point> points;
-        float ratio_w = float(net_input_width) / float(img.cols);
+        //字符识别+编码
-        float ratio_h = float(net_input_height) / float(img.rows);
+        for (int n = 0; n < text_roi_boxes.size(); n++) {
-        //过滤无效框
-        text_roi_boxes = filter_det_res(boxes, ratio_h, ratio_w, img);
+            cv::Rect rect;
-        //可视化检测结果
+            cv::Mat text_roi_mat;
-        visualize_boxes(img,text_roi_boxes);
+            rect.x = text_roi_boxes[n][0][0];
-        // TextRecognition(img,boxes);
+            rect.y = text_roi_boxes[n][0][1];
-        return true;
+            rect.width = text_roi_boxes[n][2][0] -  text_roi_boxes[n][0][0];
-    }
+            rect.height = text_roi_boxes[n][2][1] - text_roi_boxes[n][0][1];
+            if(rect.width <3 || rect.height<3)
+            {
-```
+                continue;
-### 字符识别推理
+            }
-```c++
+            text_roi_mat = srcimg(rect).clone();
-std::string  CTCDecode::forward(cv::Mat& img)
-    {
-        //预处理
-        preproc(img,data,net_input_width,net_input_height);
-        /*
-        支持两种数据拷贝方式：offload_copy=true、offload_copy=false。当offload_copy为true时，不需要进行内存拷贝，如果为false，需要先预分配输入输出的设备内存，并在推理前，将预处理数据拷贝到设备内存，推理后将模型输出从设备内存中拷贝出来，在做后处理。
-        */
-        if( this->offload_copy ==false )
+            std::string text = text_recognizer->forward(text_roi_mat);
-        {
+            text_vec.push_back(text);
-            hipMemcpy(input_buffer_device,
+            points.push_back(cv::Point(rect.x,rect.y));
-                  (void*)data,
+        }  
-                  this->input_shape.bytes(),
+        auto end = std::chrono::high_resolution_clock::now(); 
-                  hipMemcpyHostToDevice);
+        auto duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+        std::cout<<"[Time info] elapsed: "<< duration_ms.count() <<" ms\n";
+        //字符框可视化
+        visualize_boxes(srcimg,text_roi_boxes);
+        //OCR可视化
+        cv::Mat res_img = visualize_text(text_vec,points, srcimg);
+        ...
+}
-            std::vector<migraphx::argument> results = net.eval(dev_argument);
-            hipMemcpy(output_buffer_host,
-            (void*)output_buffer_device,
-            output_shape.bytes(),
-            hipMemcpyDeviceToHost);
-            //模型后处理，获取字符的最大概率和索引，并根据索引在字符库中查找对应的字符，然后合成一个句子
-            std::string text = postprocess((float *)output_buffer_device);
-            return text;
-        }else{
-            std::unordered_map<std::string, migraphx::argument> inputData;
-            inputData[input_name] = migraphx::argument{input_shape, (float *)data};
-            std::vector<migraphx::argument> results = net.eval(inputData);
-            migraphx::argument result = results[0] ;  
-            std::string text = postprocess((float *)result.data());
-            // std::cout<<"ctc: offload copy mode ..."<<std::endl;
-            return text;
-        } 
-    }
 ```
 # Ocrv5 API调用说明
 API调用步骤如下：
 - 类实例化
+- 读取测试图片
 - 识别接口调用
 例：
 ```c++
-int main(int argc, char** argv)
+int main(int argc, char** argv){
-{
    std::string det_model_onnx = "../Resource/Models/ppocrv5_server_det_infer.onnx";
    std::string rec_model_onnx = "../Resource/Models/ppocrv5_server_rec_infer.onnx";
-    std::string img_path = "../Resource/Images/20250703205038.png";
+    std::string img_path = "../Resource/Images/demo.png";
    std::string character_dict_path = "../Resource/ppocr_keys_v5.txt";
+    std::string front = "../Resource/fonts/SimHei.ttf";
    float segm_thres=0.3;
    float box_thresh=0.3; 
    ppOcrEngine ocr_engine(det_model_onnx,
        rec_model_onnx,
        character_dict_path,
+        front,
        segm_thres,
        box_thresh,
        true,
-        "fp32");
+        "fp16");
    cv::Mat img=cv::imread(img_path);
    ocr_engine.forward(img);
    return 0;
 }
 ```
-sample支持两种精度推理（fp32和fp16），默认是fp32）,精度和内存拷贝方式分别通过ocr_engine的构造函数传入参数来设置。
+sample支持两种精度推理（fp32和fp16），默认是fp16）,精度和内存拷贝方式分别通过ocr_engine的构造函数传入参数来设置。
\ No newline at end of file
--- a/Doc/Tutorial_Python.md
+++ b/Doc/Tutorial_Python.md
@@ -4,10 +4,14 @@ PP-OCRv5 是PP-OCR新一代文字识别解决方案，该方案聚焦于多场
 ## 模型简介
 ### 文本检测 
-文本检测使用了dbnet(论文地址：https://arxiv.org/pdf/1911.08947),网络结构:![alt text](Images/DBNet.png),模型输出概率图，并用Vatti Clipping算法对字符区域多边形简化处理。 sample中使用动态shape（N,3,H,C）,最大输入shape是[1,3,640,640],模型地址：Resource/Models/ppocrv5_server_det_infer.onnx
+文本检测使用了dbnet( 论文地址：https://arxiv.org/pdf/1911.08947 ),网络结构:
+![alt text](Images/DBNet.png) 
+模型输出概率图，并用Vatti Clipping算法对字符区域多边形简化处理，sample中借助Clipping 库。 sample中模型输入shape为[1,3,640,640],模型路径：Resource/Models/ppocrv5_server_det_infer.onnx
 ### 文本识别
-文本识别使用了CRNN+CTCDecode(https://arxiv.org/pdf/2009.09941)，网络结构：![alt text](Images/CRNN.png)，sample中使用了动态shape (N,3,48,W),最大输入shape是[1,3,48,720],模型地址：Resource/Models/ppocrv5_server_rec_infer.onnx
+文本识别使用了CRNN+CTCDecode( https://arxiv.org/pdf/2009.09941 )，网络结构：
+![(Images/CRNN.png)](Images/CRNN.png)
+sample中模型输入shape为[1,3,48,720],模型路径：Resource/Models/ppocrv5_server_rec_infer.onnx																						
 ## 预处理
 ### 检测模型预处理
@@ -80,28 +84,20 @@ def preprocess(self, img, max_wh_ratio):
        imgH, imgW = self.rec_input_size
        max_h,max_w = self.rec_input_size
        h, w = img.shape[:2]
-        # re_size = (max_w,max_h)
        #保留H的原始维度
        if h <= max_h:
            ratio = max_h / h
            w = int(w*ratio)
            if w <= max_w:
-                re_size =(w,max_h)
+                re_size =(w,max_h)    
            else:
-                re_size = (max_w,max_h)
+                re_size = (max_w,max_h)  
        else:
            ratio = max_h/h
            w,h = int(w*ratio),max_h
            if w <= max_w:
-                re_size = (w,h)
+                re_size = (w,h)  
            else:
                re_size = (max_w,h)
@@ -112,12 +108,9 @@ def preprocess(self, img, max_wh_ratio):
        resized_image = resized_image.transpose((2, 0, 1)) / 255
        resized_image -= 0.5
        resized_image /= 0.5
        #填充，沿着右、下填充
        padding_im = np.zeros((3, imgH, imgW), dtype=np.float32)
        padding_im[:, :, 0:re_size[0]] = resized_image
        return padding_im
 ```
 ## 类介绍
@@ -154,7 +147,7 @@ class PPOcrV5():
        **kwargs       ：设置字符检测模型后处理相关参数
    Returns:
-        return_type: NONE。
+        return_type: 无返回值
    Examples:
        det_onnx_path = "PATH/TO/det_onnx_model.onnx"
@@ -198,7 +191,7 @@ class TextDetector(object):
        **kwargs       ：设置字符检测模型后处理相关参数
    Returns:
-        return_type: NONE。
+        return_type: 无返回值。
    Examples:
        self.db_detector = TextDetector(
@@ -216,7 +209,6 @@ class TextDetector(object):
    """
 class TextRecgnizer(object):
-    """Support SVTR_LCNet """
    def __init__(
        self,
        rec_model_path,
@@ -240,7 +232,7 @@ class TextRecgnizer(object):
        **kwargs       ：设置字符识别模型后处理相关参数
    Returns:
-        return_type: NONE。
+        return_type: 无返回值。
    Examples:
        self.text_extractor = TextRecgnizer(rec_model_path=rec_model_path,
@@ -252,18 +244,15 @@ class TextRecgnizer(object):
    class BaseRecLabelDecode(object):
    def __init__(self, character_dict_path=None,
     use_space_char=False)
-     """Convert between text-label and text-index
+    """
    字符识别(crnn+ctc)。
    Args:
        character_dict_path ：字符集文件路径。
        use_space_char      ：字符集中是否包含空格。
    Returns:
-        return_type: NONE。
+        return_type: 无返回值。
+    Examples: 
-    Examples:
    """
    class CTCLabelDecode(BaseRecLabelDecode):
@@ -277,140 +266,28 @@ class TextRecgnizer(object):
        character_dict_path ：字符集文件路径。
        use_space_char      ：字符集中是否包含空格。
    Returns:
-        return_type: NONE。
+        return_type: 无返回值。
    Examples:
    """
 ```
 ## 推理
-### 字符检测模型推理
-```python
-def __call__(self, src_img):
-        data = self.preprocess(src_img)
-        """支持两种数据拷贝方式：offload_copy=true、offload_copy=false。当offload_copy为true时，不需要进行内存拷贝，如果为false，需要先预分配输入输出的设备内存，并在推理前，将预处理数据拷贝到设备内存，推理后将模型输出从设备内存中拷贝出来，在做后处理。"""
-        if self.offload_copy==False:
-            self.d_mem[self.det_input_name] =migraphx.to_gpu(migraphx.argument(data["image"]))
-            results = self.db_model.run(self.d_mem)
-        else:
-            results = self.db_model.run({self.det_input_name:data["image"]})
-        if self.offload_copy==False :
-            #从gpu拷贝推理结果到cpu 
-            result=migraphx.from_gpu(results[0])
-            print("offload copy model")
-            result = np.array(result)
-        else:
-            result = results[0]
-        shape_list = np.expand_dims(data["shape"], axis=0)
-        pred = np.array(result)
-        pred = pred[:, 0, :, :]
-        #获取大于阈值的概率
-        segmentation = pred > self.thresh
-        boxes_batch = []
-        for batch_index in range(pred.shape[0]):
-            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
-            if self.dilation_kernel is not None:
-                mask = cv2.dilate(
-                    np.array(segmentation[batch_index]).astype(np.uint8),
-                    self.dilation_kernel,
-                )
-            else:
-                mask = segmentation[batch_index]
-            #根据预测的bitmap获取文本区域
-            if self.box_type == "poly":
-                boxes, scores = self.polygons_from_bitmap(
-                    pred[batch_index], mask, ratio_w,ratio_h, src_w, src_h
-                )
-            elif self.box_type == "quad":
-                boxes, scores = self.boxes_from_bitmap(
-                    pred[batch_index], mask, ratio_w,ratio_h, src_w, src_h
-                )
-            else:
-                raise ValueError("box_type can only be one of ['quad', 'poly']")
-            boxes_batch.append(boxes)
-        #文本区域按照从上到下，从左到右的顺序排序
-        det_box_batch = self.sorted_boxes(boxes_batch)
-        #文本区域按坐标映射到原始图像
-        dt_boxes,det_rects = self.box_standardization(det_box_batch,shape_list)
-        return dt_boxes,det_rects
-```
-### 字符识别推理
 ```python
-def __call__(self, batch_img_list):
+def __call__(self, src_img):
-        if len(batch_img_list) == 0:
+        import time
-            return []
+        start = time.time()
-        width_list = []
+        #字符检测
-        #遍历图片列表（字符roi存放在图片列表中），为了支持多batch推理，这里还会将batch_size张图片进行拼接np.concatenate(batch_norm_imgs)
+        dt_boxs,dt_rects = self.db_detector(src_img)
-        for b in range(len(batch_img_list)):
+        res_img = self.vis_boxes(dt_boxs,src_img)
-            for img in batch_img_list[b]:
+        #字符区域图片裁剪
-                width_list.append(img.shape[1] / float(img.shape[0]))
+        batch_img_list = self.detection_roi_crop(src_img,dt_rects)
+        #字符特征提取
-        indices = np.argsort(np.array(width_list))
+        batch_outputs_pre ,batch_max_wh_ratio_pre   = self.text_extractor(batch_img_list)
+        #字符编码
-        input_batch = self.rec_batch_num
+        batch_text_list, batch_label_list = self.ctc_decoder(batch_outputs_pre,return_word_box=False,wh_ratio_list = batch_max_wh_ratio_pre)
-        batch_outputs_pre = []
-        batch_max_wh_ratio_pre = []
-        for b in range(len(batch_img_list)):
-            im_count = len(batch_img_list[b])
-            batch_outputs = []
-            batch_max_wh_ratio = []
-            for beg_img_no in range(0, im_count, input_batch):
-                end_img_no = min(im_count, beg_img_no + input_batch)
-                # for ino in range(beg_img_no, end_img_no):
-                #     h, w = batch_img_list[b][indices[ino]].shape[0:2]
-                #     wh_ratio = w * 1.0 / h
-                #     max_wh_ratio = max(max_wh_ratio, wh_ratio)
-                batch_norm_imgs = []
-                max_wh_ratio = list()
-                # N batch
-                for ino in range(beg_img_no, end_img_no):
-                    #单张图片预处理
-                    norm_img = self.preprocess(batch_img_list[b][indices[ino]], max_wh_ratio)
-                    norm_img = norm_img[np.newaxis, :].astype(np.float32)
-                    batch_norm_imgs.append(norm_img)
-                batch_max_wh_ratio.append(max_wh_ratio)
-                #batch_size张图片进行拼接
-                if self.rec_batch_num >1:
-                    norm_img_batch = np.concatenate(batch_norm_imgs)
-                    norm_img_batch = norm_img_batch.copy()
-                else:
-                    norm_img_batch = np.array([batch_norm_imgs.copy()])
-                if self.offload_copy==False:
-                    print("offload copy model")
-                    self.d_mem[self.rec_input_name] =migraphx.to_gpu(migraphx.argument(norm_img_batch))
-                    results = self.rec_model.run(self.d_mem)
-                    output = np.array(results[0])
-                else:
-                    results = self.rec_model.run({self.rec_input_name:norm_img_batch})
-                    output = results[0]
-                # batch_outputs.append(np.array(output))
-                #将所有batch的输出结果append到batch_outputs中方便后处理
-                [batch_outputs.append(out) for out in np.array(output)]
-            batch_outputs_pre.append(np.array(batch_outputs))   
-            batch_max_wh_ratio_pre.append(batch_max_wh_ratio)            
-        return batch_outputs_pre ,batch_max_wh_ratio_pre  
 ```
 # Ocrv5 API调用说明
 API调用步骤如下：
@@ -425,8 +302,8 @@ if __name__ == '__main__':
    rec_onnx_path = "../Resource/Models/ppocrv5_server_rec_infer.onnx"
    image_path = "../Resource/Images/lite_demo.png"
    img = cv2.imread(image_path)
-    ppocrv5 = PPOcrV5(det_onnx_path,rec_onnx_path,offload_copy=True,precision_mode="fp32")
+    ppocrv5 = PPOcrV5(det_onnx_path,rec_onnx_path,offload_copy=True,precision_mode="fp16")
    res_img = ppocrv5(img)
    cv2.imwrite("res.jpg",res_img)
 ```
-sample支持两种精度推理（fp32和fp16），默认是fp32）,精度和内存拷贝方式分别通过precision_mode和offload_copy参数控制。
+sample支持两种精度推理（fp32和fp16），默认是fp16）,精度和内存拷贝方式分别通过precision_mode和offload_copy参数控制。
\ No newline at end of file
--- a/Python/inference.py
+++ b/Python/inference.py
--- a/Python/res.jpg
+++ b/Python/res.jpg
--- a/README.md
+++ b/README.md
@@ -81,6 +81,7 @@ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 ```
 python inference.py
 ```
+结果图片保存在当前目录下：res.jpg
 offload_copy和precision_mode设置可参考[Tutorial_Python.md](Doc/Tutorial_Python.md)，在main中示例。
 ### C++版本推理
@@ -104,7 +105,6 @@ cd <path_to_ppocrv5_migraphx>
 sh ./3rdParty/InstallOpenCVDependences.sh
 ```
 #### 安装OpenCV并构建工程
 ```
@@ -116,30 +116,9 @@ rbuild build -d depend
 - 进入到opencv-3.4.11_mini目录下创建build目录，cd build
 - 执行以下命令：
 ```
-cmake -D CMAKE_BUILD_TYPE=RELEASE       -D CMAKE_INSTALL_PREFIX=./opencv_dep      -D INSTALL_C_EXAMPLES=ON       -D INSTALL_PYTHON_EXAMPLES=ON       -D OPENCV_GENERATE_PKGCONFIG=ON       -D BUILD_EXAMPLES=ON  -D OPENCV_EXTRA_MODULES_PATH=../modules/ ..
+cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=./opencv_dep -D INSTALL_C_EXAMPLES=ON -D INSTALL_PYTHON_EXAMPLES=ON  -D OPENCV_GENERATE_PKGCONFIG=ON -D BUILD_EXAMPLES=ON -D OPENCV_EXTRA_MODULES_PATH=../modules/ ..
 ```
 - 执行make -j8 && make install,编译的头文件和库目录存放在opencv_dep,将opencv_dep目录拷贝到3rdParty,并命名为opencv
-#### 设置环境变量
-将依赖库依赖加入环境变量LD_LIBRARY_PATH，在~/.bashrc中添加如下语句：
-当操作系统是ubuntu系统时：
-```
-export LD_LIBRARY_PATH=<path_to_ppocrv5_migraphx>/depend/lib/:$LD_LIBRARY_PATH
-```
-当操作系统是centos系统时：
-```
-export LD_LIBRARY_PATH=<path_to_ppocrv5_migraphx>/depend/lib64/:$LD_LIBRARY_PATH
-```
-然后执行:
-```
-source ~/.bashrc
-```
 #### 运行示例
@@ -155,58 +134,112 @@ cmake .. && make
 #运行
 ./ppOcrV5cd 
 ```
+结果图片保存在当前目录下：res.jpg
 ## result
 ### Python版本
-输出结果中，每个值分别对应每个label的实际概率。
+输出结果中展示了识别到的字符，每个字符后面跟着一个置信度，置信度值越大，识别结果越准确。
 ```
-产品信息/参数, 0.954
+'0', 0.991
-发足够的滋养, 1.000
+纯臻营养护发素, 1.000
-纯臻宫乔护发素, 0.883
+'0'.'9''9''3''6''0''4', 0.999
-花费了'0'.'4''5''7''3''3''5'秒, 0.993
+'1', 0.998
-【净含量】：'2''2''0'ml, 0.993
+产品信息/参数, 0.934
+'0'.'9''9''2''7''2''8', 0.999
+'2', 0.999
+（'4''5'元／每公斤，'1''0''0'公斤起订）, 0.970
+'0'.'9''7''4''1''7', 0.999
+'3', 0.999
 每瓶'2''2'元，'1''0''0''0'瓶起订）, 0.998
-【品名】：纯臻营养护发素, 0.998
+'0'.'9''9''3''9''7''6', 0.999
-【品牌】：代加工方式/'0'EMODM, 0.968
+'4', 0.998
-糖、椰油酰胺丙基甜菜碱、泛醒, 0.997
+【品牌】：代加工方式/'0'EMODM, 0.959
-【适用人群】：适合所有肤质, 0.998
+'0'.'9''8''5''1''3''3', 0.998
-【产品编号】：YM'-'X'-''3''0''1''1''0'.'9''6''8''9''9', 0.993
+'5', 0.998
-（'4''5'元/每公斤，'1''0''0'公斤起订）, 0.972
+【品名】：纯臻营养护发素, 0.997
-【主要成分】：鲸蜡硬脂醇、燕麦B'-'葡聚, 0.966
+'0'.'9''9''5''0''0''7', 0.999
-【主要功能】：可紧致头发磷层，从而达到, 0.994
+'6', 0.995
-即时持久改善头发光泽的效果，给干燥的头, 0.997
+【产品编号】：YM'-'X'-''3''0''1''1''0'.'9''6''8''9''9', 0.973
-The detectionvisualizedimagsavedin./vis.jpg, 0.940
+'7', 0.999
-[Time info] elapsed:3.5736
+【净含量】：'2''2''0'ml, 0.994
+'0'.'9''9''6''5''7''7', 0.999
+'8', 0.998
+【适用人群】：适合所有肤质, 0.997
+'0'.'9''9''5''8''4''2', 0.999
+'9', 0.997
+【主要成分】：鲸蜡硬脂醇、燕麦B'-'葡聚, 0.976
+'0'.'9''6''1''9''2''8', 0.999
+'1''0', 1.000
+糖、椰油酰胺丙基甜菜碱、泛醒, 0.996
+'0'.'9''2''5''8''9''8', 0.999
+'1''1', 0.999
+（成品包材）, 0.998
+'0'.'9''7''2''5''7''3', 0.999
+'1''2', 1.000
+【主要功能】：可紧致头发磷层，从而达到, 0.992
+'0'.'9''9''4''4''4''8', 0.999
+'1''3', 0.999
+即时持久改善头发光泽的效果，给干燥的头, 0.989
+'0'.'9''9''0''1''9''8', 0.999
+'1''4', 0.999
+发足够的滋养, 0.999
+'0'.'9''9''7''6''6''8', 0.999
+花费了'0'.'4''5''7''3''3''5'秒, 0.993
+[Time info] elapsed:578.6152 ms
 ```
 ### C++版本
 ```
-ocr res :[生成一幅画，负向提示词为：画中不要出现人物。正负提示词结合会]
+ocr res :花费了'0'.'4''5''7''3''3''5'秒  0.984009
-ocr res :[Text_encode_'2'.副文本编码器，补充描述性细节(如材质、光照、]
+ocr res :'0'.'9''9''7'  0.773633
-ocr res :[图片的准确性，过滤掉不需要的元素，例如正向提示词为：提示模型]
+ocr res :发足够的滋养  0.96818
-ocr res :[编码器特征融合提升模型的理解能力。]
+ocr res :'1'  0.697754
-ocr res :[正负 prompt 设置：正向 prompt 和负向 prompt 结合可以提升生成]
+ocr res :'0''0'.'9''9''0''1''9'  0.656647
-ocr res :[语义表示捕获提示词的基础含义和全局语境（如对象、动作），与副]
+ocr res :即时持久改善头发光泽的效果，给干燥的头  0.996608
-ocr res :[的图像不会发生变化，随机种子可以增加生成图像的多样性。]
+ocr res :  0
-ocr res :[Text_encode.主文本编码器，将prompt序列转换为一个综合的]
+ocr res :【主要功能】：可紧致头发磷层，从而达到  0.993421
-ocr res :[响初始噪声和生成结果的确定性，固定种子后，同一个prompt生成]
+ocr res :'0'.'9''9''4''4'  0.677327
-ocr res :[声转化为目标图像。]
+ocr res :  0
-ocr res :[随机数设置：随机数种子是控制生成过程随机性的关键参数，直接影]
+ocr res :'0'.'9''7''2'  0.637158
-ocr res :[Scheduler：调度器，控制图像生成，决定了如何逐步将随机噪]
+ocr res :（成品包材）  0.901937
-ocr res :[程和图像生成过程中有着至关重要的作用。]
+ocr res :'1'  0.32251
-ocr res :[在stable'-'dffusion'-'xl'-'base'-''1'.'0'模型中主要包含一下子组件：]
+ocr res :糖、椰油酰胺丙基甜菜碱、泛醒  0.993478
-ocr res :[Pipeline的配置参数控制图像生成的质量和速度，在扩散模型预测过]
+ocr res :'0'.'9''2''5'  0.586279
-ocr res :[具配置文件中的定义手动加载各个子组件。]
+ocr res :'1''0'  0.547241
-ocr res :[这里使用了扩散模型加载器统一加载了所有的子组件，也可以更]
+ocr res :【主要成分】：鲸蜡硬脂醇、燕麦B'-'葡聚  0.975303
-ocr res :[·'2'.'3'pipeline 配置]
+ocr res :'0'.'9''1''9'  0.568408
-Time taken by task: 3475 ms
+ocr res :  0
+ocr res :'0'.'9''9''5''2'  0.613647
+ocr res :【适用人群】：适合所有肤质  0.996882
+ocr res :'8'  0.378906
+ocr res :'0'.'9''9'  0.595581
+ocr res :【净含量】：'2''2''0'ml  0.835671
+ocr res :'7'  0.356689
+ocr res :【产品编号】：YM'-'X'-''3''0''1''1''0'.'9''6''8''9''9'  0.993695
+ocr res :'6'  0.214355
+ocr res :'0'.'9''9''5'  0.478052
+ocr res :【品名】：纯臻营养护发素  0.996175
+ocr res :'5'  0.594727
+ocr res :  0
+ocr res :'0'.'9''8''5'  0.55166
+ocr res :【品牌】：代加工方式/'0'EMODM  0.917768
+ocr res :每瓶'2''2'元，'1''0''0''0'瓶起订）  0.974644
+ocr res :'0'.'9''9''3''9''7''6'  0.736755
+ocr res :'3'  0.486572
+ocr res :（'4''5'元/每公斤，'1''0''0'公斤起订）  0.940028
+ocr res :'0'.'9'm'7'  0.534668
+ocr res :'2'  0.961426
+ocr res :  0
+ocr res :'0'.'9''9''2'  0.524121
+ocr res :产品信息/参数  0.913853
+ocr res :纯臻营养护发素'0'.'9''9''3''6''0''4'  0.964128
+ocr res :'0'  0.380127
+ocr res :The detection visualized imagesavedin./vis.jpg  0.94302
+[Time info] elapsed: 389 ms
 ```
 ### 精度
 无

--- a/Resource/Images/lite_demo.png
+++ b/Resource/Images/lite_demo.png
--- a/Resource/fonts/SimHei.ttf
+++ b/Resource/fonts/SimHei.ttf
--- a/Src/Utility/cv_put_Text.cpp
+++ b/Src/Utility/cv_put_Text.cpp
+#include "cv_put_Text.hpp"
+PutText::PutText(const char* font_path) {
+     // 初始化 FreeType
+    if (FT_Init_FreeType(&ft)) {
+        std::cerr << "Error: Could not init FreeType !" << std::endl;
+        return;
+    }
+    // 加载字体文件（ 这里使用 SimHei.ttf 字体文件）
+    if (FT_New_Face(ft, font_path, 0, &face)) {
+        std::cerr << "Error: Load front failed!" << std::endl;
+        exit(-1);
+    }
+}
+PutText::~PutText() {
+    // 释放 FreeType 资源
+    FT_Done_Face(face);
+    FT_Done_FreeType(ft);
+}
+void PutText::putText(cv::Mat& img, const std::string& text, int x, int y, int fontSize, cv::Scalar color) {
+    if(img.empty())
+    {
+        std::cerr << "Empty image!";
+        return ;
+    }
+    // 设置字体大小
+    FT_Set_Pixel_Sizes(face, 0, fontSize);
+    int start_point_x = x;
+    int start_point_y = y + fontSize; // 调整基线
+    // 循环处理每个字符
+    for (size_t i = 0; i < text.size(); ) {
+        // 解析 UTF-8 字符
+        unsigned long unicode = 0;
+        if ((text[i] & 0x80) == 0) {
+            unicode = text[i];
+            i += 1;
+        } else if ((text[i] & 0xE0) == 0xC0) {
+            unicode = ((text[i] & 0x1F) << 6) | (text[i + 1] & 0x3F);
+            i += 2;
+        } else if ((text[i] & 0xF0) == 0xE0) {
+            unicode = ((text[i] & 0x0F) << 12) | ((text[i + 1] & 0x3F) << 6) | (text[i + 2] & 0x3F);
+            i += 3;
+        } else {
+            i++; // 无效 UTF-8
+            continue;
+        }
+        // 加载字符字形
+        if (FT_Load_Char(face, unicode, FT_LOAD_RENDER)) {
+            std::cerr << "Error: Could not load glyph" << std::endl;
+            continue;
+        }
+        // 绘制到 OpenCV 图像
+        FT_Bitmap& bitmap = face->glyph->bitmap;
+        for (int row = 0; row < bitmap.rows; ++row) {
+            for (int col = 0; col < bitmap.width; ++col) {
+                unsigned char intensity = bitmap.buffer[row * bitmap.width + col];
+                if (intensity > 0) {
+                    cv::Vec3b& pixel = img.at<cv::Vec3b>(start_point_y - face->glyph->bitmap_top + row, start_point_x + face->glyph->bitmap_left + col);
+                    pixel[0] = color[0] * (intensity / 255.0) + pixel[0] * (1 - intensity / 255.0);
+                    pixel[1] = color[1] * (intensity / 255.0) + pixel[1] * (1 - intensity / 255.0);
+                    pixel[2] = color[2] * (intensity / 255.0) + pixel[2] * (1 - intensity / 255.0);
+                }
+            }
+        }
+        start_point_x += face->glyph->advance.x >> 6;
+    }
+}
\ No newline at end of file
--- a/Src/Utility/cv_put_Text.hpp
+++ b/Src/Utility/cv_put_Text.hpp
+#pragma once
+#include <ft2build.h>
+#include FT_FREETYPE_H
+#include <opencv2/opencv.hpp>
+class PutText {
+private:
+    FT_Library ft;
+    FT_Face face;
+public:
+    PutText(const char* font_path);
+    ~PutText();
+    /**
+     * @brief 向图片写文字（支持中文）
+     * @param img    待叠加字符的图片
+     * @param text     待叠加的字符
+     * @param x        垂直方向缩放比例
+     * @param y        水平方向缩放比例
+     * @param fontSize 原始图像
+     * @param color    字体颜色，默认绿色
+     * 
+     * @return  无返回值
+     */
+    void putText(cv::Mat& img, const std::string& text, int x, int y, int fontSize=2, cv::Scalar color=cv::Scalar(0, 255, 0));
+};
--- a/Src/main.cpp
+++ b/Src/main.cpp
 #include "ocr_engine.hpp"
 using namespace ppocr;
-int main(int argc, char** argv)
+int main(int argc, char** argv){
-{
    std::string det_model_onnx = "../Resource/Models/ppocrv5_server_det_infer.onnx";
    std::string rec_model_onnx = "../Resource/Models/ppocrv5_server_rec_infer.onnx";
-    std::string img_path = "../Resource/Images/20250703205038.png";
+    std::string img_path = "../Resource/Images/demo.png";
    std::string character_dict_path = "../Resource/ppocr_keys_v5.txt";
+    std::string front = "../Resource/fonts/SimHei.ttf";
    float segm_thres=0.3;
    float box_thresh=0.3; 
    ppOcrEngine ocr_engine(det_model_onnx,
        rec_model_onnx,
        character_dict_path,
+        front,
        segm_thres,
        box_thresh,
        true,
-        "fp32");
+        "fp16");
    cv::Mat img=cv::imread(img_path);
    ocr_engine.forward(img);
    return 0;
 }
\ No newline at end of file
--- a/Src/ocr_engine.cpp
+++ b/Src/ocr_engine.cpp
--- a/Src/ocr_engine.hpp
+++ b/Src/ocr_engine.hpp
@@ -10,15 +10,9 @@
 #include "Filesystem.h"
 #include "SimpleLog.h"
 #include "clipper.h"
+#include "cv_put_Text.hpp"
 namespace ppocr{
-    struct _TEXT_BOX
-    {
-        cv::Rect  t_rect;
-        float score;
-    };
-    using T_BOX = struct _TEXT_BOX;
    class CTCDecode
    {
    private:
@@ -38,7 +32,6 @@ namespace ppocr{
        void* output_buffer_device;
        void* output_buffer_host;
        migraphx::shape input_shape;
        migraphx::shape output_shape;
        std::string input_name;
@@ -51,7 +44,7 @@ namespace ppocr{
    public:
        CTCDecode(std::string rec_model_path,
-        std::string precision_mode="fp32",
+        std::string precision_mode="fp16",
        int image_width=480,
        int image_height=48,
        int channel=3,
@@ -61,7 +54,9 @@ namespace ppocr{
        ~CTCDecode();
        /**
-         * @brief 字符识别编码，可支持，最长可支持预测90个字符，18385个字符
+         * @brief 字符识别、编码API 字符识别编码，可支持，最长可支持预测90个字符，18385个字符
+         * @param img 输入图片
+         * @return 编码后的字符串
         */
        std::string forward(cv::Mat& img);
@@ -93,7 +88,6 @@ namespace ppocr{
         * @return 成功：text,失败：""
         */
        std::string decode(std::vector<float>& probs,std::vector<int>& indexs,float& mean_prob);
    };
    class OcrDet
@@ -117,25 +111,33 @@ namespace ppocr{
        float* data;
-        //Allocate device buffer and host buffer,if offload_copy is false
+        //当offload_copy为true时，分配设备内存
        std::unordered_map<std::string, migraphx::argument> dev_argument;
        void* input_buffer_device;
        void* output_buffer_device;
        void* output_buffer_host;
-        //postprocess
+        //后处理
        int n_channel;
-        int feature_size;  //single channel feature map size.
+        int feature_size;  //单个通道的特征大小，例如模型输出[1,3,32,32],feature_size= 32x32.
        int output_width;
        int output_height;
-        int max_candidates;//maximun number of candidates contours.
+        int max_candidates;//最大检测的候选区域.
    public:
        OcrDet(std::string det_model_path,
-            std::string precision_mode="float32",
+            std::string precision_mode="fp16",
            bool offload_copy = true,
            float segm_thres = 0.3,
            float box_thresh = 0.7);
        ~OcrDet();
+         /**
+         * @brief 字符检测模型推理API
+         * @param img 原始图片
+         * @param text_roi_boxes  字符区域坐标，格式：[[[tl.x, tl.y], [tr.x, tr.y],[], [br.x, br.y], [bl.x, bl.y]]]]
+         *                                                  |              |               |                |
+         *                                               左上坐标        右上坐标         右下坐标        左下坐标
+         * @return 成功返回true，失败返回false
+         */
        bool forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes);
    private:
@@ -159,17 +161,18 @@ namespace ppocr{
         */
        int  postprocess(float* feature, std::vector<std::vector<std::vector<int>>> &boxes);
+        /**
+         * @brief 后处理，文本区域提取
+         * @param pred  二值图（这里字符检测使用了dbnet分割字符区域，二值图对应了文本区域）
-        int boxes_from_bitmap(cv::Mat& bit_map,std::vector<T_BOX>& box);
+         * @param bitmap 二值图（pred做形态学运算输出bitmap，结合pred结算平均边框得分）
+         * @return 成功：0,失败：-1
+         */
        std::vector<std::vector<std::vector<int>>>boxes_from_bitmap(
        const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
        const float &det_db_unclip_ratio, const bool &use_polygon_score);
        std::vector<std::vector<float>> Mat2Vector(cv::Mat mat);
        /**
         * @brief 统计多边形区域的平均得分
         * @param contour  字符区域的轮廓点集合
@@ -238,9 +241,6 @@ namespace ppocr{
         */
        float box_score_fast(std::vector<std::vector<float>> box_array,cv::Mat pred) ;
-        void visualize_boxes(const cv::Mat &srcimg,
-        const std::vector<std::vector<std::vector<int>>> &boxes) ;
        bool text_recognition(const cv::Mat &srcimg,
        const std::vector<std::vector<std::vector<int>>> &boxes);
@@ -250,16 +250,21 @@ namespace ppocr{
        private:
            std::shared_ptr<OcrDet> text_detector;
            std::shared_ptr<CTCDecode> text_recognizer;
+            std::shared_ptr<PutText> ft2 ;
        public:
            ppOcrEngine(const std::string &det_model_path,
                    const std::string &rec_model_path,
                    const std::string &character_dict_path,
+                    const std::string front,
                    const float segm_thres=0.3,
                    const float box_thresh=0.7,
                    bool offload_copy =true,
-                    std::string precision_mode = "fp32") ;
+                    std::string precision_mode = "fp16") ;
            ~ppOcrEngine();
            std::vector<std::string> forward(cv::Mat &srcimg);
+            cv::Mat visualize_text(std::vector<std::string> texts,std::vector<cv::Point> points, cv::Mat &img);
+            void visualize_boxes(cv::Mat &srcimg,
+        const std::vector<std::vector<std::vector<int>>> &boxes) ;
    };
 }