1、新增warm up功能 2、新增图片叠加OCR字符功能

417a4ca0 · liuhy · 369751c2 · 417a4ca0 · 369751c2 · 417a4ca0
Commit 417a4ca0 authored Jul 08, 2025 by liuhy
16 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,7 @@ set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} -std=c++17)
 set(CMAKE_BUILD_TYPE release)
 set(INCLUDE_PATH    ${CMAKE_CURRENT_SOURCE_DIR}/Src/
+                    /usr/include/freetype2
                    $ENV{DTKROOT}/include/
                    ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility
                    ${CMAKE_CURRENT_SOURCE_DIR}/3rdParty/opencv/include)
@@ -17,6 +18,7 @@ include_directories(${INCLUDE_PATH})
 # 添加依赖库路径
 set(LIBRARY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/3rdParty/opencv/lib
+                /usr/lib/x86_64-linux-gnu
                $ENV{DTKROOT}/lib/)
 link_directories(${LIBRARY_PATH})
@@ -24,6 +26,7 @@ link_directories(${LIBRARY_PATH})
 set(LIBRARY opencv_core
            opencv_imgproc
            opencv_imgcodecs
+            freetype
            opencv_dnn
            migraphx
            migraphx_gpu
@@ -36,6 +39,7 @@ link_libraries(${LIBRARY})
 set(SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/Src/main.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/Filesystem.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/clipper.cpp
+                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/cv_put_Text.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/ocr_engine.cpp)
 # 添加可执行目标

--- a/Doc/Images/CRNN.png
+++ b/Doc/Images/CRNN.png
--- a/Doc/Images/DBNet.png
+++ b/Doc/Images/DBNet.png
--- a/Doc/Images/res.jpg
+++ b/Doc/Images/res.jpg
--- a/Doc/Tutorial_Cpp.md
+++ b/Doc/Tutorial_Cpp.md
@@ -4,10 +4,14 @@ PP-OCRv5 是PP-OCR新一代文字识别解决方案，该方案聚焦于多场
 ## 模型简介
 ### 文本检测 
-文本检测使用了dbnet(论文地址：https://arxiv.org/pdf/1911.08947),网络结构:![alt text](Images/DBNet.png),模型输出概率图，并用Vatti Clipping算法对字符区域多边形简化处理，sample中借助Clipping 库。 sample中使用动态shape（N,3,H,C）,最大输入shape是[1,3,640,640],模型地址：Resource/Models/ppocrv5_server_det_infer.onnx
+文本检测使用了dbnet( 论文地址：https://arxiv.org/pdf/1911.08947 ),网络结构:
+![alt text](Images/DBNet.png) 
+模型输出概率图，并用Vatti Clipping算法对字符区域多边形简化处理，sample中借助Clipping 库。 sample模型输入shape为[1,3,640,640],模型路径：Resource/Models/ppocrv5_server_det_infer.onnx
 ### 文本识别
-文本识别使用了CRNN+CTCDecode(https://arxiv.org/pdf/2009.09941)，网络结构：![alt text](Images/CRNN.png)，sample中使用了动态shape (N,3,48,W),最大输入shape是[1,3,48,720],模型地址：Resource/Models/ppocrv5_server_rec_infer.onnx
+文本识别使用了CRNN+CTCDecode( https://arxiv.org/pdf/2009.09941 )，网络结构：
+![(Images/CRNN.png)](Images/CRNN.png)
+sample中模型输入shape为[1,3,48,720],模型路径：Resource/Models/ppocrv5_server_rec_infer.onnx
 ## 预处理
 ### 检测模型预处理
@@ -110,7 +114,7 @@ class ppOcrEngine {
                    const float segm_thres=0.3,
                    const float box_thresh=0.7,
                    bool offload_copy =true,
-                    std::string precision_mode = "fp32") ;
+                    std::string precision_mode = "fp16") ;
                    /**
         * @brief OCR engine初始化
         * @param det_model_path  字符检测模型路径
@@ -119,7 +123,7 @@ class ppOcrEngine {
         * @param segm_thres   像素分割阈值
         * @param box_thresh   字符区域box阈值
         * @param offload_copy 内存拷贝存模式， 支持两种数据拷贝方式：*offload_copy=true、offload_copy=false。当offload_copy为true时，不需*要进行内存拷贝，如果为false，需要先预分配输入输出的设备内存，并在推理* *前，将预处理数据拷贝到设备内存，推理后将模型输出从设备内存中拷贝出来
-         * @param precision_mode   精度模式，支持：fp32、fp16
+         * @param precision_mode   精度模式，支持：fp32、fp16，默认支持fp16
         * 
         * @return NONE
         */
@@ -130,36 +134,11 @@ class ppOcrEngine {
    class CTCDecode
    {
    private:
-        //inference image
+        ...
-        float* data;
-        std::unordered_map<std::string, migraphx::argument> device_data;
-        migraphx::program net;
-        int batch_size;
-        int net_input_width;
-        int net_input_height;
-        int net_input_channel;
-        bool offload_copy;
-        std::string precision_mode;
-        std::unordered_map<std::string, migraphx::argument> dev_argument;
-        void* input_buffer_device;
-        void* output_buffer_device;
-        void* output_buffer_host;
-        migraphx::shape input_shape;
-        migraphx::shape output_shape;
-        std::string input_name;
-        std::string output_name;
-        //postprocess: n_channel->model output channel,feature_size--> feature size one channel
-        int n_channel;
-        int feature_size;
-        std::vector<std::string> k_words;
    public:
        CTCDecode(std::string rec_model_path,
-        std::string precision_mode="fp32",
+        std::string precision_mode="fp16",
        int image_width=480,
        int image_height=48,
        int channel=3,
@@ -169,73 +148,21 @@ class ppOcrEngine {
        ~CTCDecode();
        /**
-         * @brief 字符识别编码，可支持，最长可支持预测90个字符，18385个字符
+         * @brief 字符识别、编码API 字符识别编码，可支持，最长可支持预测90个字符，18385个字符
+         * @param img 输入图片
+         * @return 编码后的字符串
         */
        std::string forward(cv::Mat& img);
    private:
-        /**
+       ...
-         * @brief 预处理
-         * pixel = (src_img*scale-0.5)/0.5;
-         * scale = 1.0/255
-         * @param img  字符图片
-         * @param data 预处理输出
-         * @param img_w 模型输入宽
-         * @param img_h 模型输入高
-         * @return 成功：true,失败：false
-         */
-        bool preproc(cv::Mat img,float* data,int img_w=480,int img_h=48);
-        /**
-         * @brief 模型预测后处理，获取每行中概率最大的字符，组成一句长度最大为90个字符的句子，模型预测输出shape=[1,90,18385]
-         * @param feature model output 
-         * @return 成功：text,失败：""
-         */
-        std::string postprocess(float* feature);
-        /**
-         * @brief 解码，将模型预测输出与字符集关联起来
-         * @param probs 模型预测的最大概率
-         * @param indexs 模型预测的最大概率的索引值
-         * @param mean_prob 预测句子的平均概率
-         * @return 成功：text,失败：""
-         */
-        std::string decode(std::vector<float>& probs,std::vector<int>& indexs,float& mean_prob);
    };
    class OcrDet
    {
    private:
-        std::string precision_mode;
+        ...
-        bool offload_copy;
-        migraphx::program net;
-        migraphx::shape input_shape;
-        migraphx::shape output_shape;
-        std::string input_name;
-        std::string output_name;
-        int det_batch_size;
-        int data_size ; 
-        float segm_thres;
-        float box_thres;  
-        int net_input_width;
-        int net_input_height;
-        int net_input_channel;
-        float* data;
-        //Allocate device buffer and host buffer,if offload_copy is false
-        std::unordered_map<std::string, migraphx::argument> dev_argument;
-        void* input_buffer_device;
-        void* output_buffer_device;
-        void* output_buffer_host;
-        //postprocess
-        int n_channel;
-        int feature_size;  //single channel feature map size.
-        int output_width;
-        int output_height;
-        int max_candidates;//maximun number of candidates contours.
    public:
        OcrDet(std::string det_model_path,
@@ -244,113 +171,19 @@ class ppOcrEngine {
            float segm_thres = 0.3,
            float box_thresh = 0.7);
        ~OcrDet();
-        bool forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes);
-    private:
-        /**
-         * @brief 预处理
-         * pixel = (scale*src_img*mean/std);
-         * scale = 1.0/255
-         * mean = [0.485, 0.456, 0.406]
-         * std = [0.229, 0.224, 0.225]  
-         * @param img  字符图片
-         * @param data 预处理输出
-         * @return 成功：w,h维度的缩放比例
-         */
-        cv::Size preproc(cv::Mat img,float* data);
-        /**
-         * @brief 后处理，通过模型预测的二值图获取文本区域
-         * @param feature  模型预测tensor（这里字符检测使用了dbnet）
-         * @param boxes 字符区域坐标
-         * @return 成功：0,失败：-1
-         */
-        int  postprocess(float* feature, std::vector<std::vector<std::vector<int>>> &boxes);
-        int boxes_from_bitmap(cv::Mat& bit_map,std::vector<T_BOX>& box);
-        std::vector<std::vector<std::vector<int>>>boxes_from_bitmap(
-        const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
-        const float &det_db_unclip_ratio, const bool &use_polygon_score);
-        std::vector<std::vector<float>> Mat2Vector(cv::Mat mat);
-        /**
-         * @brief 统计多边形区域的平均得分
-         * @param contour  字符区域的轮廓点集合
-         * @param pred  模型预测二值图
-         * @return score
-         */
-        float polygon_score_acc(std::vector<cv::Point> contour,cv::Mat pred);
-        /**
-         * @brief 对模型预测的区域进行向内或向外扩散，扩散比例是unclip_ratio ，目的是找到更加合适的字符区域
-         * @param box  字符区域坐标
-         * @param pred  模型预测二值图
-         * @return 处理后的字符区域
-         */
-        cv::RotatedRect unClip(std::vector<std::vector<float>> box,
-                                      const float &unclip_ratio);
        /**
-         * @brief 计算偏移距离
+         * @brief 字符检测模型推理API
-         *  distance = area * unclip_ratio / dist;
+         * @param img 原始图片
-         *  area = ∑(x_i*y_{i+1} - x_{i+1}*y_i)
+         * @param text_roi_boxes  字符区域坐标，格式：[[[tl.x, tl.y], [tr.x, tr.y],[], [br.x, br.y], [bl.x, bl.y]]]]
-         *  dist = sqrtf(dx * dx + dy * dy)
+         *                                                  |              |               |              |
-         *
+         *                                               左上坐标        右上坐标         右下坐标        左下坐标
-         * @param box  字符区域坐标
+         * @return 成功返回true，失败返回false
-         * @param unclip_ratio  缩放比例
-         * @param distance 偏移距离
-         * @return  NONE
         */
-        void get_contour_area(const std::vector<std::vector<float>> &box,
+        bool forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes);
-                                   float unclip_ratio, float &distance) ;
-        /**
-         * @brief 无效字符区域过滤。首先将boxes映射回原始图像，然后过滤无效区域
-         * @param boxes  字符区域坐标
-         * @param ratio_h  垂直方向缩放比例
-         * @param ratio_w  水平方向缩放比例
-         * @param srcimg   原始图像
-         * 
-         * @return  字符区域有效坐标
-         */
-        std::vector<std::vector<std::vector<int>>> filter_det_res(std::vector<std::vector<std::vector<int>>> boxes,
-                                float ratio_h, float ratio_w, cv::Mat srcimg);
-        /**
-         * @brief 对字符区域按照从上到下，从左到右的顺序排序
-         * @param pts  字符区域坐标
-         * 
-         * @return  字符区域有效坐标
-         */
-        std::vector<std::vector<int>> order_points_clockwise(std::vector<std::vector<int>> pts);
-         /**
-         * @brief 获取最小矩形坐标
-         * @param box  字符区域最小外接矩形的坐标
-         * @param ssid  box的最大边
-         * @return  字符区域有效坐标
-         */
-        std::vector<std::vector<float>> get_mini_boxes(cv::RotatedRect box,float &ssid) ;
-        /**
-         * @brief 计算bitmap上的t_rect区域的平均分数
-         * @param box_array  模型预测的字符区域
-         * @param pred  模型预测二值图
-         * @return score
-         */
-        float box_score_fast(std::vector<std::vector<float>> box_array,cv::Mat pred) ;
-        void visualize_boxes(const cv::Mat &srcimg,
-        const std::vector<std::vector<std::vector<int>>> &boxes) ;
-        bool text_recognition(const cv::Mat &srcimg,
+    private:
-        const std::vector<std::vector<std::vector<int>>> &boxes);
+        ...
    };
@@ -358,119 +191,84 @@ class ppOcrEngine {
 ## 推理
-### 字符检测模型推理
+- 字符检测
+- 字符识别、解码
+- 字符框可视化
+- OCR结果可视化
 ```c++
-bool OcrDet::forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes)
+std::vector<std::string> ppOcrEngine::forward(cv::Mat &srcimg){
+        std::vector<std::vector<std::vector<int>>> text_roi_boxes;
+        std::vector<std::string> text_vec;
+        auto start = std::chrono::high_resolution_clock::now();
+        //字符区域检测
+        text_detector->forward(srcimg,text_roi_boxes);
+        if(text_roi_boxes.size() == 0)
        {
-        std::vector<std::vector<std::vector<int>>> boxes;
+            std::cout<<"Not found text roi !\n";
-        //输入数据预处理
+            return std::vector<std::string>();
-        cv::Size ratio = preproc(img,data);
-        /*
-        支持两种数据拷贝方式：offload_copy=true、offload_copy=false。当offload_copy为true时，不需要进行内存拷贝，如果为false，需要先预分配输入输出的设备内存，并在推理前，将预处理数据拷贝到设备内存，推理后将模型输出从设备内存中拷贝出来，在做后处理。
-        */
-        if( this->offload_copy ==false )
-        {
-            hipMemcpy(input_buffer_device,
-                  (void*)data,
-                  this->input_shape.bytes(),
-                  hipMemcpyHostToDevice);
-            std::vector<migraphx::argument> results = net.eval(dev_argument);
-            hipMemcpy(output_buffer_host,
-            (void*)output_buffer_device,
-            output_shape.bytes(),
-            hipMemcpyDeviceToHost);
-            postprocess((float *)output_buffer_host,boxes);
-            std::cout<<"copy mode ..."<<std::endl;
-        }else{
-            std::unordered_map<std::string, migraphx::argument> inputData;
-            inputData[input_name] = migraphx::argument{input_shape, (float *)data};
-            std::vector<migraphx::argument> results = net.eval(inputData);
-            migraphx::argument result = results[0] ; //get output data  
-            postprocess((float *)result.data(),boxes);
-            std::cout<<"offload copy mode ..."<<std::endl;
-        }
-        //计算等比缩放比例
-        float ratio_w = float(net_input_width) / float(img.cols);
-        float ratio_h = float(net_input_height) / float(img.rows);
-        //过滤无效框
-        text_roi_boxes = filter_det_res(boxes, ratio_h, ratio_w, img);
-        //可视化检测结果
-        visualize_boxes(img,text_roi_boxes);
-        // TextRecognition(img,boxes);
-        return true;
        }
+        std::vector<cv::Point> points;
-```
+        //字符识别+编码
-### 字符识别推理
+        for (int n = 0; n < text_roi_boxes.size(); n++) {
-```c++
-std::string  CTCDecode::forward(cv::Mat& img)
+            cv::Rect rect;
+            cv::Mat text_roi_mat;
+            rect.x = text_roi_boxes[n][0][0];
+            rect.y = text_roi_boxes[n][0][1];
+            rect.width = text_roi_boxes[n][2][0] -  text_roi_boxes[n][0][0];
+            rect.height = text_roi_boxes[n][2][1] - text_roi_boxes[n][0][1];
+            if(rect.width <3 || rect.height<3)
            {
-        //预处理
+                continue;
-        preproc(img,data,net_input_width,net_input_height);
-        /*
-        支持两种数据拷贝方式：offload_copy=true、offload_copy=false。当offload_copy为true时，不需要进行内存拷贝，如果为false，需要先预分配输入输出的设备内存，并在推理前，将预处理数据拷贝到设备内存，推理后将模型输出从设备内存中拷贝出来，在做后处理。
-        */
-        if( this->offload_copy ==false )
-        {
-            hipMemcpy(input_buffer_device,
-                  (void*)data,
-                  this->input_shape.bytes(),
-                  hipMemcpyHostToDevice);
-            std::vector<migraphx::argument> results = net.eval(dev_argument);
-            hipMemcpy(output_buffer_host,
-            (void*)output_buffer_device,
-            output_shape.bytes(),
-            hipMemcpyDeviceToHost);
-            //模型后处理，获取字符的最大概率和索引，并根据索引在字符库中查找对应的字符，然后合成一个句子
-            std::string text = postprocess((float *)output_buffer_device);
-            return text;
-        }else{
-            std::unordered_map<std::string, migraphx::argument> inputData;
-            inputData[input_name] = migraphx::argument{input_shape, (float *)data};
-            std::vector<migraphx::argument> results = net.eval(inputData);
-            migraphx::argument result = results[0] ;  
-            std::string text = postprocess((float *)result.data());
-            // std::cout<<"ctc: offload copy mode ..."<<std::endl;
-            return text;
            }
+            text_roi_mat = srcimg(rect).clone();
+            std::string text = text_recognizer->forward(text_roi_mat);
+            text_vec.push_back(text);
+            points.push_back(cv::Point(rect.x,rect.y));
        }  
+        auto end = std::chrono::high_resolution_clock::now(); 
+        auto duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+        std::cout<<"[Time info] elapsed: "<< duration_ms.count() <<" ms\n";
+        //字符框可视化
+        visualize_boxes(srcimg,text_roi_boxes);
+        //OCR可视化
+        cv::Mat res_img = visualize_text(text_vec,points, srcimg);
+        ...
+}
 ```
 # Ocrv5 API调用说明
 API调用步骤如下：
 - 类实例化
+- 读取测试图片
 - 识别接口调用
 例：
 ```c++
-int main(int argc, char** argv)
+int main(int argc, char** argv){
-{
    std::string det_model_onnx = "../Resource/Models/ppocrv5_server_det_infer.onnx";
    std::string rec_model_onnx = "../Resource/Models/ppocrv5_server_rec_infer.onnx";
-    std::string img_path = "../Resource/Images/20250703205038.png";
+    std::string img_path = "../Resource/Images/demo.png";
    std::string character_dict_path = "../Resource/ppocr_keys_v5.txt";
+    std::string front = "../Resource/fonts/SimHei.ttf";
    float segm_thres=0.3;
    float box_thresh=0.3; 
    ppOcrEngine ocr_engine(det_model_onnx,
        rec_model_onnx,
        character_dict_path,
+        front,
        segm_thres,
        box_thresh,
        true,
-        "fp32");
+        "fp16");
    cv::Mat img=cv::imread(img_path);
    ocr_engine.forward(img);
    return 0;
 }
 ```
-sample支持两种精度推理（fp32和fp16），默认是fp32）,精度和内存拷贝方式分别通过ocr_engine的构造函数传入参数来设置。
+sample支持两种精度推理（fp32和fp16），默认是fp16）,精度和内存拷贝方式分别通过ocr_engine的构造函数传入参数来设置。
\ No newline at end of file
--- a/Doc/Tutorial_Python.md
+++ b/Doc/Tutorial_Python.md
@@ -4,10 +4,14 @@ PP-OCRv5 是PP-OCR新一代文字识别解决方案，该方案聚焦于多场
 ## 模型简介
 ### 文本检测 
-文本检测使用了dbnet(论文地址：https://arxiv.org/pdf/1911.08947),网络结构:![alt text](Images/DBNet.png),模型输出概率图，并用Vatti Clipping算法对字符区域多边形简化处理。 sample中使用动态shape（N,3,H,C）,最大输入shape是[1,3,640,640],模型地址：Resource/Models/ppocrv5_server_det_infer.onnx
+文本检测使用了dbnet( 论文地址：https://arxiv.org/pdf/1911.08947 ),网络结构:
+![alt text](Images/DBNet.png) 
+模型输出概率图，并用Vatti Clipping算法对字符区域多边形简化处理，sample中借助Clipping 库。 sample中模型输入shape为[1,3,640,640],模型路径：Resource/Models/ppocrv5_server_det_infer.onnx
 ### 文本识别
-文本识别使用了CRNN+CTCDecode(https://arxiv.org/pdf/2009.09941)，网络结构：![alt text](Images/CRNN.png)，sample中使用了动态shape (N,3,48,W),最大输入shape是[1,3,48,720],模型地址：Resource/Models/ppocrv5_server_rec_infer.onnx
+文本识别使用了CRNN+CTCDecode( https://arxiv.org/pdf/2009.09941 )，网络结构：
+![(Images/CRNN.png)](Images/CRNN.png)
+sample中模型输入shape为[1,3,48,720],模型路径：Resource/Models/ppocrv5_server_rec_infer.onnx																						
 ## 预处理
 ### 检测模型预处理
@@ -80,28 +84,20 @@ def preprocess(self, img, max_wh_ratio):
        imgH, imgW = self.rec_input_size
        max_h,max_w = self.rec_input_size
        h, w = img.shape[:2]
-        # re_size = (max_w,max_h)
        #保留H的原始维度
        if h <= max_h:
            ratio = max_h / h
            w = int(w*ratio)
            if w <= max_w:
                re_size =(w,max_h)    
            else:
                re_size = (max_w,max_h)  
        else:
            ratio = max_h/h
            w,h = int(w*ratio),max_h
            if w <= max_w:
                re_size = (w,h)  
            else:
                re_size = (max_w,h)
@@ -112,12 +108,9 @@ def preprocess(self, img, max_wh_ratio):
        resized_image = resized_image.transpose((2, 0, 1)) / 255
        resized_image -= 0.5
        resized_image /= 0.5
        #填充，沿着右、下填充
        padding_im = np.zeros((3, imgH, imgW), dtype=np.float32)
        padding_im[:, :, 0:re_size[0]] = resized_image
        return padding_im
 ```
 ## 类介绍
@@ -154,7 +147,7 @@ class PPOcrV5():
        **kwargs       ：设置字符检测模型后处理相关参数
    Returns:
-        return_type: NONE。
+        return_type: 无返回值
    Examples:
        det_onnx_path = "PATH/TO/det_onnx_model.onnx"
@@ -198,7 +191,7 @@ class TextDetector(object):
        **kwargs       ：设置字符检测模型后处理相关参数
    Returns:
-        return_type: NONE。
+        return_type: 无返回值。
    Examples:
        self.db_detector = TextDetector(
@@ -216,7 +209,6 @@ class TextDetector(object):
    """
 class TextRecgnizer(object):
-    """Support SVTR_LCNet """
    def __init__(
        self,
        rec_model_path,
@@ -240,7 +232,7 @@ class TextRecgnizer(object):
        **kwargs       ：设置字符识别模型后处理相关参数
    Returns:
-        return_type: NONE。
+        return_type: 无返回值。
    Examples:
        self.text_extractor = TextRecgnizer(rec_model_path=rec_model_path,
@@ -252,18 +244,15 @@ class TextRecgnizer(object):
    class BaseRecLabelDecode(object):
    def __init__(self, character_dict_path=None,
     use_space_char=False)
-     """Convert between text-label and text-index
+    """
    字符识别(crnn+ctc)。
    Args:
        character_dict_path ：字符集文件路径。
        use_space_char      ：字符集中是否包含空格。
    Returns:
-        return_type: NONE。
+        return_type: 无返回值。
    Examples: 
    """
    class CTCLabelDecode(BaseRecLabelDecode):
@@ -277,139 +266,27 @@ class TextRecgnizer(object):
        character_dict_path ：字符集文件路径。
        use_space_char      ：字符集中是否包含空格。
    Returns:
-        return_type: NONE。
+        return_type: 无返回值。
    Examples:
    """
 ```
 ## 推理
-### 字符检测模型推理
-```python
-def __call__(self, src_img):
-        data = self.preprocess(src_img)
-        """支持两种数据拷贝方式：offload_copy=true、offload_copy=false。当offload_copy为true时，不需要进行内存拷贝，如果为false，需要先预分配输入输出的设备内存，并在推理前，将预处理数据拷贝到设备内存，推理后将模型输出从设备内存中拷贝出来，在做后处理。"""
-        if self.offload_copy==False:
-            self.d_mem[self.det_input_name] =migraphx.to_gpu(migraphx.argument(data["image"]))
-            results = self.db_model.run(self.d_mem)
-        else:
-            results = self.db_model.run({self.det_input_name:data["image"]})
-        if self.offload_copy==False :
-            #从gpu拷贝推理结果到cpu 
-            result=migraphx.from_gpu(results[0])
-            print("offload copy model")
-            result = np.array(result)
-        else:
-            result = results[0]
-        shape_list = np.expand_dims(data["shape"], axis=0)
-        pred = np.array(result)
-        pred = pred[:, 0, :, :]
-        #获取大于阈值的概率
-        segmentation = pred > self.thresh
-        boxes_batch = []
-        for batch_index in range(pred.shape[0]):
-            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
-            if self.dilation_kernel is not None:
-                mask = cv2.dilate(
-                    np.array(segmentation[batch_index]).astype(np.uint8),
-                    self.dilation_kernel,
-                )
-            else:
-                mask = segmentation[batch_index]
-            #根据预测的bitmap获取文本区域
-            if self.box_type == "poly":
-                boxes, scores = self.polygons_from_bitmap(
-                    pred[batch_index], mask, ratio_w,ratio_h, src_w, src_h
-                )
-            elif self.box_type == "quad":
-                boxes, scores = self.boxes_from_bitmap(
-                    pred[batch_index], mask, ratio_w,ratio_h, src_w, src_h
-                )
-            else:
-                raise ValueError("box_type can only be one of ['quad', 'poly']")
-            boxes_batch.append(boxes)
-        #文本区域按照从上到下，从左到右的顺序排序
-        det_box_batch = self.sorted_boxes(boxes_batch)
-        #文本区域按坐标映射到原始图像
-        dt_boxes,det_rects = self.box_standardization(det_box_batch,shape_list)
-        return dt_boxes,det_rects
-```
-### 字符识别推理
 ```python
-def __call__(self, batch_img_list):
+def __call__(self, src_img):
-        if len(batch_img_list) == 0:
+        import time
-            return []
+        start = time.time()
-        width_list = []
+        #字符检测
-        #遍历图片列表（字符roi存放在图片列表中），为了支持多batch推理，这里还会将batch_size张图片进行拼接np.concatenate(batch_norm_imgs)
+        dt_boxs,dt_rects = self.db_detector(src_img)
-        for b in range(len(batch_img_list)):
+        res_img = self.vis_boxes(dt_boxs,src_img)
-            for img in batch_img_list[b]:
+        #字符区域图片裁剪
-                width_list.append(img.shape[1] / float(img.shape[0]))
+        batch_img_list = self.detection_roi_crop(src_img,dt_rects)
+        #字符特征提取
-        indices = np.argsort(np.array(width_list))
+        batch_outputs_pre ,batch_max_wh_ratio_pre   = self.text_extractor(batch_img_list)
+        #字符编码
-        input_batch = self.rec_batch_num
+        batch_text_list, batch_label_list = self.ctc_decoder(batch_outputs_pre,return_word_box=False,wh_ratio_list = batch_max_wh_ratio_pre)
-        batch_outputs_pre = []
-        batch_max_wh_ratio_pre = []
-        for b in range(len(batch_img_list)):
-            im_count = len(batch_img_list[b])
-            batch_outputs = []
-            batch_max_wh_ratio = []
-            for beg_img_no in range(0, im_count, input_batch):
-                end_img_no = min(im_count, beg_img_no + input_batch)
-                # for ino in range(beg_img_no, end_img_no):
-                #     h, w = batch_img_list[b][indices[ino]].shape[0:2]
-                #     wh_ratio = w * 1.0 / h
-                #     max_wh_ratio = max(max_wh_ratio, wh_ratio)
-                batch_norm_imgs = []
-                max_wh_ratio = list()
-                # N batch
-                for ino in range(beg_img_no, end_img_no):
-                    #单张图片预处理
-                    norm_img = self.preprocess(batch_img_list[b][indices[ino]], max_wh_ratio)
-                    norm_img = norm_img[np.newaxis, :].astype(np.float32)
-                    batch_norm_imgs.append(norm_img)
-                batch_max_wh_ratio.append(max_wh_ratio)
-                #batch_size张图片进行拼接
-                if self.rec_batch_num >1:
-                    norm_img_batch = np.concatenate(batch_norm_imgs)
-                    norm_img_batch = norm_img_batch.copy()
-                else:
-                    norm_img_batch = np.array([batch_norm_imgs.copy()])
-                if self.offload_copy==False:
-                    print("offload copy model")
-                    self.d_mem[self.rec_input_name] =migraphx.to_gpu(migraphx.argument(norm_img_batch))
-                    results = self.rec_model.run(self.d_mem)
-                    output = np.array(results[0])
-                else:
-                    results = self.rec_model.run({self.rec_input_name:norm_img_batch})
-                    output = results[0]
-                # batch_outputs.append(np.array(output))
-                #将所有batch的输出结果append到batch_outputs中方便后处理
-                [batch_outputs.append(out) for out in np.array(output)]
-            batch_outputs_pre.append(np.array(batch_outputs))   
-            batch_max_wh_ratio_pre.append(batch_max_wh_ratio)            
-        return batch_outputs_pre ,batch_max_wh_ratio_pre  
 ```
 # Ocrv5 API调用说明
@@ -425,8 +302,8 @@ if __name__ == '__main__':
    rec_onnx_path = "../Resource/Models/ppocrv5_server_rec_infer.onnx"
    image_path = "../Resource/Images/lite_demo.png"
    img = cv2.imread(image_path)
-    ppocrv5 = PPOcrV5(det_onnx_path,rec_onnx_path,offload_copy=True,precision_mode="fp32")
+    ppocrv5 = PPOcrV5(det_onnx_path,rec_onnx_path,offload_copy=True,precision_mode="fp16")
    res_img = ppocrv5(img)
    cv2.imwrite("res.jpg",res_img)
 ```
-sample支持两种精度推理（fp32和fp16），默认是fp32）,精度和内存拷贝方式分别通过precision_mode和offload_copy参数控制。
+sample支持两种精度推理（fp32和fp16），默认是fp16）,精度和内存拷贝方式分别通过precision_mode和offload_copy参数控制。
\ No newline at end of file
--- a/Python/inference.py
+++ b/Python/inference.py
--- a/Python/res.jpg
+++ b/Python/res.jpg
--- a/README.md
+++ b/README.md
@@ -81,6 +81,7 @@ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 ```
 python inference.py
 ```
+结果图片保存在当前目录下：res.jpg
 offload_copy和precision_mode设置可参考[Tutorial_Python.md](Doc/Tutorial_Python.md)，在main中示例。
 ### C++版本推理
@@ -104,7 +105,6 @@ cd <path_to_ppocrv5_migraphx>
 sh ./3rdParty/InstallOpenCVDependences.sh
 ```
 #### 安装OpenCV并构建工程
 ```
@@ -119,27 +119,6 @@ rbuild build -d depend
 cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=./opencv_dep -D INSTALL_C_EXAMPLES=ON -D INSTALL_PYTHON_EXAMPLES=ON  -D OPENCV_GENERATE_PKGCONFIG=ON -D BUILD_EXAMPLES=ON -D OPENCV_EXTRA_MODULES_PATH=../modules/ ..
 ```
 - 执行make -j8 && make install,编译的头文件和库目录存放在opencv_dep,将opencv_dep目录拷贝到3rdParty,并命名为opencv
-#### 设置环境变量
-将依赖库依赖加入环境变量LD_LIBRARY_PATH，在~/.bashrc中添加如下语句：
-当操作系统是ubuntu系统时：
-```
-export LD_LIBRARY_PATH=<path_to_ppocrv5_migraphx>/depend/lib/:$LD_LIBRARY_PATH
-```
-当操作系统是centos系统时：
-```
-export LD_LIBRARY_PATH=<path_to_ppocrv5_migraphx>/depend/lib64/:$LD_LIBRARY_PATH
-```
-然后执行:
-```
-source ~/.bashrc
-```
 #### 运行示例
@@ -155,58 +134,112 @@ cmake .. && make
 #运行
 ./ppOcrV5cd 
 ```
+结果图片保存在当前目录下：res.jpg
 ## result
 ### Python版本
-输出结果中，每个值分别对应每个label的实际概率。
+输出结果中展示了识别到的字符，每个字符后面跟着一个置信度，置信度值越大，识别结果越准确。
 ```
-产品信息/参数, 0.954
+'0', 0.991
-发足够的滋养, 1.000
+纯臻营养护发素, 1.000
-纯臻宫乔护发素, 0.883
+'0'.'9''9''3''6''0''4', 0.999
-花费了'0'.'4''5''7''3''3''5'秒, 0.993
+'1', 0.998
-【净含量】：'2''2''0'ml, 0.993
+产品信息/参数, 0.934
+'0'.'9''9''2''7''2''8', 0.999
+'2', 0.999
+（'4''5'元／每公斤，'1''0''0'公斤起订）, 0.970
+'0'.'9''7''4''1''7', 0.999
+'3', 0.999
 每瓶'2''2'元，'1''0''0''0'瓶起订）, 0.998
-【品名】：纯臻营养护发素, 0.998
+'0'.'9''9''3''9''7''6', 0.999
-【品牌】：代加工方式/'0'EMODM, 0.968
+'4', 0.998
-糖、椰油酰胺丙基甜菜碱、泛醒, 0.997
+【品牌】：代加工方式/'0'EMODM, 0.959
-【适用人群】：适合所有肤质, 0.998
+'0'.'9''8''5''1''3''3', 0.998
-【产品编号】：YM'-'X'-''3''0''1''1''0'.'9''6''8''9''9', 0.993
+'5', 0.998
-（'4''5'元/每公斤，'1''0''0'公斤起订）, 0.972
+【品名】：纯臻营养护发素, 0.997
-【主要成分】：鲸蜡硬脂醇、燕麦B'-'葡聚, 0.966
+'0'.'9''9''5''0''0''7', 0.999
-【主要功能】：可紧致头发磷层，从而达到, 0.994
+'6', 0.995
-即时持久改善头发光泽的效果，给干燥的头, 0.997
+【产品编号】：YM'-'X'-''3''0''1''1''0'.'9''6''8''9''9', 0.973
-The detectionvisualizedimagsavedin./vis.jpg, 0.940
+'7', 0.999
-[Time info] elapsed:3.5736
+【净含量】：'2''2''0'ml, 0.994
+'0'.'9''9''6''5''7''7', 0.999
+'8', 0.998
+【适用人群】：适合所有肤质, 0.997
+'0'.'9''9''5''8''4''2', 0.999
+'9', 0.997
+【主要成分】：鲸蜡硬脂醇、燕麦B'-'葡聚, 0.976
+'0'.'9''6''1''9''2''8', 0.999
+'1''0', 1.000
+糖、椰油酰胺丙基甜菜碱、泛醒, 0.996
+'0'.'9''2''5''8''9''8', 0.999
+'1''1', 0.999
+（成品包材）, 0.998
+'0'.'9''7''2''5''7''3', 0.999
+'1''2', 1.000
+【主要功能】：可紧致头发磷层，从而达到, 0.992
+'0'.'9''9''4''4''4''8', 0.999
+'1''3', 0.999
+即时持久改善头发光泽的效果，给干燥的头, 0.989
+'0'.'9''9''0''1''9''8', 0.999
+'1''4', 0.999
+发足够的滋养, 0.999
+'0'.'9''9''7''6''6''8', 0.999
+花费了'0'.'4''5''7''3''3''5'秒, 0.993
+[Time info] elapsed:578.6152 ms
 ```
 ### C++版本
 ```
-ocr res :[生成一幅画，负向提示词为：画中不要出现人物。正负提示词结合会]
+ocr res :花费了'0'.'4''5''7''3''3''5'秒  0.984009
-ocr res :[Text_encode_'2'.副文本编码器，补充描述性细节(如材质、光照、]
+ocr res :'0'.'9''9''7'  0.773633
-ocr res :[图片的准确性，过滤掉不需要的元素，例如正向提示词为：提示模型]
+ocr res :发足够的滋养  0.96818
-ocr res :[编码器特征融合提升模型的理解能力。]
+ocr res :'1'  0.697754
-ocr res :[正负 prompt 设置：正向 prompt 和负向 prompt 结合可以提升生成]
+ocr res :'0''0'.'9''9''0''1''9'  0.656647
-ocr res :[语义表示捕获提示词的基础含义和全局语境（如对象、动作），与副]
+ocr res :即时持久改善头发光泽的效果，给干燥的头  0.996608
-ocr res :[的图像不会发生变化，随机种子可以增加生成图像的多样性。]
+ocr res :  0
-ocr res :[Text_encode.主文本编码器，将prompt序列转换为一个综合的]
+ocr res :【主要功能】：可紧致头发磷层，从而达到  0.993421
-ocr res :[响初始噪声和生成结果的确定性，固定种子后，同一个prompt生成]
+ocr res :'0'.'9''9''4''4'  0.677327
-ocr res :[声转化为目标图像。]
+ocr res :  0
-ocr res :[随机数设置：随机数种子是控制生成过程随机性的关键参数，直接影]
+ocr res :'0'.'9''7''2'  0.637158
-ocr res :[Scheduler：调度器，控制图像生成，决定了如何逐步将随机噪]
+ocr res :（成品包材）  0.901937
-ocr res :[程和图像生成过程中有着至关重要的作用。]
+ocr res :'1'  0.32251
-ocr res :[在stable'-'dffusion'-'xl'-'base'-''1'.'0'模型中主要包含一下子组件：]
+ocr res :糖、椰油酰胺丙基甜菜碱、泛醒  0.993478
-ocr res :[Pipeline的配置参数控制图像生成的质量和速度，在扩散模型预测过]
+ocr res :'0'.'9''2''5'  0.586279
-ocr res :[具配置文件中的定义手动加载各个子组件。]
+ocr res :'1''0'  0.547241
-ocr res :[这里使用了扩散模型加载器统一加载了所有的子组件，也可以更]
+ocr res :【主要成分】：鲸蜡硬脂醇、燕麦B'-'葡聚  0.975303
-ocr res :[·'2'.'3'pipeline 配置]
+ocr res :'0'.'9''1''9'  0.568408
-Time taken by task: 3475 ms
+ocr res :  0
+ocr res :'0'.'9''9''5''2'  0.613647
+ocr res :【适用人群】：适合所有肤质  0.996882
+ocr res :'8'  0.378906
+ocr res :'0'.'9''9'  0.595581
+ocr res :【净含量】：'2''2''0'ml  0.835671
+ocr res :'7'  0.356689
+ocr res :【产品编号】：YM'-'X'-''3''0''1''1''0'.'9''6''8''9''9'  0.993695
+ocr res :'6'  0.214355
+ocr res :'0'.'9''9''5'  0.478052
+ocr res :【品名】：纯臻营养护发素  0.996175
+ocr res :'5'  0.594727
+ocr res :  0
+ocr res :'0'.'9''8''5'  0.55166
+ocr res :【品牌】：代加工方式/'0'EMODM  0.917768
+ocr res :每瓶'2''2'元，'1''0''0''0'瓶起订）  0.974644
+ocr res :'0'.'9''9''3''9''7''6'  0.736755
+ocr res :'3'  0.486572
+ocr res :（'4''5'元/每公斤，'1''0''0'公斤起订）  0.940028
+ocr res :'0'.'9'm'7'  0.534668
+ocr res :'2'  0.961426
+ocr res :  0
+ocr res :'0'.'9''9''2'  0.524121
+ocr res :产品信息/参数  0.913853
+ocr res :纯臻营养护发素'0'.'9''9''3''6''0''4'  0.964128
+ocr res :'0'  0.380127
+ocr res :The detection visualized imagesavedin./vis.jpg  0.94302
+[Time info] elapsed: 389 ms
 ```
 ### 精度
 无

--- a/Resource/Images/lite_demo.png
+++ b/Resource/Images/lite_demo.png
--- a/Resource/fonts/SimHei.ttf
+++ b/Resource/fonts/SimHei.ttf
--- a/Src/Utility/cv_put_Text.cpp
+++ b/Src/Utility/cv_put_Text.cpp
+#include "cv_put_Text.hpp"
+PutText::PutText(const char* font_path) {
+     // 初始化 FreeType
+    if (FT_Init_FreeType(&ft)) {
+        std::cerr << "Error: Could not init FreeType !" << std::endl;
+        return;
+    }
+    // 加载字体文件（ 这里使用 SimHei.ttf 字体文件）
+    if (FT_New_Face(ft, font_path, 0, &face)) {
+        std::cerr << "Error: Load front failed!" << std::endl;
+        exit(-1);
+    }
+}
+PutText::~PutText() {
+    // 释放 FreeType 资源
+    FT_Done_Face(face);
+    FT_Done_FreeType(ft);
+}
+void PutText::putText(cv::Mat& img, const std::string& text, int x, int y, int fontSize, cv::Scalar color) {
+    if(img.empty())
+    {
+        std::cerr << "Empty image!";
+        return ;
+    }
+    // 设置字体大小
+    FT_Set_Pixel_Sizes(face, 0, fontSize);
+    int start_point_x = x;
+    int start_point_y = y + fontSize; // 调整基线
+    // 循环处理每个字符
+    for (size_t i = 0; i < text.size(); ) {
+        // 解析 UTF-8 字符
+        unsigned long unicode = 0;
+        if ((text[i] & 0x80) == 0) {
+            unicode = text[i];
+            i += 1;
+        } else if ((text[i] & 0xE0) == 0xC0) {
+            unicode = ((text[i] & 0x1F) << 6) | (text[i + 1] & 0x3F);
+            i += 2;
+        } else if ((text[i] & 0xF0) == 0xE0) {
+            unicode = ((text[i] & 0x0F) << 12) | ((text[i + 1] & 0x3F) << 6) | (text[i + 2] & 0x3F);
+            i += 3;
+        } else {
+            i++; // 无效 UTF-8
+            continue;
+        }
+        // 加载字符字形
+        if (FT_Load_Char(face, unicode, FT_LOAD_RENDER)) {
+            std::cerr << "Error: Could not load glyph" << std::endl;
+            continue;
+        }
+        // 绘制到 OpenCV 图像
+        FT_Bitmap& bitmap = face->glyph->bitmap;
+        for (int row = 0; row < bitmap.rows; ++row) {
+            for (int col = 0; col < bitmap.width; ++col) {
+                unsigned char intensity = bitmap.buffer[row * bitmap.width + col];
+                if (intensity > 0) {
+                    cv::Vec3b& pixel = img.at<cv::Vec3b>(start_point_y - face->glyph->bitmap_top + row, start_point_x + face->glyph->bitmap_left + col);
+                    pixel[0] = color[0] * (intensity / 255.0) + pixel[0] * (1 - intensity / 255.0);
+                    pixel[1] = color[1] * (intensity / 255.0) + pixel[1] * (1 - intensity / 255.0);
+                    pixel[2] = color[2] * (intensity / 255.0) + pixel[2] * (1 - intensity / 255.0);
+                }
+            }
+        }
+        start_point_x += face->glyph->advance.x >> 6;
+    }
+}
\ No newline at end of file
--- a/Src/Utility/cv_put_Text.hpp
+++ b/Src/Utility/cv_put_Text.hpp
+#pragma once
+#include <ft2build.h>
+#include FT_FREETYPE_H
+#include <opencv2/opencv.hpp>
+class PutText {
+private:
+    FT_Library ft;
+    FT_Face face;
+public:
+    PutText(const char* font_path);
+    ~PutText();
+    /**
+     * @brief 向图片写文字（支持中文）
+     * @param img    待叠加字符的图片
+     * @param text     待叠加的字符
+     * @param x        垂直方向缩放比例
+     * @param y        水平方向缩放比例
+     * @param fontSize 原始图像
+     * @param color    字体颜色，默认绿色
+     * 
+     * @return  无返回值
+     */
+    void putText(cv::Mat& img, const std::string& text, int x, int y, int fontSize=2, cv::Scalar color=cv::Scalar(0, 255, 0));
+};
--- a/Src/main.cpp
+++ b/Src/main.cpp
@@ -2,24 +2,28 @@
 using namespace ppocr;
-int main(int argc, char** argv)
+int main(int argc, char** argv){
-{
    std::string det_model_onnx = "../Resource/Models/ppocrv5_server_det_infer.onnx";
    std::string rec_model_onnx = "../Resource/Models/ppocrv5_server_rec_infer.onnx";
-    std::string img_path = "../Resource/Images/20250703205038.png";
+    std::string img_path = "../Resource/Images/demo.png";
    std::string character_dict_path = "../Resource/ppocr_keys_v5.txt";
+    std::string front = "../Resource/fonts/SimHei.ttf";
    float segm_thres=0.3;
    float box_thresh=0.3; 
    ppOcrEngine ocr_engine(det_model_onnx,
        rec_model_onnx,
        character_dict_path,
+        front,
        segm_thres,
        box_thresh,
        true,
-        "fp32");
+        "fp16");
    cv::Mat img=cv::imread(img_path);
    ocr_engine.forward(img);
    return 0;
 }
\ No newline at end of file
--- a/Src/ocr_engine.cpp
+++ b/Src/ocr_engine.cpp
@@ -53,15 +53,12 @@ bool XsortFp32(std::vector<float> a, std::vector<float> b) {
            return a[0] < b[0];
        return false;
    }
-namespace ppocr
+namespace ppocr{
-{
    OcrDet::OcrDet(const std::string det_model_path,
            std::string precision_mode,
            bool offload_copy,
            float segm_thres,
-            float box_thresh )
+            float box_thresh ){
-    {
       if(!Exists(det_model_path))
        {
            LOG_ERROR(stdout, "onnx file not exists!\n");
@@ -119,6 +116,9 @@ namespace ppocr
        options.offload_copy = offload_copy;
        migraphx::target gpuTarget = migraphx::gpu::target{};
        net.compile(gpuTarget, options);
+        float *warm_data = (float*)malloc(this->input_shape.bytes());
+        memset(warm_data, 1.0, this->input_shape.bytes());
        if( this->offload_copy ==false )
        {
            hipMalloc(&input_buffer_device, this->input_shape.bytes());
@@ -127,14 +127,23 @@ namespace ppocr
            dev_argument[input_name]  = migraphx::argument{input_shape, input_buffer_device};
            dev_argument[output_name] = migraphx::argument{output_shape, output_buffer_device};
-        }
-        //decode
+             hipMemcpy(input_buffer_device,
-        // ocr = std::make_shared<CTCDecode>(res_mpath,100,32,3,keys_path);
+                  (void*)warm_data,
+                  this->input_shape.bytes(),
+                  hipMemcpyHostToDevice);
+            //warm up
+            std::vector<migraphx::argument> results = net.eval(dev_argument);
+        }else{
+            std::unordered_map<std::string, migraphx::argument> inputData;
+            inputData[input_name] = migraphx::argument{input_shape, (float *)warm_data};
+            //warm up
+            std::vector<migraphx::argument> results = net.eval(inputData);
+        }
+        free(warm_data);
    }
-    OcrDet::~OcrDet()
+    OcrDet::~OcrDet(){
-    {
        if(data)
        {
            free(data);
@@ -142,6 +151,7 @@ namespace ppocr
        }
        if( offload_copy == false )
        {
+            //内存释放
            if(input_buffer_device)
            {
                hipFree(input_buffer_device);
@@ -158,8 +168,7 @@ namespace ppocr
        }
    }
-    cv::Size OcrDet::preproc(cv::Mat img,float* data)
+    cv::Size OcrDet::preproc(cv::Mat img,float* data){
-    {
        float scale = 1.0/255.0;
        std::vector<float> s_mean={0.485, 0.456, 0.406};
        std::vector<float> s_stdv={0.229, 0.224, 0.225};
@@ -189,8 +198,7 @@ namespace ppocr
        return  scale_r ;
    }
-    std::vector<std::vector<float>> OcrDet::get_mini_boxes(cv::RotatedRect box,float &ssid) 
+    std::vector<std::vector<float>> OcrDet::get_mini_boxes(cv::RotatedRect box,float &ssid) {
-    {
        ssid = max(box.size.width, box.size.height);
        cv::Mat points;
        cv::boxPoints(box, points);
@@ -252,7 +260,6 @@ namespace ppocr
            auto array = get_mini_boxes(box, ssid);
            auto box_for_unclip = array;
-            // end get_mini_box
            if (ssid < min_size) {
            continue;
@@ -260,7 +267,7 @@ namespace ppocr
            float score;
            if (use_polygon_score)
-            /* compute using polygon*/
+                //多边形区域的平均得分作为box的分数
                score = polygon_score_acc(contours[_i], pred);
            else
             score = box_score_fast(array, pred);
@@ -268,12 +275,11 @@ namespace ppocr
            if (score < box_thresh)
                continue;
-            // start for unclip
+            //简化边界得到准确的边界
            cv::RotatedRect points = unClip(box_for_unclip, det_db_unclip_ratio);
            if (points.size.height < 1.001 && points.size.width < 1.001) {
                continue;
            }
-            // end for unclip
            cv::RotatedRect clipbox = points;
            auto cliparray = get_mini_boxes(clipbox, ssid);
@@ -296,12 +302,11 @@ namespace ppocr
            }
            boxes.push_back(intcliparray);
-        } // end for
+        }
        return boxes;
    }
-    std::vector<std::vector<float>> OcrDet::Mat2Vector(cv::Mat mat)
+    std::vector<std::vector<float>> OcrDet::Mat2Vector(cv::Mat mat){
-    {
        std::vector<std::vector<float>> img_vec;
        std::vector<float> tmp;
@@ -316,8 +321,7 @@ namespace ppocr
    }
    float  OcrDet::polygon_score_acc(std::vector<cv::Point> contour,
-                                     cv::Mat pred)
+                                     cv::Mat pred){
-    {
        int width = pred.cols;
        int height = pred.rows;
        std::vector<float> box_x;
@@ -364,8 +368,7 @@ namespace ppocr
    }
    float OcrDet::box_score_fast(std::vector<std::vector<float>> box_array,
-                                  cv::Mat pred) 
+                                  cv::Mat pred) {
-    {
        auto array = box_array;
        int width = pred.cols;
        int height = pred.rows;
@@ -402,8 +405,7 @@ namespace ppocr
        return score;
   }
    cv::RotatedRect OcrDet::unClip(std::vector<std::vector<float>> box,
-                                      const float &unclip_ratio)
+                                      const float &unclip_ratio){
-    {
        float distance = 1.0;
        get_contour_area(box, unclip_ratio, distance);
        ClipperLib::ClipperOffset offset;
@@ -433,8 +435,7 @@ namespace ppocr
    }
    void OcrDet::get_contour_area(const std::vector<std::vector<float>> &box,
-                                   float unclip_ratio, float &distance) 
+                                   float unclip_ratio, float &distance) {
-    {
        int pts_num = 4;
        float area = 0.0f;
        float dist = 0.0f;
@@ -452,8 +453,7 @@ namespace ppocr
    std::vector<std::vector<std::vector<int>>>
    OcrDet::filter_det_res(std::vector<std::vector<std::vector<int>>> boxes,
-                                float ratio_h, float ratio_w, cv::Mat srcimg)
+                                float ratio_h, float ratio_w, cv::Mat srcimg){
-    {
        int oriimg_h = srcimg.rows;
        int oriimg_w = srcimg.cols;
@@ -482,8 +482,7 @@ namespace ppocr
        return root_points;
    }
-    std::vector<std::vector<int>> OcrDet::order_points_clockwise(std::vector<std::vector<int>> pts)
+    std::vector<std::vector<int>> OcrDet::order_points_clockwise(std::vector<std::vector<int>> pts){
-    {
        std::vector<std::vector<int>> box = pts;
        std::sort(box.begin(), box.end(), XsortInt);
        std::vector<std::vector<int>> leftmost = {box[0], box[1]};
@@ -500,31 +499,8 @@ namespace ppocr
        return rect;
    }
-    void OcrDet::visualize_boxes(const cv::Mat &srcimg,
-        const std::vector<std::vector<std::vector<int>>> &boxes) 
-    {
-        cv::Mat img_vis;
-        srcimg.copyTo(img_vis);
-        for (int n = 0; n < boxes.size(); n++) {
-            cv::Point rook_points[4];
-            // std::cout<<"size :"<<boxes[n].size()<<'\n';
-            for (int m = 0; m < boxes[n].size(); m++) {
-            rook_points[m] = cv::Point(int(boxes[n][m][0]), int(boxes[n][m][1]));
-            }
-            const cv::Point *ppt[1] = {rook_points};
-            int npt[] = {4};
-            cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
-        }
-        cv::imwrite("./ocr_debug.png", img_vis);
-        std::cout << "image saved in ./ocr_result.png"
-                    << std::endl;
-    }
    bool OcrDet::text_recognition(const cv::Mat &srcimg,
-        const std::vector<std::vector<std::vector<int>>> &boxes)
+        const std::vector<std::vector<std::vector<int>>> &boxes){
-    {
        if(boxes.size() == 0)
        {
            std::cout<<"Not found text roi !\n";
@@ -540,15 +516,11 @@ namespace ppocr
            rect.width = boxes[n][2][0] - boxes[n][0][0];
            rect.height = boxes[n][2][1] - boxes[n][0][1];
            text_mat = srcimg(rect).clone();
-            // ocr->forward(text_mat);
-            // cv::rectangle(srcimg,rect,cv::Scalar(0,255,0),2);
        }   
-        // cv::imwrite("region_debug.jpg",srcimg);
        return true;
    }
-    int OcrDet::postprocess(float* feature, std::vector<std::vector<std::vector<int>>> &boxes)
+    int OcrDet::postprocess(float* feature, std::vector<std::vector<std::vector<int>>> &boxes){
-    {
        int batch_s = 1;
        float conf_thres = 0.6;
        cv::Mat thres_mat = cv::Mat(cv::Size(output_height,output_width), CV_8UC1);
@@ -574,8 +546,7 @@ namespace ppocr
        return 0;        
    }
-    bool OcrDet::forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes)
+    bool OcrDet::forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes){
-    {
        std::vector<std::vector<std::vector<int>>> boxes;
        cv::Size ratio = preproc(img,data);
@@ -608,8 +579,7 @@ namespace ppocr
        float ratio_h = float(net_input_height) / float(img.rows);
        text_roi_boxes = filter_det_res(boxes, ratio_h, ratio_w, img);
-        visualize_boxes(img,text_roi_boxes);
+        // visualize_boxes(img,text_roi_boxes);
-        // TextRecognition(img,boxes);
        return true;
    }
@@ -620,9 +590,7 @@ namespace ppocr
        int channel,
        int batch_size,
        bool offload_copy,
-        std::string character_dict_path)
+        std::string character_dict_path){ 
-    {
        if(!Exists(rec_model_path))
        {
            LOG_ERROR(stdout, "onnx file not exists!\n");
@@ -634,7 +602,6 @@ namespace ppocr
        this->net_input_channel=channel;
        this->precision_mode = precision_mode;
        migraphx::onnx_options onnx_options;
        onnx_options.map_input_dims["x"] = {1, 3, 48, 720};
@@ -663,8 +630,6 @@ namespace ppocr
        this->feature_size = output_shape.lens()[2];
        n_channel = this->output_shape.lens()[1];
-        std::cout<<"["<<this->output_shape.lens()[0]<<
-        ","<<this->output_shape.lens()[1]<<","<<this->output_shape.lens()[2]<<"]\n";
        this->offload_copy = offload_copy;
        migraphx::compile_options options;
@@ -673,23 +638,37 @@ namespace ppocr
        migraphx::target gpuTarget = migraphx::gpu::target{};
        net.compile(gpuTarget, options);
+        float *warm_data = (float*)malloc(this->input_shape.bytes());
+        memset(warm_data, 1.0, this->input_shape.bytes());
        if( this->offload_copy ==false )
        {
-            LOG_INFO(stdout, "Set copy mode ...\n");
            hipMalloc(&input_buffer_device, this->input_shape.bytes());
            hipMalloc(&output_buffer_device, this->output_shape.bytes());
            output_buffer_host   =  (void*)malloc(this->output_shape.bytes());
            dev_argument[input_name]  = migraphx::argument{input_shape, input_buffer_device};
            dev_argument[output_name] = migraphx::argument{output_shape, output_buffer_device};
-        }
+             hipMemcpy(input_buffer_device,
+                  (void*)warm_data,
+                  this->input_shape.bytes(),
+                  hipMemcpyHostToDevice);
+            //warm up
+            std::vector<migraphx::argument> results = net.eval(dev_argument);
+        }else{
+            std::unordered_map<std::string, migraphx::argument> inputData;
+            inputData[input_name] = migraphx::argument{input_shape, (float *)warm_data};
+            //warm up
+            std::vector<migraphx::argument> results = net.eval(inputData);
+        }
+        free(warm_data);
        std::ifstream infile; 
        infile.open(character_dict_path,std::ios::in);    
        assert(infile.is_open()); 
        std::string k_work=""; 
        k_words.clear();
+        //读取字典文件
        while (std::getline(infile,k_work))
        {
            k_words.push_back(k_work);
@@ -697,8 +676,7 @@ namespace ppocr
        system("chcp 65001");
    }
-    CTCDecode::~CTCDecode()
+    CTCDecode::~CTCDecode(){
-    {
        if(data)
        {
            free(data);
@@ -723,8 +701,7 @@ namespace ppocr
        }
    }
-    bool CTCDecode::preproc(cv::Mat img,float* data,int img_w,int img_h)
+    bool CTCDecode::preproc(cv::Mat img,float* data,int img_w,int img_h){
-    {
        if (img.empty())
        {
            std::cout<<"WARNING image is empty!\n";
@@ -754,25 +731,21 @@ namespace ppocr
                    data[i*img_w+j] = (template_mat.at<cv::Vec3b>(i, j)[2]*scale-0.5)/0.5;
                    data[i*img_w+j+img_h*img_w] = (template_mat.at<cv::Vec3b>(i, j)[1]*scale-0.5)/0.5;
                    data[i*img_w+j+2*img_h*img_w] =( template_mat.at<cv::Vec3b>(i, j)[0]*scale-0.5)/0.5;  
                }
            }
        }
        return  true ;
    }
-    std::string CTCDecode::decode(std::vector<float>& probs,std::vector<int>& indexs,float& mean_prob)
+    std::string CTCDecode::decode(std::vector<float>& probs,std::vector<int>& indexs,float& mean_prob){
-    {
        int ignored_tokens=0;
        std::string text="";
        std::vector<float> n_probs;
        std::vector<int> n_indexs;
        int eff_text_num=0;
        for (int i=0;i<n_channel;i++)
        {
-            // std::cout<<"s :"<<i<<":"<<indexs[i]<<"-"<<probs[i]<<std::endl;
            if(indexs[i]==ignored_tokens)
            {
                continue;
@@ -784,7 +757,6 @@ namespace ppocr
            mean_prob+=probs[i];
            text+=k_words[indexs[i]-1];
            eff_text_num++;
        }
@@ -801,38 +773,26 @@ namespace ppocr
    } 
    std::string CTCDecode::postprocess(float* feature)
    {
-        //shape 25*6625
        std::vector<float> probs;
        std::vector<int> indexs;
        float prob=0.;
-        // std::cout<<"n_channel:"<<n_channel<<", feature_size:"<<feature_size<<std::endl;
        for (int i=0;i<n_channel;i++)
        {
            float* c_feat = feature+i*feature_size;
            int max_index = argmax<float*>(c_feat,c_feat+feature_size);
            float max_pro = c_feat[max_index];
-            // std::cout<<"step:"<<i<<"  max_pro:"<<max_pro<<", max_index:"<<max_index<<std::endl;
            probs.push_back(max_pro);
            indexs.push_back(max_index);
        }
        std::string text = decode(probs,indexs,prob);
-        std::cout<<"ocr res :["<<text<<"]\n";
+        std::cout<<"ocr res :"<<text<<"  "<<prob<<"\n";
        return text;
    }
-    std::string  CTCDecode::forward(cv::Mat& img)
+    std::string  CTCDecode::forward(cv::Mat& img){
-    {
        preproc(img,data,net_input_width,net_input_height);
-        // std::unordered_map<std::string, migraphx::argument> inputData;
-        // inputData[input_name] = migraphx::argument{input_shape, data};
-        // std::vector<migraphx::argument> results = net.eval(inputData);
-        // migraphx::argument result = results[0];
        if( this->offload_copy ==false )
        {
            hipMemcpy(input_buffer_device,
@@ -846,8 +806,6 @@ namespace ppocr
            (void*)output_buffer_device,
            output_shape.bytes(),
            hipMemcpyDeviceToHost);
-            // std::cout<<"ctc: copy mode ..."<<std::endl;
            std::string text = postprocess((float *)output_buffer_device);
            return text;
        }else{
@@ -856,44 +814,63 @@ namespace ppocr
            std::vector<migraphx::argument> results = net.eval(inputData);
            migraphx::argument result = results[0] ;  
            std::string text = postprocess((float *)result.data());
-            // std::cout<<"ctc: offload copy mode ..."<<std::endl;
            return text;
        }
-        //get output data (first node)
-        // migraphx::shape outputShape = result.get_shape();
-        // int numberOfOutput = outputShape.elements();
-        // std::vector<std::size_t> outputSize = outputShape.lens();
-        // std::cout<<"output size:"<<outputSize.size()<<std::endl;
-        // for(int i = 0; i < outputSize.size(); i++)
-        // {
-        //     std::cout << outputSize[i] << " ";
-        // }
    }
    ppOcrEngine::ppOcrEngine(const std::string &det_model_path,
                    const std::string &rec_model_path,
                    const std::string &character_dict_path,
+                    const std::string front,
                    float segm_thres,
                    float box_thresh,
                    bool offload_copy,
-                    std::string precision_mode ){
+                    std::string precision_mode
+                    ){
        text_detector = std::make_shared<OcrDet>(det_model_path,precision_mode,offload_copy,segm_thres,box_thresh);
        text_recognizer = std::make_shared<CTCDecode>(rec_model_path,precision_mode,720,48,3,1,offload_copy,character_dict_path);
+        ft2 = std::make_shared<PutText>(front.c_str());
    }
-    ppOcrEngine::~ppOcrEngine()
+    ppOcrEngine::~ppOcrEngine(){
-    {
        ;
    }
-    std::vector<std::string> ppOcrEngine::forward(cv::Mat &srcimg)
+    void ppOcrEngine::visualize_boxes(cv::Mat &srcimg,
+        const std::vector<std::vector<std::vector<int>>> &boxes) {
+        std::vector<std::vector<cv::Point>> contours;
+        for (const auto& box : boxes) {
+            std::vector<cv::Point> pts;
+            for (const auto& point : box) {
+                pts.emplace_back(point[0], point[1]);
+            }
+            contours.push_back(pts);
+        }
+        cv::polylines(
+            srcimg,
+            contours,                 
+            true,                    // 是否闭合
+            cv::Scalar(0, 255, 0),   // 默认绿色
+            2,                       // 线宽
+            cv::LINE_8               // 8连通线
+        );
+    }
+    cv::Mat ppOcrEngine::visualize_text(std::vector<std::string> texts,std::vector<cv::Point> points, cv::Mat &img)
    {
+        assert(texts.size()==points.size()),"error texts size != points size";
+        cv::Mat draw_img = cv::Mat(img.size(), CV_8UC3,cv::Scalar(255,255,255));
+        int width = img.cols*2;
+        int height = img.rows;
+        cv::Mat templete_img = cv::Mat(width,height, CV_8UC3,cv::Scalar(255,255,255));
+        for(int i = 0 ; i < texts.size(); i++)
+        {
+            ft2->putText(draw_img,texts[i],points[i].x,points[i].y,15);
+        }
+        cv::hconcat(img, draw_img, templete_img);
+        return templete_img;
+    }
+    std::vector<std::string> ppOcrEngine::forward(cv::Mat &srcimg){
        std::vector<std::vector<std::vector<int>>> text_roi_boxes;
        std::vector<std::string> text_vec;
@@ -904,7 +881,8 @@ namespace ppocr
            std::cout<<"Not found text roi !\n";
            return std::vector<std::string>();
        }
-        std::cout<<"text_roi_boxes.size(): "<<text_roi_boxes.size()<<"\n";
+        std::vector<cv::Point> points;
        for (int n = 0; n < text_roi_boxes.size(); n++) {
            cv::Rect rect;
@@ -920,10 +898,14 @@ namespace ppocr
            text_roi_mat = srcimg(rect).clone();
            std::string text = text_recognizer->forward(text_roi_mat);
            text_vec.push_back(text);
+            points.push_back(cv::Point(rect.x,rect.y));
        }  
        auto end = std::chrono::high_resolution_clock::now(); 
        auto duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
-        std::cout<<"Time taken by task: "<< duration_ms.count() <<" ms\n";
+        std::cout<<"[Time info] elapsed: "<< duration_ms.count() <<" ms\n";
+        visualize_boxes(srcimg,text_roi_boxes);
+        cv::Mat res_img = visualize_text(text_vec,points, srcimg);
+        cv::imwrite("res.jpg",res_img);
        return text_vec;
    }

--- a/Src/ocr_engine.hpp
+++ b/Src/ocr_engine.hpp
@@ -10,15 +10,9 @@
 #include "Filesystem.h"
 #include "SimpleLog.h"
 #include "clipper.h"
+#include "cv_put_Text.hpp"
 namespace ppocr{
-    struct _TEXT_BOX
-    {
-        cv::Rect  t_rect;
-        float score;
-    };
-    using T_BOX = struct _TEXT_BOX;
    class CTCDecode
    {
    private:
@@ -38,7 +32,6 @@ namespace ppocr{
        void* output_buffer_device;
        void* output_buffer_host;
        migraphx::shape input_shape;
        migraphx::shape output_shape;
        std::string input_name;
@@ -51,7 +44,7 @@ namespace ppocr{
    public:
        CTCDecode(std::string rec_model_path,
-        std::string precision_mode="fp32",
+        std::string precision_mode="fp16",
        int image_width=480,
        int image_height=48,
        int channel=3,
@@ -61,7 +54,9 @@ namespace ppocr{
        ~CTCDecode();
        /**
-         * @brief 字符识别编码，可支持，最长可支持预测90个字符，18385个字符
+         * @brief 字符识别、编码API 字符识别编码，可支持，最长可支持预测90个字符，18385个字符
+         * @param img 输入图片
+         * @return 编码后的字符串
         */
        std::string forward(cv::Mat& img);
@@ -93,7 +88,6 @@ namespace ppocr{
         * @return 成功：text,失败：""
         */
        std::string decode(std::vector<float>& probs,std::vector<int>& indexs,float& mean_prob);
    };
    class OcrDet
@@ -117,25 +111,33 @@ namespace ppocr{
        float* data;
-        //Allocate device buffer and host buffer,if offload_copy is false
+        //当offload_copy为true时，分配设备内存
        std::unordered_map<std::string, migraphx::argument> dev_argument;
        void* input_buffer_device;
        void* output_buffer_device;
        void* output_buffer_host;
-        //postprocess
+        //后处理
        int n_channel;
-        int feature_size;  //single channel feature map size.
+        int feature_size;  //单个通道的特征大小，例如模型输出[1,3,32,32],feature_size= 32x32.
        int output_width;
        int output_height;
-        int max_candidates;//maximun number of candidates contours.
+        int max_candidates;//最大检测的候选区域.
    public:
        OcrDet(std::string det_model_path,
-            std::string precision_mode="float32",
+            std::string precision_mode="fp16",
            bool offload_copy = true,
            float segm_thres = 0.3,
            float box_thresh = 0.7);
        ~OcrDet();
+         /**
+         * @brief 字符检测模型推理API
+         * @param img 原始图片
+         * @param text_roi_boxes  字符区域坐标，格式：[[[tl.x, tl.y], [tr.x, tr.y],[], [br.x, br.y], [bl.x, bl.y]]]]
+         *                                                  |              |               |                |
+         *                                               左上坐标        右上坐标         右下坐标        左下坐标
+         * @return 成功返回true，失败返回false
+         */
        bool forward(cv::Mat& img,std::vector<std::vector<std::vector<int>>>& text_roi_boxes);
    private:
@@ -159,11 +161,12 @@ namespace ppocr{
         */
        int  postprocess(float* feature, std::vector<std::vector<std::vector<int>>> &boxes);
+        /**
+         * @brief 后处理，文本区域提取
+         * @param pred  二值图（这里字符检测使用了dbnet分割字符区域，二值图对应了文本区域）
-        int boxes_from_bitmap(cv::Mat& bit_map,std::vector<T_BOX>& box);
+         * @param bitmap 二值图（pred做形态学运算输出bitmap，结合pred结算平均边框得分）
+         * @return 成功：0,失败：-1
+         */
        std::vector<std::vector<std::vector<int>>>boxes_from_bitmap(
        const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
        const float &det_db_unclip_ratio, const bool &use_polygon_score);
@@ -238,9 +241,6 @@ namespace ppocr{
         */
        float box_score_fast(std::vector<std::vector<float>> box_array,cv::Mat pred) ;
-        void visualize_boxes(const cv::Mat &srcimg,
-        const std::vector<std::vector<std::vector<int>>> &boxes) ;
        bool text_recognition(const cv::Mat &srcimg,
        const std::vector<std::vector<std::vector<int>>> &boxes);
@@ -250,16 +250,21 @@ namespace ppocr{
        private:
            std::shared_ptr<OcrDet> text_detector;
            std::shared_ptr<CTCDecode> text_recognizer;
+            std::shared_ptr<PutText> ft2 ;
        public:
            ppOcrEngine(const std::string &det_model_path,
                    const std::string &rec_model_path,
                    const std::string &character_dict_path,
+                    const std::string front,
                    const float segm_thres=0.3,
                    const float box_thresh=0.7,
                    bool offload_copy =true,
-                    std::string precision_mode = "fp32") ;
+                    std::string precision_mode = "fp16") ;
            ~ppOcrEngine();
            std::vector<std::string> forward(cv::Mat &srcimg);
+            cv::Mat visualize_text(std::vector<std::string> texts,std::vector<cv::Point> points, cv::Mat &img);
+            void visualize_boxes(cv::Mat &srcimg,
+        const std::vector<std::vector<std::vector<int>>> &boxes) ;
    };
 }