"...magic-animate_pytorch.git" did not exist on "45d3f4ffb1c941eed6c439d40c55b3f952efed43"
Commit 41a1b292 authored by Leif's avatar Leif
Browse files

Merge remote-tracking branch 'origin/dygraph' into dygraph

parents 9471054e 3d30899b
This diff is collapsed.
...@@ -143,7 +143,7 @@ python PPOCRLabel.py ...@@ -143,7 +143,7 @@ python PPOCRLabel.py
### 3.1 Shortcut keys ### 3.1 Shortcut keys
| Shortcut keys | Description | | Shortcut keys | Description |
| ------------------------ | ------------------------------------------------ | |--------------------------| ------------------------------------------------ |
| Ctrl + Shift + R | Re-recognize all the labels of the current image | | Ctrl + Shift + R | Re-recognize all the labels of the current image |
| W | Create a rect box | | W | Create a rect box |
| Q | Create a four-points box | | Q | Create a four-points box |
...@@ -151,7 +151,7 @@ python PPOCRLabel.py ...@@ -151,7 +151,7 @@ python PPOCRLabel.py
| Ctrl + R | Re-recognize the selected box | | Ctrl + R | Re-recognize the selected box |
| Ctrl + C | Copy and paste the selected box | | Ctrl + C | Copy and paste the selected box |
| Ctrl + Left Mouse Button | Multi select the label box | | Ctrl + Left Mouse Button | Multi select the label box |
| Backspace | Delete the selected box | | Ctrl + X | Delete the selected box |
| Ctrl + V | Check image | | Ctrl + V | Check image |
| Ctrl + Shift + d | Delete image | | Ctrl + Shift + d | Delete image |
| D | Next image | | D | Next image |
......
...@@ -131,16 +131,16 @@ python PPOCRLabel.py --lang ch ...@@ -131,16 +131,16 @@ python PPOCRLabel.py --lang ch
### 3.1 快捷键 ### 3.1 快捷键
| 快捷键 | 说明 | | 快捷键 | 说明 |
| ---------------- | ---------------------------- | |------------------| ---------------------------- |
| Ctrl + shift + R | 对当前图片的所有标记重新识别 | | Ctrl + shift + R | 对当前图片的所有标记重新识别 |
| W | 新建矩形框 | | W | 新建矩形框 |
| Q | 新建四点框 | | Q | 新建四点框 |
| Ctrl + E | 编辑所选框标签 | | Ctrl + E | 编辑所选框标签 |
| Ctrl + R | 重新识别所选标记 | | Ctrl + R | 重新识别所选标记 |
| Ctrl + C | 复制并粘贴选中的标记框 | | Ctrl + C | 复制并粘贴选中的标记框 |
| Ctrl + 鼠标左键 | 多选标记框 | | Ctrl + 鼠标左键 | 多选标记框 |
| Backspace | 删除所选框 | | Ctrl + X | 删除所选框 |
| Ctrl + V | 确认本张图片标记 | | Ctrl + V | 确认本张图片标记 |
| Ctrl + Shift + d | 删除本张图片 | | Ctrl + Shift + d | 删除本张图片 |
| D | 下一张图片 | | D | 下一张图片 |
......
...@@ -6,6 +6,8 @@ except ImportError: ...@@ -6,6 +6,8 @@ except ImportError:
from PyQt4.QtGui import * from PyQt4.QtGui import *
from PyQt4.QtCore import * from PyQt4.QtCore import *
import time
import datetime
import json import json
import cv2 import cv2
import numpy as np import numpy as np
...@@ -80,8 +82,9 @@ class AutoDialog(QDialog): ...@@ -80,8 +82,9 @@ class AutoDialog(QDialog):
self.parent = parent self.parent = parent
self.ocr = ocr self.ocr = ocr
self.mImgList = mImgList self.mImgList = mImgList
self.lender = lenbar
self.pb = QProgressBar() self.pb = QProgressBar()
self.pb.setRange(0, lenbar) self.pb.setRange(0, self.lender)
self.pb.setValue(0) self.pb.setValue(0)
layout = QVBoxLayout() layout = QVBoxLayout()
...@@ -108,10 +111,16 @@ class AutoDialog(QDialog): ...@@ -108,10 +111,16 @@ class AutoDialog(QDialog):
self.thread_1.progressBarValue.connect(self.handleProgressBarSingal) self.thread_1.progressBarValue.connect(self.handleProgressBarSingal)
self.thread_1.listValue.connect(self.handleListWidgetSingal) self.thread_1.listValue.connect(self.handleListWidgetSingal)
self.thread_1.endsignal.connect(self.handleEndsignalSignal) self.thread_1.endsignal.connect(self.handleEndsignalSignal)
self.time_start = time.time() # save start time
def handleProgressBarSingal(self, i): def handleProgressBarSingal(self, i):
self.pb.setValue(i) self.pb.setValue(i)
# calculate time left of auto labeling
avg_time = (time.time() - self.time_start) / i # Use average time to prevent time fluctuations
time_left = str(datetime.timedelta(seconds=avg_time * (self.lender - i))).split(".")[0] # Remove microseconds
self.setWindowTitle("PPOCRLabel -- " + f"Time Left: {time_left}") # show
def handleListWidgetSingal(self, i): def handleListWidgetSingal(self, i):
self.listWidget.addItem(i) self.listWidget.addItem(i)
titem = self.listWidget.item(self.listWidget.count() - 1) titem = self.listWidget.item(self.listWidget.count() - 1)
......
...@@ -87,6 +87,10 @@ class Canvas(QWidget): ...@@ -87,6 +87,10 @@ class Canvas(QWidget):
#initialisation for panning #initialisation for panning
self.pan_initial_pos = QPoint() self.pan_initial_pos = QPoint()
#lockedshapes related
self.lockedShapes = []
self.isInTheSameImage = False
def setDrawingColor(self, qColor): def setDrawingColor(self, qColor):
self.drawingLineColor = qColor self.drawingLineColor = qColor
self.drawingRectColor = qColor self.drawingRectColor = qColor
......
...@@ -30,6 +30,7 @@ DEFAULT_SELECT_LINE_COLOR = QColor(255, 255, 255) ...@@ -30,6 +30,7 @@ DEFAULT_SELECT_LINE_COLOR = QColor(255, 255, 255)
DEFAULT_SELECT_FILL_COLOR = QColor(0, 128, 255, 155) DEFAULT_SELECT_FILL_COLOR = QColor(0, 128, 255, 155)
DEFAULT_VERTEX_FILL_COLOR = QColor(0, 255, 0, 255) DEFAULT_VERTEX_FILL_COLOR = QColor(0, 255, 0, 255)
DEFAULT_HVERTEX_FILL_COLOR = QColor(255, 0, 0) DEFAULT_HVERTEX_FILL_COLOR = QColor(255, 0, 0)
DEFAULT_LOCK_COLOR = QColor(255, 0, 255)
MIN_Y_LABEL = 10 MIN_Y_LABEL = 10
...@@ -57,7 +58,7 @@ class Shape(object): ...@@ -57,7 +58,7 @@ class Shape(object):
self.selected = False self.selected = False
self.difficult = difficult self.difficult = difficult
self.paintLabel = paintLabel self.paintLabel = paintLabel
self.locked = False
self._highlightIndex = None self._highlightIndex = None
self._highlightMode = self.NEAR_VERTEX self._highlightMode = self.NEAR_VERTEX
self._highlightSettings = { self._highlightSettings = {
......
...@@ -104,4 +104,6 @@ singleRe=Re-recognition RectBox ...@@ -104,4 +104,6 @@ singleRe=Re-recognition RectBox
labelDialogOption=Pop-up Label Input Dialog labelDialogOption=Pop-up Label Input Dialog
undo=Undo undo=Undo
undoLastPoint=Undo Last Point undoLastPoint=Undo Last Point
autoSaveMode=Auto Export Label Mode autoSaveMode=Auto Export Label Mode
\ No newline at end of file lockBox=Lock selected box/Unlock all box
lockBoxDetail=Lock selected box/Unlock all box
\ No newline at end of file
...@@ -104,4 +104,6 @@ singleRe=重识别此区块 ...@@ -104,4 +104,6 @@ singleRe=重识别此区块
labelDialogOption=弹出标记输入框 labelDialogOption=弹出标记输入框
undo=撤销 undo=撤销
undoLastPoint=撤销上个点 undoLastPoint=撤销上个点
autoSaveMode=自动导出标记结果 autoSaveMode=自动导出标记结果
\ No newline at end of file lockBox=锁定框/解除锁定框
lockBoxDetail=若当前没有框处于锁定状态则锁定选中的框,若存在锁定框则解除所有锁定框的锁定状态
...@@ -33,17 +33,17 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools ...@@ -33,17 +33,17 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
- [more](./doc/doc_en/update_en.md) - [more](./doc/doc_en/update_en.md)
## Features ## Features
- PP-OCR series of high-quality pre-trained models, comparable to commercial effects - PP-OCR - A series of high-quality pre-trained models, comparable to commercial products
- Ultra lightweight PP-OCRv2 series models: detection (3.1M) + direction classifier (1.4M) + recognition 8.5M) = 13.0M - Ultra lightweight PP-OCRv2 series models: detection (3.1M) + direction classifier (1.4M) + recognition 8.5M) = 13.0M
- Ultra lightweight PP-OCR mobile series models: detection (3.0M) + direction classifier (1.4M) + recognition (5.0M) = 9.4M - Ultra lightweight PP-OCR mobile series models: detection (3.0M) + direction classifier (1.4M) + recognition (5.0M) = 9.4M
- General PP-OCR server series models: detection (47.1M) + direction classifier (1.4M) + recognition (94.9M) = 143.4M - General PP-OCR server series models: detection (47.1M) + direction classifier (1.4M) + recognition (94.9M) = 143.4M
- Support Chinese, English, and digit recognition, vertical text recognition, and long text recognition - Support Chinese, English, and digit recognition, vertical text recognition, and long text recognition
- Support multi-language recognition: about 80 languages like Korean, Japanese, German, French, etc - Support multi-lingual recognition: about 80 languages like Korean, Japanese, German, French, etc
- PP-Structure: a document structurize system - PP-Structure: a document structurize system
- support layout analysis and table recognition (support export to Excel) - Support layout analysis and table recognition (support export to Excel)
- support key information extraction - Support key information extraction
- support DocVQA - Support DocVQA
- Rich toolkits related to the OCR areas - Rich OCR toolkit
- Semi-automatic data annotation tool, i.e., PPOCRLabel: support fast and efficient data annotation - Semi-automatic data annotation tool, i.e., PPOCRLabel: support fast and efficient data annotation
- Data synthesis tool, i.e., Style-Text: easy to synthesize a large number of images which are similar to the target scene image - Data synthesis tool, i.e., Style-Text: easy to synthesize a large number of images which are similar to the target scene image
- Support user-defined training, provides rich predictive inference deployment solutions - Support user-defined training, provides rich predictive inference deployment solutions
...@@ -62,7 +62,7 @@ The above pictures are the visualizations of the general ppocr_server model. For ...@@ -62,7 +62,7 @@ The above pictures are the visualizations of the general ppocr_server model. For
<a name="Community"></a> <a name="Community"></a>
## Community ## Community
- Scan the QR code below with your Wechat, you can access to official technical exchange group. Look forward to your participation. - Scan the QR code below with your Wechat, you can join the official technical discussion group. Looking forward to your participation.
<div align="center"> <div align="center">
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" /> <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" />
...@@ -120,8 +120,8 @@ For a new language request, please refer to [Guideline for new language_requests ...@@ -120,8 +120,8 @@ For a new language request, please refer to [Guideline for new language_requests
- [PP-Structure: Information Extraction](./ppstructure/README.md) - [PP-Structure: Information Extraction](./ppstructure/README.md)
- [Layout Parser](./ppstructure/layout/README.md) - [Layout Parser](./ppstructure/layout/README.md)
- [Table Recognition](./ppstructure/table/README.md) - [Table Recognition](./ppstructure/table/README.md)
- [DocVQA](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa) - [DocVQA](./ppstructure/vqa/README.md)
- [Key Information Extraction](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md) - [Key Information Extraction](./ppstructure/docs/kie.md)
- Academic Circles - Academic Circles
- [Two-stage Algorithm](./doc/doc_en/algorithm_overview_en.md) - [Two-stage Algorithm](./doc/doc_en/algorithm_overview_en.md)
- [PGNet Algorithm](./doc/doc_en/pgnet_en.md) - [PGNet Algorithm](./doc/doc_en/pgnet_en.md)
......
...@@ -99,8 +99,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ...@@ -99,8 +99,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- [PP-Structure信息提取](./ppstructure/README_ch.md) - [PP-Structure信息提取](./ppstructure/README_ch.md)
- [版面分析](./ppstructure/layout/README_ch.md) - [版面分析](./ppstructure/layout/README_ch.md)
- [表格识别](./ppstructure/table/README_ch.md) - [表格识别](./ppstructure/table/README_ch.md)
- [DocVQA](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa) - [DocVQA](./ppstructure/vqa/README_ch.md)
- [关键信息提取](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md) - [关键信息提取](./ppstructure/docs/kie.md)
- OCR学术圈 - OCR学术圈
- [两阶段模型介绍与下载](./doc/doc_ch/algorithm_overview.md) - [两阶段模型介绍与下载](./doc/doc_ch/algorithm_overview.md)
- [端到端PGNet算法](./doc/doc_ch/pgnet.md) - [端到端PGNet算法](./doc/doc_ch/pgnet.md)
......
Global:
use_gpu: True
epoch_num: &epoch_num 200
log_smooth_window: 10
print_batch_step: 10
save_model_dir: ./output/re_layoutxlm/
save_epoch_step: 2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step: [ 0, 19 ]
cal_metric_during_train: False
save_inference_dir:
use_visualdl: False
seed: 2022
infer_img: doc/vqa/input/zh_val_21.jpg
save_res_path: ./output/re/
Architecture:
model_type: vqa
algorithm: &algorithm "LayoutXLM"
Transform:
Backbone:
name: LayoutXLMForRe
pretrained: True
checkpoints:
Loss:
name: LossFromOutput
key: loss
reduction: mean
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
clip_norm: 10
lr:
learning_rate: 0.00005
regularizer:
name: L2
factor: 0.00000
PostProcess:
name: VQAReTokenLayoutLMPostProcess
Metric:
name: VQAReTokenMetric
main_indicator: hmean
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_train/image
label_file_list:
- train_data/XFUND/zh_train/xfun_normalize_train.json
ratio_list: [ 1.0 ]
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: True
algorithm: *algorithm
class_path: &class_path ppstructure/vqa/labels/labels_ser.txt
- VQATokenPad:
max_seq_len: &max_seq_len 512
return_attention_mask: True
- VQAReTokenRelation:
- VQAReTokenChunk:
max_seq_len: *max_seq_len
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids', 'bbox', 'image', 'attention_mask', 'token_type_ids','entities', 'relations'] # dataloader will return list in this order
loader:
shuffle: True
drop_last: False
batch_size_per_card: 8
num_workers: 4
collate_fn: ListCollator
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_val/image
label_file_list:
- train_data/XFUND/zh_val/xfun_normalize_val.json
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: True
algorithm: *algorithm
class_path: *class_path
- VQATokenPad:
max_seq_len: *max_seq_len
return_attention_mask: True
- VQAReTokenRelation:
- VQAReTokenChunk:
max_seq_len: *max_seq_len
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids', 'bbox', 'image', 'attention_mask', 'token_type_ids','entities', 'relations'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 8
num_workers: 4
collate_fn: ListCollator
Global:
use_gpu: True
epoch_num: &epoch_num 200
log_smooth_window: 10
print_batch_step: 10
save_model_dir: ./output/ser_layoutlm/
save_epoch_step: 2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step: [ 0, 19 ]
cal_metric_during_train: False
save_inference_dir:
use_visualdl: False
seed: 2022
infer_img: doc/vqa/input/zh_val_0.jpg
save_res_path: ./output/ser/
Architecture:
model_type: vqa
algorithm: &algorithm "LayoutLM"
Transform:
Backbone:
name: LayoutLMForSer
pretrained: True
checkpoints:
num_classes: &num_classes 7
Loss:
name: VQASerTokenLayoutLMLoss
num_classes: *num_classes
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
lr:
name: Linear
learning_rate: 0.00005
epochs: *epoch_num
warmup_epoch: 2
regularizer:
name: L2
factor: 0.00000
PostProcess:
name: VQASerTokenLayoutLMPostProcess
class_path: &class_path ppstructure/vqa/labels/labels_ser.txt
Metric:
name: VQASerTokenMetric
main_indicator: hmean
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_train/image
label_file_list:
- train_data/XFUND/zh_train/xfun_normalize_train.json
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: False
algorithm: *algorithm
class_path: *class_path
- VQATokenPad:
max_seq_len: &max_seq_len 512
return_attention_mask: True
- VQASerTokenChunk:
max_seq_len: *max_seq_len
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids','labels', 'bbox', 'image', 'attention_mask', 'token_type_ids'] # dataloader will return list in this order
loader:
shuffle: True
drop_last: False
batch_size_per_card: 8
num_workers: 4
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_val/image
label_file_list:
- train_data/XFUND/zh_val/xfun_normalize_val.json
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: False
algorithm: *algorithm
class_path: *class_path
- VQATokenPad:
max_seq_len: *max_seq_len
return_attention_mask: True
- VQASerTokenChunk:
max_seq_len: *max_seq_len
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids', 'labels', 'bbox', 'image', 'attention_mask', 'token_type_ids'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 8
num_workers: 4
Global:
use_gpu: True
epoch_num: &epoch_num 200
log_smooth_window: 10
print_batch_step: 10
save_model_dir: ./output/ser_layoutxlm/
save_epoch_step: 2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step: [ 0, 19 ]
cal_metric_during_train: False
save_inference_dir:
use_visualdl: False
seed: 2022
infer_img: doc/vqa/input/zh_val_42.jpg
save_res_path: ./output/ser
Architecture:
model_type: vqa
algorithm: &algorithm "LayoutXLM"
Transform:
Backbone:
name: LayoutXLMForSer
pretrained: True
checkpoints:
num_classes: &num_classes 7
Loss:
name: VQASerTokenLayoutLMLoss
num_classes: *num_classes
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
lr:
name: Linear
learning_rate: 0.00005
epochs: *epoch_num
warmup_epoch: 2
regularizer:
name: L2
factor: 0.00000
PostProcess:
name: VQASerTokenLayoutLMPostProcess
class_path: &class_path ppstructure/vqa/labels/labels_ser.txt
Metric:
name: VQASerTokenMetric
main_indicator: hmean
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_train/image
label_file_list:
- train_data/XFUND/zh_train/xfun_normalize_train.json
ratio_list: [ 1.0 ]
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: False
algorithm: *algorithm
class_path: *class_path
- VQATokenPad:
max_seq_len: &max_seq_len 512
return_attention_mask: True
- VQASerTokenChunk:
max_seq_len: *max_seq_len
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids','labels', 'bbox', 'image', 'attention_mask', 'token_type_ids'] # dataloader will return list in this order
loader:
shuffle: True
drop_last: False
batch_size_per_card: 8
num_workers: 4
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_val/image
label_file_list:
- train_data/XFUND/zh_val/xfun_normalize_val.json
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: False
algorithm: *algorithm
class_path: *class_path
- VQATokenPad:
max_seq_len: *max_seq_len
return_attention_mask: True
- VQASerTokenChunk:
max_seq_len: *max_seq_len
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids', 'labels', 'bbox', 'image', 'attention_mask', 'token_type_ids'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 8
num_workers: 4
...@@ -160,6 +160,7 @@ public class Predictor { ...@@ -160,6 +160,7 @@ public class Predictor {
for (String content : contents) { for (String content : contents) {
wordLabels.add(content); wordLabels.add(content);
} }
wordLabels.add(" ");
Log.i(TAG, "Word label size: " + wordLabels.size()); Log.i(TAG, "Word label size: " + wordLabels.size());
} catch (Exception e) { } catch (Exception e) {
Log.e(TAG, e.getMessage()); Log.e(TAG, e.getMessage());
......
# Server-side C++ Inference # Server-side C++ Inference
This chapter introduces the C++ deployment method of the PaddleOCR model, and the corresponding python predictive deployment method refers to [document](../../doc/doc_ch/inference.md). This chapter introduces the C++ deployment steps of the PaddleOCR model. The corresponding Python predictive deployment method refers to [document](../../doc/doc_ch/inference.md).
C++ is better than python in terms of performance calculation. Therefore, in most CPU and GPU deployment scenarios, C++ deployment is mostly used. C++ is better than python in terms of performance. Therefore, in CPU and GPU deployment scenarios, C++ deployment is mostly used.
This section will introduce how to configure the C++ environment and complete it in the Linux\Windows (CPU\GPU) environment This section will introduce how to configure the C++ environment and deploy PaddleOCR in Linux (CPU\GPU) environment. For Windows deployment please refer to [Windows](./docs/windows_vs2019_build.md) compilation guidelines.
PaddleOCR model deployment.
## 1. Prepare the Environment ## 1. Prepare the Environment
...@@ -15,7 +14,7 @@ PaddleOCR model deployment. ...@@ -15,7 +14,7 @@ PaddleOCR model deployment.
### 1.1 Compile OpenCV ### 1.1 Compile OpenCV
* First of all, you need to download the source code compiled package in the Linux environment from the opencv official website. Taking opencv3.4.7 as an example, the download command is as follows. * First of all, you need to download the source code compiled package in the Linux environment from the OpenCV official website. Taking OpenCV 3.4.7 as an example, the download command is as follows.
```bash ```bash
cd deploy/cpp_infer cd deploy/cpp_infer
...@@ -23,9 +22,9 @@ wget https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz ...@@ -23,9 +22,9 @@ wget https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz
tar -xf opencv-3.4.7.tar.gz tar -xf opencv-3.4.7.tar.gz
``` ```
Finally, you can see the folder of `opencv-3.4.7/` in the current directory. Finally, you will see the folder of `opencv-3.4.7/` in the current directory.
* Compile opencv, the opencv source path (`root_path`) and installation path (`install_path`) should be set by yourself. Enter the opencv source code path and compile it in the following way. * Compile OpenCV, the OpenCV source path (`root_path`) and installation path (`install_path`) should be set by yourself. Enter the OpenCV source code path and compile it in the following way.
```shell ```shell
...@@ -58,11 +57,11 @@ make -j ...@@ -58,11 +57,11 @@ make -j
make install make install
``` ```
Among them, `root_path` is the downloaded opencv source code path, and `install_path` is the installation path of opencv. After `make install` is completed, the opencv header file and library file will be generated in this folder for later OCR source code compilation. In the above commands, `root_path` is the downloaded OpenCV source code path, and `install_path` is the installation path of OpenCV. After `make install` is completed, the OpenCV header file and library file will be generated in this folder for later OCR source code compilation.
The final file structure under the opencv installation path is as follows. The final file structure under the OpenCV installation path is as follows.
``` ```
opencv3/ opencv3/
...@@ -79,20 +78,20 @@ opencv3/ ...@@ -79,20 +78,20 @@ opencv3/
#### 1.2.1 Direct download and installation #### 1.2.1 Direct download and installation
[Paddle inference library official website](https://paddle-inference.readthedocs.io/en/latest/user_guides/download_lib.html). You can view and select the appropriate version of the inference library on the official website. [Paddle inference library official website](https://paddle-inference.readthedocs.io/en/latest/user_guides/download_lib.html). You can review and select the appropriate version of the inference library on the official website.
* After downloading, use the following method to uncompress. * After downloading, use the following command to extract files.
``` ```
tar -xf paddle_inference.tgz tar -xf paddle_inference.tgz
``` ```
Finally you can see the following files in the folder of `paddle_inference/`. Finally you will see the the folder of `paddle_inference/` in the current path.
#### 1.2.2 Compile from the source code #### 1.2.2 Compile the inference source code
* If you want to get the latest Paddle inference library features, you can download the latest code from Paddle github repository and compile the inference library from the source code. It is recommended to download the inference library with paddle version greater than or equal to 2.0.1. * If you want to get the latest Paddle inference library features, you can download the latest code from Paddle GitHub repository and compile the inference library from the source code. It is recommended to download the inference library with paddle version greater than or equal to 2.0.1.
* You can refer to [Paddle inference library] (https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html) to get the Paddle source code from github, and then compile To generate the latest inference library. The method of using git to access the code is as follows. * You can refer to [Paddle inference library] (https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html) to get the Paddle source code from GitHub, and then compile To generate the latest inference library. The method of using git to access the code is as follows.
```shell ```shell
...@@ -100,7 +99,7 @@ git clone https://github.com/PaddlePaddle/Paddle.git ...@@ -100,7 +99,7 @@ git clone https://github.com/PaddlePaddle/Paddle.git
git checkout develop git checkout develop
``` ```
* After entering the Paddle directory, the commands to compile the paddle inference library are as follows. * Enter the Paddle directory and run the following commands to compile the paddle inference library.
```shell ```shell
rm -rf build rm -rf build
...@@ -133,14 +132,14 @@ build/paddle_inference_install_dir/ ...@@ -133,14 +132,14 @@ build/paddle_inference_install_dir/
|-- version.txt |-- version.txt
``` ```
Among them, `paddle` is the Paddle library required for C++ prediction later, and `version.txt` contains the version information of the current inference library. `paddle` is the Paddle library required for C++ prediction later, and `version.txt` contains the version information of the current inference library.
## 2. Compile and Run the Demo ## 2. Compile and Run the Demo
### 2.1 Export the inference model ### 2.1 Export the inference model
* You can refer to [Model inference](../../doc/doc_ch/inference.md)export the inference model. After the model is exported, assuming it is placed in the `inference` directory, the directory structure is as follows. * You can refer to [Model inference](../../doc/doc_ch/inference.md) and export the inference model. After the model is exported, assuming it is placed in the `inference` directory, the directory structure is as follows.
``` ```
inference/ inference/
...@@ -171,20 +170,28 @@ CUDA_LIB_DIR=your_cuda_lib_dir ...@@ -171,20 +170,28 @@ CUDA_LIB_DIR=your_cuda_lib_dir
CUDNN_LIB_DIR=your_cudnn_lib_dir CUDNN_LIB_DIR=your_cudnn_lib_dir
``` ```
`OPENCV_DIR` is the opencv installation path; `LIB_DIR` is the download (`paddle_inference` folder) `OPENCV_DIR` is the OpenCV installation path; `LIB_DIR` is the download (`paddle_inference` folder)
or the generated Paddle inference library path (`build/paddle_inference_install_dir` folder); or the generated Paddle inference library path (`build/paddle_inference_install_dir` folder);
`CUDA_LIB_DIR` is the cuda library file path, in docker; it is `/usr/local/cuda/lib64`; `CUDNN_LIB_DIR` is the cudnn library file path, in docker it is `/usr/lib/x86_64-linux-gnu/`. `CUDA_LIB_DIR` is the CUDA library file path, in docker; it is `/usr/local/cuda/lib64`; `CUDNN_LIB_DIR` is the cuDNN library file path, in docker it is `/usr/lib/x86_64-linux-gnu/`.
* After the compilation is completed, an executable file named `ppocr` will be generated in the `build` folder. * After the compilation is completed, an executable file named `ppocr` will be generated in the `build` folder.
### Run the demo ### Run the demo
Execute the built executable file: Execute the built executable file:
```shell ```shell
./build/ppocr <mode> [--param1] [--param2] [...] ./build/ppocr <mode> [--param1] [--param2] [...]
``` ```
Here, `mode` is a required parameter,and the value range is ['det', 'rec', 'system'], representing using detection only, using recognition only and using the end-to-end system respectively. Specifically, `mode` is a required parameter,and the valid values are
mode value | Model used
-----|------
det | Detection only
rec | Recognition only
system | End-to-end system
Specifically,
##### 1. run det demo: ##### 1. run det demo:
```shell ```shell
...@@ -214,9 +221,9 @@ Here, `mode` is a required parameter,and the value range is ['det', 'rec', 'sy ...@@ -214,9 +221,9 @@ Here, `mode` is a required parameter,and the value range is ['det', 'rec', 'sy
--image_dir=../../doc/imgs/12.jpg --image_dir=../../doc/imgs/12.jpg
``` ```
More parameters are as follows, More parameters are as follows,
- common parameters - Common parameters
|parameter|data type|default|meaning| |parameter|data type|default|meaning|
| --- | --- | --- | --- | | --- | --- | --- | --- |
...@@ -226,7 +233,7 @@ More parameters are as follows, ...@@ -226,7 +233,7 @@ More parameters are as follows,
|cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed| |cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed|
|use_mkldnn|bool|true|Whether to use mkdlnn library| |use_mkldnn|bool|true|Whether to use mkdlnn library|
- detection related parameters - Detection related parameters
|parameter|data type|default|meaning| |parameter|data type|default|meaning|
| --- | --- | --- | --- | | --- | --- | --- | --- |
...@@ -238,7 +245,7 @@ More parameters are as follows, ...@@ -238,7 +245,7 @@ More parameters are as follows,
|use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.| |use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.|
|visualize|bool|true|Whether to visualize the results,when it is set as true, The prediction result will be save in the image file `./ocr_vis.png`.| |visualize|bool|true|Whether to visualize the results,when it is set as true, The prediction result will be save in the image file `./ocr_vis.png`.|
- classifier related parameters - Classifier related parameters
|parameter|data type|default|meaning| |parameter|data type|default|meaning|
| --- | --- | --- | --- | | --- | --- | --- | --- |
...@@ -246,7 +253,7 @@ More parameters are as follows, ...@@ -246,7 +253,7 @@ More parameters are as follows,
|cls_model_dir|string|-|Address of direction classifier inference model| |cls_model_dir|string|-|Address of direction classifier inference model|
|cls_thresh|float|0.9|Score threshold of the direction classifier| |cls_thresh|float|0.9|Score threshold of the direction classifier|
- recogniton related parameters - Recognition related parameters
|parameter|data type|default|meaning| |parameter|data type|default|meaning|
| --- | --- | --- | --- | | --- | --- | --- | --- |
...@@ -265,4 +272,4 @@ The detection results will be shown on the screen, which is as follows. ...@@ -265,4 +272,4 @@ The detection results will be shown on the screen, which is as follows.
### 2.3 Notes ### 2.3 Notes
* Paddle2.0.0 inference model library is recommended for this toturial. * Paddle 2.0.0 inference model library is recommended for this tutorial.
English | [简体中文](README_cn.md) English | [简体中文](README_cn.md)
## Introduction ## Introduction
Many users hope package the PaddleOCR service into a docker image, so that it can be quickly released and used in the docker or k8s environment. Many users hope package the PaddleOCR service into a docker image, so that it can be quickly released and used in the docker or K8s environment.
This page provides some standardized code to achieve this goal. You can quickly publish the PaddleOCR project into a callable Restful API service through the following steps. (At present, the deployment based on the HubServing mode is implemented first, and author plans to increase the deployment of the PaddleServing mode in the futrue) This page provides some standardized code to achieve this goal. You can quickly publish the PaddleOCR project into a callable Restful API service through the following steps. (At present, the deployment based on the HubServing mode is implemented first, and author plans to increase the deployment of the PaddleServing mode in the future)
## 1. Prerequisites ## 1. Prerequisites
...@@ -14,7 +14,7 @@ c. NVIDIA Container Toolkit(GPU,Docker 19.03+ can skip this) ...@@ -14,7 +14,7 @@ c. NVIDIA Container Toolkit(GPU,Docker 19.03+ can skip this)
d. cuDNN 7.6+(GPU) d. cuDNN 7.6+(GPU)
## 2. Build Image ## 2. Build Image
a. Goto Dockerfile directory(ps:Need to distinguish between cpu and gpu version, the following takes cpu as an example, gpu version needs to replace the keyword) a. Go to Dockerfile directory(PS: Need to distinguish between CPU and GPU version, the following takes CPU as an example, GPU version needs to replace the keyword)
``` ```
cd deploy/docker/hubserving/cpu cd deploy/docker/hubserving/cpu
``` ```
...@@ -42,13 +42,13 @@ docker logs -f paddle_ocr ...@@ -42,13 +42,13 @@ docker logs -f paddle_ocr
``` ```
## 4. Test ## 4. Test
a. Calculate the Base64 encoding of the picture to be recognized (if you just test, you can use a free online tool, like:https://freeonlinetools24.com/base64-image/ a. Calculate the Base64 encoding of the picture to be recognized (For test purpose, you can use a free online tool such as https://freeonlinetools24.com/base64-image/ )
b. Post a service request(sample request in sample_request.txt) b. Post a service request(sample request in sample_request.txt)
``` ```
curl -H "Content-Type:application/json" -X POST --data "{\"images\": [\"Input image Base64 encode(need to delete the code 'data:image/jpg;base64,')\"]}" http://localhost:8868/predict/ocr_system curl -H "Content-Type:application/json" -X POST --data "{\"images\": [\"Input image Base64 encode(need to delete the code 'data:image/jpg;base64,')\"]}" http://localhost:8868/predict/ocr_system
``` ```
c. Get resposne(If the call is successful, the following result will be returned) c. Get response(If the call is successful, the following result will be returned)
``` ```
{"msg":"","results":[[{"confidence":0.8403433561325073,"text":"约定","text_region":[[345,377],[641,390],[634,540],[339,528]]},{"confidence":0.8131805658340454,"text":"最终相遇","text_region":[[356,532],[624,530],[624,596],[356,598]]}]],"status":"0"} {"msg":"","results":[[{"confidence":0.8403433561325073,"text":"约定","text_region":[[345,377],[641,390],[634,540],[339,528]]},{"confidence":0.8131805658340454,"text":"最终相遇","text_region":[[356,532],[624,530],[624,596],[356,598]]}]],"status":"0"}
``` ```
# Tutorial of PaddleOCR Mobile deployment # Tutorial of PaddleOCR Mobile deployment
This tutorial will introduce how to use [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) to deploy paddleOCR ultra-lightweight Chinese and English detection models on mobile phones. This tutorial will introduce how to use [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) to deploy PaddleOCR ultra-lightweight Chinese and English detection models on mobile phones.
paddle-lite is a lightweight inference engine for PaddlePaddle. It provides efficient inference capabilities for mobile phones and IoTs, and extensively integrates cross-platform hardware to provide lightweight deployment solutions for end-side deployment issues. paddle-lite is a lightweight inference engine for PaddlePaddle. It provides efficient inference capabilities for mobile phones and IoT, and extensively integrates cross-platform hardware to provide lightweight deployment solutions for end-side deployment issues.
## 1. Preparation ## 1. Preparation
......
...@@ -22,6 +22,7 @@ PaddleOCR提供2种服务部署方式: ...@@ -22,6 +22,7 @@ PaddleOCR提供2种服务部署方式:
- [环境准备](#环境准备) - [环境准备](#环境准备)
- [模型转换](#模型转换) - [模型转换](#模型转换)
- [Paddle Serving pipeline部署](#部署) - [Paddle Serving pipeline部署](#部署)
- [Windows用户](#Windows用户)
- [FAQ](#FAQ) - [FAQ](#FAQ)
<a name="环境准备"></a> <a name="环境准备"></a>
...@@ -187,9 +188,10 @@ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \ ...@@ -187,9 +188,10 @@ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv2_rec_infer/ \
2021-05-13 03:42:36,979 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0] 2021-05-13 03:42:36,979 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0]
``` ```
## WINDOWS用户 <a name="Windows用户"></a>
## Windows用户
Windows用户不能使用上述的启动方式,需要使用Web Service,详情参见[Windows平台使用Paddle Serving指导](https://github.com/PaddlePaddle/Serving/blob/develop/doc/WINDOWS_TUTORIAL_CN.md) Windows用户不能使用上述的启动方式,需要使用Web Service,详情参见[Windows平台使用Paddle Serving指导](https://github.com/PaddlePaddle/Serving/blob/develop/doc/Windows_Tutorial_CN.md)
**WINDOWS只能使用0.5.0版本的CPU模式** **WINDOWS只能使用0.5.0版本的CPU模式**
......
...@@ -28,14 +28,14 @@ python3 setup.py install ...@@ -28,14 +28,14 @@ python3 setup.py install
``` ```
### 2. Download Pretrain Model ### 2. Download Pre-trained Model
Model prune needs to load pre-trained models. Model prune needs to load pre-trained models.
PaddleOCR also provides a series of [models](../../../doc/doc_en/models_list_en.md). Developers can choose their own models or use their own models according to their needs. PaddleOCR also provides a series of [models](../../../doc/doc_en/models_list_en.md). Developers can choose their own models or use their own models according to their needs.
### 3. Pruning sensitivity analysis ### 3. Pruning sensitivity analysis
After the pre-training model is loaded, sensitivity analysis is performed on each network layer of the model to understand the redundancy of each network layer, and save a sensitivity file which named: sen.pickle. After that, user could load the sensitivity file via the [methods provided by PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/prune/sensitive.py#L221) and determining the pruning ratio of each network layer automatically. For specific details of sensitivity analysis, see:[Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/tutorials/image_classification_sensitivity_analysis_tutorial.md) After the pre-trained model is loaded, sensitivity analysis is performed on each network layer of the model to understand the redundancy of each network layer, and save a sensitivity file which named: sen.pickle. After that, user could load the sensitivity file via the [methods provided by PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/prune/sensitive.py#L221) and determining the pruning ratio of each network layer automatically. For specific details of sensitivity analysis, see:[Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/tutorials/image_classification_sensitivity_analysis_tutorial.md)
The data format of sensitivity file: The data format of sensitivity file:
sen.pickle(Dict){ sen.pickle(Dict){
'layer_weight_name_0': sens_of_each_ratio(Dict){'pruning_ratio_0': acc_loss, 'pruning_ratio_1': acc_loss} 'layer_weight_name_0': sens_of_each_ratio(Dict){'pruning_ratio_0': acc_loss, 'pruning_ratio_1': acc_loss}
...@@ -47,7 +47,7 @@ PaddleOCR also provides a series of [models](../../../doc/doc_en/models_list_en. ...@@ -47,7 +47,7 @@ PaddleOCR also provides a series of [models](../../../doc/doc_en/models_list_en.
'conv10_expand_weights': {0.1: 0.006509952684312718, 0.2: 0.01827734339798862, 0.3: 0.014528405644659832, 0.6: 0.06536008804270439, 0.8: 0.11798612250664964, 0.7: 0.12391408417493704, 0.4: 0.030615754498018757, 0.5: 0.047105205602406594} 'conv10_expand_weights': {0.1: 0.006509952684312718, 0.2: 0.01827734339798862, 0.3: 0.014528405644659832, 0.6: 0.06536008804270439, 0.8: 0.11798612250664964, 0.7: 0.12391408417493704, 0.4: 0.030615754498018757, 0.5: 0.047105205602406594}
'conv10_linear_weights': {0.1: 0.05113190831455035, 0.2: 0.07705573833558801, 0.3: 0.12096721757739311, 0.6: 0.5135061352930738, 0.8: 0.7908166677143281, 0.7: 0.7272187676899062, 0.4: 0.1819252083008504, 0.5: 0.3728054727792405} 'conv10_linear_weights': {0.1: 0.05113190831455035, 0.2: 0.07705573833558801, 0.3: 0.12096721757739311, 0.6: 0.5135061352930738, 0.8: 0.7908166677143281, 0.7: 0.7272187676899062, 0.4: 0.1819252083008504, 0.5: 0.3728054727792405}
} }
The function would return a dict after loading the sensitivity file. The keys of the dict are name of parameters in each layer. And the value of key is the information about pruning sensitivity of correspoding layer. In example, pruning 10% filter of the layer corresponding to conv10_expand_weights would lead to 0.65% degradation of model performance. The details could be seen at: [Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/algo/algo.md#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86) The function would return a dict after loading the sensitivity file. The keys of the dict are name of parameters in each layer. And the value of key is the information about pruning sensitivity of corresponding layer. In example, pruning 10% filter of the layer corresponding to conv10_expand_weights would lead to 0.65% degradation of model performance. The details could be seen at: [Sensitivity analysis](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/algo/algo.md#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
Enter the PaddleOCR root directory,perform sensitivity analysis on the model with the following command: Enter the PaddleOCR root directory,perform sensitivity analysis on the model with the following command:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment