Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
4b214948
Commit
4b214948
authored
Mar 01, 2022
by
zhiminzhang0830
Browse files
Merge branch 'dygraph' of
https://github.com/PaddlePaddle/PaddleOCR
into new_branch
parents
917606b4
6e607a0f
Changes
144
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
607 additions
and
117 deletions
+607
-117
configs/rec/rec_efficientb3_fpn_pren.yml
configs/rec/rec_efficientb3_fpn_pren.yml
+92
-0
configs/vqa/re/layoutlmv2.yml
configs/vqa/re/layoutlmv2.yml
+123
-0
configs/vqa/re/layoutxlm.yml
configs/vqa/re/layoutxlm.yml
+4
-3
configs/vqa/ser/layoutlmv2.yml
configs/vqa/ser/layoutlmv2.yml
+121
-0
deploy/android_demo/README.md
deploy/android_demo/README.md
+34
-8
deploy/android_demo/app/build.gradle
deploy/android_demo/app/build.gradle
+4
-9
deploy/android_demo/app/src/main/AndroidManifest.xml
deploy/android_demo/app/src/main/AndroidManifest.xml
+0
-1
deploy/android_demo/app/src/main/assets/images/det_0.jpg
deploy/android_demo/app/src/main/assets/images/det_0.jpg
+0
-0
deploy/android_demo/app/src/main/assets/images/det_180.jpg
deploy/android_demo/app/src/main/assets/images/det_180.jpg
+0
-0
deploy/android_demo/app/src/main/assets/images/det_270.jpg
deploy/android_demo/app/src/main/assets/images/det_270.jpg
+0
-0
deploy/android_demo/app/src/main/assets/images/det_90.jpg
deploy/android_demo/app/src/main/assets/images/det_90.jpg
+0
-0
deploy/android_demo/app/src/main/assets/images/rec_0.jpg
deploy/android_demo/app/src/main/assets/images/rec_0.jpg
+0
-0
deploy/android_demo/app/src/main/assets/images/rec_0_180.jpg
deploy/android_demo/app/src/main/assets/images/rec_0_180.jpg
+0
-0
deploy/android_demo/app/src/main/assets/images/rec_1.jpg
deploy/android_demo/app/src/main/assets/images/rec_1.jpg
+0
-0
deploy/android_demo/app/src/main/assets/images/rec_1_180.jpg
deploy/android_demo/app/src/main/assets/images/rec_1_180.jpg
+0
-0
deploy/android_demo/app/src/main/cpp/native.cpp
deploy/android_demo/app/src/main/cpp/native.cpp
+17
-12
deploy/android_demo/app/src/main/cpp/ocr_ppredictor.cpp
deploy/android_demo/app/src/main/cpp/ocr_ppredictor.cpp
+157
-68
deploy/android_demo/app/src/main/cpp/ocr_ppredictor.h
deploy/android_demo/app/src/main/cpp/ocr_ppredictor.h
+17
-9
deploy/android_demo/app/src/main/cpp/ppredictor.cpp
deploy/android_demo/app/src/main/cpp/ppredictor.cpp
+36
-6
deploy/android_demo/app/src/main/cpp/ppredictor.h
deploy/android_demo/app/src/main/cpp/ppredictor.h
+2
-1
No files found.
configs/rec/rec_efficientb3_fpn_pren.yml
0 → 100644
View file @
4b214948
Global
:
use_gpu
:
True
epoch_num
:
8
log_smooth_window
:
20
print_batch_step
:
5
save_model_dir
:
./output/rec/pren_new
save_epoch_step
:
3
# evaluation is run every 2000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
2000
]
cal_metric_during_train
:
True
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_words/ch/word_1.jpg
# for data or label process
character_dict_path
:
max_text_length
:
&max_text_length
25
infer_mode
:
False
use_space_char
:
False
save_res_path
:
./output/rec/predicts_pren.txt
Optimizer
:
name
:
Adadelta
lr
:
name
:
Piecewise
decay_epochs
:
[
2
,
5
,
7
]
values
:
[
0.5
,
0.1
,
0.01
,
0.001
]
Architecture
:
model_type
:
rec
algorithm
:
PREN
in_channels
:
3
Backbone
:
name
:
EfficientNetb3_PREN
Neck
:
name
:
PRENFPN
n_r
:
5
d_model
:
384
max_len
:
*max_text_length
dropout
:
0.1
Head
:
name
:
PRENHead
Loss
:
name
:
PRENLoss
PostProcess
:
name
:
PRENLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
LMDBDataSet
data_dir
:
./train_data/data_lmdb_release/training/
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
False
-
PRENLabelEncode
:
-
RecAug
:
-
PRENResizeImg
:
image_shape
:
[
64
,
256
]
# h,w
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
]
loader
:
shuffle
:
True
batch_size_per_card
:
128
drop_last
:
True
num_workers
:
8
Eval
:
dataset
:
name
:
LMDBDataSet
data_dir
:
./train_data/data_lmdb_release/validation/
transforms
:
-
DecodeImage
:
img_mode
:
BGR
channel_first
:
False
-
PRENLabelEncode
:
-
PRENResizeImg
:
image_shape
:
[
64
,
256
]
# h,w
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
64
num_workers
:
8
configs/vqa/re/layoutlmv2.yml
0 → 100644
View file @
4b214948
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
200
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/re_layoutlmv2/
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2048
infer_img
:
doc/vqa/input/zh_val_21.jpg
save_res_path
:
./output/re/
Architecture
:
model_type
:
vqa
algorithm
:
&algorithm
"
LayoutLMv2"
Transform
:
Backbone
:
name
:
LayoutLMv2ForRe
pretrained
:
True
checkpoints
:
Loss
:
name
:
LossFromOutput
key
:
loss
reduction
:
mean
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
clip_norm
:
10
lr
:
learning_rate
:
0.00005
warmup_epoch
:
10
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
VQAReTokenLayoutLMPostProcess
Metric
:
name
:
VQAReTokenMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/xfun_normalize_train.json
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
&class_path
ppstructure/vqa/labels/labels_ser.txt
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
image'
,
'
attention_mask'
,
'
token_type_ids'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
collate_fn
:
ListCollator
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/xfun_normalize_val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
True
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQAReTokenRelation
:
-
VQAReTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
bbox'
,
'
image'
,
'
attention_mask'
,
'
token_type_ids'
,
'
entities'
,
'
relations'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
collate_fn
:
ListCollator
configs/vqa/re/layoutxlm.yml
View file @
4b214948
...
@@ -35,6 +35,7 @@ Optimizer:
...
@@ -35,6 +35,7 @@ Optimizer:
clip_norm
:
10
clip_norm
:
10
lr
:
lr
:
learning_rate
:
0.00005
learning_rate
:
0.00005
warmup_epoch
:
10
regularizer
:
regularizer
:
name
:
L2
name
:
L2
factor
:
0.00000
factor
:
0.00000
...
@@ -81,7 +82,7 @@ Train:
...
@@ -81,7 +82,7 @@ Train:
shuffle
:
True
shuffle
:
True
drop_last
:
False
drop_last
:
False
batch_size_per_card
:
8
batch_size_per_card
:
8
num_workers
:
4
num_workers
:
8
collate_fn
:
ListCollator
collate_fn
:
ListCollator
Eval
:
Eval
:
...
@@ -118,5 +119,5 @@ Eval:
...
@@ -118,5 +119,5 @@ Eval:
shuffle
:
False
shuffle
:
False
drop_last
:
False
drop_last
:
False
batch_size_per_card
:
8
batch_size_per_card
:
8
num_workers
:
4
num_workers
:
8
collate_fn
:
ListCollator
collate_fn
:
ListCollator
configs/vqa/ser/layoutlmv2.yml
0 → 100644
View file @
4b214948
Global
:
use_gpu
:
True
epoch_num
:
&epoch_num
200
log_smooth_window
:
10
print_batch_step
:
10
save_model_dir
:
./output/ser_layoutlmv2/
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
save_inference_dir
:
use_visualdl
:
False
seed
:
2022
infer_img
:
doc/vqa/input/zh_val_0.jpg
save_res_path
:
./output/ser/
Architecture
:
model_type
:
vqa
algorithm
:
&algorithm
"
LayoutLMv2"
Transform
:
Backbone
:
name
:
LayoutLMv2ForSer
pretrained
:
True
checkpoints
:
num_classes
:
&num_classes
7
Loss
:
name
:
VQASerTokenLayoutLMLoss
num_classes
:
*num_classes
Optimizer
:
name
:
AdamW
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Linear
learning_rate
:
0.00005
epochs
:
*epoch_num
warmup_epoch
:
2
regularizer
:
name
:
L2
factor
:
0.00000
PostProcess
:
name
:
VQASerTokenLayoutLMPostProcess
class_path
:
&class_path
ppstructure/vqa/labels/labels_ser.txt
Metric
:
name
:
VQASerTokenMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_train/image
label_file_list
:
-
train_data/XFUND/zh_train/xfun_normalize_train.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
&max_seq_len
512
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
labels'
,
'
bbox'
,
'
image'
,
'
attention_mask'
,
'
token_type_ids'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
train_data/XFUND/zh_val/image
label_file_list
:
-
train_data/XFUND/zh_val/xfun_normalize_val.json
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
VQATokenLabelEncode
:
# Class handling label
contains_re
:
False
algorithm
:
*algorithm
class_path
:
*class_path
-
VQATokenPad
:
max_seq_len
:
*max_seq_len
return_attention_mask
:
True
-
VQASerTokenChunk
:
max_seq_len
:
*max_seq_len
-
Resize
:
size
:
[
224
,
224
]
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
input_ids'
,
'
labels'
,
'
bbox'
,
'
image'
,
'
attention_mask'
,
'
token_type_ids'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
4
deploy/android_demo/README.md
View file @
4b214948
# 如何快速测试
-
[
Android Demo
](
#android-demo
)
### 1. 安装最新版本的Android Studio
-
[
1. 简介
](
#1-简介
)
-
[
2. 近期更新
](
#2-近期更新
)
-
[
3. 快速使用
](
#3-快速使用
)
-
[
3.1 安装最新版本的Android Studio
](
#31-安装最新版本的android-studio
)
-
[
3.2 安装 NDK 20 以上版本
](
#32-安装-ndk-20-以上版本
)
-
[
3.3 导入项目
](
#33-导入项目
)
-
[
4 更多支持
](
#4-更多支持
)
# Android Demo
## 1. 简介
此为PaddleOCR的Android Demo,目前支持文本检测,文本方向分类器和文本识别模型的使用。使用
[
PaddleLite v2.10
](
https://github.com/PaddlePaddle/Paddle-Lite/tree/release/v2.10
)
进行开发。
## 2. 近期更新
*
2022.02.27
*
预测库更新到PaddleLite v2.10
*
支持6种运行模式:
*
检测+分类+识别
*
检测+识别
*
分类+识别
*
检测
*
识别
*
分类
## 3. 快速使用
### 3.1 安装最新版本的Android Studio
可以从 https://developer.android.com/studio 下载。本Demo使用是4.0版本Android Studio编写。
可以从 https://developer.android.com/studio 下载。本Demo使用是4.0版本Android Studio编写。
###
2. 按照
NDK 20 以上版本
###
3.2 安装
NDK 20 以上版本
Demo测试的时候使用的是NDK 20b版本,20版本以上均可以支持编译成功。
Demo测试的时候使用的是NDK 20b版本,20版本以上均可以支持编译成功。
如果您是初学者,可以用以下方式安装和测试NDK编译环境。
如果您是初学者,可以用以下方式安装和测试NDK编译环境。
点击 File -> New ->New Project, 新建 "Native C++" project
点击 File -> New ->New Project, 新建 "Native C++" project
### 3. 导入项目
### 3.3 导入项目
点击 File->New->Import Project..., 然后跟着Android Studio的引导导入
点击 File->New->Import Project..., 然后跟着Android Studio的引导导入
## 4 更多支持
# 获得更多支持
前往
[
Paddle-Lite
](
https://github.com/PaddlePaddle/Paddle-Lite
)
,获得更多开发支持
前往
[
端计算模型生成平台EasyEdge
](
https://ai.baidu.com/easyedge/app/open_source_demo?referrerUrl=paddlelite
)
,获得更多开发支持:
-
Demo APP:可使用手机扫码安装,方便手机端快速体验文字识别
-
SDK:模型被封装为适配不同芯片硬件和操作系统SDK,包括完善的接口,方便进行二次开发
deploy/android_demo/app/build.gradle
View file @
4b214948
...
@@ -8,8 +8,8 @@ android {
...
@@ -8,8 +8,8 @@ android {
applicationId
"com.baidu.paddle.lite.demo.ocr"
applicationId
"com.baidu.paddle.lite.demo.ocr"
minSdkVersion
23
minSdkVersion
23
targetSdkVersion
29
targetSdkVersion
29
versionCode
1
versionCode
2
versionName
"
1
.0"
versionName
"
2
.0"
testInstrumentationRunner
"android.support.test.runner.AndroidJUnitRunner"
testInstrumentationRunner
"android.support.test.runner.AndroidJUnitRunner"
externalNativeBuild
{
externalNativeBuild
{
cmake
{
cmake
{
...
@@ -17,11 +17,6 @@ android {
...
@@ -17,11 +17,6 @@ android {
arguments
'-DANDROID_PLATFORM=android-23'
,
'-DANDROID_STL=c++_shared'
,
"-DANDROID_ARM_NEON=TRUE"
arguments
'-DANDROID_PLATFORM=android-23'
,
'-DANDROID_STL=c++_shared'
,
"-DANDROID_ARM_NEON=TRUE"
}
}
}
}
ndk
{
// abiFilters "arm64-v8a", "armeabi-v7a"
abiFilters
"arm64-v8a"
,
"armeabi-v7a"
ldLibs
"jnigraphics"
}
}
}
buildTypes
{
buildTypes
{
release
{
release
{
...
@@ -48,7 +43,7 @@ dependencies {
...
@@ -48,7 +43,7 @@ dependencies {
def
archives
=
[
def
archives
=
[
[
[
'src'
:
'https://paddleocr.bj.bcebos.com/
dygraph_v2.0/lite
/paddle_lite_libs_v2_
9_
0.tar.gz'
,
'src'
:
'https://paddleocr.bj.bcebos.com/
libs
/paddle_lite_libs_v2_
1
0.tar.gz'
,
'dest'
:
'PaddleLite'
'dest'
:
'PaddleLite'
],
],
[
[
...
@@ -56,7 +51,7 @@ def archives = [
...
@@ -56,7 +51,7 @@ def archives = [
'dest'
:
'OpenCV'
'dest'
:
'OpenCV'
],
],
[
[
'src'
:
'https://paddleocr.bj.bcebos.com/
dygraph_v2.0/lite/ocr_v2_for_cpu
.tar.gz'
,
'src'
:
'https://paddleocr.bj.bcebos.com/
PP-OCRv2/lite/ch_PP-OCRv2
.tar.gz'
,
'dest'
:
'src/main/assets/models'
'dest'
:
'src/main/assets/models'
],
],
[
[
...
...
deploy/android_demo/app/src/main/AndroidManifest.xml
View file @
4b214948
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
android:roundIcon=
"@mipmap/ic_launcher_round"
android:roundIcon=
"@mipmap/ic_launcher_round"
android:supportsRtl=
"true"
android:supportsRtl=
"true"
android:theme=
"@style/AppTheme"
>
android:theme=
"@style/AppTheme"
>
<!-- to test MiniActivity, change this to com.baidu.paddle.lite.demo.ocr.MiniActivity -->
<activity
android:name=
"com.baidu.paddle.lite.demo.ocr.MainActivity"
>
<activity
android:name=
"com.baidu.paddle.lite.demo.ocr.MainActivity"
>
<intent-filter>
<intent-filter>
<action
android:name=
"android.intent.action.MAIN"
/>
<action
android:name=
"android.intent.action.MAIN"
/>
...
...
deploy/android_demo/app/src/main/assets/images/0.jpg
→
deploy/android_demo/app/src/main/assets/images/
det_
0.jpg
View file @
4b214948
File moved
deploy/android_demo/app/src/main/assets/images/180.jpg
→
deploy/android_demo/app/src/main/assets/images/
det_
180.jpg
View file @
4b214948
File moved
deploy/android_demo/app/src/main/assets/images/270.jpg
→
deploy/android_demo/app/src/main/assets/images/
det_
270.jpg
View file @
4b214948
File moved
deploy/android_demo/app/src/main/assets/images/90.jpg
→
deploy/android_demo/app/src/main/assets/images/
det_
90.jpg
View file @
4b214948
File moved
deploy/android_demo/app/src/main/assets/images/rec_0.jpg
0 → 100644
View file @
4b214948
6.22 KB
deploy/android_demo/app/src/main/assets/images/rec_0_180.jpg
0 → 100644
View file @
4b214948
6.61 KB
deploy/android_demo/app/src/main/assets/images/rec_1.jpg
0 → 100644
View file @
4b214948
7.75 KB
deploy/android_demo/app/src/main/assets/images/rec_1_180.jpg
0 → 100644
View file @
4b214948
8.06 KB
deploy/android_demo/app/src/main/cpp/native.cpp
View file @
4b214948
...
@@ -13,7 +13,7 @@ static paddle::lite_api::PowerMode str_to_cpu_mode(const std::string &cpu_mode);
...
@@ -13,7 +13,7 @@ static paddle::lite_api::PowerMode str_to_cpu_mode(const std::string &cpu_mode);
extern
"C"
JNIEXPORT
jlong
JNICALL
extern
"C"
JNIEXPORT
jlong
JNICALL
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init
(
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init
(
JNIEnv
*
env
,
jobject
thiz
,
jstring
j_det_model_path
,
JNIEnv
*
env
,
jobject
thiz
,
jstring
j_det_model_path
,
jstring
j_rec_model_path
,
jstring
j_cls_model_path
,
jint
j_thread_num
,
jstring
j_rec_model_path
,
jstring
j_cls_model_path
,
jint
j_use_opencl
,
jint
j_thread_num
,
jstring
j_cpu_mode
)
{
jstring
j_cpu_mode
)
{
std
::
string
det_model_path
=
jstring_to_cpp_string
(
env
,
j_det_model_path
);
std
::
string
det_model_path
=
jstring_to_cpp_string
(
env
,
j_det_model_path
);
std
::
string
rec_model_path
=
jstring_to_cpp_string
(
env
,
j_rec_model_path
);
std
::
string
rec_model_path
=
jstring_to_cpp_string
(
env
,
j_rec_model_path
);
...
@@ -21,6 +21,7 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init(
...
@@ -21,6 +21,7 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_init(
int
thread_num
=
j_thread_num
;
int
thread_num
=
j_thread_num
;
std
::
string
cpu_mode
=
jstring_to_cpp_string
(
env
,
j_cpu_mode
);
std
::
string
cpu_mode
=
jstring_to_cpp_string
(
env
,
j_cpu_mode
);
ppredictor
::
OCR_Config
conf
;
ppredictor
::
OCR_Config
conf
;
conf
.
use_opencl
=
j_use_opencl
;
conf
.
thread_num
=
thread_num
;
conf
.
thread_num
=
thread_num
;
conf
.
mode
=
str_to_cpu_mode
(
cpu_mode
);
conf
.
mode
=
str_to_cpu_mode
(
cpu_mode
);
ppredictor
::
OCR_PPredictor
*
orc_predictor
=
ppredictor
::
OCR_PPredictor
*
orc_predictor
=
...
@@ -57,32 +58,31 @@ str_to_cpu_mode(const std::string &cpu_mode) {
...
@@ -57,32 +58,31 @@ str_to_cpu_mode(const std::string &cpu_mode) {
extern
"C"
JNIEXPORT
jfloatArray
JNICALL
extern
"C"
JNIEXPORT
jfloatArray
JNICALL
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward
(
Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward
(
JNIEnv
*
env
,
jobject
thiz
,
jlong
java_pointer
,
jfloatArray
buf
,
JNIEnv
*
env
,
jobject
thiz
,
jlong
java_pointer
,
jobject
original_image
,
jint
j_max_size_len
,
jint
j_run_det
,
jint
j_run_cls
,
jint
j_run_rec
)
{
jfloatArray
ddims
,
jobject
original_image
)
{
LOGI
(
"begin to run native forward"
);
LOGI
(
"begin to run native forward"
);
if
(
java_pointer
==
0
)
{
if
(
java_pointer
==
0
)
{
LOGE
(
"JAVA pointer is NULL"
);
LOGE
(
"JAVA pointer is NULL"
);
return
cpp_array_to_jfloatarray
(
env
,
nullptr
,
0
);
return
cpp_array_to_jfloatarray
(
env
,
nullptr
,
0
);
}
}
cv
::
Mat
origin
=
bitmap_to_cv_mat
(
env
,
original_image
);
cv
::
Mat
origin
=
bitmap_to_cv_mat
(
env
,
original_image
);
if
(
origin
.
size
==
0
)
{
if
(
origin
.
size
==
0
)
{
LOGE
(
"origin bitmap cannot convert to CV Mat"
);
LOGE
(
"origin bitmap cannot convert to CV Mat"
);
return
cpp_array_to_jfloatarray
(
env
,
nullptr
,
0
);
return
cpp_array_to_jfloatarray
(
env
,
nullptr
,
0
);
}
}
int
max_size_len
=
j_max_size_len
;
int
run_det
=
j_run_det
;
int
run_cls
=
j_run_cls
;
int
run_rec
=
j_run_rec
;
ppredictor
::
OCR_PPredictor
*
ppredictor
=
ppredictor
::
OCR_PPredictor
*
ppredictor
=
(
ppredictor
::
OCR_PPredictor
*
)
java_pointer
;
(
ppredictor
::
OCR_PPredictor
*
)
java_pointer
;
std
::
vector
<
float
>
dims_float_arr
=
jfloatarray_to_float_vector
(
env
,
ddims
);
std
::
vector
<
int64_t
>
dims_arr
;
std
::
vector
<
int64_t
>
dims_arr
;
dims_arr
.
resize
(
dims_float_arr
.
size
());
std
::
copy
(
dims_float_arr
.
cbegin
(),
dims_float_arr
.
cend
(),
dims_arr
.
begin
());
// 这里值有点大,就不调用jfloatarray_to_float_vector了
int64_t
buf_len
=
(
int64_t
)
env
->
GetArrayLength
(
buf
);
jfloat
*
buf_data
=
env
->
GetFloatArrayElements
(
buf
,
JNI_FALSE
);
float
*
data
=
(
jfloat
*
)
buf_data
;
std
::
vector
<
ppredictor
::
OCRPredictResult
>
results
=
std
::
vector
<
ppredictor
::
OCRPredictResult
>
results
=
ppredictor
->
infer_ocr
(
dims_arr
,
data
,
buf_len
,
NET_OCR
,
origin
);
ppredictor
->
infer_ocr
(
origin
,
max_size_len
,
run_det
,
run_cls
,
run_rec
);
LOGI
(
"infer_ocr finished with boxes %ld"
,
results
.
size
());
LOGI
(
"infer_ocr finished with boxes %ld"
,
results
.
size
());
// 这里将std::vector<ppredictor::OCRPredictResult> 序列化成
// 这里将std::vector<ppredictor::OCRPredictResult> 序列化成
// float数组,传输到java层再反序列化
// float数组,传输到java层再反序列化
std
::
vector
<
float
>
float_arr
;
std
::
vector
<
float
>
float_arr
;
...
@@ -90,13 +90,18 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward(
...
@@ -90,13 +90,18 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_forward(
float_arr
.
push_back
(
r
.
points
.
size
());
float_arr
.
push_back
(
r
.
points
.
size
());
float_arr
.
push_back
(
r
.
word_index
.
size
());
float_arr
.
push_back
(
r
.
word_index
.
size
());
float_arr
.
push_back
(
r
.
score
);
float_arr
.
push_back
(
r
.
score
);
// add det point
for
(
const
std
::
vector
<
int
>
&
point
:
r
.
points
)
{
for
(
const
std
::
vector
<
int
>
&
point
:
r
.
points
)
{
float_arr
.
push_back
(
point
.
at
(
0
));
float_arr
.
push_back
(
point
.
at
(
0
));
float_arr
.
push_back
(
point
.
at
(
1
));
float_arr
.
push_back
(
point
.
at
(
1
));
}
}
// add rec word idx
for
(
int
index
:
r
.
word_index
)
{
for
(
int
index
:
r
.
word_index
)
{
float_arr
.
push_back
(
index
);
float_arr
.
push_back
(
index
);
}
}
// add cls result
float_arr
.
push_back
(
r
.
cls_label
);
float_arr
.
push_back
(
r
.
cls_score
);
}
}
return
cpp_array_to_jfloatarray
(
env
,
float_arr
.
data
(),
float_arr
.
size
());
return
cpp_array_to_jfloatarray
(
env
,
float_arr
.
data
(),
float_arr
.
size
());
}
}
...
...
deploy/android_demo/app/src/main/cpp/ocr_ppredictor.cpp
View file @
4b214948
...
@@ -17,15 +17,15 @@ int OCR_PPredictor::init(const std::string &det_model_content,
...
@@ -17,15 +17,15 @@ int OCR_PPredictor::init(const std::string &det_model_content,
const
std
::
string
&
rec_model_content
,
const
std
::
string
&
rec_model_content
,
const
std
::
string
&
cls_model_content
)
{
const
std
::
string
&
cls_model_content
)
{
_det_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
_det_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
new
PPredictor
{
_config
.
thread_num
,
NET_OCR
,
_config
.
mode
});
new
PPredictor
{
_config
.
use_opencl
,
_config
.
thread_num
,
NET_OCR
,
_config
.
mode
});
_det_predictor
->
init_nb
(
det_model_content
);
_det_predictor
->
init_nb
(
det_model_content
);
_rec_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
_rec_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
new
PPredictor
{
_config
.
thread_num
,
NET_OCR_INTERNAL
,
_config
.
mode
});
new
PPredictor
{
_config
.
use_opencl
,
_config
.
thread_num
,
NET_OCR_INTERNAL
,
_config
.
mode
});
_rec_predictor
->
init_nb
(
rec_model_content
);
_rec_predictor
->
init_nb
(
rec_model_content
);
_cls_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
_cls_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
new
PPredictor
{
_config
.
thread_num
,
NET_OCR_INTERNAL
,
_config
.
mode
});
new
PPredictor
{
_config
.
use_opencl
,
_config
.
thread_num
,
NET_OCR_INTERNAL
,
_config
.
mode
});
_cls_predictor
->
init_nb
(
cls_model_content
);
_cls_predictor
->
init_nb
(
cls_model_content
);
return
RETURN_OK
;
return
RETURN_OK
;
}
}
...
@@ -34,15 +34,16 @@ int OCR_PPredictor::init_from_file(const std::string &det_model_path,
...
@@ -34,15 +34,16 @@ int OCR_PPredictor::init_from_file(const std::string &det_model_path,
const
std
::
string
&
rec_model_path
,
const
std
::
string
&
rec_model_path
,
const
std
::
string
&
cls_model_path
)
{
const
std
::
string
&
cls_model_path
)
{
_det_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
_det_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
new
PPredictor
{
_config
.
thread_num
,
NET_OCR
,
_config
.
mode
});
new
PPredictor
{
_config
.
use_opencl
,
_config
.
thread_num
,
NET_OCR
,
_config
.
mode
});
_det_predictor
->
init_from_file
(
det_model_path
);
_det_predictor
->
init_from_file
(
det_model_path
);
_rec_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
_rec_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
new
PPredictor
{
_config
.
thread_num
,
NET_OCR_INTERNAL
,
_config
.
mode
});
new
PPredictor
{
_config
.
use_opencl
,
_config
.
thread_num
,
NET_OCR_INTERNAL
,
_config
.
mode
});
_rec_predictor
->
init_from_file
(
rec_model_path
);
_rec_predictor
->
init_from_file
(
rec_model_path
);
_cls_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
_cls_predictor
=
std
::
unique_ptr
<
PPredictor
>
(
new
PPredictor
{
_config
.
thread_num
,
NET_OCR_INTERNAL
,
_config
.
mode
});
new
PPredictor
{
_config
.
use_opencl
,
_config
.
thread_num
,
NET_OCR_INTERNAL
,
_config
.
mode
});
_cls_predictor
->
init_from_file
(
cls_model_path
);
_cls_predictor
->
init_from_file
(
cls_model_path
);
return
RETURN_OK
;
return
RETURN_OK
;
}
}
...
@@ -77,33 +78,126 @@ visual_img(const std::vector<std::vector<std::vector<int>>> &filter_boxes,
...
@@ -77,33 +78,126 @@ visual_img(const std::vector<std::vector<std::vector<int>>> &filter_boxes,
}
}
std
::
vector
<
OCRPredictResult
>
std
::
vector
<
OCRPredictResult
>
OCR_PPredictor
::
infer_ocr
(
const
std
::
vector
<
int64_t
>
&
dims
,
OCR_PPredictor
::
infer_ocr
(
cv
::
Mat
&
origin
,
int
max_size_len
,
int
run_det
,
int
run_cls
,
int
run_rec
)
{
const
float
*
input_data
,
int
input_len
,
int
net_flag
,
LOGI
(
"ocr cpp start *****************"
);
cv
::
Mat
&
origin
)
{
LOGI
(
"ocr cpp det: %d, cls: %d, rec: %d"
,
run_det
,
run_cls
,
run_rec
);
std
::
vector
<
OCRPredictResult
>
ocr_results
;
if
(
run_det
){
infer_det
(
origin
,
max_size_len
,
ocr_results
);
}
if
(
run_rec
){
if
(
ocr_results
.
size
()
==
0
){
OCRPredictResult
res
;
ocr_results
.
emplace_back
(
std
::
move
(
res
));
}
for
(
int
i
=
0
;
i
<
ocr_results
.
size
();
i
++
)
{
infer_rec
(
origin
,
run_cls
,
ocr_results
[
i
]);
}
}
else
if
(
run_cls
){
ClsPredictResult
cls_res
=
infer_cls
(
origin
);
OCRPredictResult
res
;
res
.
cls_score
=
cls_res
.
cls_score
;
res
.
cls_label
=
cls_res
.
cls_label
;
ocr_results
.
push_back
(
res
);
}
LOGI
(
"ocr cpp end *****************"
);
return
ocr_results
;
}
cv
::
Mat
DetResizeImg
(
const
cv
::
Mat
img
,
int
max_size_len
,
std
::
vector
<
float
>
&
ratio_hw
)
{
int
w
=
img
.
cols
;
int
h
=
img
.
rows
;
float
ratio
=
1.
f
;
int
max_wh
=
w
>=
h
?
w
:
h
;
if
(
max_wh
>
max_size_len
)
{
if
(
h
>
w
)
{
ratio
=
static_cast
<
float
>
(
max_size_len
)
/
static_cast
<
float
>
(
h
);
}
else
{
ratio
=
static_cast
<
float
>
(
max_size_len
)
/
static_cast
<
float
>
(
w
);
}
}
int
resize_h
=
static_cast
<
int
>
(
float
(
h
)
*
ratio
);
int
resize_w
=
static_cast
<
int
>
(
float
(
w
)
*
ratio
);
if
(
resize_h
%
32
==
0
)
resize_h
=
resize_h
;
else
if
(
resize_h
/
32
<
1
+
1e-5
)
resize_h
=
32
;
else
resize_h
=
(
resize_h
/
32
-
1
)
*
32
;
if
(
resize_w
%
32
==
0
)
resize_w
=
resize_w
;
else
if
(
resize_w
/
32
<
1
+
1e-5
)
resize_w
=
32
;
else
resize_w
=
(
resize_w
/
32
-
1
)
*
32
;
cv
::
Mat
resize_img
;
cv
::
resize
(
img
,
resize_img
,
cv
::
Size
(
resize_w
,
resize_h
));
ratio_hw
.
push_back
(
static_cast
<
float
>
(
resize_h
)
/
static_cast
<
float
>
(
h
));
ratio_hw
.
push_back
(
static_cast
<
float
>
(
resize_w
)
/
static_cast
<
float
>
(
w
));
return
resize_img
;
}
void
OCR_PPredictor
::
infer_det
(
cv
::
Mat
&
origin
,
int
max_size_len
,
std
::
vector
<
OCRPredictResult
>
&
ocr_results
)
{
std
::
vector
<
float
>
mean
=
{
0.485
f
,
0.456
f
,
0.406
f
};
std
::
vector
<
float
>
scale
=
{
1
/
0.229
f
,
1
/
0.224
f
,
1
/
0.225
f
};
PredictorInput
input
=
_det_predictor
->
get_first_input
();
PredictorInput
input
=
_det_predictor
->
get_first_input
();
input
.
set_dims
(
dims
);
input
.
set_data
(
input_data
,
input_len
);
std
::
vector
<
float
>
ratio_hw
;
cv
::
Mat
input_image
=
DetResizeImg
(
origin
,
max_size_len
,
ratio_hw
);
input_image
.
convertTo
(
input_image
,
CV_32FC3
,
1
/
255.0
f
);
const
float
*
dimg
=
reinterpret_cast
<
const
float
*>
(
input_image
.
data
);
int
input_size
=
input_image
.
rows
*
input_image
.
cols
;
input
.
set_dims
({
1
,
3
,
input_image
.
rows
,
input_image
.
cols
});
neon_mean_scale
(
dimg
,
input
.
get_mutable_float_data
(),
input_size
,
mean
,
scale
);
LOGI
(
"ocr cpp det shape %d,%d"
,
input_image
.
rows
,
input_image
.
cols
);
std
::
vector
<
PredictorOutput
>
results
=
_det_predictor
->
infer
();
std
::
vector
<
PredictorOutput
>
results
=
_det_predictor
->
infer
();
PredictorOutput
&
res
=
results
.
at
(
0
);
PredictorOutput
&
res
=
results
.
at
(
0
);
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
filtered_box
=
calc_filtered_boxes
(
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
filtered_box
=
calc_filtered_boxes
(
res
.
get_float_data
(),
res
.
get_size
(),
(
int
)
dims
[
2
],
(
int
)
dims
[
3
],
origin
);
res
.
get_float_data
(),
res
.
get_size
(),
input_image
.
rows
,
input_image
.
cols
,
origin
);
LOGI
(
"Filter_box size %ld"
,
filtered_box
.
size
());
LOGI
(
"ocr cpp det Filter_box size %ld"
,
filtered_box
.
size
());
return
infer_rec
(
filtered_box
,
origin
);
for
(
int
i
=
0
;
i
<
filtered_box
.
size
();
i
++
){
LOGI
(
"ocr cpp box %d,%d,%d,%d,%d,%d,%d,%d"
,
filtered_box
[
i
][
0
][
0
],
filtered_box
[
i
][
0
][
1
],
filtered_box
[
i
][
1
][
0
],
filtered_box
[
i
][
1
][
1
],
filtered_box
[
i
][
2
][
0
],
filtered_box
[
i
][
2
][
1
],
filtered_box
[
i
][
3
][
0
],
filtered_box
[
i
][
3
][
1
]);
OCRPredictResult
res
;
res
.
points
=
filtered_box
[
i
];
ocr_results
.
push_back
(
res
);
}
}
}
std
::
vector
<
OCRPredictResult
>
OCR_PPredictor
::
infer_rec
(
void
OCR_PPredictor
::
infer_rec
(
const
cv
::
Mat
&
origin_img
,
int
run_cls
,
OCRPredictResult
&
ocr_result
)
{
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
&
boxes
,
const
cv
::
Mat
&
origin_img
)
{
std
::
vector
<
float
>
mean
=
{
0.5
f
,
0.5
f
,
0.5
f
};
std
::
vector
<
float
>
mean
=
{
0.5
f
,
0.5
f
,
0.5
f
};
std
::
vector
<
float
>
scale
=
{
1
/
0.5
f
,
1
/
0.5
f
,
1
/
0.5
f
};
std
::
vector
<
float
>
scale
=
{
1
/
0.5
f
,
1
/
0.5
f
,
1
/
0.5
f
};
std
::
vector
<
int64_t
>
dims
=
{
1
,
3
,
0
,
0
};
std
::
vector
<
int64_t
>
dims
=
{
1
,
3
,
0
,
0
};
std
::
vector
<
OCRPredictResult
>
ocr_results
;
PredictorInput
input
=
_rec_predictor
->
get_first_input
();
PredictorInput
input
=
_rec_predictor
->
get_first_input
();
for
(
auto
bp
=
boxes
.
crbegin
();
bp
!=
boxes
.
crend
();
++
bp
)
{
const
std
::
vector
<
std
::
vector
<
int
>>
&
box
=
*
bp
;
const
std
::
vector
<
std
::
vector
<
int
>>
&
box
=
ocr_result
.
points
;
cv
::
Mat
crop_img
=
get_rotate_crop_image
(
origin_img
,
box
);
cv
::
Mat
crop_img
;
crop_img
=
infer_cls
(
crop_img
);
if
(
box
.
size
()
>
0
){
crop_img
=
get_rotate_crop_image
(
origin_img
,
box
);
}
else
{
crop_img
=
origin_img
;
}
if
(
run_cls
){
ClsPredictResult
cls_res
=
infer_cls
(
crop_img
);
crop_img
=
cls_res
.
img
;
ocr_result
.
cls_score
=
cls_res
.
cls_score
;
ocr_result
.
cls_label
=
cls_res
.
cls_label
;
}
float
wh_ratio
=
float
(
crop_img
.
cols
)
/
float
(
crop_img
.
rows
);
float
wh_ratio
=
float
(
crop_img
.
cols
)
/
float
(
crop_img
.
rows
);
cv
::
Mat
input_image
=
crnn_resize_img
(
crop_img
,
wh_ratio
);
cv
::
Mat
input_image
=
crnn_resize_img
(
crop_img
,
wh_ratio
);
...
@@ -122,8 +216,6 @@ std::vector<OCRPredictResult> OCR_PPredictor::infer_rec(
...
@@ -122,8 +216,6 @@ std::vector<OCRPredictResult> OCR_PPredictor::infer_rec(
const
float
*
predict_batch
=
results
.
at
(
0
).
get_float_data
();
const
float
*
predict_batch
=
results
.
at
(
0
).
get_float_data
();
const
std
::
vector
<
int64_t
>
predict_shape
=
results
.
at
(
0
).
get_shape
();
const
std
::
vector
<
int64_t
>
predict_shape
=
results
.
at
(
0
).
get_shape
();
OCRPredictResult
res
;
// ctc decode
// ctc decode
int
argmax_idx
;
int
argmax_idx
;
int
last_index
=
0
;
int
last_index
=
0
;
...
@@ -140,27 +232,19 @@ std::vector<OCRPredictResult> OCR_PPredictor::infer_rec(
...
@@ -140,27 +232,19 @@ std::vector<OCRPredictResult> OCR_PPredictor::infer_rec(
if
(
argmax_idx
>
0
&&
(
!
(
n
>
0
&&
argmax_idx
==
last_index
)))
{
if
(
argmax_idx
>
0
&&
(
!
(
n
>
0
&&
argmax_idx
==
last_index
)))
{
score
+=
max_value
;
score
+=
max_value
;
count
+=
1
;
count
+=
1
;
res
.
word_index
.
push_back
(
argmax_idx
);
ocr_result
.
word_index
.
push_back
(
argmax_idx
);
}
}
last_index
=
argmax_idx
;
last_index
=
argmax_idx
;
}
}
score
/=
count
;
score
/=
count
;
if
(
res
.
word_index
.
empty
())
{
ocr_result
.
score
=
score
;
continue
;
LOGI
(
"ocr cpp rec word size %ld"
,
count
);
}
res
.
score
=
score
;
res
.
points
=
box
;
ocr_results
.
emplace_back
(
std
::
move
(
res
));
}
LOGI
(
"ocr_results finished %lu"
,
ocr_results
.
size
());
return
ocr_results
;
}
}
cv
::
Ma
t
OCR_PPredictor
::
infer_cls
(
const
cv
::
Mat
&
img
,
float
thresh
)
{
ClsPredictResul
t
OCR_PPredictor
::
infer_cls
(
const
cv
::
Mat
&
img
,
float
thresh
)
{
std
::
vector
<
float
>
mean
=
{
0.5
f
,
0.5
f
,
0.5
f
};
std
::
vector
<
float
>
mean
=
{
0.5
f
,
0.5
f
,
0.5
f
};
std
::
vector
<
float
>
scale
=
{
1
/
0.5
f
,
1
/
0.5
f
,
1
/
0.5
f
};
std
::
vector
<
float
>
scale
=
{
1
/
0.5
f
,
1
/
0.5
f
,
1
/
0.5
f
};
std
::
vector
<
int64_t
>
dims
=
{
1
,
3
,
0
,
0
};
std
::
vector
<
int64_t
>
dims
=
{
1
,
3
,
0
,
0
};
std
::
vector
<
OCRPredictResult
>
ocr_results
;
PredictorInput
input
=
_cls_predictor
->
get_first_input
();
PredictorInput
input
=
_cls_predictor
->
get_first_input
();
...
@@ -182,7 +266,7 @@ cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
...
@@ -182,7 +266,7 @@ cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
float
score
=
0
;
float
score
=
0
;
int
label
=
0
;
int
label
=
0
;
for
(
int64_t
i
=
0
;
i
<
results
.
at
(
0
).
get_size
();
i
++
)
{
for
(
int64_t
i
=
0
;
i
<
results
.
at
(
0
).
get_size
();
i
++
)
{
LOGI
(
"output scores [%f]"
,
scores
[
i
]);
LOGI
(
"
ocr cpp cls
output scores [%f]"
,
scores
[
i
]);
if
(
scores
[
i
]
>
score
)
{
if
(
scores
[
i
]
>
score
)
{
score
=
scores
[
i
];
score
=
scores
[
i
];
label
=
i
;
label
=
i
;
...
@@ -193,7 +277,12 @@ cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
...
@@ -193,7 +277,12 @@ cv::Mat OCR_PPredictor::infer_cls(const cv::Mat &img, float thresh) {
if
(
label
%
2
==
1
&&
score
>
thresh
)
{
if
(
label
%
2
==
1
&&
score
>
thresh
)
{
cv
::
rotate
(
srcimg
,
srcimg
,
1
);
cv
::
rotate
(
srcimg
,
srcimg
,
1
);
}
}
return
srcimg
;
ClsPredictResult
res
;
res
.
cls_label
=
label
;
res
.
cls_score
=
score
;
res
.
img
=
srcimg
;
LOGI
(
"ocr cpp cls word cls %ld, %f"
,
label
,
score
);
return
res
;
}
}
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
...
...
deploy/android_demo/app/src/main/cpp/ocr_ppredictor.h
View file @
4b214948
...
@@ -15,6 +15,7 @@ namespace ppredictor {
...
@@ -15,6 +15,7 @@ namespace ppredictor {
* Config
* Config
*/
*/
struct
OCR_Config
{
struct
OCR_Config
{
int
use_opencl
=
0
;
int
thread_num
=
4
;
// Thread num
int
thread_num
=
4
;
// Thread num
paddle
::
lite_api
::
PowerMode
mode
=
paddle
::
lite_api
::
PowerMode
mode
=
paddle
::
lite_api
::
LITE_POWER_HIGH
;
// PaddleLite Mode
paddle
::
lite_api
::
LITE_POWER_HIGH
;
// PaddleLite Mode
...
@@ -27,8 +28,15 @@ struct OCRPredictResult {
...
@@ -27,8 +28,15 @@ struct OCRPredictResult {
std
::
vector
<
int
>
word_index
;
std
::
vector
<
int
>
word_index
;
std
::
vector
<
std
::
vector
<
int
>>
points
;
std
::
vector
<
std
::
vector
<
int
>>
points
;
float
score
;
float
score
;
float
cls_score
;
int
cls_label
=-
1
;
};
};
struct
ClsPredictResult
{
float
cls_score
;
int
cls_label
=-
1
;
cv
::
Mat
img
;
};
/**
/**
* OCR there are 2 models
* OCR there are 2 models
* 1. First model(det),select polygones to show where are the texts
* 1. First model(det),select polygones to show where are the texts
...
@@ -62,8 +70,7 @@ public:
...
@@ -62,8 +70,7 @@ public:
* @return
* @return
*/
*/
virtual
std
::
vector
<
OCRPredictResult
>
virtual
std
::
vector
<
OCRPredictResult
>
infer_ocr
(
const
std
::
vector
<
int64_t
>
&
dims
,
const
float
*
input_data
,
infer_ocr
(
cv
::
Mat
&
origin
,
int
max_size_len
,
int
run_det
,
int
run_cls
,
int
run_rec
);
int
input_len
,
int
net_flag
,
cv
::
Mat
&
origin
);
virtual
NET_TYPE
get_net_flag
()
const
;
virtual
NET_TYPE
get_net_flag
()
const
;
...
@@ -80,16 +87,17 @@ private:
...
@@ -80,16 +87,17 @@ private:
calc_filtered_boxes
(
const
float
*
pred
,
int
pred_size
,
int
output_height
,
calc_filtered_boxes
(
const
float
*
pred
,
int
pred_size
,
int
output_height
,
int
output_width
,
const
cv
::
Mat
&
origin
);
int
output_width
,
const
cv
::
Mat
&
origin
);
void
infer_det
(
cv
::
Mat
&
origin
,
int
max_side_len
,
std
::
vector
<
OCRPredictResult
>&
ocr_results
);
/**
/**
* infer for
s
ec
ond
model
* infer for
r
ec model
*
*
* @param boxes
* @param boxes
* @param origin
* @param origin
* @return
* @return
*/
*/
std
::
vector
<
OCRPredictResult
>
void
infer_rec
(
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
&
boxes
,
infer_rec
(
const
cv
::
Mat
&
origin
,
int
run_cls
,
OCRPredictResult
&
ocr_result
);
const
cv
::
Mat
&
origin
);
/**
/**
* infer for cls model
* infer for cls model
...
@@ -98,7 +106,7 @@ private:
...
@@ -98,7 +106,7 @@ private:
* @param origin
* @param origin
* @return
* @return
*/
*/
cv
::
Ma
t
infer_cls
(
const
cv
::
Mat
&
origin
,
float
thresh
=
0.9
);
ClsPredictResul
t
infer_cls
(
const
cv
::
Mat
&
origin
,
float
thresh
=
0.9
);
/**
/**
* Postprocess or sencod model to extract text
* Postprocess or sencod model to extract text
...
...
deploy/android_demo/app/src/main/cpp/ppredictor.cpp
View file @
4b214948
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
#include "common.h"
#include "common.h"
namespace
ppredictor
{
namespace
ppredictor
{
PPredictor
::
PPredictor
(
int
thread_num
,
int
net_flag
,
PPredictor
::
PPredictor
(
int
use_opencl
,
int
thread_num
,
int
net_flag
,
paddle
::
lite_api
::
PowerMode
mode
)
paddle
::
lite_api
::
PowerMode
mode
)
:
_thread_num
(
thread_num
),
_net_flag
(
net_flag
),
_mode
(
mode
)
{}
:
_use_opencl
(
use_opencl
),
_thread_num
(
thread_num
),
_net_flag
(
net_flag
),
_mode
(
mode
)
{}
int
PPredictor
::
init_nb
(
const
std
::
string
&
model_content
)
{
int
PPredictor
::
init_nb
(
const
std
::
string
&
model_content
)
{
paddle
::
lite_api
::
MobileConfig
config
;
paddle
::
lite_api
::
MobileConfig
config
;
...
@@ -19,10 +19,40 @@ int PPredictor::init_from_file(const std::string &model_content) {
...
@@ -19,10 +19,40 @@ int PPredictor::init_from_file(const std::string &model_content) {
}
}
template
<
typename
ConfigT
>
int
PPredictor
::
_init
(
ConfigT
&
config
)
{
template
<
typename
ConfigT
>
int
PPredictor
::
_init
(
ConfigT
&
config
)
{
bool
is_opencl_backend_valid
=
paddle
::
lite_api
::
IsOpenCLBackendValid
(
/*check_fp16_valid = false*/
);
if
(
is_opencl_backend_valid
)
{
if
(
_use_opencl
!=
0
)
{
// Make sure you have write permission of the binary path.
// We strongly recommend each model has a unique binary name.
const
std
::
string
bin_path
=
"/data/local/tmp/"
;
const
std
::
string
bin_name
=
"lite_opencl_kernel.bin"
;
config
.
set_opencl_binary_path_name
(
bin_path
,
bin_name
);
// opencl tune option
// CL_TUNE_NONE: 0
// CL_TUNE_RAPID: 1
// CL_TUNE_NORMAL: 2
// CL_TUNE_EXHAUSTIVE: 3
const
std
::
string
tuned_path
=
"/data/local/tmp/"
;
const
std
::
string
tuned_name
=
"lite_opencl_tuned.bin"
;
config
.
set_opencl_tune
(
paddle
::
lite_api
::
CL_TUNE_NORMAL
,
tuned_path
,
tuned_name
);
// opencl precision option
// CL_PRECISION_AUTO: 0, first fp16 if valid, default
// CL_PRECISION_FP32: 1, force fp32
// CL_PRECISION_FP16: 2, force fp16
config
.
set_opencl_precision
(
paddle
::
lite_api
::
CL_PRECISION_FP32
);
LOGI
(
"ocr cpp device: running on gpu."
);
}
}
else
{
LOGI
(
"ocr cpp device: running on cpu."
);
// you can give backup cpu nb model instead
// config.set_model_from_file(cpu_nb_model_dir);
}
config
.
set_threads
(
_thread_num
);
config
.
set_threads
(
_thread_num
);
config
.
set_power_mode
(
_mode
);
config
.
set_power_mode
(
_mode
);
_predictor
=
paddle
::
lite_api
::
CreatePaddlePredictor
(
config
);
_predictor
=
paddle
::
lite_api
::
CreatePaddlePredictor
(
config
);
LOGI
(
"paddle instance created"
);
LOGI
(
"
ocr cpp
paddle instance created"
);
return
RETURN_OK
;
return
RETURN_OK
;
}
}
...
@@ -43,18 +73,18 @@ std::vector<PredictorInput> PPredictor::get_inputs(int num) {
...
@@ -43,18 +73,18 @@ std::vector<PredictorInput> PPredictor::get_inputs(int num) {
PredictorInput
PPredictor
::
get_first_input
()
{
return
get_input
(
0
);
}
PredictorInput
PPredictor
::
get_first_input
()
{
return
get_input
(
0
);
}
std
::
vector
<
PredictorOutput
>
PPredictor
::
infer
()
{
std
::
vector
<
PredictorOutput
>
PPredictor
::
infer
()
{
LOGI
(
"infer Run start %d"
,
_net_flag
);
LOGI
(
"
ocr cpp
infer Run start %d"
,
_net_flag
);
std
::
vector
<
PredictorOutput
>
results
;
std
::
vector
<
PredictorOutput
>
results
;
if
(
!
_is_input_get
)
{
if
(
!
_is_input_get
)
{
return
results
;
return
results
;
}
}
_predictor
->
Run
();
_predictor
->
Run
();
LOGI
(
"infer Run end"
);
LOGI
(
"
ocr cpp
infer Run end"
);
for
(
int
i
=
0
;
i
<
_predictor
->
GetOutputNames
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
_predictor
->
GetOutputNames
().
size
();
i
++
)
{
std
::
unique_ptr
<
const
paddle
::
lite_api
::
Tensor
>
output_tensor
=
std
::
unique_ptr
<
const
paddle
::
lite_api
::
Tensor
>
output_tensor
=
_predictor
->
GetOutput
(
i
);
_predictor
->
GetOutput
(
i
);
LOGI
(
"output tensor[%d] size %ld"
,
i
,
product
(
output_tensor
->
shape
()));
LOGI
(
"
ocr cpp
output tensor[%d] size %ld"
,
i
,
product
(
output_tensor
->
shape
()));
PredictorOutput
result
{
std
::
move
(
output_tensor
),
i
,
_net_flag
};
PredictorOutput
result
{
std
::
move
(
output_tensor
),
i
,
_net_flag
};
results
.
emplace_back
(
std
::
move
(
result
));
results
.
emplace_back
(
std
::
move
(
result
));
}
}
...
...
deploy/android_demo/app/src/main/cpp/ppredictor.h
View file @
4b214948
...
@@ -22,7 +22,7 @@ public:
...
@@ -22,7 +22,7 @@ public:
class
PPredictor
:
public
PPredictor_Interface
{
class
PPredictor
:
public
PPredictor_Interface
{
public:
public:
PPredictor
(
PPredictor
(
int
thread_num
,
int
net_flag
=
0
,
int
use_opencl
,
int
thread_num
,
int
net_flag
=
0
,
paddle
::
lite_api
::
PowerMode
mode
=
paddle
::
lite_api
::
LITE_POWER_HIGH
);
paddle
::
lite_api
::
PowerMode
mode
=
paddle
::
lite_api
::
LITE_POWER_HIGH
);
virtual
~
PPredictor
()
{}
virtual
~
PPredictor
()
{}
...
@@ -54,6 +54,7 @@ protected:
...
@@ -54,6 +54,7 @@ protected:
template
<
typename
ConfigT
>
int
_init
(
ConfigT
&
config
);
template
<
typename
ConfigT
>
int
_init
(
ConfigT
&
config
);
private:
private:
int
_use_opencl
;
int
_thread_num
;
int
_thread_num
;
paddle
::
lite_api
::
PowerMode
_mode
;
paddle
::
lite_api
::
PowerMode
_mode
;
std
::
shared_ptr
<
paddle
::
lite_api
::
PaddlePredictor
>
_predictor
;
std
::
shared_ptr
<
paddle
::
lite_api
::
PaddlePredictor
>
_predictor
;
...
...
Prev
1
2
3
4
5
6
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment