Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
721c76b4
Commit
721c76b4
authored
Dec 16, 2021
by
LDOUBLEV
Browse files
fix conflict
parents
98162be4
b77f9ec0
Changes
289
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
645 additions
and
154 deletions
+645
-154
deploy/cpp_infer/src/preprocess_op.cpp
deploy/cpp_infer/src/preprocess_op.cpp
+14
-2
deploy/cpp_infer/src/utility.cpp
deploy/cpp_infer/src/utility.cpp
+13
-0
deploy/cpp_infer/tools/build_opencv.sh
deploy/cpp_infer/tools/build_opencv.sh
+28
-0
deploy/lite/ocr_db_crnn.cc
deploy/lite/ocr_db_crnn.cc
+304
-42
deploy/paddle2onnx/readme.md
deploy/paddle2onnx/readme.md
+76
-0
deploy/pdserving/README.md
deploy/pdserving/README.md
+1
-1
deploy/pdserving/README_CN.md
deploy/pdserving/README_CN.md
+2
-2
deploy/pdserving/ocr_cpp_client.py
deploy/pdserving/ocr_cpp_client.py
+56
-0
deploy/pdserving/pipeline_http_client.py
deploy/pdserving/pipeline_http_client.py
+7
-2
deploy/pdserving/pipeline_rpc_client.py
deploy/pdserving/pipeline_rpc_client.py
+9
-4
deploy/slim/prune/export_prune_model.py
deploy/slim/prune/export_prune_model.py
+2
-2
deploy/slim/prune/sensitivity_anal.py
deploy/slim/prune/sensitivity_anal.py
+2
-2
deploy/slim/quantization/export_model.py
deploy/slim/quantization/export_model.py
+2
-2
deploy/slim/quantization/quant.py
deploy/slim/quantization/quant.py
+2
-2
deploy/slim/quantization/quant_kl.py
deploy/slim/quantization/quant_kl.py
+1
-1
doc/banner.png
doc/banner.png
+0
-0
doc/doc_ch/algorithm_overview.md
doc/doc_ch/algorithm_overview.md
+59
-43
doc/doc_ch/angle_class.md
doc/doc_ch/angle_class.md
+1
-3
doc/doc_ch/config.md
doc/doc_ch/config.md
+15
-17
doc/doc_ch/detection.md
doc/doc_ch/detection.md
+51
-29
No files found.
deploy/cpp_infer/src/preprocess_op.cpp
View file @
721c76b4
...
@@ -40,6 +40,17 @@ void Permute::Run(const cv::Mat *im, float *data) {
...
@@ -40,6 +40,17 @@ void Permute::Run(const cv::Mat *im, float *data) {
}
}
}
}
void
PermuteBatch
::
Run
(
const
std
::
vector
<
cv
::
Mat
>
imgs
,
float
*
data
)
{
for
(
int
j
=
0
;
j
<
imgs
.
size
();
j
++
){
int
rh
=
imgs
[
j
].
rows
;
int
rw
=
imgs
[
j
].
cols
;
int
rc
=
imgs
[
j
].
channels
();
for
(
int
i
=
0
;
i
<
rc
;
++
i
)
{
cv
::
extractChannel
(
imgs
[
j
],
cv
::
Mat
(
rh
,
rw
,
CV_32FC1
,
data
+
(
j
*
rc
+
i
)
*
rh
*
rw
),
i
);
}
}
}
void
Normalize
::
Run
(
cv
::
Mat
*
im
,
const
std
::
vector
<
float
>
&
mean
,
void
Normalize
::
Run
(
cv
::
Mat
*
im
,
const
std
::
vector
<
float
>
&
mean
,
const
std
::
vector
<
float
>
&
scale
,
const
bool
is_scale
)
{
const
std
::
vector
<
float
>
&
scale
,
const
bool
is_scale
)
{
double
e
=
1.0
;
double
e
=
1.0
;
...
@@ -90,16 +101,17 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
...
@@ -90,16 +101,17 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
imgC
=
rec_image_shape
[
0
];
imgC
=
rec_image_shape
[
0
];
imgH
=
rec_image_shape
[
1
];
imgH
=
rec_image_shape
[
1
];
imgW
=
rec_image_shape
[
2
];
imgW
=
rec_image_shape
[
2
];
imgW
=
int
(
32
*
wh_ratio
);
imgW
=
int
(
32
*
wh_ratio
);
float
ratio
=
float
(
img
.
cols
)
/
float
(
img
.
rows
);
float
ratio
=
float
(
img
.
cols
)
/
float
(
img
.
rows
);
int
resize_w
,
resize_h
;
int
resize_w
,
resize_h
;
if
(
ceilf
(
imgH
*
ratio
)
>
imgW
)
if
(
ceilf
(
imgH
*
ratio
)
>
imgW
)
resize_w
=
imgW
;
resize_w
=
imgW
;
else
else
resize_w
=
int
(
ceilf
(
imgH
*
ratio
));
resize_w
=
int
(
ceilf
(
imgH
*
ratio
));
cv
::
resize
(
img
,
resize_img
,
cv
::
Size
(
resize_w
,
imgH
),
0.
f
,
0.
f
,
cv
::
resize
(
img
,
resize_img
,
cv
::
Size
(
resize_w
,
imgH
),
0.
f
,
0.
f
,
cv
::
INTER_LINEAR
);
cv
::
INTER_LINEAR
);
cv
::
copyMakeBorder
(
resize_img
,
resize_img
,
0
,
0
,
0
,
cv
::
copyMakeBorder
(
resize_img
,
resize_img
,
0
,
0
,
0
,
...
...
deploy/cpp_infer/src/utility.cpp
View file @
721c76b4
...
@@ -147,4 +147,17 @@ cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage,
...
@@ -147,4 +147,17 @@ cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage,
}
}
}
}
std
::
vector
<
int
>
Utility
::
argsort
(
const
std
::
vector
<
float
>&
array
)
{
const
int
array_len
(
array
.
size
());
std
::
vector
<
int
>
array_index
(
array_len
,
0
);
for
(
int
i
=
0
;
i
<
array_len
;
++
i
)
array_index
[
i
]
=
i
;
std
::
sort
(
array_index
.
begin
(),
array_index
.
end
(),
[
&
array
](
int
pos1
,
int
pos2
)
{
return
(
array
[
pos1
]
<
array
[
pos2
]);
});
return
array_index
;
}
}
// namespace PaddleOCR
}
// namespace PaddleOCR
\ No newline at end of file
deploy/cpp_infer/tools/build_opencv.sh
0 → 100644
View file @
721c76b4
root_path
=
"/paddle/PaddleOCR/deploy/cpp_infer/opencv-3.4.7"
install_path
=
${
root_path
}
/opencv3
build_dir
=
${
root_path
}
/build
rm
-rf
${
build_dir
}
mkdir
${
build_dir
}
cd
${
build_dir
}
cmake ..
\
-DCMAKE_INSTALL_PREFIX
=
${
install_path
}
\
-DCMAKE_BUILD_TYPE
=
Release
\
-DBUILD_SHARED_LIBS
=
OFF
\
-DWITH_IPP
=
OFF
\
-DBUILD_IPP_IW
=
OFF
\
-DWITH_LAPACK
=
OFF
\
-DWITH_EIGEN
=
OFF
\
-DCMAKE_INSTALL_LIBDIR
=
lib64
\
-DWITH_ZLIB
=
ON
\
-DBUILD_ZLIB
=
ON
\
-DWITH_JPEG
=
ON
\
-DBUILD_JPEG
=
ON
\
-DWITH_PNG
=
ON
\
-DBUILD_PNG
=
ON
\
-DWITH_TIFF
=
ON
\
-DBUILD_TIFF
=
ON
make
-j
make
install
deploy/lite/ocr_db_crnn.cc
View file @
721c76b4
...
@@ -12,12 +12,14 @@
...
@@ -12,12 +12,14 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle_api.h" // NOLINT
#include <chrono>
#include <chrono>
#include "paddle_api.h" // NOLINT
#include "paddle_place.h"
#include "cls_process.h"
#include "cls_process.h"
#include "crnn_process.h"
#include "crnn_process.h"
#include "db_post_process.h"
#include "db_post_process.h"
#include "AutoLog/auto_log/lite_autolog.h"
using
namespace
paddle
::
lite_api
;
// NOLINT
using
namespace
paddle
::
lite_api
;
// NOLINT
using
namespace
std
;
using
namespace
std
;
...
@@ -27,7 +29,7 @@ void NeonMeanScale(const float *din, float *dout, int size,
...
@@ -27,7 +29,7 @@ void NeonMeanScale(const float *din, float *dout, int size,
const
std
::
vector
<
float
>
mean
,
const
std
::
vector
<
float
>
mean
,
const
std
::
vector
<
float
>
scale
)
{
const
std
::
vector
<
float
>
scale
)
{
if
(
mean
.
size
()
!=
3
||
scale
.
size
()
!=
3
)
{
if
(
mean
.
size
()
!=
3
||
scale
.
size
()
!=
3
)
{
std
::
cerr
<<
"[ERROR] mean or scale size must equal to 3
\n
"
;
std
::
cerr
<<
"[ERROR] mean or scale size must equal to 3
"
<<
std
::
endl
;
exit
(
1
);
exit
(
1
);
}
}
float32x4_t
vmean0
=
vdupq_n_f32
(
mean
[
0
]);
float32x4_t
vmean0
=
vdupq_n_f32
(
mean
[
0
]);
...
@@ -159,7 +161,8 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
...
@@ -159,7 +161,8 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
std
::
vector
<
float
>
&
rec_text_score
,
std
::
vector
<
float
>
&
rec_text_score
,
std
::
vector
<
std
::
string
>
charactor_dict
,
std
::
vector
<
std
::
string
>
charactor_dict
,
std
::
shared_ptr
<
PaddlePredictor
>
predictor_cls
,
std
::
shared_ptr
<
PaddlePredictor
>
predictor_cls
,
int
use_direction_classify
)
{
int
use_direction_classify
,
std
::
vector
<
double
>
*
times
)
{
std
::
vector
<
float
>
mean
=
{
0.5
f
,
0.5
f
,
0.5
f
};
std
::
vector
<
float
>
mean
=
{
0.5
f
,
0.5
f
,
0.5
f
};
std
::
vector
<
float
>
scale
=
{
1
/
0.5
f
,
1
/
0.5
f
,
1
/
0.5
f
};
std
::
vector
<
float
>
scale
=
{
1
/
0.5
f
,
1
/
0.5
f
,
1
/
0.5
f
};
...
@@ -169,7 +172,10 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
...
@@ -169,7 +172,10 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
cv
::
Mat
resize_img
;
cv
::
Mat
resize_img
;
int
index
=
0
;
int
index
=
0
;
std
::
vector
<
double
>
time_info
=
{
0
,
0
,
0
};
for
(
int
i
=
boxes
.
size
()
-
1
;
i
>=
0
;
i
--
)
{
for
(
int
i
=
boxes
.
size
()
-
1
;
i
>=
0
;
i
--
)
{
auto
preprocess_start
=
std
::
chrono
::
steady_clock
::
now
();
crop_img
=
GetRotateCropImage
(
srcimg
,
boxes
[
i
]);
crop_img
=
GetRotateCropImage
(
srcimg
,
boxes
[
i
]);
if
(
use_direction_classify
>=
1
)
{
if
(
use_direction_classify
>=
1
)
{
crop_img
=
RunClsModel
(
crop_img
,
predictor_cls
);
crop_img
=
RunClsModel
(
crop_img
,
predictor_cls
);
...
@@ -188,7 +194,9 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
...
@@ -188,7 +194,9 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
auto
*
data0
=
input_tensor0
->
mutable_data
<
float
>
();
auto
*
data0
=
input_tensor0
->
mutable_data
<
float
>
();
NeonMeanScale
(
dimg
,
data0
,
resize_img
.
rows
*
resize_img
.
cols
,
mean
,
scale
);
NeonMeanScale
(
dimg
,
data0
,
resize_img
.
rows
*
resize_img
.
cols
,
mean
,
scale
);
auto
preprocess_end
=
std
::
chrono
::
steady_clock
::
now
();
//// Run CRNN predictor
//// Run CRNN predictor
auto
inference_start
=
std
::
chrono
::
steady_clock
::
now
();
predictor_crnn
->
Run
();
predictor_crnn
->
Run
();
// Get output and run postprocess
// Get output and run postprocess
...
@@ -196,8 +204,10 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
...
@@ -196,8 +204,10 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
std
::
move
(
predictor_crnn
->
GetOutput
(
0
)));
std
::
move
(
predictor_crnn
->
GetOutput
(
0
)));
auto
*
predict_batch
=
output_tensor0
->
data
<
float
>
();
auto
*
predict_batch
=
output_tensor0
->
data
<
float
>
();
auto
predict_shape
=
output_tensor0
->
shape
();
auto
predict_shape
=
output_tensor0
->
shape
();
auto
inference_end
=
std
::
chrono
::
steady_clock
::
now
();
// ctc decode
// ctc decode
auto
postprocess_start
=
std
::
chrono
::
steady_clock
::
now
();
std
::
string
str_res
;
std
::
string
str_res
;
int
argmax_idx
;
int
argmax_idx
;
int
last_index
=
0
;
int
last_index
=
0
;
...
@@ -221,19 +231,33 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
...
@@ -221,19 +231,33 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
score
/=
count
;
score
/=
count
;
rec_text
.
push_back
(
str_res
);
rec_text
.
push_back
(
str_res
);
rec_text_score
.
push_back
(
score
);
rec_text_score
.
push_back
(
score
);
auto
postprocess_end
=
std
::
chrono
::
steady_clock
::
now
();
std
::
chrono
::
duration
<
float
>
preprocess_diff
=
preprocess_end
-
preprocess_start
;
time_info
[
0
]
+=
double
(
preprocess_diff
.
count
()
*
1000
);
std
::
chrono
::
duration
<
float
>
inference_diff
=
inference_end
-
inference_start
;
time_info
[
1
]
+=
double
(
inference_diff
.
count
()
*
1000
);
std
::
chrono
::
duration
<
float
>
postprocess_diff
=
postprocess_end
-
postprocess_start
;
time_info
[
2
]
+=
double
(
postprocess_diff
.
count
()
*
1000
);
}
}
times
->
push_back
(
time_info
[
0
]);
times
->
push_back
(
time_info
[
1
]);
times
->
push_back
(
time_info
[
2
]);
}
}
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
RunDetModel
(
std
::
shared_ptr
<
PaddlePredictor
>
predictor
,
cv
::
Mat
img
,
RunDetModel
(
std
::
shared_ptr
<
PaddlePredictor
>
predictor
,
cv
::
Mat
img
,
std
::
map
<
std
::
string
,
double
>
Config
)
{
std
::
map
<
std
::
string
,
double
>
Config
,
std
::
vector
<
double
>
*
times
)
{
// Read img
// Read img
int
max_side_len
=
int
(
Config
[
"max_side_len"
]);
int
max_side_len
=
int
(
Config
[
"max_side_len"
]);
int
det_db_use_dilate
=
int
(
Config
[
"det_db_use_dilate"
]);
int
det_db_use_dilate
=
int
(
Config
[
"det_db_use_dilate"
]);
cv
::
Mat
srcimg
;
cv
::
Mat
srcimg
;
img
.
copyTo
(
srcimg
);
img
.
copyTo
(
srcimg
);
auto
preprocess_start
=
std
::
chrono
::
steady_clock
::
now
();
std
::
vector
<
float
>
ratio_hw
;
std
::
vector
<
float
>
ratio_hw
;
img
=
DetResizeImg
(
img
,
max_side_len
,
ratio_hw
);
img
=
DetResizeImg
(
img
,
max_side_len
,
ratio_hw
);
cv
::
Mat
img_fp
;
cv
::
Mat
img_fp
;
...
@@ -248,8 +272,10 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
...
@@ -248,8 +272,10 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
std
::
vector
<
float
>
scale
=
{
1
/
0.229
f
,
1
/
0.224
f
,
1
/
0.225
f
};
std
::
vector
<
float
>
scale
=
{
1
/
0.229
f
,
1
/
0.224
f
,
1
/
0.225
f
};
const
float
*
dimg
=
reinterpret_cast
<
const
float
*>
(
img_fp
.
data
);
const
float
*
dimg
=
reinterpret_cast
<
const
float
*>
(
img_fp
.
data
);
NeonMeanScale
(
dimg
,
data0
,
img_fp
.
rows
*
img_fp
.
cols
,
mean
,
scale
);
NeonMeanScale
(
dimg
,
data0
,
img_fp
.
rows
*
img_fp
.
cols
,
mean
,
scale
);
auto
preprocess_end
=
std
::
chrono
::
steady_clock
::
now
();
// Run predictor
// Run predictor
auto
inference_start
=
std
::
chrono
::
steady_clock
::
now
();
predictor
->
Run
();
predictor
->
Run
();
// Get output and post process
// Get output and post process
...
@@ -257,8 +283,10 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
...
@@ -257,8 +283,10 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
std
::
move
(
predictor
->
GetOutput
(
0
)));
std
::
move
(
predictor
->
GetOutput
(
0
)));
auto
*
outptr
=
output_tensor
->
data
<
float
>
();
auto
*
outptr
=
output_tensor
->
data
<
float
>
();
auto
shape_out
=
output_tensor
->
shape
();
auto
shape_out
=
output_tensor
->
shape
();
auto
inference_end
=
std
::
chrono
::
steady_clock
::
now
();
// Save output
// Save output
auto
postprocess_start
=
std
::
chrono
::
steady_clock
::
now
();
float
pred
[
shape_out
[
2
]
*
shape_out
[
3
]];
float
pred
[
shape_out
[
2
]
*
shape_out
[
3
]];
unsigned
char
cbuf
[
shape_out
[
2
]
*
shape_out
[
3
]];
unsigned
char
cbuf
[
shape_out
[
2
]
*
shape_out
[
3
]];
...
@@ -287,14 +315,23 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
...
@@ -287,14 +315,23 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
filter_boxes
=
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
filter_boxes
=
FilterTagDetRes
(
boxes
,
ratio_hw
[
0
],
ratio_hw
[
1
],
srcimg
);
FilterTagDetRes
(
boxes
,
ratio_hw
[
0
],
ratio_hw
[
1
],
srcimg
);
auto
postprocess_end
=
std
::
chrono
::
steady_clock
::
now
();
std
::
chrono
::
duration
<
float
>
preprocess_diff
=
preprocess_end
-
preprocess_start
;
times
->
push_back
(
double
(
preprocess_diff
.
count
()
*
1000
));
std
::
chrono
::
duration
<
float
>
inference_diff
=
inference_end
-
inference_start
;
times
->
push_back
(
double
(
inference_diff
.
count
()
*
1000
));
std
::
chrono
::
duration
<
float
>
postprocess_diff
=
postprocess_end
-
postprocess_start
;
times
->
push_back
(
double
(
postprocess_diff
.
count
()
*
1000
));
return
filter_boxes
;
return
filter_boxes
;
}
}
std
::
shared_ptr
<
PaddlePredictor
>
loadModel
(
std
::
string
model_file
)
{
std
::
shared_ptr
<
PaddlePredictor
>
loadModel
(
std
::
string
model_file
,
int
num_threads
)
{
MobileConfig
config
;
MobileConfig
config
;
config
.
set_model_from_file
(
model_file
);
config
.
set_model_from_file
(
model_file
);
config
.
set_threads
(
num_threads
);
std
::
shared_ptr
<
PaddlePredictor
>
predictor
=
std
::
shared_ptr
<
PaddlePredictor
>
predictor
=
CreatePaddlePredictor
<
MobileConfig
>
(
config
);
CreatePaddlePredictor
<
MobileConfig
>
(
config
);
return
predictor
;
return
predictor
;
...
@@ -354,60 +391,285 @@ std::map<std::string, double> LoadConfigTxt(std::string config_path) {
...
@@ -354,60 +391,285 @@ std::map<std::string, double> LoadConfigTxt(std::string config_path) {
return
dict
;
return
dict
;
}
}
int
main
(
int
argc
,
char
**
argv
)
{
void
check_params
(
int
argc
,
char
**
argv
)
{
if
(
argc
<
5
)
{
if
(
argc
<=
1
||
(
strcmp
(
argv
[
1
],
"det"
)
!=
0
&&
strcmp
(
argv
[
1
],
"rec"
)
!=
0
&&
strcmp
(
argv
[
1
],
"system"
)
!=
0
))
{
std
::
cerr
<<
"[ERROR] usage: "
<<
argv
[
0
]
std
::
cerr
<<
"Please choose one mode of [det, rec, system] !"
<<
std
::
endl
;
<<
" det_model_file cls_model_file rec_model_file image_path "
"charactor_dict
\n
"
;
exit
(
1
);
exit
(
1
);
}
}
std
::
string
det_model_file
=
argv
[
1
];
if
(
strcmp
(
argv
[
1
],
"det"
)
==
0
)
{
std
::
string
rec_model_file
=
argv
[
2
];
if
(
argc
<
9
){
std
::
string
cls_model_file
=
argv
[
3
];
std
::
cerr
<<
"[ERROR] usage:"
<<
argv
[
0
]
std
::
string
img_path
=
argv
[
4
];
<<
" det det_model runtime_device num_threads batchsize img_dir det_config lite_benchmark_value"
<<
std
::
endl
;
std
::
string
dict_path
=
argv
[
5
];
exit
(
1
);
}
}
if
(
strcmp
(
argv
[
1
],
"rec"
)
==
0
)
{
if
(
argc
<
9
){
std
::
cerr
<<
"[ERROR] usage:"
<<
argv
[
0
]
<<
" rec rec_model runtime_device num_threads batchsize img_dir key_txt lite_benchmark_value"
<<
std
::
endl
;
exit
(
1
);
}
}
if
(
strcmp
(
argv
[
1
],
"system"
)
==
0
)
{
if
(
argc
<
12
){
std
::
cerr
<<
"[ERROR] usage:"
<<
argv
[
0
]
<<
" system det_model rec_model clas_model runtime_device num_threads batchsize img_dir det_config key_txt lite_benchmark_value"
<<
std
::
endl
;
exit
(
1
);
}
}
}
void
system
(
char
**
argv
){
std
::
string
det_model_file
=
argv
[
2
];
std
::
string
rec_model_file
=
argv
[
3
];
std
::
string
cls_model_file
=
argv
[
4
];
std
::
string
runtime_device
=
argv
[
5
];
std
::
string
precision
=
argv
[
6
];
std
::
string
num_threads
=
argv
[
7
];
std
::
string
batchsize
=
argv
[
8
];
std
::
string
img_dir
=
argv
[
9
];
std
::
string
det_config_path
=
argv
[
10
];
std
::
string
dict_path
=
argv
[
11
];
if
(
strcmp
(
argv
[
6
],
"FP32"
)
!=
0
&&
strcmp
(
argv
[
6
],
"INT8"
)
!=
0
)
{
std
::
cerr
<<
"Only support FP32 or INT8."
<<
std
::
endl
;
exit
(
1
);
}
std
::
vector
<
cv
::
String
>
cv_all_img_names
;
cv
::
glob
(
img_dir
,
cv_all_img_names
);
//// load config from txt file
//// load config from txt file
auto
Config
=
LoadConfigTxt
(
"./
config
.txt"
);
auto
Config
=
LoadConfigTxt
(
det_
config
_path
);
int
use_direction_classify
=
int
(
Config
[
"use_direction_classify"
]);
int
use_direction_classify
=
int
(
Config
[
"use_direction_classify"
]);
auto
start
=
std
::
chrono
::
system_clock
::
now
();
auto
charactor_dict
=
ReadDict
(
dict_path
);
charactor_dict
.
insert
(
charactor_dict
.
begin
(),
"#"
);
// blank char for ctc
charactor_dict
.
push_back
(
" "
);
auto
det_predictor
=
loadModel
(
det_model_file
,
std
::
stoi
(
num_threads
));
auto
rec_predictor
=
loadModel
(
rec_model_file
,
std
::
stoi
(
num_threads
));
auto
cls_predictor
=
loadModel
(
cls_model_file
,
std
::
stoi
(
num_threads
));
std
::
vector
<
double
>
det_time_info
=
{
0
,
0
,
0
};
std
::
vector
<
double
>
rec_time_info
=
{
0
,
0
,
0
};
auto
det_predictor
=
loadModel
(
det_model_file
);
for
(
int
i
=
0
;
i
<
cv_all_img_names
.
size
();
++
i
)
{
auto
rec_predictor
=
loadModel
(
rec_model_file
);
std
::
cout
<<
"The predict img: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
auto
cls_predictor
=
loadModel
(
cls_model_file
);
cv
::
Mat
srcimg
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
if
(
!
srcimg
.
data
)
{
std
::
cerr
<<
"[ERROR] image read failed! image path: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
exit
(
1
);
}
std
::
vector
<
double
>
det_times
;
auto
boxes
=
RunDetModel
(
det_predictor
,
srcimg
,
Config
,
&
det_times
);
std
::
vector
<
std
::
string
>
rec_text
;
std
::
vector
<
float
>
rec_text_score
;
std
::
vector
<
double
>
rec_times
;
RunRecModel
(
boxes
,
srcimg
,
rec_predictor
,
rec_text
,
rec_text_score
,
charactor_dict
,
cls_predictor
,
use_direction_classify
,
&
rec_times
);
//// visualization
auto
img_vis
=
Visualization
(
srcimg
,
boxes
);
//// print recognized text
for
(
int
i
=
0
;
i
<
rec_text
.
size
();
i
++
)
{
std
::
cout
<<
i
<<
"
\t
"
<<
rec_text
[
i
]
<<
"
\t
"
<<
rec_text_score
[
i
]
<<
std
::
endl
;
}
det_time_info
[
0
]
+=
det_times
[
0
];
det_time_info
[
1
]
+=
det_times
[
1
];
det_time_info
[
2
]
+=
det_times
[
2
];
rec_time_info
[
0
]
+=
rec_times
[
0
];
rec_time_info
[
1
]
+=
rec_times
[
1
];
rec_time_info
[
2
]
+=
rec_times
[
2
];
}
if
(
strcmp
(
argv
[
12
],
"True"
)
==
0
)
{
AutoLogger
autolog_det
(
det_model_file
,
runtime_device
,
std
::
stoi
(
num_threads
),
std
::
stoi
(
batchsize
),
"dynamic"
,
precision
,
det_time_info
,
cv_all_img_names
.
size
());
AutoLogger
autolog_rec
(
rec_model_file
,
runtime_device
,
std
::
stoi
(
num_threads
),
std
::
stoi
(
batchsize
),
"dynamic"
,
precision
,
rec_time_info
,
cv_all_img_names
.
size
());
autolog_det
.
report
();
std
::
cout
<<
std
::
endl
;
autolog_rec
.
report
();
}
}
void
det
(
int
argc
,
char
**
argv
)
{
std
::
string
det_model_file
=
argv
[
2
];
std
::
string
runtime_device
=
argv
[
3
];
std
::
string
precision
=
argv
[
4
];
std
::
string
num_threads
=
argv
[
5
];
std
::
string
batchsize
=
argv
[
6
];
std
::
string
img_dir
=
argv
[
7
];
std
::
string
det_config_path
=
argv
[
8
];
if
(
strcmp
(
argv
[
4
],
"FP32"
)
!=
0
&&
strcmp
(
argv
[
4
],
"INT8"
)
!=
0
)
{
std
::
cerr
<<
"Only support FP32 or INT8."
<<
std
::
endl
;
exit
(
1
);
}
std
::
vector
<
cv
::
String
>
cv_all_img_names
;
cv
::
glob
(
img_dir
,
cv_all_img_names
);
//// load config from txt file
auto
Config
=
LoadConfigTxt
(
det_config_path
);
auto
det_predictor
=
loadModel
(
det_model_file
,
std
::
stoi
(
num_threads
));
std
::
vector
<
double
>
time_info
=
{
0
,
0
,
0
};
for
(
int
i
=
0
;
i
<
cv_all_img_names
.
size
();
++
i
)
{
std
::
cout
<<
"The predict img: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
cv
::
Mat
srcimg
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
if
(
!
srcimg
.
data
)
{
std
::
cerr
<<
"[ERROR] image read failed! image path: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
exit
(
1
);
}
std
::
vector
<
double
>
times
;
auto
boxes
=
RunDetModel
(
det_predictor
,
srcimg
,
Config
,
&
times
);
//// visualization
auto
img_vis
=
Visualization
(
srcimg
,
boxes
);
std
::
cout
<<
boxes
.
size
()
<<
" bboxes have detected:"
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
boxes
.
size
();
i
++
){
std
::
cout
<<
"The "
<<
i
<<
" box:"
<<
std
::
endl
;
for
(
int
j
=
0
;
j
<
4
;
j
++
){
for
(
int
k
=
0
;
k
<
2
;
k
++
){
std
::
cout
<<
boxes
[
i
][
j
][
k
]
<<
"
\t
"
;
}
}
std
::
cout
<<
std
::
endl
;
}
time_info
[
0
]
+=
times
[
0
];
time_info
[
1
]
+=
times
[
1
];
time_info
[
2
]
+=
times
[
2
];
}
if
(
strcmp
(
argv
[
9
],
"True"
)
==
0
)
{
AutoLogger
autolog
(
det_model_file
,
runtime_device
,
std
::
stoi
(
num_threads
),
std
::
stoi
(
batchsize
),
"dynamic"
,
precision
,
time_info
,
cv_all_img_names
.
size
());
autolog
.
report
();
}
}
void
rec
(
int
argc
,
char
**
argv
)
{
std
::
string
rec_model_file
=
argv
[
2
];
std
::
string
runtime_device
=
argv
[
3
];
std
::
string
precision
=
argv
[
4
];
std
::
string
num_threads
=
argv
[
5
];
std
::
string
batchsize
=
argv
[
6
];
std
::
string
img_dir
=
argv
[
7
];
std
::
string
dict_path
=
argv
[
8
];
if
(
strcmp
(
argv
[
4
],
"FP32"
)
!=
0
&&
strcmp
(
argv
[
4
],
"INT8"
)
!=
0
)
{
std
::
cerr
<<
"Only support FP32 or INT8."
<<
std
::
endl
;
exit
(
1
);
}
std
::
vector
<
cv
::
String
>
cv_all_img_names
;
cv
::
glob
(
img_dir
,
cv_all_img_names
);
auto
charactor_dict
=
ReadDict
(
dict_path
);
auto
charactor_dict
=
ReadDict
(
dict_path
);
charactor_dict
.
insert
(
charactor_dict
.
begin
(),
"#"
);
// blank char for ctc
charactor_dict
.
insert
(
charactor_dict
.
begin
(),
"#"
);
// blank char for ctc
charactor_dict
.
push_back
(
" "
);
charactor_dict
.
push_back
(
" "
);
cv
::
Mat
srcimg
=
cv
::
imread
(
img_path
,
cv
::
IMREAD_COLOR
);
auto
rec_predictor
=
loadModel
(
rec_model_file
,
std
::
stoi
(
num_threads
));
auto
boxes
=
RunDetModel
(
det_predictor
,
srcimg
,
Config
);
std
::
vector
<
std
::
string
>
rec_text
;
std
::
shared_ptr
<
PaddlePredictor
>
cls_predictor
;
std
::
vector
<
float
>
rec_text_score
;
RunRecModel
(
boxes
,
srcimg
,
rec_predictor
,
rec_text
,
rec_text_score
,
std
::
vector
<
double
>
time_info
=
{
0
,
0
,
0
};
charactor_dict
,
cls_predictor
,
use_direction_classify
);
for
(
int
i
=
0
;
i
<
cv_all_img_names
.
size
();
++
i
)
{
std
::
cout
<<
"The predict img: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
cv
::
Mat
srcimg
=
cv
::
imread
(
cv_all_img_names
[
i
],
cv
::
IMREAD_COLOR
);
auto
end
=
std
::
chrono
::
system_clock
::
now
();
if
(
!
srcimg
.
data
)
{
auto
duration
=
std
::
cerr
<<
"[ERROR] image read failed! image path: "
<<
cv_all_img_names
[
i
]
<<
std
::
endl
;
std
::
chrono
::
duration_cast
<
std
::
chrono
::
microseconds
>
(
end
-
start
);
exit
(
1
);
}
//// visualization
int
width
=
srcimg
.
cols
;
auto
img_vis
=
Visualization
(
srcimg
,
boxes
);
int
height
=
srcimg
.
rows
;
std
::
vector
<
int
>
upper_left
=
{
0
,
0
};
std
::
vector
<
int
>
upper_right
=
{
width
,
0
};
std
::
vector
<
int
>
lower_right
=
{
width
,
height
};
std
::
vector
<
int
>
lower_left
=
{
0
,
height
};
std
::
vector
<
std
::
vector
<
int
>>
box
=
{
upper_left
,
upper_right
,
lower_right
,
lower_left
};
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>>>
boxes
=
{
box
};
std
::
vector
<
std
::
string
>
rec_text
;
std
::
vector
<
float
>
rec_text_score
;
std
::
vector
<
double
>
times
;
RunRecModel
(
boxes
,
srcimg
,
rec_predictor
,
rec_text
,
rec_text_score
,
charactor_dict
,
cls_predictor
,
0
,
&
times
);
//// print recognized text
for
(
int
i
=
0
;
i
<
rec_text
.
size
();
i
++
)
{
std
::
cout
<<
i
<<
"
\t
"
<<
rec_text
[
i
]
<<
"
\t
"
<<
rec_text_score
[
i
]
<<
std
::
endl
;
}
time_info
[
0
]
+=
times
[
0
];
time_info
[
1
]
+=
times
[
1
];
time_info
[
2
]
+=
times
[
2
];
}
// TODO: support autolog
if
(
strcmp
(
argv
[
9
],
"True"
)
==
0
)
{
AutoLogger
autolog
(
rec_model_file
,
runtime_device
,
std
::
stoi
(
num_threads
),
std
::
stoi
(
batchsize
),
"dynamic"
,
precision
,
time_info
,
cv_all_img_names
.
size
());
autolog
.
report
();
}
}
int
main
(
int
argc
,
char
**
argv
)
{
check_params
(
argc
,
argv
);
std
::
cout
<<
"mode: "
<<
argv
[
1
]
<<
endl
;
//// print recognized text
if
(
strcmp
(
argv
[
1
],
"system"
)
==
0
)
{
for
(
int
i
=
0
;
i
<
rec_text
.
size
();
i
++
)
{
system
(
argv
);
std
::
cout
<<
i
<<
"
\t
"
<<
rec_text
[
i
]
<<
"
\t
"
<<
rec_text_score
[
i
]
<<
std
::
endl
;
}
}
std
::
cout
<<
"花费了"
if
(
strcmp
(
argv
[
1
],
"det"
)
==
0
)
{
<<
double
(
duration
.
count
())
*
det
(
argc
,
argv
);
std
::
chrono
::
microseconds
::
period
::
num
/
}
std
::
chrono
::
microseconds
::
period
::
den
<<
"秒"
<<
std
::
endl
;
if
(
strcmp
(
argv
[
1
],
"rec"
)
==
0
)
{
rec
(
argc
,
argv
);
}
return
0
;
return
0
;
}
}
\ No newline at end of file
deploy/paddle2onnx/readme.md
0 → 100644
View file @
721c76b4
# paddle2onnx 模型转化与预测
本章节介绍 PaddleOCR 模型如何转化为 ONNX 模型,并基于 ONNX 引擎预测。
## 1. 环境准备
需要准备 Paddle2ONNX 模型转化环境,和 ONNX 模型预测环境
### Paddle2ONNX
Paddle2ONNX 支持将 PaddlePaddle 模型格式转化到 ONNX 模型格式,算子目前稳定支持导出 ONNX Opset 9~11,部分Paddle算子支持更低的ONNX Opset转换。
更多细节可参考
[
Paddle2ONNX
](
https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/README_zh.md
)
-
安装 Paddle2ONNX
```
python3.7 -m pip install paddle2onnx
```
-
安装 ONNX
```
# 建议安装 1.4.0 版本,可根据环境更换版本号
python3.7 -m pip install onnxruntime==1.4.0
```
## 2. 模型转换
-
Paddle 模型下载
有两种方式获取Paddle静态图模型:在
[
model_list
](
../../doc/doc_ch/models_list.md
)
中下载PaddleOCR提供的预测模型;
参考
[
模型导出说明
](
../../doc/doc_ch/inference.md#训练模型转inference模型
)
把训练好的权重转为 inference_model。
以 ppocr 检测模型为例:
```
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && cd ..
```
-
模型转换
使用 Paddle2ONNX 将Paddle静态图模型转换为ONNX模型格式:
```
paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ \
--model_filename=inference.pdmodel \
--params_filename=inference.pdiparams \
--save_file=./inference/det_mobile_onnx/model.onnx \
--opset_version=10 \
--enable_onnx_checker=True
```
执行完毕后,ONNX 模型会被保存在
`./inference/det_mobile_onnx/`
路径下
*
注意:以下几个模型暂不支持转换为 ONNX 模型:
NRTR、SAR、RARE、SRN
## 3. onnx 预测
以检测模型为例,使用 ONNX 预测可执行如下命令:
```
python3.7 ../../tools/infer/predict_det.py --use_gpu=False --use_onnx=True \
--det_model_dir=./inference/det_mobile_onnx/model.onnx \
--image_dir=../../doc/imgs/1.jpg
```
执行命令后在终端会打印出预测的检测框坐标,并在
`./inference_results/`
下保存可视化结果。
```
root INFO: 1.jpg [[[291, 295], [334, 292], [348, 844], [305, 847]], [[344, 296], [379, 294], [387, 669], [353, 671]]]
The predict time of ../../doc/imgs/1.jpg: 0.06162881851196289
The visualized image saved in ./inference_results/det_res_1.jpg
```
*
注意:ONNX暂时不支持变长预测,需要将输入resize到固定输入,预测结果可能与直接使用Paddle预测有细微不同。
deploy/pdserving/README.md
View file @
721c76b4
...
@@ -114,7 +114,7 @@ The recognition model is the same.
...
@@ -114,7 +114,7 @@ The recognition model is the same.
git clone https://github.com/PaddlePaddle/PaddleOCR
git clone https://github.com/PaddlePaddle/PaddleOCR
# Enter the working directory
# Enter the working directory
cd PaddleOCR/deploy/pdserv
er
/
cd PaddleOCR/deploy/pdserv
ing
/
```
```
The pdserver directory contains the code to start the pipeline service and send prediction requests, including:
The pdserver directory contains the code to start the pipeline service and send prediction requests, including:
...
...
deploy/pdserving/README_CN.md
View file @
721c76b4
...
@@ -112,7 +112,7 @@ python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_rec_in
...
@@ -112,7 +112,7 @@ python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_rec_in
git clone https://github.com/PaddlePaddle/PaddleOCR
git clone https://github.com/PaddlePaddle/PaddleOCR
# 进入到工作目录
# 进入到工作目录
cd PaddleOCR/deploy/pdserv
er
/
cd PaddleOCR/deploy/pdserv
ing
/
```
```
pdserver目录包含启动pipeline服务和发送预测请求的代码,包括:
pdserver目录包含启动pipeline服务和发送预测请求的代码,包括:
```
```
...
@@ -206,7 +206,7 @@ pip3 install paddle-serving-app==0.3.1
...
@@ -206,7 +206,7 @@ pip3 install paddle-serving-app==0.3.1
1.
启动服务端程序
1.
启动服务端程序
```
```
cd win
cd win
python3 ocr_web_server.py gpu(使用gpu方式)
python3 ocr_web_server.py gpu(使用gpu方式)
或者
或者
python3 ocr_web_server.py cpu(使用cpu方式)
python3 ocr_web_server.py cpu(使用cpu方式)
...
...
deploy/pdserving/ocr_cpp_client.py
0 → 100755
View file @
721c76b4
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from
paddle_serving_client
import
Client
import
sys
import
numpy
as
np
import
base64
import
os
import
cv2
from
paddle_serving_app.reader
import
Sequential
,
URL2Image
,
ResizeByFactor
from
paddle_serving_app.reader
import
Div
,
Normalize
,
Transpose
from
ocr_reader
import
OCRReader
client
=
Client
()
# TODO:load_client need to load more than one client model.
# this need to figure out some details.
client
.
load_client_config
(
sys
.
argv
[
1
:])
client
.
connect
([
"127.0.0.1:9293"
])
import
paddle
test_img_dir
=
"test_img/"
ocr_reader
=
OCRReader
(
char_dict_path
=
"../../ppocr/utils/ppocr_keys_v1.txt"
)
def
cv2_to_base64
(
image
):
return
base64
.
b64encode
(
image
).
decode
(
'utf8'
)
#data.tostring()).decode('utf8')
for
img_file
in
os
.
listdir
(
test_img_dir
):
with
open
(
os
.
path
.
join
(
test_img_dir
,
img_file
),
'rb'
)
as
file
:
image_data
=
file
.
read
()
image
=
cv2_to_base64
(
image_data
)
res_list
=
[]
#print(image)
fetch_map
=
client
.
predict
(
feed
=
{
"x"
:
image
},
fetch
=
[
"save_infer_model/scale_0.tmp_1"
],
batch
=
True
)
print
(
"fetrch map:"
,
fetch_map
)
one_batch_res
=
ocr_reader
.
postprocess
(
fetch_map
,
with_score
=
True
)
for
res
in
one_batch_res
:
res_list
.
append
(
res
[
0
])
res
=
{
"res"
:
str
(
res_list
)}
print
(
res
)
deploy/pdserving/pipeline_http_client.py
View file @
721c76b4
...
@@ -18,13 +18,19 @@ import json
...
@@ -18,13 +18,19 @@ import json
import
base64
import
base64
import
os
import
os
import
argparse
parser
=
argparse
.
ArgumentParser
(
description
=
"args for paddleserving"
)
parser
.
add_argument
(
"--image_dir"
,
type
=
str
,
default
=
"../../doc/imgs/"
)
args
=
parser
.
parse_args
()
def
cv2_to_base64
(
image
):
def
cv2_to_base64
(
image
):
return
base64
.
b64encode
(
image
).
decode
(
'utf8'
)
return
base64
.
b64encode
(
image
).
decode
(
'utf8'
)
url
=
"http://127.0.0.1:9998/ocr/prediction"
url
=
"http://127.0.0.1:9998/ocr/prediction"
test_img_dir
=
"../../doc/imgs/"
test_img_dir
=
args
.
image_dir
for
idx
,
img_file
in
enumerate
(
os
.
listdir
(
test_img_dir
)):
for
idx
,
img_file
in
enumerate
(
os
.
listdir
(
test_img_dir
)):
with
open
(
os
.
path
.
join
(
test_img_dir
,
img_file
),
'rb'
)
as
file
:
with
open
(
os
.
path
.
join
(
test_img_dir
,
img_file
),
'rb'
)
as
file
:
image_data1
=
file
.
read
()
image_data1
=
file
.
read
()
...
@@ -36,5 +42,4 @@ for idx, img_file in enumerate(os.listdir(test_img_dir)):
...
@@ -36,5 +42,4 @@ for idx, img_file in enumerate(os.listdir(test_img_dir)):
r
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
))
r
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
))
print
(
r
.
json
())
print
(
r
.
json
())
test_img_dir
=
"../../doc/imgs/"
print
(
"==> total number of test imgs: "
,
len
(
os
.
listdir
(
test_img_dir
)))
print
(
"==> total number of test imgs: "
,
len
(
os
.
listdir
(
test_img_dir
)))
deploy/pdserving/pipeline_rpc_client.py
View file @
721c76b4
...
@@ -30,12 +30,17 @@ def cv2_to_base64(image):
...
@@ -30,12 +30,17 @@ def cv2_to_base64(image):
return
base64
.
b64encode
(
image
).
decode
(
'utf8'
)
return
base64
.
b64encode
(
image
).
decode
(
'utf8'
)
test_img_dir
=
"imgs/"
import
argparse
parser
=
argparse
.
ArgumentParser
(
description
=
"args for paddleserving"
)
parser
.
add_argument
(
"--image_dir"
,
type
=
str
,
default
=
"../../doc/imgs/"
)
args
=
parser
.
parse_args
()
test_img_dir
=
args
.
image_dir
for
img_file
in
os
.
listdir
(
test_img_dir
):
for
img_file
in
os
.
listdir
(
test_img_dir
):
with
open
(
os
.
path
.
join
(
test_img_dir
,
img_file
),
'rb'
)
as
file
:
with
open
(
os
.
path
.
join
(
test_img_dir
,
img_file
),
'rb'
)
as
file
:
image_data
=
file
.
read
()
image_data
=
file
.
read
()
image
=
cv2_to_base64
(
image_data
)
image
=
cv2_to_base64
(
image_data
)
for
i
in
range
(
1
):
for
i
in
range
(
1
):
ret
=
client
.
predict
(
feed_dict
=
{
"image"
:
image
},
fetch
=
[
"res"
])
ret
=
client
.
predict
(
feed_dict
=
{
"image"
:
image
},
fetch
=
[
"res"
])
print
(
ret
)
print
(
ret
)
deploy/slim/prune/export_prune_model.py
View file @
721c76b4
...
@@ -30,7 +30,7 @@ from ppocr.modeling.architectures import build_model
...
@@ -30,7 +30,7 @@ from ppocr.modeling.architectures import build_model
from
ppocr.postprocess
import
build_post_process
from
ppocr.postprocess
import
build_post_process
from
ppocr.metrics
import
build_metric
from
ppocr.metrics
import
build_metric
from
ppocr.utils.save_load
import
init
_model
from
ppocr.utils.save_load
import
load
_model
import
tools.program
as
program
import
tools.program
as
program
...
@@ -89,7 +89,7 @@ def main(config, device, logger, vdl_writer):
...
@@ -89,7 +89,7 @@ def main(config, device, logger, vdl_writer):
logger
.
info
(
f
"FLOPs after pruning:
{
flops
}
"
)
logger
.
info
(
f
"FLOPs after pruning:
{
flops
}
"
)
# load pretrain model
# load pretrain model
pre_best_model_dict
=
init
_model
(
config
,
model
,
logger
,
None
)
load
_model
(
config
,
model
)
metric
=
program
.
eval
(
model
,
valid_dataloader
,
post_process_class
,
metric
=
program
.
eval
(
model
,
valid_dataloader
,
post_process_class
,
eval_class
)
eval_class
)
logger
.
info
(
f
"metric['hmean']:
{
metric
[
'hmean'
]
}
"
)
logger
.
info
(
f
"metric['hmean']:
{
metric
[
'hmean'
]
}
"
)
...
...
deploy/slim/prune/sensitivity_anal.py
View file @
721c76b4
...
@@ -32,7 +32,7 @@ from ppocr.losses import build_loss
...
@@ -32,7 +32,7 @@ from ppocr.losses import build_loss
from
ppocr.optimizer
import
build_optimizer
from
ppocr.optimizer
import
build_optimizer
from
ppocr.postprocess
import
build_post_process
from
ppocr.postprocess
import
build_post_process
from
ppocr.metrics
import
build_metric
from
ppocr.metrics
import
build_metric
from
ppocr.utils.save_load
import
init
_model
from
ppocr.utils.save_load
import
load
_model
import
tools.program
as
program
import
tools.program
as
program
dist
.
get_world_size
()
dist
.
get_world_size
()
...
@@ -94,7 +94,7 @@ def main(config, device, logger, vdl_writer):
...
@@ -94,7 +94,7 @@ def main(config, device, logger, vdl_writer):
# build metric
# build metric
eval_class
=
build_metric
(
config
[
'Metric'
])
eval_class
=
build_metric
(
config
[
'Metric'
])
# load pretrain model
# load pretrain model
pre_best_model_dict
=
init
_model
(
config
,
model
,
logger
,
optimizer
)
pre_best_model_dict
=
load
_model
(
config
,
model
,
optimizer
)
logger
.
info
(
'train dataloader has {} iters, valid dataloader has {} iters'
.
logger
.
info
(
'train dataloader has {} iters, valid dataloader has {} iters'
.
format
(
len
(
train_dataloader
),
len
(
valid_dataloader
)))
format
(
len
(
train_dataloader
),
len
(
valid_dataloader
)))
...
...
deploy/slim/quantization/export_model.py
View file @
721c76b4
...
@@ -28,7 +28,7 @@ from paddle.jit import to_static
...
@@ -28,7 +28,7 @@ from paddle.jit import to_static
from
ppocr.modeling.architectures
import
build_model
from
ppocr.modeling.architectures
import
build_model
from
ppocr.postprocess
import
build_post_process
from
ppocr.postprocess
import
build_post_process
from
ppocr.utils.save_load
import
init
_model
from
ppocr.utils.save_load
import
load
_model
from
ppocr.utils.logging
import
get_logger
from
ppocr.utils.logging
import
get_logger
from
tools.program
import
load_config
,
merge_config
,
ArgsParser
from
tools.program
import
load_config
,
merge_config
,
ArgsParser
from
ppocr.metrics
import
build_metric
from
ppocr.metrics
import
build_metric
...
@@ -101,7 +101,7 @@ def main():
...
@@ -101,7 +101,7 @@ def main():
quanter
=
QAT
(
config
=
quant_config
)
quanter
=
QAT
(
config
=
quant_config
)
quanter
.
quantize
(
model
)
quanter
.
quantize
(
model
)
init
_model
(
config
,
model
)
load
_model
(
config
,
model
)
model
.
eval
()
model
.
eval
()
# build metric
# build metric
...
...
deploy/slim/quantization/quant.py
View file @
721c76b4
...
@@ -37,7 +37,7 @@ from ppocr.losses import build_loss
...
@@ -37,7 +37,7 @@ from ppocr.losses import build_loss
from
ppocr.optimizer
import
build_optimizer
from
ppocr.optimizer
import
build_optimizer
from
ppocr.postprocess
import
build_post_process
from
ppocr.postprocess
import
build_post_process
from
ppocr.metrics
import
build_metric
from
ppocr.metrics
import
build_metric
from
ppocr.utils.save_load
import
init
_model
from
ppocr.utils.save_load
import
load
_model
import
tools.program
as
program
import
tools.program
as
program
from
paddleslim.dygraph.quant
import
QAT
from
paddleslim.dygraph.quant
import
QAT
...
@@ -137,7 +137,7 @@ def main(config, device, logger, vdl_writer):
...
@@ -137,7 +137,7 @@ def main(config, device, logger, vdl_writer):
# build metric
# build metric
eval_class
=
build_metric
(
config
[
'Metric'
])
eval_class
=
build_metric
(
config
[
'Metric'
])
# load pretrain model
# load pretrain model
pre_best_model_dict
=
init
_model
(
config
,
model
,
logger
,
optimizer
)
pre_best_model_dict
=
load
_model
(
config
,
model
,
optimizer
)
logger
.
info
(
'train dataloader has {} iters, valid dataloader has {} iters'
.
logger
.
info
(
'train dataloader has {} iters, valid dataloader has {} iters'
.
format
(
len
(
train_dataloader
),
len
(
valid_dataloader
)))
format
(
len
(
train_dataloader
),
len
(
valid_dataloader
)))
...
...
deploy/slim/quantization/quant_kl.py
View file @
721c76b4
...
@@ -37,7 +37,7 @@ from ppocr.losses import build_loss
...
@@ -37,7 +37,7 @@ from ppocr.losses import build_loss
from
ppocr.optimizer
import
build_optimizer
from
ppocr.optimizer
import
build_optimizer
from
ppocr.postprocess
import
build_post_process
from
ppocr.postprocess
import
build_post_process
from
ppocr.metrics
import
build_metric
from
ppocr.metrics
import
build_metric
from
ppocr.utils.save_load
import
init
_model
from
ppocr.utils.save_load
import
load
_model
import
tools.program
as
program
import
tools.program
as
program
import
paddleslim
import
paddleslim
from
paddleslim.dygraph.quant
import
QAT
from
paddleslim.dygraph.quant
import
QAT
...
...
doc/banner.png
0 → 100644
View file @
721c76b4
138 KB
doc/doc_ch/algorithm_overview.md
View file @
721c76b4
<a
name=
"算法介绍"
></a>
# 两阶段算法
## 算法介绍
-
[
两阶段算法
](
#-----
)
*
[
1. 算法介绍
](
#1
)
+
[
1.1 文本检测算法
](
#11
)
+
[
1.2 文本识别算法
](
#12
)
*
[
2. 模型训练
](
#2
)
*
[
3. 模型推理
](
#3
)
<a
name=
"1"
></a>
## 1. 算法介绍
本文给出了PaddleOCR已支持的文本检测算法和文本识别算法列表,以及每个算法在
**英文公开数据集**
上的模型和指标,主要用于算法简介和算法性能对比,更多包括中文在内的其他数据集上的模型请参考
[
PP-OCR v2.0 系列模型下载
](
./models_list.md
)
。
本文给出了PaddleOCR已支持的文本检测算法和文本识别算法列表,以及每个算法在
**英文公开数据集**
上的模型和指标,主要用于算法简介和算法性能对比,更多包括中文在内的其他数据集上的模型请参考
[
PP-OCR v2.0 系列模型下载
](
./models_list.md
)
。
-
[
1.文本检测算法
](
#文本检测算法
)
<a
name=
"11"
></a>
-
[
2.文本识别算法
](
#文本识别算法
)
<a
name=
"文本检测算法"
></a>
### 1.1 文本检测算法
### 1.文本检测算法
PaddleOCR开源的文本检测算法列表:
PaddleOCR开源的文本检测算法列表:
-
[
x] DB([paper
](
https://arxiv.org/abs/1911.08947
)
)(ppocr推荐)
-
[
x] DB([paper
](
https://arxiv.org/abs/1911.08947
)
)
[2]
(ppocr推荐)
-
[
x] EAST([paper
](
https://arxiv.org/abs/1704.03155
)
)
-
[
x] EAST([paper
](
https://arxiv.org/abs/1704.03155
)
)
[1]
-
[
x] SAST([paper
](
https://arxiv.org/abs/1908.05498
)
)
-
[
x] SAST([paper
](
https://arxiv.org/abs/1908.05498
)
)
[4]
-
[
x] PSENet([paper
](
https://arxiv.org/abs/1903.12473v2
)
)
-
[
x] PSENet([paper
](
https://arxiv.org/abs/1903.12473v2
)
)
在ICDAR2015文本检测公开数据集上,算法效果如下:
在ICDAR2015文本检测公开数据集上,算法效果如下:
|模型|骨干网络|precision|recall|Hmean|下载链接|
|模型|骨干网络|precision|recall|Hmean|下载链接|
| --- | --- | --- | --- | --- | --- |
| --- | --- | --- | --- | --- | --- |
|EAST|ResNet50_vd|85.80%|86.71%|86.25%|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar
)
|
|EAST|ResNet50_vd|85.80%|86.71%|86.25%|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar
)
|
|EAST|MobileNetV3|79.42%|80.64%|80.03%|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar
)
|
|EAST|MobileNetV3|79.42%|80.64%|80.03%|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar
)
|
|DB|ResNet50_vd|86.41%|78.72%|82.38%|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar
)
|
|DB|ResNet50_vd|86.41%|78.72%|82.38%|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar
)
|
|DB|MobileNetV3|77.29%|73.08%|75.12%|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar
)
|
|DB|MobileNetV3|77.29%|73.08%|75.12%|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar
)
|
|SAST|ResNet50_vd|91.39%|83.77%|87.42%|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar
)
|
|SAST|ResNet50_vd|91.39%|83.77%|87.42%|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar
)
|
|PSE|ResNet50_vd|85.81%|79.53%|82.55%|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_vd_pse_v2.0_train.tar
)
|
|PSE|ResNet50_vd|85.81%|79.53%|82.55%|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_vd_pse_v2.0_train.tar
)
|
|PSE|MobileNetV3|82.20%|70.48%|75.89%|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_mv3_pse_v2.0_train.tar
)
|
|PSE|MobileNetV3|82.20%|70.48%|75.89%|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_mv3_pse_v2.0_train.tar
)
|
在Total-text文本检测公开数据集上,算法效果如下:
在Total-text文本检测公开数据集上,算法效果如下:
|模型|骨干网络|precision|recall|Hmean|下载链接|
|模型|骨干网络|precision|recall|Hmean|下载链接|
| --- | --- | --- | --- | --- | --- |
| --- | --- | --- | --- | --- | --- |
|SAST|ResNet50_vd|89.63%|78.44%|83.66%|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar
)
|
|SAST|ResNet50_vd|89.63%|78.44%|83.66%|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar
)
|
**说明:**
SAST模型训练额外加入了icdar2013、icdar2017、COCO-Text、ArT等公开数据集进行调优。PaddleOCR用到的经过整理格式的英文公开数据集下载:
**说明:**
SAST模型训练额外加入了icdar2013、icdar2017、COCO-Text、ArT等公开数据集进行调优。PaddleOCR用到的经过整理格式的英文公开数据集下载:
*
[
百度云地址
](
https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw
)
(
提取码:
2bpi)
*
[
百度云地址
](
https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw
)
(
提取码:
2bpi)
*
[
Google Drive下载地址
](
https://drive.google.com/drive/folders/1ll2-XEVyCQLpJjawLDiRlvo_i4BqHCJe?usp=sharing
)
*
[
Google Drive下载地址
](
https://drive.google.com/drive/folders/1ll2-XEVyCQLpJjawLDiRlvo_i4BqHCJe?usp=sharing
)
PaddleOCR文本检测算法的训练和使用请参考文档教程中
[
模型训练/评估中的文本检测部分
](
./detection.md
)
。
<a
name=
"12"
></a>
<a
name=
"文本识别算法"
></a>
### 1.2 文本识别算法
### 2.文本识别算法
PaddleOCR基于动态图开源的文本识别算法列表:
PaddleOCR基于动态图开源的文本识别算法列表:
-
[
x] CRNN([paper
](
https://arxiv.org/abs/1507.05717
)
)(ppocr推荐)
-
[
x] CRNN([paper
](
https://arxiv.org/abs/1507.05717
)
)
[7]
(ppocr推荐)
-
[
x] Rosetta([paper
](
https://arxiv.org/abs/1910.05085
)
)
-
[
x] Rosetta([paper
](
https://arxiv.org/abs/1910.05085
)
)
[10]
-
[
x] STAR-Net([paper
](
http://www.bmva.org/bmvc/2016/papers/paper043/index.html
)
)
-
[
x] STAR-Net([paper
](
http://www.bmva.org/bmvc/2016/papers/paper043/index.html
)
)
[11]
-
[
x] RARE([paper
](
https://arxiv.org/abs/1603.03915v1
)
)
-
[
x] RARE([paper
](
https://arxiv.org/abs/1603.03915v1
)
)
[12]
-
[
x] SRN([paper
](
https://arxiv.org/abs/2003.12294
)
)
-
[
x] SRN([paper
](
https://arxiv.org/abs/2003.12294
)
)
[5]
-
[
x] NRTR([paper
](
https://arxiv.org/abs/1806.00926v2
)
)
-
[
x] NRTR([paper
](
https://arxiv.org/abs/1806.00926v2
)
)
[13]
-
[
x] SAR([paper
](
https://arxiv.org/abs/1811.00751v2
)
)
-
[
x] SAR([paper
](
https://arxiv.org/abs/1811.00751v2
)
)
-
[
x]
SEED([paper
](
https://arxiv.org/pdf/2005.10977.pdf
)
)
-
[
x] SEED([paper
](
https://arxiv.org/pdf/2005.10977.pdf
)
)
参考
[
DTRB
](
https://arxiv.org/abs/1904.01906
)
文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
参考
[
DTRB
]
[
3
]
(https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|---|---|---|---|---|
|---|---|---|---|---|
|Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar
)
|
|Rosetta|Resnet34_vd|80.9%|rec_r34_vd_none_none_ctc|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_none_ctc_v2.0_train.tar
)
|
|Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar
)
|
|Rosetta|MobileNetV3|78.05%|rec_mv3_none_none_ctc|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_none_ctc_v2.0_train.tar
)
|
|CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar
)
|
|CRNN|Resnet34_vd|82.76%|rec_r34_vd_none_bilstm_ctc|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar
)
|
|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar
)
|
|CRNN|MobileNetV3|79.97%|rec_mv3_none_bilstm_ctc|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar
)
|
|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar
)
|
|StarNet|Resnet34_vd|84.44%|rec_r34_vd_tps_bilstm_ctc|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_ctc_v2.0_train.tar
)
|
|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar
)
|
|StarNet|MobileNetV3|81.42%|rec_mv3_tps_bilstm_ctc|
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_ctc_v2.0_train.tar
)
|
|RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar
)
|
|RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar
)
|
|RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar
)
|
|RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar
)
|
|SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn |
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar
)
|
|SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn |
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar
)
|
|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr |
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar
)
|
|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr |
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar
)
|
|SAR|Resnet31| 87.2% | rec_r31_sar |
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar
)
|
|SAR|Resnet31| 87.2% | rec_r31_sar |
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar
)
|
|SEED| Aster_Resnet | 85.2% | rec_resnet_stn_bilstm_att |
[
下载链接
](
https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar
)
|
|SEED|Aster_Resnet| 85.2% | rec_resnet_stn_bilstm_att |
[
训练模型
](
https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar
)
|
PaddleOCR文本识别算法的训练和使用请参考文档教程中
[
模型训练/评估中的文本识别部分
](
./recognition.md
)
。
<a
name=
"2"
></a>
## 2. 模型训练
PaddleOCR文本检测算法的训练和使用请参考文档教程中
[
模型训练/评估中的文本检测部分
](
./detection.md
)
。文本识别算法的训练和使用请参考文档教程中
[
模型训练/评估中的文本识别部分
](
./recognition.md
)
。
<a
name=
"3"
></a>
## 3. 模型推理
上述模型中除PP-OCR系列模型以外,其余模型仅支持基于Python引擎的推理,具体内容可参考
[
基于Python预测引擎推理
](
./inference.md
)
doc/doc_ch/angle_class.md
View file @
721c76b4
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
## 1. 方法介绍
## 1. 方法介绍
文本方向分类器主要用于图片非0度的场景下,在这种场景下需要对图片里检测到的文本行进行一个转正的操作。在PaddleOCR系统内,
文本方向分类器主要用于图片非0度的场景下,在这种场景下需要对图片里检测到的文本行进行一个转正的操作。在PaddleOCR系统内,
文字检测之后得到的文本行图片经过仿射变换之后送入识别模型,此时只需要对文字进行一个0和180度的角度分类,因此PaddleOCR内置的
文字检测之后得到的文本行图片经过仿射变换之后送入识别模型,此时只需要对文字进行一个0和180度的角度分类,因此PaddleOCR内置的
文
字角度
分类器
**只支持了0和180度的分类**
。如果想支持更多角度,可以自己修改算法进行支持。
文
本方向
分类器
**只支持了0和180度的分类**
。如果想支持更多角度,可以自己修改算法进行支持。
0和180度数据样本例子:
0和180度数据样本例子:
...
@@ -72,8 +72,6 @@ train/cls/train/word_002.jpg 180
...
@@ -72,8 +72,6 @@ train/cls/train/word_002.jpg 180
<a
name=
"启动训练"
></a>
<a
name=
"启动训练"
></a>
## 3. 启动训练
## 3. 启动训练
### 启动训练
将准备好的txt文件和图片文件夹路径分别写入配置文件的
`Train/Eval.dataset.label_file_list`
和
`Train/Eval.dataset.data_dir`
字段下,
`Train/Eval.dataset.data_dir`
字段下的路径和文件里记载的图片名构成了图片的绝对路径。
将准备好的txt文件和图片文件夹路径分别写入配置文件的
`Train/Eval.dataset.label_file_list`
和
`Train/Eval.dataset.data_dir`
字段下,
`Train/Eval.dataset.data_dir`
字段下的路径和文件里记载的图片名构成了图片的绝对路径。
PaddleOCR提供了训练脚本、评估脚本和预测脚本。
PaddleOCR提供了训练脚本、评估脚本和预测脚本。
...
...
doc/doc_ch/config.md
View file @
721c76b4
...
@@ -36,11 +36,10 @@
...
@@ -36,11 +36,10 @@
| pretrained_model | 设置加载预训练模型路径 | ./pretrain_models/CRNN/best_accuracy |
\
|
| pretrained_model | 设置加载预训练模型路径 | ./pretrain_models/CRNN/best_accuracy |
\
|
| checkpoints | 加载模型参数路径 | None | 用于中断后加载参数继续训练 |
| checkpoints | 加载模型参数路径 | None | 用于中断后加载参数继续训练 |
| use_visualdl | 设置是否启用visualdl进行可视化log展示 | False |
[
教程地址
](
https://www.paddlepaddle.org.cn/paddle/visualdl
)
|
| use_visualdl | 设置是否启用visualdl进行可视化log展示 | False |
[
教程地址
](
https://www.paddlepaddle.org.cn/paddle/visualdl
)
|
| infer_img | 设置预测图像路径或文件夹路径 | ./infer_img |
\|
| infer_img | 设置预测图像路径或文件夹路径 | ./infer_img |
\|
|
| character_dict_path | 设置字典路径 | ./ppocr/utils/ppocr_keys_v1.txt |
\
|
| character_dict_path | 设置字典路径 | ./ppocr/utils/ppocr_keys_v1.txt |
如果为空,则默认使用小写字母+数字作为字典
|
| max_text_length | 设置文本最大长度 | 25 |
\
|
| max_text_length | 设置文本最大长度 | 25 |
\
|
| character_type | 设置字符类型 | ch | en/ch, en时将使用默认dict,ch时使用自定义dict|
| use_space_char | 设置是否识别空格 | True |
\|
|
| use_space_char | 设置是否识别空格 | True | 仅在 character_type=ch 时支持空格 |
| label_list | 设置方向分类器支持的角度 | ['0','180'] | 仅在方向分类器中生效 |
| label_list | 设置方向分类器支持的角度 | ['0','180'] | 仅在方向分类器中生效 |
| save_res_path | 设置检测模型的结果保存地址 | ./output/det_db/predicts_db.txt | 仅在检测模型中生效 |
| save_res_path | 设置检测模型的结果保存地址 | ./output/det_db/predicts_db.txt | 仅在检测模型中生效 |
...
@@ -191,7 +190,6 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi
...
@@ -191,7 +190,6 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi
use_gpu: True
use_gpu: True
epoch_num: 500
epoch_num: 500
...
...
character_type: it # 需要识别的语种
character_dict_path: {path/of/dict} # 字典文件所在路径
character_dict_path: {path/of/dict} # 字典文件所在路径
Train:
Train:
...
@@ -212,17 +210,17 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi
...
@@ -212,17 +210,17 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi
目前PaddleOCR支持的多语言算法有:
目前PaddleOCR支持的多语言算法有:
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language |
character_type |
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
:-----: |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: |
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 |
chinese_cht|
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 |
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) |
EN |
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 |
french |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 |
german |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 |
japan |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 |
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 |
korean |
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 |
| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 |
latin |
| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 |
| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 |
ar |
| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 |
| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 |
cyrillic |
| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 |
| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 |
devanagari |
| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 |
更多支持语种请参考:
[
多语言模型
](
https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99
)
更多支持语种请参考:
[
多语言模型
](
https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99
)
doc/doc_ch/detection.md
View file @
721c76b4
# 目录
# 文字检测
-
[
1. 文字检测
](
#1-----
)
*
[
1.1 数据准备
](
#11-----
)
*
[
1.2 下载预训练模型
](
#12--------
)
*
[
1.3 启动训练
](
#13-----
)
*
[
1.4 断点训练
](
#14-----
)
*
[
1.5 更换Backbone 训练
](
#15---backbone---
)
*
[
1.6 指标评估
](
#16-----
)
*
[
1.7 测试检测效果
](
#17-------
)
*
[
1.8 转inference模型测试
](
#18--inference----
)
-
[
2. FAQ
](
#2-faq
)
<a
name=
"1-----"
></a>
# 1. 文字检测
本节以icdar2015数据集为例,介绍PaddleOCR中检测模型训练、评估、测试的使用方式。
本节以icdar2015数据集为例,介绍PaddleOCR中检测模型训练、评估、测试的使用方式。
-
[
1. 准备数据和模型
](
#1--------
)
*
[
1.1 数据准备
](
#11-----
)
*
[
1.2 下载预训练模型
](
#12--------
)
-
[
2. 开始训练
](
#2-----
)
*
[
2.1 启动训练
](
#21-----
)
*
[
2.2 断点训练
](
#22-----
)
*
[
2.3 更换Backbone 训练
](
#23---backbone---
)
-
[
3. 模型评估与预测
](
#3--------
)
*
[
3.1 指标评估
](
#31-----
)
*
[
3.2 测试检测效果
](
#32-------
)
-
[
4. 模型导出与预测
](
#4--------
)
-
[
5. FAQ
](
#5-faq
)
<a
name=
"1--------"
></a>
# 1. 准备数据和模型
<a
name=
"11-----"
></a>
<a
name=
"11-----"
></a>
## 1.1 数据准备
## 1.1 数据准备
...
@@ -83,8 +85,11 @@ wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dyg
...
@@ -83,8 +85,11 @@ wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dyg
wget
-P
./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams
wget
-P
./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams
```
```
<a
name=
"13-----"
></a>
<a
name=
"2-----"
></a>
## 1.3 启动训练
# 2. 开始训练
<a
name=
"21-----"
></a>
## 2.1 启动训练
*如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false*
*如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false*
...
@@ -96,6 +101,10 @@ python3 tools/train.py -c configs/det/det_mv3_db.yml \
...
@@ -96,6 +101,10 @@ python3 tools/train.py -c configs/det/det_mv3_db.yml \
# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID
# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID
python3
-m
paddle.distributed.launch
--gpus
'0,1,2,3'
tools/train.py
-c
configs/det/det_mv3_db.yml
\
python3
-m
paddle.distributed.launch
--gpus
'0,1,2,3'
tools/train.py
-c
configs/det/det_mv3_db.yml
\
-o
Global.pretrained_model
=
./pretrain_models/MobileNetV3_large_x0_5_pretrained
-o
Global.pretrained_model
=
./pretrain_models/MobileNetV3_large_x0_5_pretrained
# 多机多卡训练,通过 --ips 参数设置使用的机器IP地址,通过 --gpus 参数设置使用的GPU ID
python3
-m
paddle.distributed.launch
--ips
=
"xx.xx.xx.xx,xx.xx.xx.xx"
--gpus
'0,1,2,3'
tools/train.py
-c
configs/det/det_mv3_db.yml
\
-o
Global.pretrained_model
=
./pretrain_models/MobileNetV3_large_x0_5_pretrained
```
```
上述指令中,通过-c 选择训练使用configs/det/det_db_mv3.yml配置文件。
上述指令中,通过-c 选择训练使用configs/det/det_db_mv3.yml配置文件。
...
@@ -106,8 +115,17 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/
...
@@ -106,8 +115,17 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/
python3 tools/train.py
-c
configs/det/det_mv3_db.yml
-o
Optimizer.base_lr
=
0.0001
python3 tools/train.py
-c
configs/det/det_mv3_db.yml
-o
Optimizer.base_lr
=
0.0001
```
```
<a
name=
"14-----"
></a>
**注意:**
采用多机多卡训练时,需要替换上面命令中的ips值为您机器的地址,机器之间需要能够相互ping通。另外,训练时需要在多个机器上分别启动命令。查看机器ip地址的命令为
`ifconfig`
。
## 1.4 断点训练
如果您想进一步加快训练速度,可以使用
[
自动混合精度训练
](
https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/01_paddle2.0_introduction/basic_concept/amp_cn.html
)
, 以单机单卡为例,命令如下:
```
shell
python3 tools/train.py
-c
configs/det/det_mv3_db.yml
\
-o
Global.pretrained_model
=
./pretrain_models/MobileNetV3_large_x0_5_pretrained
\
Global.use_amp
=
True Global.scale_loss
=
1024.0 Global.use_dynamic_loss_scaling
=
True
```
<a
name
=
"22-----"
>
</a>
## 2.2 断点训练
如果训练程序中断,如果希望加载训练中断的模型从而恢复训练,可以通过指定Global.checkpoints指定要加载的模型路径:
如果训练程序中断,如果希望加载训练中断的模型从而恢复训练,可以通过指定Global.checkpoints指定要加载的模型路径:
```
shell
```
shell
...
@@ -116,8 +134,8 @@ python3 tools/train.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=./you
...
@@ -116,8 +134,8 @@ python3 tools/train.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=./you
**注意**:`Global.checkpoints`的优先级高于`Global.pretrained_model`的优先级,即同时指定两个参数时,优先加载`Global.checkpoints`指定的模型,如果`Global.checkpoints`指定的模型路径有误,会加载`Global.pretrained_model`指定的模型。
**注意**:`Global.checkpoints`的优先级高于`Global.pretrained_model`的优先级,即同时指定两个参数时,优先加载`Global.checkpoints`指定的模型,如果`Global.checkpoints`指定的模型路径有误,会加载`Global.pretrained_model`指定的模型。
<a
name=
"
15
---backbone---"
></a>
<a name="
23
---backbone---"></a>
##
1.5
更换Backbone 训练
##
2.3
更换Backbone 训练
PaddleOCR将网络划分为四部分,分别在[ppocr/modeling](../../ppocr/modeling)下。 进入网络的数据将按照顺序(transforms->backbones->
PaddleOCR将网络划分为四部分,分别在[ppocr/modeling](../../ppocr/modeling)下。 进入网络的数据将按照顺序(transforms->backbones->
necks->heads)依次通过这四个部分。
necks->heads)依次通过这四个部分。
...
@@ -164,8 +182,11 @@ args1: args1
...
@@ -164,8 +182,11 @@ args1: args1
**注意**:如果要更换网络的其他模块,可以参考[文档](./add_new_algorithm.md)。
**注意**:如果要更换网络的其他模块,可以参考[文档](./add_new_algorithm.md)。
<a
name=
"16-----"
></a>
<a name="3--------"></a>
## 1.6 指标评估
# 3. 模型评估与预测
<a name="31-----"></a>
## 3.1 指标评估
PaddleOCR计算三个OCR检测相关的指标,分别是:Precision、Recall、Hmean(F-Score)。
PaddleOCR计算三个OCR检测相关的指标,分别是:Precision、Recall、Hmean(F-Score)。
...
@@ -177,8 +198,8 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
...
@@ -177,8 +198,8 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
* 注:`box_thresh`、`unclip_ratio`是DB后处理所需要的参数,在评估EAST模型时不需要设置
* 注:`box_thresh`、`unclip_ratio`是DB后处理所需要的参数,在评估EAST模型时不需要设置
<a
name=
"
17
-------"
></a>
<a name="
32
-------"></a>
##
1.7
测试检测效果
##
3.2
测试检测效果
测试单张图像的检测效果
测试单张图像的检测效果
```
shell
```
shell
...
@@ -195,8 +216,8 @@ python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./
...
@@ -195,8 +216,8 @@ python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy"
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy"
```
```
<a
name=
"
18--inference
----"
></a>
<a name="
4----
----"></a>
#
#
1.8 转inference模型测试
#
4. 模型导出与预测
inference 模型(`paddle.jit.save`保存的模型)
inference 模型(`paddle.jit.save`保存的模型)
一般是模型训练,把模型结构和模型参数保存在文件中的固化模型,多用于预测部署场景。
一般是模型训练,把模型结构和模型参数保存在文件中的固化模型,多用于预测部署场景。
...
@@ -218,10 +239,11 @@ python3 tools/infer/predict_det.py --det_algorithm="DB" --det_model_dir="./outpu
...
@@ -218,10 +239,11 @@ python3 tools/infer/predict_det.py --det_algorithm="DB" --det_model_dir="./outpu
python3 tools/infer/predict_det.py --det_algorithm="EAST" --det_model_dir="./output/det_db_inference/" --image_dir="./doc/imgs/" --use_gpu=True
python3 tools/infer/predict_det.py --det_algorithm="EAST" --det_model_dir="./output/det_db_inference/" --image_dir="./doc/imgs/" --use_gpu=True
```
```
<a
name=
"
2
"
></a>
<a name="
5-faq
"></a>
#
2
. FAQ
#
5
. FAQ
Q1: 训练模型转inference 模型之后预测效果不一致?
Q1: 训练模型转inference 模型之后预测效果不一致?
**A**:此类问题出现较多,问题多是trained model预测时候的预处理、后处理参数和inference model预测的时候的预处理、后处理参数不一致导致的。以det_mv3_db.yml配置文件训练的模型为例,训练模型、inference模型预测结果不一致问题解决方式如下:
**A**:此类问题出现较多,问题多是trained model预测时候的预处理、后处理参数和inference model预测的时候的预处理、后处理参数不一致导致的。以det_mv3_db.yml配置文件训练的模型为例,训练模型、inference模型预测结果不一致问题解决方式如下:
- 检查[trained model预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L116),和[inference model的预测预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/predict_det.py#L42)函数是否一致。算法在评估的时候,输入图像大小会影响精度,为了和论文保持一致,训练icdar15配置文件中将图像resize到[736, 1280],但是在inference model预测的时候只有一套默认参数,会考虑到预测速度问题,默认限制图像最长边为960做resize的。训练模型预处理和inference模型的预处理函数位于[ppocr/data/imaug/operators.py](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/ppocr/data/imaug/operators.py#L147)
- 检查[trained model预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L116),和[inference model的预测预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/predict_det.py#L42)函数是否一致。算法在评估的时候,输入图像大小会影响精度,为了和论文保持一致,训练icdar15配置文件中将图像resize到[736, 1280],但是在inference model预测的时候只有一套默认参数,会考虑到预测速度问题,默认限制图像最长边为960做resize的。训练模型预处理和inference模型的预处理函数位于[ppocr/data/imaug/operators.py](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/ppocr/data/imaug/operators.py#L147)
- 检查[trained model后处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L51),和[inference 后处理参数](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/utility.py#L50)是否一致。
- 检查[trained model后处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L51),和[inference 后处理参数](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/utility.py#L50)是否一致。
Prev
1
2
3
4
5
6
7
8
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment