"vscode:/vscode.git/clone" did not exist on "7a382d5dc4e6d894639575d58ec44a7b7b1320e3"
Commit d73ed79c authored by Leif's avatar Leif
Browse files

Merge remote-tracking branch 'Evezerest/dygraph' into dygraph

parents af77d08c 2945abd7
...@@ -6,27 +6,35 @@ function _set_params(){ ...@@ -6,27 +6,35 @@ function _set_params(){
run_mode=${1:-"sp"} # 单卡sp|多卡mp run_mode=${1:-"sp"} # 单卡sp|多卡mp
batch_size=${2:-"64"} batch_size=${2:-"64"}
fp_item=${3:-"fp32"} # fp32|fp16 fp_item=${3:-"fp32"} # fp32|fp16
max_iter=${4:-"10"} # 可选,如果需要修改代码提前中断 max_epoch=${4:-"10"} # 可选,如果需要修改代码提前中断
model_name=${5:-"model_name"} model_item=${5:-"model_item"}
run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数 run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数
# 日志解析所需参数
base_batch_size=${batch_size}
mission_name="OCR"
direction_id="0"
ips_unit="images/sec"
skip_steps=2 # 解析日志,有些模型前几个step耗时长,需要跳过 (必填)
keyword="ips:" # 解析日志,筛选出数据所在行的关键字 (必填)
index="1"
model_name=${model_item}_bs${batch_size}_${fp_item} # model_item 用于yml文件名匹配,model_name 用于数据入库前端展示
# 以下不用修改 # 以下不用修改
device=${CUDA_VISIBLE_DEVICES//,/ } device=${CUDA_VISIBLE_DEVICES//,/ }
arr=(${device}) arr=(${device})
num_gpu_devices=${#arr[*]} num_gpu_devices=${#arr[*]}
log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices} log_file=${run_log_path}/${model_item}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
} }
function _train(){ function _train(){
echo "Train on ${num_gpu_devices} GPUs" echo "Train on ${num_gpu_devices} GPUs"
echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} Global.eval_batch_step=[0,20000] Global.print_batch_step=2" train_cmd="-c configs/det/${model_item}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_epoch} Global.eval_batch_step=[0,20000] Global.print_batch_step=2"
case ${run_mode} in case ${run_mode} in
sp) sp)
train_cmd="python3.7 tools/train.py "${train_cmd}"" train_cmd="python tools/train.py "${train_cmd}""
;; ;;
mp) mp)
train_cmd="python3.7 -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py ${train_cmd}" train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py ${train_cmd}"
;; ;;
*) echo "choose run_mode(sp or mp)"; exit 1; *) echo "choose run_mode(sp or mp)"; exit 1;
esac esac
...@@ -46,17 +54,7 @@ function _train(){ ...@@ -46,17 +54,7 @@ function _train(){
fi fi
} }
function _analysis_log(){ source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;该脚本在连调时可从benchmark repo中下载https://github.com/PaddlePaddle/benchmark/blob/master/scripts/run_model.sh;如果不联调只想要产出训练log可以注掉本行,提交时需打开
analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file} --mission_name ${model_name} --run_mode ${run_mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_size} --skip_steps 1 --gpu_num ${num_gpu_devices} --index 1 --model_mode=-1 --ips_unit=samples/sec"
eval $analysis_cmd
}
function _kill_process(){
kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'`
}
_set_params $@ _set_params $@
_train #_train # 如果只想产出训练log,不解析,可取消注释
_analysis_log _run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只想要产出训练log可以注掉本行,提交时需打开
_kill_process
\ No newline at end of file
#!/bin/bash
# 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37 # 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37
# 执行目录: ./PaddleOCR # 执行目录: ./PaddleOCR
# 1 安装该模型需要的依赖 (如需开启优化策略请注明) # 1 安装该模型需要的依赖 (如需开启优化策略请注明)
python3.7 -m pip install -r requirements.txt log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
python -m pip install -r requirements.txt
# 2 拷贝该模型需要数据、预训练模型 # 2 拷贝该模型需要数据、预训练模型
wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data && tar xf icdar2015.tar && cd ../ wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data && tar xf icdar2015.tar && cd ../
wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet18_vd_pretrained.pdparams
wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams
# 3 批量运行(如不方便批量,1,2需放到单个模型中) # 3 批量运行(如不方便批量,1,2需放到单个模型中)
model_mode_list=(det_res18_db_v2.0 det_r50_vd_east det_r50_vd_pse) model_mode_list=(det_res18_db_v2.0 det_r50_vd_east det_r50_vd_pse)
fp_item_list=(fp32) fp_item_list=(fp32)
bs_list=(8 16)
for model_mode in ${model_mode_list[@]}; do for model_mode in ${model_mode_list[@]}; do
for fp_item in ${fp_item_list[@]}; do for fp_item in ${fp_item_list[@]}; do
if [ ${model_mode} == "det_r50_vd_east" ]; then
bs_list=(16)
else
bs_list=(8 16)
fi
for bs_item in ${bs_list[@]}; do for bs_item in ${bs_list[@]}; do
echo "index is speed, 1gpus, begin, ${model_name}" echo "index is speed, 1gpus, begin, ${model_name}"
run_mode=sp run_mode=sp
CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} # (5min) log_name=ocr_${model_mode}_bs${bs_item}_${fp_item}_${run_mode}
CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 1 ${model_mode} | tee ${log_path}/${log_name}_speed_1gpus 2>&1 # (5min)
sleep 60 sleep 60
echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
run_mode=mp run_mode=mp
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} log_name=ocr_${model_mode}_bs${bs_item}_${fp_item}_${run_mode}
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} | tee ${log_path}/${log_name}_speed_8gpus8p 2>&1
sleep 60 sleep 60
done done
done done
......
...@@ -62,8 +62,7 @@ Loss: ...@@ -62,8 +62,7 @@ Loss:
weight: 0.05 weight: 0.05
num_classes: 6625 num_classes: 6625
feat_dim: 96 feat_dim: 96
init_center: false center_file_path:
center_file_path: "./train_center.pkl"
# you can also try to add ace loss on your own dataset # you can also try to add ace loss on your own dataset
# - ACELoss: # - ACELoss:
# weight: 0.1 # weight: 0.1
......
...@@ -34,10 +34,10 @@ PaddleOCR模型部署。 ...@@ -34,10 +34,10 @@ PaddleOCR模型部署。
* 首先需要从opencv官网上下载在Linux环境下源码编译的包,以opencv3.4.7为例,下载命令如下。 * 首先需要从opencv官网上下载在Linux环境下源码编译的包,以opencv3.4.7为例,下载命令如下。
``` ```bash
cd deploy/cpp_infer cd deploy/cpp_infer
wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz wget https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz
tar -xf 3.4.7.tar.gz tar -xf opencv-3.4.7.tar.gz
``` ```
最终可以在当前目录下看到`opencv-3.4.7/`的文件夹。 最终可以在当前目录下看到`opencv-3.4.7/`的文件夹。
...@@ -45,12 +45,13 @@ tar -xf 3.4.7.tar.gz ...@@ -45,12 +45,13 @@ tar -xf 3.4.7.tar.gz
* 编译opencv,设置opencv源码路径(`root_path`)以及安装路径(`install_path`)。进入opencv源码路径下,按照下面的方式进行编译。 * 编译opencv,设置opencv源码路径(`root_path`)以及安装路径(`install_path`)。进入opencv源码路径下,按照下面的方式进行编译。
```shell ```shell
root_path=your_opencv_root_path root_path="your_opencv_root_path"
install_path=${root_path}/opencv3 install_path=${root_path}/opencv3
build_dir=${root_path}/build
rm -rf build rm -rf ${build_dir}
mkdir build mkdir ${build_dir}
cd build cd ${build_dir}
cmake .. \ cmake .. \
-DCMAKE_INSTALL_PREFIX=${install_path} \ -DCMAKE_INSTALL_PREFIX=${install_path} \
...@@ -74,6 +75,11 @@ make -j ...@@ -74,6 +75,11 @@ make -j
make install make install
``` ```
也可以直接修改`tools/build_opencv.sh`的内容,然后直接运行下面的命令进行编译。
```shell
sh tools/build_opencv.sh
```
其中`root_path`为下载的opencv源码路径,`install_path`为opencv的安装路径,`make install`完成之后,会在该文件夹下生成opencv头文件和库文件,用于后面的OCR代码编译。 其中`root_path`为下载的opencv源码路径,`install_path`为opencv的安装路径,`make install`完成之后,会在该文件夹下生成opencv头文件和库文件,用于后面的OCR代码编译。
...@@ -233,12 +239,12 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -233,12 +239,12 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
--image_dir=../../doc/imgs/12.jpg --image_dir=../../doc/imgs/12.jpg
``` ```
更多参数如下: 更多支持的可调节参数解释如下:
- 通用参数 - 通用参数
|参数名称|类型|默认参数|意义| |参数名称|类型|默认参数|意义|
| --- | --- | --- | --- | | :---: | :---: | :---: | :---: |
|use_gpu|bool|false|是否使用GPU| |use_gpu|bool|false|是否使用GPU|
|gpu_id|int|0|GPU id,使用GPU时有效| |gpu_id|int|0|GPU id,使用GPU时有效|
|gpu_mem|int|4000|申请的GPU内存| |gpu_mem|int|4000|申请的GPU内存|
...@@ -248,7 +254,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -248,7 +254,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
- 检测模型相关 - 检测模型相关
|参数名称|类型|默认参数|意义| |参数名称|类型|默认参数|意义|
| --- | --- | --- | --- | | :---: | :---: | :---: | :---: |
|det_model_dir|string|-|检测模型inference model地址| |det_model_dir|string|-|检测模型inference model地址|
|max_side_len|int|960|输入图像长宽大于960时,等比例缩放图像,使得图像最长边为960| |max_side_len|int|960|输入图像长宽大于960时,等比例缩放图像,使得图像最长边为960|
|det_db_thresh|float|0.3|用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显| |det_db_thresh|float|0.3|用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显|
...@@ -260,7 +266,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -260,7 +266,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
- 方向分类器相关 - 方向分类器相关
|参数名称|类型|默认参数|意义| |参数名称|类型|默认参数|意义|
| --- | --- | --- | --- | | :---: | :---: | :---: | :---: |
|use_angle_cls|bool|false|是否使用方向分类器| |use_angle_cls|bool|false|是否使用方向分类器|
|cls_model_dir|string|-|方向分类器inference model地址| |cls_model_dir|string|-|方向分类器inference model地址|
|cls_thresh|float|0.9|方向分类器的得分阈值| |cls_thresh|float|0.9|方向分类器的得分阈值|
...@@ -268,7 +274,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -268,7 +274,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
- 识别模型相关 - 识别模型相关
|参数名称|类型|默认参数|意义| |参数名称|类型|默认参数|意义|
| --- | --- | --- | --- | | :---: | :---: | :---: | :---: |
|rec_model_dir|string|-|识别模型inference model地址| |rec_model_dir|string|-|识别模型inference model地址|
|char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件| |char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|
......
...@@ -17,10 +17,10 @@ PaddleOCR model deployment. ...@@ -17,10 +17,10 @@ PaddleOCR model deployment.
* First of all, you need to download the source code compiled package in the Linux environment from the opencv official website. Taking opencv3.4.7 as an example, the download command is as follows. * First of all, you need to download the source code compiled package in the Linux environment from the opencv official website. Taking opencv3.4.7 as an example, the download command is as follows.
``` ```bash
cd deploy/cpp_infer cd deploy/cpp_infer
wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz wget https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz
tar -xf 3.4.7.tar.gz tar -xf opencv-3.4.7.tar.gz
``` ```
Finally, you can see the folder of `opencv-3.4.7/` in the current directory. Finally, you can see the folder of `opencv-3.4.7/` in the current directory.
......
root_path="/paddle/PaddleOCR/deploy/cpp_infer/opencv-3.4.7"
install_path=${root_path}/opencv3
build_dir=${root_path}/build
rm -rf ${build_dir}
mkdir ${build_dir}
cd ${build_dir}
cmake .. \
-DCMAKE_INSTALL_PREFIX=${install_path} \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DWITH_IPP=OFF \
-DBUILD_IPP_IW=OFF \
-DWITH_LAPACK=OFF \
-DWITH_EIGEN=OFF \
-DCMAKE_INSTALL_LIBDIR=lib64 \
-DWITH_ZLIB=ON \
-DBUILD_ZLIB=ON \
-DWITH_JPEG=ON \
-DBUILD_JPEG=ON \
-DWITH_PNG=ON \
-DBUILD_PNG=ON \
-DWITH_TIFF=ON \
-DBUILD_TIFF=ON
make -j
make install
...@@ -172,7 +172,10 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img, ...@@ -172,7 +172,10 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
cv::Mat resize_img; cv::Mat resize_img;
int index = 0; int index = 0;
std::vector<double> time_info = {0, 0, 0};
for (int i = boxes.size() - 1; i >= 0; i--) { for (int i = boxes.size() - 1; i >= 0; i--) {
auto preprocess_start = std::chrono::steady_clock::now();
crop_img = GetRotateCropImage(srcimg, boxes[i]); crop_img = GetRotateCropImage(srcimg, boxes[i]);
if (use_direction_classify >= 1) { if (use_direction_classify >= 1) {
crop_img = RunClsModel(crop_img, predictor_cls); crop_img = RunClsModel(crop_img, predictor_cls);
...@@ -191,7 +194,9 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img, ...@@ -191,7 +194,9 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
auto *data0 = input_tensor0->mutable_data<float>(); auto *data0 = input_tensor0->mutable_data<float>();
NeonMeanScale(dimg, data0, resize_img.rows * resize_img.cols, mean, scale); NeonMeanScale(dimg, data0, resize_img.rows * resize_img.cols, mean, scale);
auto preprocess_end = std::chrono::steady_clock::now();
//// Run CRNN predictor //// Run CRNN predictor
auto inference_start = std::chrono::steady_clock::now();
predictor_crnn->Run(); predictor_crnn->Run();
// Get output and run postprocess // Get output and run postprocess
...@@ -199,8 +204,10 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img, ...@@ -199,8 +204,10 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
std::move(predictor_crnn->GetOutput(0))); std::move(predictor_crnn->GetOutput(0)));
auto *predict_batch = output_tensor0->data<float>(); auto *predict_batch = output_tensor0->data<float>();
auto predict_shape = output_tensor0->shape(); auto predict_shape = output_tensor0->shape();
auto inference_end = std::chrono::steady_clock::now();
// ctc decode // ctc decode
auto postprocess_start = std::chrono::steady_clock::now();
std::string str_res; std::string str_res;
int argmax_idx; int argmax_idx;
int last_index = 0; int last_index = 0;
...@@ -224,7 +231,20 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img, ...@@ -224,7 +231,20 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
score /= count; score /= count;
rec_text.push_back(str_res); rec_text.push_back(str_res);
rec_text_score.push_back(score); rec_text_score.push_back(score);
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
time_info[0] += double(preprocess_diff.count() * 1000);
std::chrono::duration<float> inference_diff = inference_end - inference_start;
time_info[1] += double(inference_diff.count() * 1000);
std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
time_info[2] += double(postprocess_diff.count() * 1000);
} }
times->push_back(time_info[0]);
times->push_back(time_info[1]);
times->push_back(time_info[2]);
} }
std::vector<std::vector<std::vector<int>>> std::vector<std::vector<std::vector<int>>>
...@@ -312,7 +332,6 @@ std::shared_ptr<PaddlePredictor> loadModel(std::string model_file, int num_threa ...@@ -312,7 +332,6 @@ std::shared_ptr<PaddlePredictor> loadModel(std::string model_file, int num_threa
config.set_model_from_file(model_file); config.set_model_from_file(model_file);
config.set_threads(num_threads); config.set_threads(num_threads);
std::shared_ptr<PaddlePredictor> predictor = std::shared_ptr<PaddlePredictor> predictor =
CreatePaddlePredictor<MobileConfig>(config); CreatePaddlePredictor<MobileConfig>(config);
return predictor; return predictor;
...@@ -434,6 +453,9 @@ void system(char **argv){ ...@@ -434,6 +453,9 @@ void system(char **argv){
auto rec_predictor = loadModel(rec_model_file, std::stoi(num_threads)); auto rec_predictor = loadModel(rec_model_file, std::stoi(num_threads));
auto cls_predictor = loadModel(cls_model_file, std::stoi(num_threads)); auto cls_predictor = loadModel(cls_model_file, std::stoi(num_threads));
std::vector<double> det_time_info = {0, 0, 0};
std::vector<double> rec_time_info = {0, 0, 0};
for (int i = 0; i < cv_all_img_names.size(); ++i) { for (int i = 0; i < cv_all_img_names.size(); ++i) {
std::cout << "The predict img: " << cv_all_img_names[i] << std::endl; std::cout << "The predict img: " << cv_all_img_names[i] << std::endl;
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
...@@ -460,7 +482,37 @@ void system(char **argv){ ...@@ -460,7 +482,37 @@ void system(char **argv){
for (int i = 0; i < rec_text.size(); i++) { for (int i = 0; i < rec_text.size(); i++) {
std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i] std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
<< std::endl; << std::endl;
}
det_time_info[0] += det_times[0];
det_time_info[1] += det_times[1];
det_time_info[2] += det_times[2];
rec_time_info[0] += rec_times[0];
rec_time_info[1] += rec_times[1];
rec_time_info[2] += rec_times[2];
} }
if (strcmp(argv[12], "True") == 0) {
AutoLogger autolog_det(det_model_file,
runtime_device,
std::stoi(num_threads),
std::stoi(batchsize),
"dynamic",
precision,
det_time_info,
cv_all_img_names.size());
AutoLogger autolog_rec(rec_model_file,
runtime_device,
std::stoi(num_threads),
std::stoi(batchsize),
"dynamic",
precision,
rec_time_info,
cv_all_img_names.size());
autolog_det.report();
std::cout << std::endl;
autolog_rec.report();
} }
} }
...@@ -503,15 +555,15 @@ void det(int argc, char **argv) { ...@@ -503,15 +555,15 @@ void det(int argc, char **argv) {
auto img_vis = Visualization(srcimg, boxes); auto img_vis = Visualization(srcimg, boxes);
std::cout << boxes.size() << " bboxes have detected:" << std::endl; std::cout << boxes.size() << " bboxes have detected:" << std::endl;
// for (int i=0; i<boxes.size(); i++){ for (int i=0; i<boxes.size(); i++){
// std::cout << "The " << i << " box:" << std::endl; std::cout << "The " << i << " box:" << std::endl;
// for (int j=0; j<4; j++){ for (int j=0; j<4; j++){
// for (int k=0; k<2; k++){ for (int k=0; k<2; k++){
// std::cout << boxes[i][j][k] << "\t"; std::cout << boxes[i][j][k] << "\t";
// } }
// } }
// std::cout << std::endl; std::cout << std::endl;
// } }
time_info[0] += times[0]; time_info[0] += times[0];
time_info[1] += times[1]; time_info[1] += times[1];
time_info[2] += times[2]; time_info[2] += times[2];
...@@ -585,6 +637,9 @@ void rec(int argc, char **argv) { ...@@ -585,6 +637,9 @@ void rec(int argc, char **argv) {
std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i] std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
<< std::endl; << std::endl;
} }
time_info[0] += times[0];
time_info[1] += times[1];
time_info[2] += times[2];
} }
// TODO: support autolog // TODO: support autolog
if (strcmp(argv[9], "True") == 0) { if (strcmp(argv[9], "True") == 0) {
......
...@@ -52,12 +52,17 @@ def main(config, device, logger, vdl_writer): ...@@ -52,12 +52,17 @@ def main(config, device, logger, vdl_writer):
config['Architecture']["Head"]['out_channels'] = char_num config['Architecture']["Head"]['out_channels'] = char_num
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
flops = paddle.flops(model, [1, 3, 640, 640]) if config['Architecture']['model_type'] == 'det':
logger.info(f"FLOPs before pruning: {flops}") input_shape = [1, 3, 640, 640]
elif config['Architecture']['model_type'] == 'rec':
input_shape = [1, 3, 32, 320]
flops = paddle.flops(model, input_shape)
logger.info("FLOPs before pruning: {}".format(flops))
from paddleslim.dygraph import FPGMFilterPruner from paddleslim.dygraph import FPGMFilterPruner
model.train() model.train()
pruner = FPGMFilterPruner(model, [1, 3, 640, 640]) pruner = FPGMFilterPruner(model, input_shape)
# build metric # build metric
eval_class = build_metric(config['Metric']) eval_class = build_metric(config['Metric'])
...@@ -65,8 +70,13 @@ def main(config, device, logger, vdl_writer): ...@@ -65,8 +70,13 @@ def main(config, device, logger, vdl_writer):
def eval_fn(): def eval_fn():
metric = program.eval(model, valid_dataloader, post_process_class, metric = program.eval(model, valid_dataloader, post_process_class,
eval_class) eval_class)
logger.info(f"metric['hmean']: {metric['hmean']}") if config['Architecture']['model_type'] == 'det':
return metric['hmean'] main_indicator = 'hmean'
else:
main_indicator = 'acc'
logger.info("metric[{}]: {}".format(main_indicator, metric[
main_indicator]))
return metric[main_indicator]
params_sensitive = pruner.sensitive( params_sensitive = pruner.sensitive(
eval_func=eval_fn, eval_func=eval_fn,
...@@ -81,18 +91,22 @@ def main(config, device, logger, vdl_writer): ...@@ -81,18 +91,22 @@ def main(config, device, logger, vdl_writer):
# calculate pruned params's ratio # calculate pruned params's ratio
params_sensitive = pruner._get_ratios_by_loss(params_sensitive, loss=0.02) params_sensitive = pruner._get_ratios_by_loss(params_sensitive, loss=0.02)
for key in params_sensitive.keys(): for key in params_sensitive.keys():
logger.info(f"{key}, {params_sensitive[key]}") logger.info("{}, {}".format(key, params_sensitive[key]))
plan = pruner.prune_vars(params_sensitive, [0]) plan = pruner.prune_vars(params_sensitive, [0])
flops = paddle.flops(model, [1, 3, 640, 640]) flops = paddle.flops(model, input_shape)
logger.info(f"FLOPs after pruning: {flops}") logger.info("FLOPs after pruning: {}".format(flops))
# load pretrain model # load pretrain model
load_model(config, model) load_model(config, model)
metric = program.eval(model, valid_dataloader, post_process_class, metric = program.eval(model, valid_dataloader, post_process_class,
eval_class) eval_class)
logger.info(f"metric['hmean']: {metric['hmean']}") if config['Architecture']['model_type'] == 'det':
main_indicator = 'hmean'
else:
main_indicator = 'acc'
logger.info("metric['']: {}".format(main_indicator, metric[main_indicator]))
# start export model # start export model
from paddle.jit import to_static from paddle.jit import to_static
......
...@@ -73,13 +73,18 @@ def main(config, device, logger, vdl_writer): ...@@ -73,13 +73,18 @@ def main(config, device, logger, vdl_writer):
char_num = len(getattr(post_process_class, 'character')) char_num = len(getattr(post_process_class, 'character'))
config['Architecture']["Head"]['out_channels'] = char_num config['Architecture']["Head"]['out_channels'] = char_num
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
if config['Architecture']['model_type'] == 'det':
input_shape = [1, 3, 640, 640]
elif config['Architecture']['model_type'] == 'rec':
input_shape = [1, 3, 32, 320]
flops = paddle.flops(model, input_shape)
flops = paddle.flops(model, [1, 3, 640, 640])
logger.info("FLOPs before pruning: {}".format(flops)) logger.info("FLOPs before pruning: {}".format(flops))
from paddleslim.dygraph import FPGMFilterPruner from paddleslim.dygraph import FPGMFilterPruner
model.train() model.train()
pruner = FPGMFilterPruner(model, [1, 3, 640, 640])
pruner = FPGMFilterPruner(model, input_shape)
# build loss # build loss
loss_class = build_loss(config['Loss']) loss_class = build_loss(config['Loss'])
...@@ -107,8 +112,14 @@ def main(config, device, logger, vdl_writer): ...@@ -107,8 +112,14 @@ def main(config, device, logger, vdl_writer):
def eval_fn(): def eval_fn():
metric = program.eval(model, valid_dataloader, post_process_class, metric = program.eval(model, valid_dataloader, post_process_class,
eval_class, False) eval_class, False)
logger.info("metric['hmean']: {}".format(metric['hmean'])) if config['Architecture']['model_type'] == 'det':
return metric['hmean'] main_indicator = 'hmean'
else:
main_indicator = 'acc'
logger.info("metric[{}]: {}".format(main_indicator, metric[
main_indicator]))
return metric[main_indicator]
run_sensitive_analysis = False run_sensitive_analysis = False
""" """
...@@ -149,7 +160,7 @@ def main(config, device, logger, vdl_writer): ...@@ -149,7 +160,7 @@ def main(config, device, logger, vdl_writer):
plan = pruner.prune_vars(params_sensitive, [0]) plan = pruner.prune_vars(params_sensitive, [0])
flops = paddle.flops(model, [1, 3, 640, 640]) flops = paddle.flops(model, input_shape)
logger.info("FLOPs after pruning: {}".format(flops)) logger.info("FLOPs after pruning: {}".format(flops))
# start train # start train
......
...@@ -247,3 +247,7 @@ Q1: 训练模型转inference 模型之后预测效果不一致? ...@@ -247,3 +247,7 @@ Q1: 训练模型转inference 模型之后预测效果不一致?
**A**:此类问题出现较多,问题多是trained model预测时候的预处理、后处理参数和inference model预测的时候的预处理、后处理参数不一致导致的。以det_mv3_db.yml配置文件训练的模型为例,训练模型、inference模型预测结果不一致问题解决方式如下: **A**:此类问题出现较多,问题多是trained model预测时候的预处理、后处理参数和inference model预测的时候的预处理、后处理参数不一致导致的。以det_mv3_db.yml配置文件训练的模型为例,训练模型、inference模型预测结果不一致问题解决方式如下:
- 检查[trained model预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L116),和[inference model的预测预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/predict_det.py#L42)函数是否一致。算法在评估的时候,输入图像大小会影响精度,为了和论文保持一致,训练icdar15配置文件中将图像resize到[736, 1280],但是在inference model预测的时候只有一套默认参数,会考虑到预测速度问题,默认限制图像最长边为960做resize的。训练模型预处理和inference模型的预处理函数位于[ppocr/data/imaug/operators.py](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/ppocr/data/imaug/operators.py#L147) - 检查[trained model预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L116),和[inference model的预测预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/predict_det.py#L42)函数是否一致。算法在评估的时候,输入图像大小会影响精度,为了和论文保持一致,训练icdar15配置文件中将图像resize到[736, 1280],但是在inference model预测的时候只有一套默认参数,会考虑到预测速度问题,默认限制图像最长边为960做resize的。训练模型预处理和inference模型的预处理函数位于[ppocr/data/imaug/operators.py](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/ppocr/data/imaug/operators.py#L147)
- 检查[trained model后处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L51),和[inference 后处理参数](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/utility.py#L50)是否一致。 - 检查[trained model后处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L51),和[inference 后处理参数](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/utility.py#L50)是否一致。
Q1: 训练EAST模型提示找不到lanms库?
**A**:执行pip3 install lanms-nova 即可。
...@@ -34,6 +34,8 @@ inference 模型(`paddle.jit.save`保存的模型) ...@@ -34,6 +34,8 @@ inference 模型(`paddle.jit.save`保存的模型)
- [1. 超轻量中文OCR模型推理](#超轻量中文OCR模型推理) - [1. 超轻量中文OCR模型推理](#超轻量中文OCR模型推理)
- [2. 其他模型推理](#其他模型推理) - [2. 其他模型推理](#其他模型推理)
- [六、参数解释](#参数解释)
<a name="训练模型转inference模型"></a> <a name="训练模型转inference模型"></a>
## 一、训练模型转inference模型 ## 一、训练模型转inference模型
...@@ -394,3 +396,127 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d ...@@ -394,3 +396,127 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d
执行命令后,识别结果图像如下: 执行命令后,识别结果图像如下:
![](../imgs_results/img_10_east_starnet.jpg) ![](../imgs_results/img_10_east_starnet.jpg)
<a name="参数解释"></a>
# 六、参数解释
更多关于预测过程的参数解释如下所示。
* 全局信息
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| image_dir | str | 无,必须显式指定 | 图像或者文件夹路径 |
| vis_font_path | str | "./doc/fonts/simfang.ttf" | 用于可视化的字体路径 |
| drop_score | float | 0.5 | 识别得分小于该值的结果会被丢弃,不会作为返回结果 |
| use_pdserving | bool | False | 是否使用Paddle Serving进行预测 |
| warmup | bool | False | 是否开启warmup,在统计预测耗时的时候,可以使用这种方法 |
| draw_img_save_dir | str | "./inference_results" | 系统串联预测OCR结果的保存文件夹 |
| save_crop_res | bool | False | 是否保存OCR的识别文本图像 |
| crop_res_save_dir | str | "./output" | 保存OCR识别出来的文本图像路径 |
| use_mp | bool | False | 是否开启多进程预测 |
| total_process_num | int | 6 | 开启的进城数,`use_mp``True`时生效 |
| process_id | int | 0 | 当前进程的id号,无需自己修改 |
| benchmark | bool | False | 是否开启benchmark,对预测速度、显存占用等进行统计 |
| save_log_path | str | "./log_output/" | 开启`benchmark`时,日志结果的保存文件夹 |
| show_log | bool | True | 是否显示预测中的日志信息 |
| use_onnx | bool | False | 是否开启onnx预测 |
* 预测引擎相关
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| use_gpu | bool | True | 是否使用GPU进行预测 |
| ir_optim | bool | True | 是否对计算图进行分析与优化,开启后可以加速预测过程 |
| use_tensorrt | bool | False | 是否开启tensorrt |
| min_subgraph_size | int | 15 | tensorrt中最小子图size,当子图的size大于该值时,才会尝试对该子图使用trt engine计算 |
| precision | str | fp32 | 预测的精度,支持`fp32`, `fp16`, `int8` 3种输入 |
| enable_mkldnn | bool | True | 是否开启mkldnn |
| cpu_threads | int | 10 | 开启mkldnn时,cpu预测的线程数 |
* 文本检测模型相关
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| det_algorithm | str | "DB" | 文本检测算法名称,目前支持`DB`, `EAST`, `SAST`, `PSE` |
| det_model_dir | str | xx | 检测inference模型路径 |
| det_limit_side_len | int | 960 | 检测的图像边长限制 |
| det_limit_type | str | "max" | 检测的变成限制类型,目前支持`min`, `max``min`表示保证图像最短边不小于`det_limit_side_len``max`表示保证图像最长边不大于`det_limit_side_len` |
其中,DB算法相关参数如下
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| det_db_thresh | float | 0.3 | DB输出的概率图中,得分大于该阈值的像素点才会被认为是文字像素点 |
| det_db_box_thresh | float | 0.6 | 检测结果边框内,所有像素点的平均得分大于该阈值时,该结果会被认为是文字区域 |
| det_db_unclip_ratio | float | 1.5 | `Vatti clipping`算法的扩张系数,使用该方法对文字区域进行扩张 |
| max_batch_size | int | 10 | 预测的batch size |
| use_dilation | bool | False | 是否对分割结果进行膨胀以获取更优检测效果 |
| det_db_score_mode | str | "fast" | DB的检测结果得分计算方法,支持`fast``slow``fast`是根据polygon的外接矩形边框内的所有像素计算平均得分,`slow`是根据原始polygon内的所有像素计算平均得分,计算速度相对较慢一些,但是更加准确一些。 |
EAST算法相关参数如下
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| det_east_score_thresh | float | 0.8 | EAST后处理中score map的阈值 |
| det_east_cover_thresh | float | 0.1 | EAST后处理中文本框的平均得分阈值 |
| det_east_nms_thresh | float | 0.2 | EAST后处理中nms的阈值 |
SAST算法相关参数如下
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| det_sast_score_thresh | float | 0.5 | SAST后处理中的得分阈值 |
| det_sast_nms_thresh | float | 0.5 | SAST后处理中nms的阈值 |
| det_sast_polygon | bool | False | 是否多边形检测,弯曲文本场景(如Total-Text)设置为True |
PSE算法相关参数如下
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| det_pse_thresh | float | 0.0 | 对输出图做二值化的阈值 |
| det_pse_box_thresh | float | 0.85 | 对box进行过滤的阈值,低于此阈值的丢弃 |
| det_pse_min_area | float | 16 | box的最小面积,低于此阈值的丢弃 |
| det_pse_box_type | str | "box" | 返回框的类型,box:四点坐标,poly: 弯曲文本的所有点坐标 |
| det_pse_scale | int | 1 | 输入图像相对于进后处理的图的比例,如`640*640`的图像,网络输出为`160*160`,scale为2的情况下,进后处理的图片shape为`320*320`。这个值调大可以加快后处理速度,但是会带来精度的下降 |
* 文本识别模型相关
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| rec_algorithm | str | "CRNN" | 文本识别算法名称,目前支持`CRNN`, `SRN`, `RARE`, `NETR`, `SAR` |
| rec_model_dir | str | 无,如果使用识别模型,该项是必填项 | 识别inference模型路径 |
| rec_image_shape | list | [3, 32, 320] | 识别时的图像尺寸, |
| rec_batch_num | int | 6 | 识别的batch size |
| max_text_length | int | 25 | 识别结果最大长度,在`SRN`中有效 |
| rec_char_dict_path | str | "./ppocr/utils/ppocr_keys_v1.txt" | 识别的字符字典文件 |
| use_space_char | bool | True | 是否包含空格,如果为`True`,则会在最后字符字典中补充`空格`字符 |
* 端到端文本检测与识别模型相关
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| e2e_algorithm | str | "PGNet" | 端到端算法名称,目前支持`PGNet` |
| e2e_model_dir | str | 无,如果使用端到端模型,该项是必填项 | 端到端模型inference模型路径 |
| e2e_limit_side_len | int | 768 | 端到端的输入图像边长限制 |
| e2e_limit_type | str | "max" | 端到端的边长限制类型,目前支持`min`, `max``min`表示保证图像最短边不小于`e2e_limit_side_len``max`表示保证图像最长边不大于`e2e_limit_side_len` |
| e2e_pgnet_score_thresh | float | 0.5 | 端到端得分阈值,小于该阈值的结果会被丢弃 |
| e2e_char_dict_path | str | "./ppocr/utils/ic15_dict.txt" | 识别的字典文件路径 |
| e2e_pgnet_valid_set | str | "totaltext" | 验证集名称,目前支持`totaltext`, `partvgg`,不同数据集对应的后处理方式不同,与训练过程保持一致即可 |
| e2e_pgnet_mode | str | "fast" | PGNet的检测结果得分计算方法,支持`fast``slow``fast`是根据polygon的外接矩形边框内的所有像素计算平均得分,`slow`是根据原始polygon内的所有像素计算平均得分,计算速度相对较慢一些,但是更加准确一些。 |
* 方向分类器模型相关
| 参数名称 | 类型 | 默认值 | 含义 |
| :--: | :--: | :--: | :--: |
| use_angle_cls | bool | False | 是否使用方向分类器 |
| cls_model_dir | str | 无,如果需要使用,则必须显式指定路径 | 方向分类器inference模型路径 |
| cls_image_shape | list | [3, 48, 192] | 预测尺度 |
| label_list | list | ['0', '180'] | class id对应的角度值 |
| cls_batch_num | int | 6 | 方向分类器预测的batch size |
| cls_thresh | float | 0.9 | 预测阈值,模型预测结果为180度,且得分大于该阈值时,认为最终预测结果为180度,需要翻转 |
...@@ -33,8 +33,8 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 ...@@ -33,8 +33,8 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|模型名称|模型简介|配置文件|推理模型大小|下载地址| |模型名称|模型简介|配置文件|推理模型大小|下载地址|
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
|ch_PP-OCRv2_det_slim|【最新】slim量化+蒸馏版超轻量模型,支持中英文、多语种文本检测|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml)| 3M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar)| |ch_PP-OCRv2_det_slim|【最新】slim量化+蒸馏版超轻量模型,支持中英文、多语种文本检测|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)| 3M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar)|
|ch_PP-OCRv2_det|【最新】原始超轻量模型,支持中英文、多语种文本检测|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| |ch_PP-OCRv2_det|【最新】原始超轻量模型,支持中英文、多语种文本检测|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)|
|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| 2.6M |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar)| |ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| 2.6M |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar)|
|ch_ppocr_mobile_v2.0_det|原始超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)| |ch_ppocr_mobile_v2.0_det|原始超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|
|ch_ppocr_server_v2.0_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)| |ch_ppocr_server_v2.0_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)|
......
...@@ -66,13 +66,13 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/e2e_server_pgnetA_infer. ...@@ -66,13 +66,13 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/e2e_server_pgnetA_infer.
### 单张图像或者图像集合预测 ### 单张图像或者图像集合预测
```bash ```bash
# 预测image_dir指定的单张图像 # 预测image_dir指定的单张图像
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext"
# 预测image_dir指定的图像集合 # 预测image_dir指定的图像集合
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext"
# 如果想使用CPU进行预测,需设置use_gpu参数为False # 如果想使用CPU进行预测,需设置use_gpu参数为False
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True --use_gpu=False python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext" --use_gpu=False
``` ```
### 可视化结果 ### 可视化结果
可视化文本检测结果默认保存到./inference_results文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下: 可视化文本检测结果默认保存到./inference_results文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下:
...@@ -167,9 +167,9 @@ python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img= ...@@ -167,9 +167,9 @@ python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img=
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar && tar xf en_server_pgnetA.tar wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar && tar xf en_server_pgnetA.tar
python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
``` ```
**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`**,可以执行如下命令: **PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"` and `--e2e_pgnet_valid_set="partvgg"`**,可以执行如下命令:
``` ```
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=False python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_valid_set="partvgg" --e2e_pgnet_valid_set="totaltext"
``` ```
可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下: 可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下:
...@@ -178,9 +178,9 @@ python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/im ...@@ -178,9 +178,9 @@ python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/im
#### (2). 弯曲文本检测模型(Total-Text) #### (2). 弯曲文本检测模型(Total-Text)
对于弯曲文本样例 对于弯曲文本样例
**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`,同时,还需要增加参数`--e2e_pgnet_polygon=True`,**可以执行如下命令: **PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`,同时,还需要增加参数`--e2e_pgnet_valid_set="totaltext"`,**可以执行如下命令:
``` ```
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_valid_set="totaltext"
``` ```
可视化文本端到端结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下: 可视化文本端到端结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下:
......
...@@ -29,8 +29,8 @@ Relationship of the above models is as follows. ...@@ -29,8 +29,8 @@ Relationship of the above models is as follows.
|model name|description|config|model size|download| |model name|description|config|model size|download|
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
|ch_PP-OCRv2_det_slim|[New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml)| 3M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar)| |ch_PP-OCRv2_det_slim|[New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)| 3M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar)|
|ch_PP-OCRv2_det|[New] Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| |ch_PP-OCRv2_det|[New] Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)|
|ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|2.6M |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar)| |ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|2.6M |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar)|
|ch_ppocr_mobile_v2.0_det|Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)| |ch_ppocr_mobile_v2.0_det|Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|
|ch_ppocr_server_v2.0_det|General model, which is larger than the lightweight model, but achieved better performance|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)| |ch_ppocr_server_v2.0_det|General model, which is larger than the lightweight model, but achieved better performance|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)|
......
...@@ -59,13 +59,13 @@ After decompression, there should be the following file structure: ...@@ -59,13 +59,13 @@ After decompression, there should be the following file structure:
### Single image or image set prediction ### Single image or image set prediction
```bash ```bash
# Prediction single image specified by image_dir # Prediction single image specified by image_dir
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext"
# Prediction the collection of images specified by image_dir # Prediction the collection of images specified by image_dir
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext"
# If you want to use CPU for prediction, you need to set use_gpu parameter is false # If you want to use CPU for prediction, you need to set use_gpu parameter is false
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True --use_gpu=False python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --use_gpu=False --e2e_pgnet_valid_set="totaltext"
``` ```
### Visualization results ### Visualization results
The visualized end-to-end results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows: The visualized end-to-end results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows:
...@@ -166,9 +166,9 @@ First, convert the model saved in the PGNet end-to-end training process into an ...@@ -166,9 +166,9 @@ First, convert the model saved in the PGNet end-to-end training process into an
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar && tar xf en_server_pgnetA.tar wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar && tar xf en_server_pgnetA.tar
python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
``` ```
**For PGNet quadrangle end-to-end model inference, you need to set the parameter `--e2e_algorithm="PGNet"`**, run the following command: **For PGNet quadrangle end-to-end model inference, you need to set the parameter `--e2e_algorithm="PGNet"` and `--e2e_pgnet_valid_set="partvgg"`**, run the following command:
``` ```
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=False python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_valid_set="partvgg"
``` ```
The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows: The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows:
...@@ -176,9 +176,9 @@ The visualized text detection results are saved to the `./inference_results` fol ...@@ -176,9 +176,9 @@ The visualized text detection results are saved to the `./inference_results` fol
#### (2). Curved text detection model (Total-Text) #### (2). Curved text detection model (Total-Text)
For the curved text example, we use the same model as the quadrilateral For the curved text example, we use the same model as the quadrilateral
**For PGNet end-to-end curved text detection model inference, you need to set the parameter `--e2e_algorithm="PGNet"` and `--e2e_pgnet_polygon=True`**, run the following command: **For PGNet end-to-end curved text detection model inference, you need to set the parameter `--e2e_algorithm="PGNet"` and `--e2e_pgnet_valid_set="totaltext"`**, run the following command:
``` ```
python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_valid_set="totaltext"
``` ```
The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows: The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows:
......
...@@ -42,7 +42,7 @@ __all__ = [ ...@@ -42,7 +42,7 @@ __all__ = [
] ]
SUPPORT_DET_MODEL = ['DB'] SUPPORT_DET_MODEL = ['DB']
VERSION = '2.3.0.1' VERSION = '2.3.0.2'
SUPPORT_REC_MODEL = ['CRNN'] SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR = os.path.expanduser("~/.paddleocr/") BASE_DIR = os.path.expanduser("~/.paddleocr/")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment