Commit 253b8453 authored by Leif's avatar Leif
Browse files

Merge remote-tracking branch 'origin/dygraph' into dygraph

parents 7cad4817 bc999986
include LICENSE.txt include LICENSE
include README.md include README.md
recursive-include ppocr/utils *.txt utility.py logging.py recursive-include ppocr/utils *.txt utility.py logging.py network.py
recursive-include ppocr/data/ *.py recursive-include ppocr/data/ *.py
recursive-include ppocr/postprocess *.py recursive-include ppocr/postprocess *.py
recursive-include tools/infer *.py recursive-include tools/infer *.py
......
Global:
debug: false
use_gpu: true
epoch_num: 800
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_chinese_lite_distillation_v2.1
save_epoch_step: 3
eval_batch_step: [0, 2000]
cal_metric_during_train: true
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: false
infer_img: doc/imgs_words/ch/word_1.jpg
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25
infer_mode: false
use_space_char: false
distributed: true
save_res_path: ./output/rec/predicts_chinese_lite_distillation_v2.1.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.0005
warmup_epoch: 5
regularizer:
name: L2
factor: 1.0e-05
Architecture:
name: DistillationModel
algorithm: Distillation
Models:
Student:
pretrained:
freeze_params: false
return_all_feats: true
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 64
Head:
name: CTCHead
mid_channels: 96
fc_decay: 0.00001
Teacher:
pretrained:
freeze_params: false
return_all_feats: true
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [1, 2, 2, 2]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 64
Head:
name: CTCHead
mid_channels: 96
fc_decay: 0.00001
Loss:
name: CombinedLoss
loss_config_list:
- DistillationCTCLoss:
weight: 1.0
model_name_list: ["Student", "Teacher"]
key: head_out
- DistillationDMLLoss:
weight: 1.0
act: "softmax"
model_name_pairs:
- ["Student", "Teacher"]
key: head_out
- DistillationDistanceLoss:
weight: 1.0
mode: "l2"
model_name_pairs:
- ["Student", "Teacher"]
key: backbone_out
PostProcess:
name: DistillationCTCLabelDecode
model_name: ["Student", "Teacher"]
key: head_out
Metric:
name: DistillationMetric
base_metric_name: RecMetric
main_indicator: acc
key: "Student"
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/
label_file_list:
- ./train_data/train_list.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- RecAug:
- CTCLabelEncode:
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: true
batch_size_per_card: 128
drop_last: true
num_sections: 1
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data
label_file_list:
- ./train_data/val_list.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- CTCLabelEncode:
- RecResizeImg:
image_shape: [3, 32, 320]
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: false
drop_last: false
batch_size_per_card: 128
num_workers: 8
Global:
use_gpu: true
epoch_num: 50
log_smooth_window: 20
print_batch_step: 5
save_model_dir: ./output/table_mv3/
save_epoch_step: 5
# evaluation is run every 400 iterations after the 0th iteration
eval_batch_step: [0, 400]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process
character_dict_path: ppocr/utils/dict/table_structure_dict.txt
character_type: en
max_text_length: 100
max_elem_length: 500
max_cell_num: 500
infer_mode: False
process_total_num: 0
process_cut_num: 0
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
clip_norm: 5.0
lr:
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0.00000
Architecture:
model_type: table
algorithm: TableAttn
Backbone:
name: MobileNetV3
scale: 1.0
model_name: small
disable_se: True
Head:
name: TableAttentionHead
hidden_size: 256
l2_decay: 0.00001
loc_type: 2
Loss:
name: TableAttentionLoss
structure_weight: 100.0
loc_weight: 10000.0
PostProcess:
name: TableLabelDecode
Metric:
name: TableMetric
main_indicator: acc
Train:
dataset:
name: PubTabDataSet
data_dir: train_data/table/pubtabnet/train/
label_file_path: train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- ResizeTableImage:
max_len: 488
- TableLabelEncode:
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- PaddingTableImage:
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
loader:
shuffle: True
batch_size_per_card: 32
drop_last: True
num_workers: 1
Eval:
dataset:
name: PubTabDataSet
data_dir: train_data/table/pubtabnet/val/
label_file_path: train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- ResizeTableImage:
max_len: 488
- TableLabelEncode:
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- PaddingTableImage:
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 16
num_workers: 1
...@@ -465,9 +465,13 @@ public class MainActivity extends AppCompatActivity { ...@@ -465,9 +465,13 @@ public class MainActivity extends AppCompatActivity {
} }
public void btn_load_model_click(View view) { public void btn_load_model_click(View view) {
if (predictor.isLoaded()){
tvStatus.setText("STATUS: model has been loaded");
}else{
tvStatus.setText("STATUS: load model ......"); tvStatus.setText("STATUS: load model ......");
loadModel(); loadModel();
} }
}
public void btn_run_model_click(View view) { public void btn_run_model_click(View view) {
Bitmap image =((BitmapDrawable)ivInputImage.getDrawable()).getBitmap(); Bitmap image =((BitmapDrawable)ivInputImage.getDrawable()).getBitmap();
......
...@@ -194,26 +194,25 @@ public class Predictor { ...@@ -194,26 +194,25 @@ public class Predictor {
"supported!"); "supported!");
return false; return false;
} }
int[] channelStride = new int[]{width * height, width * height * 2}; int[] channelStride = new int[]{width * height, width * height * 2};
int p = scaleImage.getPixel(scaleImage.getWidth() - 1, scaleImage.getHeight() - 1); int[] pixels=new int[width*height];
for (int y = 0; y < height; y++) { scaleImage.getPixels(pixels,0,scaleImage.getWidth(),0,0,scaleImage.getWidth(),scaleImage.getHeight());
for (int x = 0; x < width; x++) { for (int i = 0; i < pixels.length; i++) {
int color = scaleImage.getPixel(x, y); int color = pixels[i];
float[] rgb = new float[]{(float) red(color) / 255.0f, (float) green(color) / 255.0f, float[] rgb = new float[]{(float) red(color) / 255.0f, (float) green(color) / 255.0f,
(float) blue(color) / 255.0f}; (float) blue(color) / 255.0f};
inputData[y * width + x] = (rgb[channelIdx[0]] - inputMean[0]) / inputStd[0]; inputData[i] = (rgb[channelIdx[0]] - inputMean[0]) / inputStd[0];
inputData[y * width + x + channelStride[0]] = (rgb[channelIdx[1]] - inputMean[1]) / inputStd[1]; inputData[i + channelStride[0]] = (rgb[channelIdx[1]] - inputMean[1]) / inputStd[1];
inputData[y * width + x + channelStride[1]] = (rgb[channelIdx[2]] - inputMean[2]) / inputStd[2]; inputData[i+ channelStride[1]] = (rgb[channelIdx[2]] - inputMean[2]) / inputStd[2];
}
} }
} else if (channels == 1) { } else if (channels == 1) {
for (int y = 0; y < height; y++) { int[] pixels=new int[width*height];
for (int x = 0; x < width; x++) { scaleImage.getPixels(pixels,0,scaleImage.getWidth(),0,0,scaleImage.getWidth(),scaleImage.getHeight());
int color = inputImage.getPixel(x, y); for (int i = 0; i < pixels.length; i++) {
int color = pixels[i];
float gray = (float) (red(color) + green(color) + blue(color)) / 3.0f / 255.0f; float gray = (float) (red(color) + green(color) + blue(color)) / 3.0f / 255.0f;
inputData[y * width + x] = (gray - inputMean[0]) / inputStd[0]; inputData[i] = (gray - inputMean[0]) / inputStd[0];
}
} }
} else { } else {
Log.i(TAG, "Unsupported channel size " + Integer.toString(channels) + ", only channel 1 and 3 is " + Log.i(TAG, "Unsupported channel size " + Integer.toString(channels) + ", only channel 1 and 3 is " +
......
...@@ -44,6 +44,9 @@ public: ...@@ -44,6 +44,9 @@ public:
inline static size_t argmax(ForwardIterator first, ForwardIterator last) { inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
return std::distance(first, std::max_element(first, last)); return std::distance(first, std::max_element(first, last));
} }
static void GetAllFiles(const char *dir_name,
std::vector<std::string> &all_inputs);
}; };
} // namespace PaddleOCR } // namespace PaddleOCR
\ No newline at end of file
...@@ -77,7 +77,7 @@ opencv3/ ...@@ -77,7 +77,7 @@ opencv3/
#### 1.2.1 直接下载安装 #### 1.2.1 直接下载安装
* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。 * [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。
* 下载之后使用下面的方法解压。 * 下载之后使用下面的方法解压。
...@@ -89,10 +89,11 @@ tar -xf paddle_inference.tgz ...@@ -89,10 +89,11 @@ tar -xf paddle_inference.tgz
#### 1.2.2 预测库源码编译 #### 1.2.2 预测库源码编译
* 如果希望获取最新预测库特性,可以从Paddle github上克隆最新代码,源码编译预测库。 * 如果希望获取最新预测库特性,可以从Paddle github上克隆最新代码,源码编译预测库。
* 可以参考[Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。 * 可以参考[Paddle预测库安装编译说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi) 的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。
```shell ```shell
git clone https://github.com/PaddlePaddle/Paddle.git git clone https://github.com/PaddlePaddle/Paddle.git
git checkout release/2.1
``` ```
* 进入Paddle目录后,编译方法如下。 * 进入Paddle目录后,编译方法如下。
...@@ -115,7 +116,7 @@ make -j ...@@ -115,7 +116,7 @@ make -j
make inference_lib_dist make inference_lib_dist
``` ```
更多编译参数选项可以参考Paddle C++预测库官网:[https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 更多编译参数选项介绍可以参考[文档说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi)
* 编译完成之后,可以在`build/paddle_inference_install_dir/`文件下看到生成了以下文件及文件夹。 * 编译完成之后,可以在`build/paddle_inference_install_dir/`文件下看到生成了以下文件及文件夹。
...@@ -140,11 +141,11 @@ build/paddle_inference_install_dir/ ...@@ -140,11 +141,11 @@ build/paddle_inference_install_dir/
``` ```
inference/ inference/
|-- det_db |-- det_db
| |--inference.pdparams | |--inference.pdiparams
| |--inference.pdimodel | |--inference.pdmodel
|-- rec_rcnn |-- rec_rcnn
| |--inference.pdparams | |--inference.pdiparams
| |--inference.pdparams | |--inference.pdmodel
``` ```
......
...@@ -78,8 +78,7 @@ opencv3/ ...@@ -78,8 +78,7 @@ opencv3/
#### 1.2.1 Direct download and installation #### 1.2.1 Direct download and installation
* Different cuda versions of the Linux inference library (based on GCC 4.8.2) are provided on the [Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html). You can view and select the appropriate version of the inference library on the official website.
[Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html). You can view and select the appropriate version of the inference library on the official website.
* After downloading, use the following method to uncompress. * After downloading, use the following method to uncompress.
...@@ -97,9 +96,10 @@ Finally you can see the following files in the folder of `paddle_inference/`. ...@@ -97,9 +96,10 @@ Finally you can see the following files in the folder of `paddle_inference/`.
```shell ```shell
git clone https://github.com/PaddlePaddle/Paddle.git git clone https://github.com/PaddlePaddle/Paddle.git
git checkout release/2.1
``` ```
* After entering the Paddle directory, the compilation method is as follows. * After entering the Paddle directory, the commands to compile the paddle inference library are as follows.
```shell ```shell
rm -rf build rm -rf build
...@@ -119,7 +119,7 @@ make -j ...@@ -119,7 +119,7 @@ make -j
make inference_lib_dist make inference_lib_dist
``` ```
For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html). For more compilation parameter options, please refer to the [document](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi).
* After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`. * After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`.
...@@ -144,11 +144,11 @@ Among them, `paddle` is the Paddle library required for C++ prediction later, an ...@@ -144,11 +144,11 @@ Among them, `paddle` is the Paddle library required for C++ prediction later, an
``` ```
inference/ inference/
|-- det_db |-- det_db
| |--inference.pdparams | |--inference.pdiparams
| |--inference.pdimodel | |--inference.pdmodel
|-- rec_rcnn |-- rec_rcnn
| |--inference.pdparams | |--inference.pdiparams
| |--inference.pdparams | |--inference.pdmodel
``` ```
......
...@@ -27,9 +27,12 @@ ...@@ -27,9 +27,12 @@
#include <fstream> #include <fstream>
#include <numeric> #include <numeric>
#include <glog/logging.h>
#include <include/config.h> #include <include/config.h>
#include <include/ocr_det.h> #include <include/ocr_det.h>
#include <include/ocr_rec.h> #include <include/ocr_rec.h>
#include <include/utility.h>
#include <sys/stat.h>
using namespace std; using namespace std;
using namespace cv; using namespace cv;
...@@ -47,13 +50,8 @@ int main(int argc, char **argv) { ...@@ -47,13 +50,8 @@ int main(int argc, char **argv) {
config.PrintConfigInfo(); config.PrintConfigInfo();
std::string img_path(argv[2]); std::string img_path(argv[2]);
std::vector<std::string> all_img_names;
cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); Utility::GetAllFiles((char *)img_path.c_str(), all_img_names);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << img_path << "\n";
exit(1);
}
DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id, DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id,
config.gpu_mem, config.cpu_math_library_num_threads, config.gpu_mem, config.cpu_math_library_num_threads,
...@@ -76,7 +74,18 @@ int main(int argc, char **argv) { ...@@ -76,7 +74,18 @@ int main(int argc, char **argv) {
config.use_tensorrt, config.use_fp16); config.use_tensorrt, config.use_fp16);
auto start = std::chrono::system_clock::now(); auto start = std::chrono::system_clock::now();
for (auto img_dir : all_img_names) {
LOG(INFO) << "The predict img: " << img_dir;
cv::Mat srcimg = cv::imread(img_dir, cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << img_path
<< "\n";
exit(1);
}
std::vector<std::vector<std::vector<int>>> boxes; std::vector<std::vector<std::vector<int>>> boxes;
det.Run(srcimg, boxes); det.Run(srcimg, boxes);
rec.Run(boxes, srcimg, cls); rec.Run(boxes, srcimg, cls);
...@@ -88,6 +97,7 @@ int main(int argc, char **argv) { ...@@ -88,6 +97,7 @@ int main(int argc, char **argv) {
std::chrono::microseconds::period::num / std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den std::chrono::microseconds::period::den
<< "s" << std::endl; << "s" << std::endl;
}
return 0; return 0;
} }
...@@ -30,6 +30,42 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -30,6 +30,42 @@ void DBDetector::LoadModel(const std::string &model_dir) {
this->use_fp16_ ? paddle_infer::Config::Precision::kHalf this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
: paddle_infer::Config::Precision::kFloat32, : paddle_infer::Config::Precision::kFloat32,
false, false); false, false);
std::map<std::string, std::vector<int>> min_input_shape = {
{"x", {1, 3, 50, 50}},
{"conv2d_92.tmp_0", {1, 96, 20, 20}},
{"conv2d_91.tmp_0", {1, 96, 10, 10}},
{"nearest_interp_v2_1.tmp_0", {1, 96, 10, 10}},
{"nearest_interp_v2_2.tmp_0", {1, 96, 20, 20}},
{"nearest_interp_v2_3.tmp_0", {1, 24, 20, 20}},
{"nearest_interp_v2_4.tmp_0", {1, 24, 20, 20}},
{"nearest_interp_v2_5.tmp_0", {1, 24, 20, 20}},
{"elementwise_add_7", {1, 56, 2, 2}},
{"nearest_interp_v2_0.tmp_0", {1, 96, 2, 2}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{"x", {1, 3, this->max_side_len_, this->max_side_len_}},
{"conv2d_92.tmp_0", {1, 96, 400, 400}},
{"conv2d_91.tmp_0", {1, 96, 200, 200}},
{"nearest_interp_v2_1.tmp_0", {1, 96, 200, 200}},
{"nearest_interp_v2_2.tmp_0", {1, 96, 400, 400}},
{"nearest_interp_v2_3.tmp_0", {1, 24, 400, 400}},
{"nearest_interp_v2_4.tmp_0", {1, 24, 400, 400}},
{"nearest_interp_v2_5.tmp_0", {1, 24, 400, 400}},
{"elementwise_add_7", {1, 56, 400, 400}},
{"nearest_interp_v2_0.tmp_0", {1, 96, 400, 400}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{"x", {1, 3, 640, 640}},
{"conv2d_92.tmp_0", {1, 96, 160, 160}},
{"conv2d_91.tmp_0", {1, 96, 80, 80}},
{"nearest_interp_v2_1.tmp_0", {1, 96, 80, 80}},
{"nearest_interp_v2_2.tmp_0", {1, 96, 160, 160}},
{"nearest_interp_v2_3.tmp_0", {1, 24, 160, 160}},
{"nearest_interp_v2_4.tmp_0", {1, 24, 160, 160}},
{"nearest_interp_v2_5.tmp_0", {1, 24, 160, 160}},
{"elementwise_add_7", {1, 56, 40, 40}},
{"nearest_interp_v2_0.tmp_0", {1, 96, 40, 40}}};
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
...@@ -48,7 +84,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -48,7 +84,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
config.SwitchIrOptim(true); config.SwitchIrOptim(true);
config.EnableMemoryOptim(); config.EnableMemoryOptim();
config.DisableGlogInfo(); // config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = CreatePredictor(config);
} }
......
...@@ -106,6 +106,15 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -106,6 +106,15 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
this->use_fp16_ ? paddle_infer::Config::Precision::kHalf this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
: paddle_infer::Config::Precision::kFloat32, : paddle_infer::Config::Precision::kFloat32,
false, false); false, false);
std::map<std::string, std::vector<int>> min_input_shape = {
{"x", {1, 3, 32, 10}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{"x", {1, 3, 32, 2000}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{"x", {1, 3, 32, 320}}};
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
......
...@@ -47,16 +47,13 @@ void Normalize::Run(cv::Mat *im, const std::vector<float> &mean, ...@@ -47,16 +47,13 @@ void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
e /= 255.0; e /= 255.0;
} }
(*im).convertTo(*im, CV_32FC3, e); (*im).convertTo(*im, CV_32FC3, e);
for (int h = 0; h < im->rows; h++) { std::vector<cv::Mat> bgr_channels(3);
for (int w = 0; w < im->cols; w++) { cv::split(*im, bgr_channels);
im->at<cv::Vec3f>(h, w)[0] = for (auto i = 0; i < bgr_channels.size(); i++) {
(im->at<cv::Vec3f>(h, w)[0] - mean[0]) * scale[0]; bgr_channels[i].convertTo(bgr_channels[i], CV_32FC1, 1.0 * scale[i],
im->at<cv::Vec3f>(h, w)[1] = (0.0 - mean[i]) * scale[i]);
(im->at<cv::Vec3f>(h, w)[1] - mean[1]) * scale[1];
im->at<cv::Vec3f>(h, w)[2] =
(im->at<cv::Vec3f>(h, w)[2] - mean[2]) * scale[2];
}
} }
cv::merge(bgr_channels, *im);
} }
void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
...@@ -81,15 +78,9 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -81,15 +78,9 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
resize_h = max(int(round(float(resize_h) / 32) * 32), 32); resize_h = max(int(round(float(resize_h) / 32) * 32), 32);
resize_w = max(int(round(float(resize_w) / 32) * 32), 32); resize_w = max(int(round(float(resize_w) / 32) * 32), 32);
if (!use_tensorrt) {
cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
ratio_h = float(resize_h) / float(h); ratio_h = float(resize_h) / float(h);
ratio_w = float(resize_w) / float(w); ratio_w = float(resize_w) / float(w);
} else {
cv::resize(img, resize_img, cv::Size(640, 640));
ratio_h = float(640) / float(h);
ratio_w = float(640) / float(w);
}
} }
void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
...@@ -108,23 +99,12 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, ...@@ -108,23 +99,12 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
resize_w = imgW; resize_w = imgW;
else else
resize_w = int(ceilf(imgH * ratio)); resize_w = int(ceilf(imgH * ratio));
if (!use_tensorrt) {
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
cv::INTER_LINEAR); cv::INTER_LINEAR);
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
int(imgW - resize_img.cols), cv::BORDER_CONSTANT, int(imgW - resize_img.cols), cv::BORDER_CONSTANT,
{127, 127, 127}); {127, 127, 127});
} else {
int k = int(img.cols * 32 / img.rows);
if (k >= 100) {
cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f,
cv::INTER_LINEAR);
} else {
cv::resize(img, resize_img, cv::Size(k, 32), 0.f, 0.f, cv::INTER_LINEAR);
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(100 - k),
cv::BORDER_CONSTANT, {127, 127, 127});
}
}
} }
void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
...@@ -142,16 +122,12 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -142,16 +122,12 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
else else
resize_w = int(ceilf(imgH * ratio)); resize_w = int(ceilf(imgH * ratio));
if (!use_tensorrt) {
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
cv::INTER_LINEAR); cv::INTER_LINEAR);
if (resize_w < imgW) { if (resize_w < imgW) {
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w, cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
} }
} else {
cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, cv::INTER_LINEAR);
}
} }
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -12,12 +12,14 @@ ...@@ -12,12 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <dirent.h>
#include <include/utility.h>
#include <iostream> #include <iostream>
#include <ostream> #include <ostream>
#include <sys/stat.h>
#include <sys/types.h>
#include <vector> #include <vector>
#include <include/utility.h>
namespace PaddleOCR { namespace PaddleOCR {
std::vector<std::string> Utility::ReadDict(const std::string &path) { std::vector<std::string> Utility::ReadDict(const std::string &path) {
...@@ -57,4 +59,37 @@ void Utility::VisualizeBboxes( ...@@ -57,4 +59,37 @@ void Utility::VisualizeBboxes(
<< std::endl; << std::endl;
} }
// list all files under a directory
void Utility::GetAllFiles(const char *dir_name,
std::vector<std::string> &all_inputs) {
if (NULL == dir_name) {
std::cout << " dir_name is null ! " << std::endl;
return;
}
struct stat s;
lstat(dir_name, &s);
if (!S_ISDIR(s.st_mode)) {
std::cout << "dir_name is not a valid directory !" << std::endl;
all_inputs.push_back(dir_name);
return;
} else {
struct dirent *filename; // return value for readdir()
DIR *dir; // return value for opendir()
dir = opendir(dir_name);
if (NULL == dir) {
std::cout << "Can not open dir " << dir_name << std::endl;
return;
}
std::cout << "Successfully opened the dir !" << std::endl;
while ((filename = readdir(dir)) != NULL) {
if (strcmp(filename->d_name, ".") == 0 ||
strcmp(filename->d_name, "..") == 0)
continue;
// img_dir + std::string("/") + all_inputs[0];
all_inputs.push_back(dir_name + std::string("/") +
std::string(filename->d_name));
}
}
}
} // namespace PaddleOCR } // namespace PaddleOCR
\ No newline at end of file
...@@ -12,9 +12,10 @@ cmake .. \ ...@@ -12,9 +12,10 @@ cmake .. \
-DWITH_MKL=ON \ -DWITH_MKL=ON \
-DWITH_GPU=OFF \ -DWITH_GPU=OFF \
-DWITH_STATIC_LIB=OFF \ -DWITH_STATIC_LIB=OFF \
-DUSE_TENSORRT=OFF \ -DWITH_TENSORRT=OFF \
-DOPENCV_DIR=${OPENCV_DIR} \ -DOPENCV_DIR=${OPENCV_DIR} \
-DCUDNN_LIB=${CUDNN_LIB_DIR} \ -DCUDNN_LIB=${CUDNN_LIB_DIR} \
-DCUDA_LIB=${CUDA_LIB_DIR} \ -DCUDA_LIB=${CUDA_LIB_DIR} \
-DTENSORRT_DIR=${TENSORRT_DIR} \
make -j make -j
...@@ -23,7 +23,7 @@ rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/ ...@@ -23,7 +23,7 @@ rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
# show the detection results # show the detection results
visualize 1 visualize 0
# use_tensorrt # use_tensorrt
use_tensorrt 0 use_tensorrt 0
......
...@@ -29,7 +29,7 @@ deploy/hubserving/ocr_system/ ...@@ -29,7 +29,7 @@ deploy/hubserving/ocr_system/
### 1. 准备环境 ### 1. 准备环境
```shell ```shell
# 安装paddlehub # 安装paddlehub
pip3 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple pip3 install paddlehub==1.8.3 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
``` ```
### 2. 下载推理模型 ### 2. 下载推理模型
......
...@@ -30,7 +30,7 @@ The following steps take the 2-stage series service as an example. If only the d ...@@ -30,7 +30,7 @@ The following steps take the 2-stage series service as an example. If only the d
### 1. Prepare the environment ### 1. Prepare the environment
```shell ```shell
# Install paddlehub # Install paddlehub
pip3 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple pip3 install paddlehub==1.8.3 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
``` ```
### 2. Download inference model ### 2. Download inference model
......
...@@ -111,9 +111,9 @@ ...@@ -111,9 +111,9 @@
| 字段 | 用途 | 默认值 | 备注 | | 字段 | 用途 | 默认值 | 备注 |
| :---------------------: | :---------------------: | :--------------: | :--------------------: | | :---------------------: | :---------------------: | :--------------: | :--------------------: |
| **dataset** | 每次迭代返回一个样本 | - | - | | **dataset** | 每次迭代返回一个样本 | - | - |
| name | dataset类名 | SimpleDataSet | 目前支持`SimpleDataSet``LMDBDateSet` | | name | dataset类名 | SimpleDataSet | 目前支持`SimpleDataSet``LMDBDataSet` |
| data_dir | 数据集图片存放路径 | ./train_data | \ | | data_dir | 数据集图片存放路径 | ./train_data | \ |
| label_file_list | 数据标签路径 | ["./train_data/train_list.txt"] | dataset为LMDBDateSet时不需要此参数 | | label_file_list | 数据标签路径 | ["./train_data/train_list.txt"] | dataset为LMDBDataSet时不需要此参数 |
| ratio_list | 数据集的比例 | [1.0] | 若label_file_list中有两个train_list,且ratio_list为[0.4,0.6],则从train_list1中采样40%,从train_list2中采样60%组合整个dataset | | ratio_list | 数据集的比例 | [1.0] | 若label_file_list中有两个train_list,且ratio_list为[0.4,0.6],则从train_list1中采样40%,从train_list2中采样60%组合整个dataset |
| transforms | 对图片和标签进行变换的方法列表 | [DecodeImage,CTCLabelEncode,RecResizeImg,KeepKeys] | 见[ppocr/data/imaug](../../ppocr/data/imaug) | | transforms | 对图片和标签进行变换的方法列表 | [DecodeImage,CTCLabelEncode,RecResizeImg,KeepKeys] | 见[ppocr/data/imaug](../../ppocr/data/imaug) |
| **loader** | dataloader相关 | - | | | **loader** | dataloader相关 | - | |
......
...@@ -243,7 +243,7 @@ Optimizer: ...@@ -243,7 +243,7 @@ Optimizer:
Train: Train:
dataset: dataset:
# 数据集格式,支持LMDBDateSet以及SimpleDataSet # 数据集格式,支持LMDBDataSet以及SimpleDataSet
name: SimpleDataSet name: SimpleDataSet
# 数据集路径 # 数据集路径
data_dir: ./train_data/ data_dir: ./train_data/
...@@ -263,7 +263,7 @@ Train: ...@@ -263,7 +263,7 @@ Train:
Eval: Eval:
dataset: dataset:
# 数据集格式,支持LMDBDateSet以及SimpleDataSet # 数据集格式,支持LMDBDataSet以及SimpleDataSet
name: SimpleDataSet name: SimpleDataSet
# 数据集路径 # 数据集路径
data_dir: ./train_data data_dir: ./train_data
...@@ -393,7 +393,7 @@ Global: ...@@ -393,7 +393,7 @@ Global:
Train: Train:
dataset: dataset:
# 数据集格式,支持LMDBDateSet以及SimpleDataSet # 数据集格式,支持LMDBDataSet以及SimpleDataSet
name: SimpleDataSet name: SimpleDataSet
# 数据集路径 # 数据集路径
data_dir: ./train_data/ data_dir: ./train_data/
...@@ -403,7 +403,7 @@ Train: ...@@ -403,7 +403,7 @@ Train:
Eval: Eval:
dataset: dataset:
# 数据集格式,支持LMDBDateSet以及SimpleDataSet # 数据集格式,支持LMDBDataSet以及SimpleDataSet
name: SimpleDataSet name: SimpleDataSet
# 数据集路径 # 数据集路径
data_dir: ./train_data data_dir: ./train_data
......
...@@ -59,7 +59,7 @@ im_show.save('result.jpg') ...@@ -59,7 +59,7 @@ im_show.save('result.jpg')
from paddleocr import PaddleOCR, draw_ocr from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR() # need to run only once to download and load model into memory ocr = PaddleOCR() # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs/11.jpg' img_path = 'PaddleOCR/doc/imgs/11.jpg'
result = ocr.ocr(img_path) result = ocr.ocr(img_path,cls=False)
for line in result: for line in result:
print(line) print(line)
...@@ -355,3 +355,4 @@ im_show.save('result.jpg') ...@@ -355,3 +355,4 @@ im_show.save('result.jpg')
| det | 前向时使用启动检测 | TRUE | | det | 前向时使用启动检测 | TRUE |
| rec | 前向时是否启动识别 | TRUE | | rec | 前向时是否启动识别 | TRUE |
| cls | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类) | FALSE | | cls | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类) | FALSE |
| show_log | 是否打印det和rec等信息 | FALSE |
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment