Commit a739abab authored by WenmuZhou's avatar WenmuZhou
Browse files

merge dygraph

parents 982926db 75aa6f2f
......@@ -19,7 +19,29 @@
### 2.1 训练
TBD
#### 数据准备
训练数据使用公开数据集[PubTabNet](https://arxiv.org/abs/1911.10683),可以从[官网](https://github.com/ibm-aur-nlp/PubTabNet)下载。PubTabNet数据集包含约50万张表格数据的图像,以及图像对应的html格式的注释。
#### 启动训练
*如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false*
```shell
# 单机单卡训练
python3 tools/train.py -c configs/table/table_mv3.yml
# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID
python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/table/table_mv3.yml
```
上述指令中,通过-c 选择训练使用configs/table/table_mv3.yml配置文件。有关配置文件的详细解释,请参考[链接](./config.md)
#### 断点训练
如果训练程序中断,如果希望加载训练中断的模型从而恢复训练,可以通过指定Global.checkpoints指定要加载的模型路径:
```shell
python3 tools/train.py -c configs/table/table_mv3.yml -o Global.checkpoints=./your/trained/model
```
**注意**`Global.checkpoints`的优先级高于`Global.pretrain_weights`的优先级,即同时指定两个参数时,优先加载`Global.checkpoints`指定的模型,如果`Global.checkpoints`指定的模型路径有误,会加载`Global.pretrain_weights`指定的模型。
### 2.2 评估
先cd到PaddleOCR/ppstructure目录下
......
......@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader
from ppocr.modeling.architectures import build_model
from ppocr.postprocess import build_post_process
from ppocr.metrics import build_metric
from ppocr.utils.save_load import init_model
from ppocr.utils.save_load import init_model, load_pretrained_params
from ppocr.utils.utility import print_dict
import tools.program as program
......@@ -55,7 +55,10 @@ def main():
model = build_model(config['Architecture'])
use_srn = config['Architecture']['algorithm'] == "SRN"
model_type = config['Architecture']['model_type']
if "model_type" in config['Architecture'].keys():
model_type = config['Architecture']['model_type']
else:
model_type = None
best_model_dict = init_model(config, model)
if len(best_model_dict):
......@@ -68,7 +71,7 @@ def main():
# start eval
metric = program.eval(model, valid_dataloader, post_process_class,
eval_class, model_type, use_srn)
eval_class, model_type, use_srn)
logger.info('metric eval ***************')
for k, v in metric.items():
logger.info('{}:{}'.format(k, v))
......
......@@ -112,7 +112,6 @@ class TextClassifier(object):
if '180' in label and score > self.cls_thresh:
img_list[indices[beg_img_no + rno]] = cv2.rotate(
img_list[indices[beg_img_no + rno]], 1)
elapse = time.time() - starttime
return img_list, cls_res, elapse
......@@ -146,7 +145,6 @@ def main(args):
cls_res[ino]))
logger.info(
"The predict time about text angle classify module is as follows: ")
text_classifier.cls_times.info(average=False)
if __name__ == "__main__":
......
......@@ -175,7 +175,7 @@ class TextDetector(object):
st = time.time()
if args.benchmark:
if self.args.benchmark:
self.autolog.times.start()
data = transform(data, self.preprocess_op)
......@@ -186,7 +186,7 @@ class TextDetector(object):
shape_list = np.expand_dims(shape_list, axis=0)
img = img.copy()
if args.benchmark:
if self.args.benchmark:
self.autolog.times.stamp()
self.input_tensor.copy_from_cpu(img)
......@@ -195,7 +195,7 @@ class TextDetector(object):
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
if args.benchmark:
if self.args.benchmark:
self.autolog.times.stamp()
preds = {}
......@@ -220,7 +220,7 @@ class TextDetector(object):
else:
dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
if args.benchmark:
if self.args.benchmark:
self.autolog.times.end(stamp=True)
et = time.time()
return dt_boxes, et - st
......
......@@ -64,6 +64,24 @@ class TextRecognizer(object):
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'rec', logger)
self.benchmark = args.benchmark
if args.benchmark:
import auto_log
pid = os.getpid()
self.autolog = auto_log.AutoLogger(
model_name="rec",
model_precision=args.precision,
batch_size=args.rec_batch_num,
data_shape="dynamic",
save_path=args.save_log_path,
inference_config=self.config,
pids=pid,
process_name=None,
gpu_ids=0 if args.use_gpu else None,
time_keys=[
'preprocess_time', 'inference_time', 'postprocess_time'
],
warmup=10)
def resize_norm_img(self, img, max_wh_ratio):
imgC, imgH, imgW = self.rec_image_shape
......@@ -168,6 +186,8 @@ class TextRecognizer(object):
rec_res = [['', 0.0]] * img_num
batch_num = self.rec_batch_num
st = time.time()
if self.benchmark:
self.autolog.times.start()
for beg_img_no in range(0, img_num, batch_num):
end_img_no = min(img_num, beg_img_no + batch_num)
norm_img_batch = []
......@@ -196,6 +216,8 @@ class TextRecognizer(object):
norm_img_batch.append(norm_img[0])
norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy()
if self.benchmark:
self.autolog.times.stamp()
if self.rec_algorithm == "SRN":
encoder_word_pos_list = np.concatenate(encoder_word_pos_list)
......@@ -222,6 +244,8 @@ class TextRecognizer(object):
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
if self.benchmark:
self.autolog.times.stamp()
preds = {"predict": outputs[2]}
else:
self.input_tensor.copy_from_cpu(norm_img_batch)
......@@ -231,11 +255,14 @@ class TextRecognizer(object):
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
if self.benchmark:
self.autolog.times.stamp()
preds = outputs[0]
rec_result = self.postprocess_op(preds)
for rno in range(len(rec_result)):
rec_res[indices[beg_img_no + rno]] = rec_result[rno]
if self.benchmark:
self.autolog.times.end(stamp=True)
return rec_res, time.time() - st
......@@ -251,9 +278,6 @@ def main(args):
for i in range(10):
res = text_recognizer([img])
cpu_mem, gpu_mem, gpu_util = 0, 0, 0
count = 0
for image_file in image_file_list:
img, flag = check_and_read_gif(image_file)
if not flag:
......@@ -273,6 +297,8 @@ def main(args):
for ino in range(len(img_list)):
logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
rec_res[ino]))
if args.benchmark:
text_recognizer.autolog.report()
if __name__ == "__main__":
......
......@@ -174,8 +174,6 @@ def main(args):
logger.info("The predict total time is {}".format(time.time() - _st))
logger.info("\nThe predict total time is {}".format(total_time))
img_num = text_sys.text_detector.det_times.img_num
if __name__ == "__main__":
args = utility.parse_args()
......
......@@ -34,7 +34,7 @@ def init_args():
parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--min_subgraph_size", type=int, default=3)
parser.add_argument("--min_subgraph_size", type=int, default=10)
parser.add_argument("--precision", type=str, default="fp32")
parser.add_argument("--gpu_mem", type=int, default=500)
......@@ -161,7 +161,7 @@ def create_predictor(args, mode, logger):
config.enable_use_gpu(args.gpu_mem, 0)
if args.use_tensorrt:
config.enable_tensorrt_engine(
precision_mode=inference.PrecisionType.Float32,
precision_mode=precision,
max_batch_size=args.max_batch_size,
min_subgraph_size=args.min_subgraph_size)
# skip the minmum trt subgraph
......
......@@ -186,7 +186,10 @@ def train(config,
model.train()
use_srn = config['Architecture']['algorithm'] == "SRN"
model_type = config['Architecture']['model_type']
try:
model_type = config['Architecture']['model_type']
except:
model_type = None
if 'start_epoch' in best_model_dict:
start_epoch = best_model_dict['start_epoch']
......
......@@ -98,7 +98,6 @@ def main(config, device, logger, vdl_writer):
eval_class = build_metric(config['Metric'])
# load pretrain model
pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer)
logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
if valid_dataloader is not None:
logger.info('valid dataloader has {} iters'.format(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment