Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
a739abab
Commit
a739abab
authored
Jul 27, 2021
by
WenmuZhou
Browse files
merge dygraph
parents
982926db
75aa6f2f
Changes
49
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
69 additions
and
20 deletions
+69
-20
test1/table/README_ch.md
test1/table/README_ch.md
+23
-1
tools/eval.py
tools/eval.py
+6
-3
tools/infer/predict_cls.py
tools/infer/predict_cls.py
+0
-2
tools/infer/predict_det.py
tools/infer/predict_det.py
+4
-4
tools/infer/predict_rec.py
tools/infer/predict_rec.py
+30
-4
tools/infer/predict_system.py
tools/infer/predict_system.py
+0
-2
tools/infer/utility.py
tools/infer/utility.py
+2
-2
tools/program.py
tools/program.py
+4
-1
tools/train.py
tools/train.py
+0
-1
No files found.
test1/table/README_ch.md
View file @
a739abab
...
@@ -19,7 +19,29 @@
...
@@ -19,7 +19,29 @@
### 2.1 训练
### 2.1 训练
TBD
#### 数据准备
训练数据使用公开数据集
[
PubTabNet
](
https://arxiv.org/abs/1911.10683
)
,可以从
[
官网
](
https://github.com/ibm-aur-nlp/PubTabNet
)
下载。PubTabNet数据集包含约50万张表格数据的图像,以及图像对应的html格式的注释。
#### 启动训练
*如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false*
```
shell
# 单机单卡训练
python3 tools/train.py
-c
configs/table/table_mv3.yml
# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID
python3
-m
paddle.distributed.launch
--gpus
'0,1,2,3'
tools/train.py
-c
configs/table/table_mv3.yml
```
上述指令中,通过-c 选择训练使用configs/table/table_mv3.yml配置文件。有关配置文件的详细解释,请参考
[
链接
](
./config.md
)
。
#### 断点训练
如果训练程序中断,如果希望加载训练中断的模型从而恢复训练,可以通过指定Global.checkpoints指定要加载的模型路径:
```
shell
python3 tools/train.py
-c
configs/table/table_mv3.yml
-o
Global.checkpoints
=
./your/trained/model
```
**注意**
:
`Global.checkpoints`
的优先级高于
`Global.pretrain_weights`
的优先级,即同时指定两个参数时,优先加载
`Global.checkpoints`
指定的模型,如果
`Global.checkpoints`
指定的模型路径有误,会加载
`Global.pretrain_weights`
指定的模型。
### 2.2 评估
### 2.2 评估
先cd到PaddleOCR/ppstructure目录下
先cd到PaddleOCR/ppstructure目录下
...
...
tools/eval.py
View file @
a739abab
...
@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader
...
@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader
from
ppocr.modeling.architectures
import
build_model
from
ppocr.modeling.architectures
import
build_model
from
ppocr.postprocess
import
build_post_process
from
ppocr.postprocess
import
build_post_process
from
ppocr.metrics
import
build_metric
from
ppocr.metrics
import
build_metric
from
ppocr.utils.save_load
import
init_model
from
ppocr.utils.save_load
import
init_model
,
load_pretrained_params
from
ppocr.utils.utility
import
print_dict
from
ppocr.utils.utility
import
print_dict
import
tools.program
as
program
import
tools.program
as
program
...
@@ -55,7 +55,10 @@ def main():
...
@@ -55,7 +55,10 @@ def main():
model
=
build_model
(
config
[
'Architecture'
])
model
=
build_model
(
config
[
'Architecture'
])
use_srn
=
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
use_srn
=
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
model_type
=
config
[
'Architecture'
][
'model_type'
]
if
"model_type"
in
config
[
'Architecture'
].
keys
():
model_type
=
config
[
'Architecture'
][
'model_type'
]
else
:
model_type
=
None
best_model_dict
=
init_model
(
config
,
model
)
best_model_dict
=
init_model
(
config
,
model
)
if
len
(
best_model_dict
):
if
len
(
best_model_dict
):
...
@@ -68,7 +71,7 @@ def main():
...
@@ -68,7 +71,7 @@ def main():
# start eval
# start eval
metric
=
program
.
eval
(
model
,
valid_dataloader
,
post_process_class
,
metric
=
program
.
eval
(
model
,
valid_dataloader
,
post_process_class
,
eval_class
,
model_type
,
use_srn
)
eval_class
,
model_type
,
use_srn
)
logger
.
info
(
'metric eval ***************'
)
logger
.
info
(
'metric eval ***************'
)
for
k
,
v
in
metric
.
items
():
for
k
,
v
in
metric
.
items
():
logger
.
info
(
'{}:{}'
.
format
(
k
,
v
))
logger
.
info
(
'{}:{}'
.
format
(
k
,
v
))
...
...
tools/infer/predict_cls.py
View file @
a739abab
...
@@ -112,7 +112,6 @@ class TextClassifier(object):
...
@@ -112,7 +112,6 @@ class TextClassifier(object):
if
'180'
in
label
and
score
>
self
.
cls_thresh
:
if
'180'
in
label
and
score
>
self
.
cls_thresh
:
img_list
[
indices
[
beg_img_no
+
rno
]]
=
cv2
.
rotate
(
img_list
[
indices
[
beg_img_no
+
rno
]]
=
cv2
.
rotate
(
img_list
[
indices
[
beg_img_no
+
rno
]],
1
)
img_list
[
indices
[
beg_img_no
+
rno
]],
1
)
elapse
=
time
.
time
()
-
starttime
return
img_list
,
cls_res
,
elapse
return
img_list
,
cls_res
,
elapse
...
@@ -146,7 +145,6 @@ def main(args):
...
@@ -146,7 +145,6 @@ def main(args):
cls_res
[
ino
]))
cls_res
[
ino
]))
logger
.
info
(
logger
.
info
(
"The predict time about text angle classify module is as follows: "
)
"The predict time about text angle classify module is as follows: "
)
text_classifier
.
cls_times
.
info
(
average
=
False
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
tools/infer/predict_det.py
View file @
a739abab
...
@@ -175,7 +175,7 @@ class TextDetector(object):
...
@@ -175,7 +175,7 @@ class TextDetector(object):
st
=
time
.
time
()
st
=
time
.
time
()
if
args
.
benchmark
:
if
self
.
args
.
benchmark
:
self
.
autolog
.
times
.
start
()
self
.
autolog
.
times
.
start
()
data
=
transform
(
data
,
self
.
preprocess_op
)
data
=
transform
(
data
,
self
.
preprocess_op
)
...
@@ -186,7 +186,7 @@ class TextDetector(object):
...
@@ -186,7 +186,7 @@ class TextDetector(object):
shape_list
=
np
.
expand_dims
(
shape_list
,
axis
=
0
)
shape_list
=
np
.
expand_dims
(
shape_list
,
axis
=
0
)
img
=
img
.
copy
()
img
=
img
.
copy
()
if
args
.
benchmark
:
if
self
.
args
.
benchmark
:
self
.
autolog
.
times
.
stamp
()
self
.
autolog
.
times
.
stamp
()
self
.
input_tensor
.
copy_from_cpu
(
img
)
self
.
input_tensor
.
copy_from_cpu
(
img
)
...
@@ -195,7 +195,7 @@ class TextDetector(object):
...
@@ -195,7 +195,7 @@ class TextDetector(object):
for
output_tensor
in
self
.
output_tensors
:
for
output_tensor
in
self
.
output_tensors
:
output
=
output_tensor
.
copy_to_cpu
()
output
=
output_tensor
.
copy_to_cpu
()
outputs
.
append
(
output
)
outputs
.
append
(
output
)
if
args
.
benchmark
:
if
self
.
args
.
benchmark
:
self
.
autolog
.
times
.
stamp
()
self
.
autolog
.
times
.
stamp
()
preds
=
{}
preds
=
{}
...
@@ -220,7 +220,7 @@ class TextDetector(object):
...
@@ -220,7 +220,7 @@ class TextDetector(object):
else
:
else
:
dt_boxes
=
self
.
filter_tag_det_res
(
dt_boxes
,
ori_im
.
shape
)
dt_boxes
=
self
.
filter_tag_det_res
(
dt_boxes
,
ori_im
.
shape
)
if
args
.
benchmark
:
if
self
.
args
.
benchmark
:
self
.
autolog
.
times
.
end
(
stamp
=
True
)
self
.
autolog
.
times
.
end
(
stamp
=
True
)
et
=
time
.
time
()
et
=
time
.
time
()
return
dt_boxes
,
et
-
st
return
dt_boxes
,
et
-
st
...
...
tools/infer/predict_rec.py
View file @
a739abab
...
@@ -64,6 +64,24 @@ class TextRecognizer(object):
...
@@ -64,6 +64,24 @@ class TextRecognizer(object):
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
,
self
.
config
=
\
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
,
self
.
config
=
\
utility
.
create_predictor
(
args
,
'rec'
,
logger
)
utility
.
create_predictor
(
args
,
'rec'
,
logger
)
self
.
benchmark
=
args
.
benchmark
if
args
.
benchmark
:
import
auto_log
pid
=
os
.
getpid
()
self
.
autolog
=
auto_log
.
AutoLogger
(
model_name
=
"rec"
,
model_precision
=
args
.
precision
,
batch_size
=
args
.
rec_batch_num
,
data_shape
=
"dynamic"
,
save_path
=
args
.
save_log_path
,
inference_config
=
self
.
config
,
pids
=
pid
,
process_name
=
None
,
gpu_ids
=
0
if
args
.
use_gpu
else
None
,
time_keys
=
[
'preprocess_time'
,
'inference_time'
,
'postprocess_time'
],
warmup
=
10
)
def
resize_norm_img
(
self
,
img
,
max_wh_ratio
):
def
resize_norm_img
(
self
,
img
,
max_wh_ratio
):
imgC
,
imgH
,
imgW
=
self
.
rec_image_shape
imgC
,
imgH
,
imgW
=
self
.
rec_image_shape
...
@@ -168,6 +186,8 @@ class TextRecognizer(object):
...
@@ -168,6 +186,8 @@ class TextRecognizer(object):
rec_res
=
[[
''
,
0.0
]]
*
img_num
rec_res
=
[[
''
,
0.0
]]
*
img_num
batch_num
=
self
.
rec_batch_num
batch_num
=
self
.
rec_batch_num
st
=
time
.
time
()
st
=
time
.
time
()
if
self
.
benchmark
:
self
.
autolog
.
times
.
start
()
for
beg_img_no
in
range
(
0
,
img_num
,
batch_num
):
for
beg_img_no
in
range
(
0
,
img_num
,
batch_num
):
end_img_no
=
min
(
img_num
,
beg_img_no
+
batch_num
)
end_img_no
=
min
(
img_num
,
beg_img_no
+
batch_num
)
norm_img_batch
=
[]
norm_img_batch
=
[]
...
@@ -196,6 +216,8 @@ class TextRecognizer(object):
...
@@ -196,6 +216,8 @@ class TextRecognizer(object):
norm_img_batch
.
append
(
norm_img
[
0
])
norm_img_batch
.
append
(
norm_img
[
0
])
norm_img_batch
=
np
.
concatenate
(
norm_img_batch
)
norm_img_batch
=
np
.
concatenate
(
norm_img_batch
)
norm_img_batch
=
norm_img_batch
.
copy
()
norm_img_batch
=
norm_img_batch
.
copy
()
if
self
.
benchmark
:
self
.
autolog
.
times
.
stamp
()
if
self
.
rec_algorithm
==
"SRN"
:
if
self
.
rec_algorithm
==
"SRN"
:
encoder_word_pos_list
=
np
.
concatenate
(
encoder_word_pos_list
)
encoder_word_pos_list
=
np
.
concatenate
(
encoder_word_pos_list
)
...
@@ -222,6 +244,8 @@ class TextRecognizer(object):
...
@@ -222,6 +244,8 @@ class TextRecognizer(object):
for
output_tensor
in
self
.
output_tensors
:
for
output_tensor
in
self
.
output_tensors
:
output
=
output_tensor
.
copy_to_cpu
()
output
=
output_tensor
.
copy_to_cpu
()
outputs
.
append
(
output
)
outputs
.
append
(
output
)
if
self
.
benchmark
:
self
.
autolog
.
times
.
stamp
()
preds
=
{
"predict"
:
outputs
[
2
]}
preds
=
{
"predict"
:
outputs
[
2
]}
else
:
else
:
self
.
input_tensor
.
copy_from_cpu
(
norm_img_batch
)
self
.
input_tensor
.
copy_from_cpu
(
norm_img_batch
)
...
@@ -231,11 +255,14 @@ class TextRecognizer(object):
...
@@ -231,11 +255,14 @@ class TextRecognizer(object):
for
output_tensor
in
self
.
output_tensors
:
for
output_tensor
in
self
.
output_tensors
:
output
=
output_tensor
.
copy_to_cpu
()
output
=
output_tensor
.
copy_to_cpu
()
outputs
.
append
(
output
)
outputs
.
append
(
output
)
if
self
.
benchmark
:
self
.
autolog
.
times
.
stamp
()
preds
=
outputs
[
0
]
preds
=
outputs
[
0
]
rec_result
=
self
.
postprocess_op
(
preds
)
rec_result
=
self
.
postprocess_op
(
preds
)
for
rno
in
range
(
len
(
rec_result
)):
for
rno
in
range
(
len
(
rec_result
)):
rec_res
[
indices
[
beg_img_no
+
rno
]]
=
rec_result
[
rno
]
rec_res
[
indices
[
beg_img_no
+
rno
]]
=
rec_result
[
rno
]
if
self
.
benchmark
:
self
.
autolog
.
times
.
end
(
stamp
=
True
)
return
rec_res
,
time
.
time
()
-
st
return
rec_res
,
time
.
time
()
-
st
...
@@ -251,9 +278,6 @@ def main(args):
...
@@ -251,9 +278,6 @@ def main(args):
for
i
in
range
(
10
):
for
i
in
range
(
10
):
res
=
text_recognizer
([
img
])
res
=
text_recognizer
([
img
])
cpu_mem
,
gpu_mem
,
gpu_util
=
0
,
0
,
0
count
=
0
for
image_file
in
image_file_list
:
for
image_file
in
image_file_list
:
img
,
flag
=
check_and_read_gif
(
image_file
)
img
,
flag
=
check_and_read_gif
(
image_file
)
if
not
flag
:
if
not
flag
:
...
@@ -273,6 +297,8 @@ def main(args):
...
@@ -273,6 +297,8 @@ def main(args):
for
ino
in
range
(
len
(
img_list
)):
for
ino
in
range
(
len
(
img_list
)):
logger
.
info
(
"Predicts of {}:{}"
.
format
(
valid_image_file_list
[
ino
],
logger
.
info
(
"Predicts of {}:{}"
.
format
(
valid_image_file_list
[
ino
],
rec_res
[
ino
]))
rec_res
[
ino
]))
if
args
.
benchmark
:
text_recognizer
.
autolog
.
report
()
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
tools/infer/predict_system.py
View file @
a739abab
...
@@ -174,8 +174,6 @@ def main(args):
...
@@ -174,8 +174,6 @@ def main(args):
logger
.
info
(
"The predict total time is {}"
.
format
(
time
.
time
()
-
_st
))
logger
.
info
(
"The predict total time is {}"
.
format
(
time
.
time
()
-
_st
))
logger
.
info
(
"
\n
The predict total time is {}"
.
format
(
total_time
))
logger
.
info
(
"
\n
The predict total time is {}"
.
format
(
total_time
))
img_num
=
text_sys
.
text_detector
.
det_times
.
img_num
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
args
=
utility
.
parse_args
()
args
=
utility
.
parse_args
()
...
...
tools/infer/utility.py
View file @
a739abab
...
@@ -34,7 +34,7 @@ def init_args():
...
@@ -34,7 +34,7 @@ def init_args():
parser
.
add_argument
(
"--use_gpu"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--use_gpu"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--ir_optim"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--ir_optim"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--use_tensorrt"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--use_tensorrt"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--min_subgraph_size"
,
type
=
int
,
default
=
3
)
parser
.
add_argument
(
"--min_subgraph_size"
,
type
=
int
,
default
=
10
)
parser
.
add_argument
(
"--precision"
,
type
=
str
,
default
=
"fp32"
)
parser
.
add_argument
(
"--precision"
,
type
=
str
,
default
=
"fp32"
)
parser
.
add_argument
(
"--gpu_mem"
,
type
=
int
,
default
=
500
)
parser
.
add_argument
(
"--gpu_mem"
,
type
=
int
,
default
=
500
)
...
@@ -161,7 +161,7 @@ def create_predictor(args, mode, logger):
...
@@ -161,7 +161,7 @@ def create_predictor(args, mode, logger):
config
.
enable_use_gpu
(
args
.
gpu_mem
,
0
)
config
.
enable_use_gpu
(
args
.
gpu_mem
,
0
)
if
args
.
use_tensorrt
:
if
args
.
use_tensorrt
:
config
.
enable_tensorrt_engine
(
config
.
enable_tensorrt_engine
(
precision_mode
=
inference
.
PrecisionType
.
Float32
,
precision_mode
=
precision
,
max_batch_size
=
args
.
max_batch_size
,
max_batch_size
=
args
.
max_batch_size
,
min_subgraph_size
=
args
.
min_subgraph_size
)
min_subgraph_size
=
args
.
min_subgraph_size
)
# skip the minmum trt subgraph
# skip the minmum trt subgraph
...
...
tools/program.py
View file @
a739abab
...
@@ -186,7 +186,10 @@ def train(config,
...
@@ -186,7 +186,10 @@ def train(config,
model
.
train
()
model
.
train
()
use_srn
=
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
use_srn
=
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
model_type
=
config
[
'Architecture'
][
'model_type'
]
try
:
model_type
=
config
[
'Architecture'
][
'model_type'
]
except
:
model_type
=
None
if
'start_epoch'
in
best_model_dict
:
if
'start_epoch'
in
best_model_dict
:
start_epoch
=
best_model_dict
[
'start_epoch'
]
start_epoch
=
best_model_dict
[
'start_epoch'
]
...
...
tools/train.py
View file @
a739abab
...
@@ -98,7 +98,6 @@ def main(config, device, logger, vdl_writer):
...
@@ -98,7 +98,6 @@ def main(config, device, logger, vdl_writer):
eval_class
=
build_metric
(
config
[
'Metric'
])
eval_class
=
build_metric
(
config
[
'Metric'
])
# load pretrain model
# load pretrain model
pre_best_model_dict
=
load_dygraph_params
(
config
,
model
,
logger
,
optimizer
)
pre_best_model_dict
=
load_dygraph_params
(
config
,
model
,
logger
,
optimizer
)
logger
.
info
(
'train dataloader has {} iters'
.
format
(
len
(
train_dataloader
)))
logger
.
info
(
'train dataloader has {} iters'
.
format
(
len
(
train_dataloader
)))
if
valid_dataloader
is
not
None
:
if
valid_dataloader
is
not
None
:
logger
.
info
(
'valid dataloader has {} iters'
.
format
(
logger
.
info
(
'valid dataloader has {} iters'
.
format
(
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment