Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
ce22e2ff
Commit
ce22e2ff
authored
Apr 09, 2021
by
WenmuZhou
Browse files
Merge branch 'android_demo' of
https://github.com/WenmuZhou/PaddleOCR
into android_demo
parents
3ff9cc93
3ff94920
Changes
109
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
382 additions
and
38 deletions
+382
-38
tools/infer/predict_det.py
tools/infer/predict_det.py
+3
-6
tools/infer/predict_e2e.py
tools/infer/predict_e2e.py
+158
-0
tools/infer/predict_rec.py
tools/infer/predict_rec.py
+33
-17
tools/infer/utility.py
tools/infer/utility.py
+37
-2
tools/infer_det.py
tools/infer_det.py
+1
-1
tools/infer_e2e.py
tools/infer_e2e.py
+122
-0
tools/program.py
tools/program.py
+19
-8
tools/train.py
tools/train.py
+8
-3
train.sh
train.sh
+1
-1
No files found.
tools/infer/predict_det.py
View file @
ce22e2ff
...
...
@@ -39,10 +39,7 @@ class TextDetector(object):
self
.
args
=
args
self
.
det_algorithm
=
args
.
det_algorithm
pre_process_list
=
[{
'DetResizeForTest'
:
{
'limit_side_len'
:
args
.
det_limit_side_len
,
'limit_type'
:
args
.
det_limit_type
}
'DetResizeForTest'
:
None
},
{
'NormalizeImage'
:
{
'std'
:
[
0.229
,
0.224
,
0.225
],
...
...
@@ -64,7 +61,7 @@ class TextDetector(object):
postprocess_params
[
"box_thresh"
]
=
args
.
det_db_box_thresh
postprocess_params
[
"max_candidates"
]
=
1000
postprocess_params
[
"unclip_ratio"
]
=
args
.
det_db_unclip_ratio
postprocess_params
[
"use_dilation"
]
=
True
postprocess_params
[
"use_dilation"
]
=
args
.
use_dilation
elif
self
.
det_algorithm
==
"EAST"
:
postprocess_params
[
'name'
]
=
'EASTPostProcess'
postprocess_params
[
"score_thresh"
]
=
args
.
det_east_score_thresh
...
...
@@ -183,7 +180,7 @@ class TextDetector(object):
preds
[
'maps'
]
=
outputs
[
0
]
else
:
raise
NotImplementedError
self
.
predictor
.
try_shrink_memory
()
post_result
=
self
.
postprocess_op
(
preds
,
shape_list
)
dt_boxes
=
post_result
[
0
][
'points'
]
if
self
.
det_algorithm
==
"SAST"
and
self
.
det_sast_polygon
:
...
...
tools/infer/predict_e2e.py
0 → 100755
View file @
ce22e2ff
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'../..'
)))
os
.
environ
[
"FLAGS_allocator_strategy"
]
=
'auto_growth'
import
cv2
import
numpy
as
np
import
time
import
sys
import
tools.infer.utility
as
utility
from
ppocr.utils.logging
import
get_logger
from
ppocr.utils.utility
import
get_image_file_list
,
check_and_read_gif
from
ppocr.data
import
create_operators
,
transform
from
ppocr.postprocess
import
build_post_process
logger
=
get_logger
()
class
TextE2E
(
object
):
def
__init__
(
self
,
args
):
self
.
args
=
args
self
.
e2e_algorithm
=
args
.
e2e_algorithm
pre_process_list
=
[{
'E2EResizeForTest'
:
{}
},
{
'NormalizeImage'
:
{
'std'
:
[
0.229
,
0.224
,
0.225
],
'mean'
:
[
0.485
,
0.456
,
0.406
],
'scale'
:
'1./255.'
,
'order'
:
'hwc'
}
},
{
'ToCHWImage'
:
None
},
{
'KeepKeys'
:
{
'keep_keys'
:
[
'image'
,
'shape'
]
}
}]
postprocess_params
=
{}
if
self
.
e2e_algorithm
==
"PGNet"
:
pre_process_list
[
0
]
=
{
'E2EResizeForTest'
:
{
'max_side_len'
:
args
.
e2e_limit_side_len
,
'valid_set'
:
'totaltext'
}
}
postprocess_params
[
'name'
]
=
'PGPostProcess'
postprocess_params
[
"score_thresh"
]
=
args
.
e2e_pgnet_score_thresh
postprocess_params
[
"character_dict_path"
]
=
args
.
e2e_char_dict_path
postprocess_params
[
"valid_set"
]
=
args
.
e2e_pgnet_valid_set
self
.
e2e_pgnet_polygon
=
args
.
e2e_pgnet_polygon
else
:
logger
.
info
(
"unknown e2e_algorithm:{}"
.
format
(
self
.
e2e_algorithm
))
sys
.
exit
(
0
)
self
.
preprocess_op
=
create_operators
(
pre_process_list
)
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
=
utility
.
create_predictor
(
args
,
'e2e'
,
logger
)
# paddle.jit.load(args.det_model_dir)
# self.predictor.eval()
def
clip_det_res
(
self
,
points
,
img_height
,
img_width
):
for
pno
in
range
(
points
.
shape
[
0
]):
points
[
pno
,
0
]
=
int
(
min
(
max
(
points
[
pno
,
0
],
0
),
img_width
-
1
))
points
[
pno
,
1
]
=
int
(
min
(
max
(
points
[
pno
,
1
],
0
),
img_height
-
1
))
return
points
def
filter_tag_det_res_only_clip
(
self
,
dt_boxes
,
image_shape
):
img_height
,
img_width
=
image_shape
[
0
:
2
]
dt_boxes_new
=
[]
for
box
in
dt_boxes
:
box
=
self
.
clip_det_res
(
box
,
img_height
,
img_width
)
dt_boxes_new
.
append
(
box
)
dt_boxes
=
np
.
array
(
dt_boxes_new
)
return
dt_boxes
def
__call__
(
self
,
img
):
ori_im
=
img
.
copy
()
data
=
{
'image'
:
img
}
data
=
transform
(
data
,
self
.
preprocess_op
)
img
,
shape_list
=
data
if
img
is
None
:
return
None
,
0
img
=
np
.
expand_dims
(
img
,
axis
=
0
)
shape_list
=
np
.
expand_dims
(
shape_list
,
axis
=
0
)
img
=
img
.
copy
()
starttime
=
time
.
time
()
self
.
input_tensor
.
copy_from_cpu
(
img
)
self
.
predictor
.
run
()
outputs
=
[]
for
output_tensor
in
self
.
output_tensors
:
output
=
output_tensor
.
copy_to_cpu
()
outputs
.
append
(
output
)
preds
=
{}
if
self
.
e2e_algorithm
==
'PGNet'
:
preds
[
'f_border'
]
=
outputs
[
0
]
preds
[
'f_char'
]
=
outputs
[
1
]
preds
[
'f_direction'
]
=
outputs
[
2
]
preds
[
'f_score'
]
=
outputs
[
3
]
else
:
raise
NotImplementedError
post_result
=
self
.
postprocess_op
(
preds
,
shape_list
)
points
,
strs
=
post_result
[
'points'
],
post_result
[
'strs'
]
dt_boxes
=
self
.
filter_tag_det_res_only_clip
(
points
,
ori_im
.
shape
)
elapse
=
time
.
time
()
-
starttime
return
dt_boxes
,
strs
,
elapse
if
__name__
==
"__main__"
:
args
=
utility
.
parse_args
()
image_file_list
=
get_image_file_list
(
args
.
image_dir
)
text_detector
=
TextE2E
(
args
)
count
=
0
total_time
=
0
draw_img_save
=
"./inference_results"
if
not
os
.
path
.
exists
(
draw_img_save
):
os
.
makedirs
(
draw_img_save
)
for
image_file
in
image_file_list
:
img
,
flag
=
check_and_read_gif
(
image_file
)
if
not
flag
:
img
=
cv2
.
imread
(
image_file
)
if
img
is
None
:
logger
.
info
(
"error in loading image:{}"
.
format
(
image_file
))
continue
points
,
strs
,
elapse
=
text_detector
(
img
)
if
count
>
0
:
total_time
+=
elapse
count
+=
1
logger
.
info
(
"Predict time of {}: {}"
.
format
(
image_file
,
elapse
))
src_im
=
utility
.
draw_e2e_res
(
points
,
strs
,
image_file
)
img_name_pure
=
os
.
path
.
split
(
image_file
)[
-
1
]
img_path
=
os
.
path
.
join
(
draw_img_save
,
"e2e_res_{}"
.
format
(
img_name_pure
))
cv2
.
imwrite
(
img_path
,
src_im
)
logger
.
info
(
"The visualized image saved in {}"
.
format
(
img_path
))
if
count
>
1
:
logger
.
info
(
"Avg Time: {}"
.
format
(
total_time
/
(
count
-
1
)))
tools/infer/predict_rec.py
View file @
ce22e2ff
...
...
@@ -54,6 +54,13 @@ class TextRecognizer(object):
"character_dict_path"
:
args
.
rec_char_dict_path
,
"use_space_char"
:
args
.
use_space_char
}
elif
self
.
rec_algorithm
==
"RARE"
:
postprocess_params
=
{
'name'
:
'AttnLabelDecode'
,
"character_type"
:
args
.
rec_char_type
,
"character_dict_path"
:
args
.
rec_char_dict_path
,
"use_space_char"
:
args
.
use_space_char
}
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
=
\
utility
.
create_predictor
(
args
,
'rec'
,
logger
)
...
...
@@ -230,7 +237,7 @@ class TextRecognizer(object):
output
=
output_tensor
.
copy_to_cpu
()
outputs
.
append
(
output
)
preds
=
outputs
[
0
]
self
.
predictor
.
try_shrink_memory
()
rec_result
=
self
.
postprocess_op
(
preds
)
for
rno
in
range
(
len
(
rec_result
)):
rec_res
[
indices
[
beg_img_no
+
rno
]]
=
rec_result
[
rno
]
...
...
@@ -241,9 +248,11 @@ class TextRecognizer(object):
def
main
(
args
):
image_file_list
=
get_image_file_list
(
args
.
image_dir
)
text_recognizer
=
TextRecognizer
(
args
)
total_run_time
=
0.0
total_images_num
=
0
valid_image_file_list
=
[]
img_list
=
[]
for
image_file
in
image_file_list
:
for
idx
,
image_file
in
enumerate
(
image_file_list
)
:
img
,
flag
=
check_and_read_gif
(
image_file
)
if
not
flag
:
img
=
cv2
.
imread
(
image_file
)
...
...
@@ -252,22 +261,29 @@ def main(args):
continue
valid_image_file_list
.
append
(
image_file
)
img_list
.
append
(
img
)
try
:
rec_res
,
predict_time
=
text_recognizer
(
img_list
)
except
:
logger
.
info
(
traceback
.
format_exc
())
logger
.
info
(
"ERROR!!!!
\n
"
"Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq
\n
"
"If your model has tps module: "
"TPS does not support variable shape.
\n
"
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' "
)
exit
()
for
ino
in
range
(
len
(
img_list
)):
logger
.
info
(
"Predicts of {}:{}"
.
format
(
valid_image_file_list
[
ino
],
rec_res
[
ino
]))
if
len
(
img_list
)
>=
args
.
rec_batch_num
or
idx
==
len
(
image_file_list
)
-
1
:
try
:
rec_res
,
predict_time
=
text_recognizer
(
img_list
)
total_run_time
+=
predict_time
except
:
logger
.
info
(
traceback
.
format_exc
())
logger
.
info
(
"ERROR!!!!
\n
"
"Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq
\n
"
"If your model has tps module: "
"TPS does not support variable shape.
\n
"
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' "
)
exit
()
for
ino
in
range
(
len
(
img_list
)):
logger
.
info
(
"Predicts of {}:{}"
.
format
(
valid_image_file_list
[
ino
],
rec_res
[
ino
]))
total_images_num
+=
len
(
valid_image_file_list
)
valid_image_file_list
=
[]
img_list
=
[]
logger
.
info
(
"Total predict time for {} images, cost: {:.3f}"
.
format
(
len
(
img_list
),
predict
_time
))
total_images_num
,
total_run
_time
))
if
__name__
==
"__main__"
:
...
...
tools/infer/utility.py
View file @
ce22e2ff
...
...
@@ -47,6 +47,7 @@ def parse_args():
parser
.
add_argument
(
"--det_db_box_thresh"
,
type
=
float
,
default
=
0.5
)
parser
.
add_argument
(
"--det_db_unclip_ratio"
,
type
=
float
,
default
=
1.6
)
parser
.
add_argument
(
"--max_batch_size"
,
type
=
int
,
default
=
10
)
parser
.
add_argument
(
"--use_dilation"
,
type
=
bool
,
default
=
False
)
# EAST parmas
parser
.
add_argument
(
"--det_east_score_thresh"
,
type
=
float
,
default
=
0.8
)
parser
.
add_argument
(
"--det_east_cover_thresh"
,
type
=
float
,
default
=
0.1
)
...
...
@@ -73,6 +74,19 @@ def parse_args():
"--vis_font_path"
,
type
=
str
,
default
=
"./doc/fonts/simfang.ttf"
)
parser
.
add_argument
(
"--drop_score"
,
type
=
float
,
default
=
0.5
)
# params for e2e
parser
.
add_argument
(
"--e2e_algorithm"
,
type
=
str
,
default
=
'PGNet'
)
parser
.
add_argument
(
"--e2e_model_dir"
,
type
=
str
)
parser
.
add_argument
(
"--e2e_limit_side_len"
,
type
=
float
,
default
=
768
)
parser
.
add_argument
(
"--e2e_limit_type"
,
type
=
str
,
default
=
'max'
)
# PGNet parmas
parser
.
add_argument
(
"--e2e_pgnet_score_thresh"
,
type
=
float
,
default
=
0.5
)
parser
.
add_argument
(
"--e2e_char_dict_path"
,
type
=
str
,
default
=
"./ppocr/utils/ic15_dict.txt"
)
parser
.
add_argument
(
"--e2e_pgnet_valid_set"
,
type
=
str
,
default
=
'totaltext'
)
parser
.
add_argument
(
"--e2e_pgnet_polygon"
,
type
=
bool
,
default
=
True
)
# params for text classifier
parser
.
add_argument
(
"--use_angle_cls"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--cls_model_dir"
,
type
=
str
)
...
...
@@ -92,8 +106,10 @@ def create_predictor(args, mode, logger):
model_dir
=
args
.
det_model_dir
elif
mode
==
'cls'
:
model_dir
=
args
.
cls_model_dir
el
se
:
el
if
mode
==
'rec'
:
model_dir
=
args
.
rec_model_dir
else
:
model_dir
=
args
.
e2e_model_dir
if
model_dir
is
None
:
logger
.
info
(
"not find {} model file path {}"
.
format
(
mode
,
model_dir
))
...
...
@@ -123,9 +139,12 @@ def create_predictor(args, mode, logger):
# cache 10 different shapes for mkldnn to avoid memory leak
config
.
set_mkldnn_cache_capacity
(
10
)
config
.
enable_mkldnn
()
# TODO LDOUBLEV: fix mkldnn bug when bach_size > 1
#config.set_mkldnn_op({'conv2d', 'depthwise_conv2d', 'pool2d', 'batch_norm'})
args
.
rec_batch_num
=
1
# config.enable_memory_optim()
# enable memory optim
config
.
enable_memory_optim
()
config
.
disable_glog_info
()
config
.
delete_pass
(
"conv_transpose_eltwiseadd_bn_fuse_pass"
)
...
...
@@ -144,6 +163,22 @@ def create_predictor(args, mode, logger):
return
predictor
,
input_tensor
,
output_tensors
def
draw_e2e_res
(
dt_boxes
,
strs
,
img_path
):
src_im
=
cv2
.
imread
(
img_path
)
for
box
,
str
in
zip
(
dt_boxes
,
strs
):
box
=
box
.
astype
(
np
.
int32
).
reshape
((
-
1
,
1
,
2
))
cv2
.
polylines
(
src_im
,
[
box
],
True
,
color
=
(
255
,
255
,
0
),
thickness
=
2
)
cv2
.
putText
(
src_im
,
str
,
org
=
(
int
(
box
[
0
,
0
,
0
]),
int
(
box
[
0
,
0
,
1
])),
fontFace
=
cv2
.
FONT_HERSHEY_COMPLEX
,
fontScale
=
0.7
,
color
=
(
0
,
255
,
0
),
thickness
=
1
)
return
src_im
def
draw_text_det_res
(
dt_boxes
,
img_path
):
src_im
=
cv2
.
imread
(
img_path
)
for
box
in
dt_boxes
:
...
...
tools/infer_det.py
View file @
ce22e2ff
...
...
@@ -97,7 +97,7 @@ def main():
preds
=
model
(
images
)
post_result
=
post_process_class
(
preds
,
shape_list
)
boxes
=
post_result
[
0
][
'points'
]
# write resul
e
# write resul
t
dt_boxes_json
=
[]
for
box
in
boxes
:
tmp_json
=
{
"transcription"
:
""
}
...
...
tools/infer_e2e.py
0 → 100755
View file @
ce22e2ff
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'..'
)))
os
.
environ
[
"FLAGS_allocator_strategy"
]
=
'auto_growth'
import
cv2
import
json
import
paddle
from
ppocr.data
import
create_operators
,
transform
from
ppocr.modeling.architectures
import
build_model
from
ppocr.postprocess
import
build_post_process
from
ppocr.utils.save_load
import
init_model
from
ppocr.utils.utility
import
get_image_file_list
import
tools.program
as
program
def
draw_e2e_res
(
dt_boxes
,
strs
,
config
,
img
,
img_name
):
if
len
(
dt_boxes
)
>
0
:
src_im
=
img
for
box
,
str
in
zip
(
dt_boxes
,
strs
):
box
=
box
.
astype
(
np
.
int32
).
reshape
((
-
1
,
1
,
2
))
cv2
.
polylines
(
src_im
,
[
box
],
True
,
color
=
(
255
,
255
,
0
),
thickness
=
2
)
cv2
.
putText
(
src_im
,
str
,
org
=
(
int
(
box
[
0
,
0
,
0
]),
int
(
box
[
0
,
0
,
1
])),
fontFace
=
cv2
.
FONT_HERSHEY_COMPLEX
,
fontScale
=
0.7
,
color
=
(
0
,
255
,
0
),
thickness
=
1
)
save_det_path
=
os
.
path
.
dirname
(
config
[
'Global'
][
'save_res_path'
])
+
"/e2e_results/"
if
not
os
.
path
.
exists
(
save_det_path
):
os
.
makedirs
(
save_det_path
)
save_path
=
os
.
path
.
join
(
save_det_path
,
os
.
path
.
basename
(
img_name
))
cv2
.
imwrite
(
save_path
,
src_im
)
logger
.
info
(
"The e2e Image saved in {}"
.
format
(
save_path
))
def
main
():
global_config
=
config
[
'Global'
]
# build model
model
=
build_model
(
config
[
'Architecture'
])
init_model
(
config
,
model
,
logger
)
# build post process
post_process_class
=
build_post_process
(
config
[
'PostProcess'
],
global_config
)
# create data ops
transforms
=
[]
for
op
in
config
[
'Eval'
][
'dataset'
][
'transforms'
]:
op_name
=
list
(
op
)[
0
]
if
'Label'
in
op_name
:
continue
elif
op_name
==
'KeepKeys'
:
op
[
op_name
][
'keep_keys'
]
=
[
'image'
,
'shape'
]
transforms
.
append
(
op
)
ops
=
create_operators
(
transforms
,
global_config
)
save_res_path
=
config
[
'Global'
][
'save_res_path'
]
if
not
os
.
path
.
exists
(
os
.
path
.
dirname
(
save_res_path
)):
os
.
makedirs
(
os
.
path
.
dirname
(
save_res_path
))
model
.
eval
()
with
open
(
save_res_path
,
"wb"
)
as
fout
:
for
file
in
get_image_file_list
(
config
[
'Global'
][
'infer_img'
]):
logger
.
info
(
"infer_img: {}"
.
format
(
file
))
with
open
(
file
,
'rb'
)
as
f
:
img
=
f
.
read
()
data
=
{
'image'
:
img
}
batch
=
transform
(
data
,
ops
)
images
=
np
.
expand_dims
(
batch
[
0
],
axis
=
0
)
shape_list
=
np
.
expand_dims
(
batch
[
1
],
axis
=
0
)
images
=
paddle
.
to_tensor
(
images
)
preds
=
model
(
images
)
post_result
=
post_process_class
(
preds
,
shape_list
)
points
,
strs
=
post_result
[
'points'
],
post_result
[
'strs'
]
# write resule
dt_boxes_json
=
[]
for
poly
,
str
in
zip
(
points
,
strs
):
tmp_json
=
{
"transcription"
:
str
}
tmp_json
[
'points'
]
=
poly
.
tolist
()
dt_boxes_json
.
append
(
tmp_json
)
otstr
=
file
+
"
\t
"
+
json
.
dumps
(
dt_boxes_json
)
+
"
\n
"
fout
.
write
(
otstr
.
encode
())
src_img
=
cv2
.
imread
(
file
)
draw_e2e_res
(
points
,
strs
,
config
,
src_img
,
file
)
logger
.
info
(
"success!"
)
if
__name__
==
'__main__'
:
config
,
device
,
logger
,
vdl_writer
=
program
.
preprocess
()
main
()
tools/program.py
View file @
ce22e2ff
...
...
@@ -182,6 +182,8 @@ def train(config,
model_average
=
False
model
.
train
()
use_srn
=
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
if
'start_epoch'
in
best_model_dict
:
start_epoch
=
best_model_dict
[
'start_epoch'
]
else
:
...
...
@@ -200,7 +202,7 @@ def train(config,
break
lr
=
optimizer
.
get_lr
()
images
=
batch
[
0
]
if
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
:
if
use_srn
:
others
=
batch
[
-
4
:]
preds
=
model
(
images
,
others
)
model_average
=
True
...
...
@@ -235,8 +237,9 @@ def train(config,
vdl_writer
.
add_scalar
(
'TRAIN/{}'
.
format
(
k
),
v
,
global_step
)
vdl_writer
.
add_scalar
(
'TRAIN/lr'
,
lr
,
global_step
)
if
dist
.
get_rank
(
)
==
0
and
global_step
>
0
and
global_step
%
print_batch_step
==
0
:
if
dist
.
get_rank
()
==
0
and
(
(
global_step
>
0
and
global_step
%
print_batch_step
==
0
)
or
(
idx
>=
len
(
train_dataloader
)
-
1
)):
logs
=
train_stats
.
log
()
strs
=
'epoch: [{}/{}], iter: {}, {}, reader_cost: {:.5f} s, batch_cost: {:.5f} s, samples: {}, ips: {:.5f}'
.
format
(
epoch
,
epoch_num
,
global_step
,
logs
,
train_reader_cost
/
...
...
@@ -256,8 +259,12 @@ def train(config,
min_average_window
=
10000
,
max_average_window
=
15625
)
Model_Average
.
apply
()
cur_metric
=
eval
(
model
,
valid_dataloader
,
post_process_class
,
eval_class
)
cur_metric
=
eval
(
model
,
valid_dataloader
,
post_process_class
,
eval_class
,
use_srn
=
use_srn
)
cur_metric_str
=
'cur metric, {}'
.
format
(
', '
.
join
(
[
'{}: {}'
.
format
(
k
,
v
)
for
k
,
v
in
cur_metric
.
items
()]))
logger
.
info
(
cur_metric_str
)
...
...
@@ -321,7 +328,8 @@ def train(config,
return
def
eval
(
model
,
valid_dataloader
,
post_process_class
,
eval_class
):
def
eval
(
model
,
valid_dataloader
,
post_process_class
,
eval_class
,
use_srn
=
False
):
model
.
eval
()
with
paddle
.
no_grad
():
total_frame
=
0.0
...
...
@@ -332,7 +340,8 @@ def eval(model, valid_dataloader, post_process_class, eval_class):
break
images
=
batch
[
0
]
start
=
time
.
time
()
if
"SRN"
in
str
(
model
.
head
):
if
use_srn
:
others
=
batch
[
-
4
:]
preds
=
model
(
images
,
others
)
else
:
...
...
@@ -366,7 +375,8 @@ def preprocess(is_train=False):
alg
=
config
[
'Architecture'
][
'algorithm'
]
assert
alg
in
[
'EAST'
,
'DB'
,
'SAST'
,
'Rosetta'
,
'CRNN'
,
'STARNet'
,
'RARE'
,
'SRN'
,
'CLS'
'EAST'
,
'DB'
,
'SAST'
,
'Rosetta'
,
'CRNN'
,
'STARNet'
,
'RARE'
,
'SRN'
,
'CLS'
,
'PGNet'
]
device
=
'gpu:{}'
.
format
(
dist
.
ParallelEnv
().
dev_id
)
if
use_gpu
else
'cpu'
...
...
@@ -386,6 +396,7 @@ def preprocess(is_train=False):
logger
=
get_logger
(
name
=
'root'
,
log_file
=
log_file
)
if
config
[
'Global'
][
'use_visualdl'
]:
from
visualdl
import
LogWriter
save_model_dir
=
config
[
'Global'
][
'save_model_dir'
]
vdl_writer_path
=
'{}/vdl/'
.
format
(
save_model_dir
)
os
.
makedirs
(
vdl_writer_path
,
exist_ok
=
True
)
vdl_writer
=
LogWriter
(
logdir
=
vdl_writer_path
)
...
...
tools/train.py
View file @
ce22e2ff
...
...
@@ -52,7 +52,10 @@ def main(config, device, logger, vdl_writer):
train_dataloader
=
build_dataloader
(
config
,
'Train'
,
device
,
logger
)
if
len
(
train_dataloader
)
==
0
:
logger
.
error
(
'No Images in train dataset, please check annotation file and path in the configuration file'
"No Images in train dataset, please ensure
\n
"
+
"
\t
1. The images num in the train label_file_list should be larger than or equal with batch size.
\n
"
+
"
\t
2. The annotation file and path in the configuration file are provided normally."
)
return
...
...
@@ -89,8 +92,10 @@ def main(config, device, logger, vdl_writer):
# load pretrain model
pre_best_model_dict
=
init_model
(
config
,
model
,
logger
,
optimizer
)
logger
.
info
(
'train dataloader has {} iters, valid dataloader has {} iters'
.
format
(
len
(
train_dataloader
),
len
(
valid_dataloader
)))
logger
.
info
(
'train dataloader has {} iters'
.
format
(
len
(
train_dataloader
)))
if
valid_dataloader
is
not
None
:
logger
.
info
(
'valid dataloader has {} iters'
.
format
(
len
(
valid_dataloader
)))
# start train
program
.
train
(
config
,
train_dataloader
,
valid_dataloader
,
device
,
model
,
loss_class
,
optimizer
,
lr_scheduler
,
post_process_class
,
...
...
train.sh
View file @
ce22e2ff
# recommended paddle.__version__ == 2.0.0
python3
-m
paddle.distributed.launch
--gpus
'0,1,2,3,4,5,6,7'
tools/train.py
-c
configs/rec/rec_mv3_none_bilstm_ctc.yml
python3
-m
paddle.distributed.launch
--log_dir
=
./debug/
--gpus
'0,1,2,3,4,5,6,7'
tools/train.py
-c
configs/rec/rec_mv3_none_bilstm_ctc.yml
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment