Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
acd479ea
Unverified
Commit
acd479ea
authored
Jan 29, 2021
by
xiaoting
Committed by
GitHub
Jan 29, 2021
Browse files
Merge pull request #1597 from tink2123/dygraph_for_srn
【Do not merge】Add srn model
parents
fad40158
6ebbbfe4
Changes
26
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
304 additions
and
37 deletions
+304
-37
ppocr/postprocess/__init__.py
ppocr/postprocess/__init__.py
+3
-2
ppocr/postprocess/rec_postprocess.py
ppocr/postprocess/rec_postprocess.py
+84
-1
tools/export_model.py
tools/export_model.py
+40
-16
tools/infer/predict_rec.py
tools/infer/predict_rec.py
+136
-13
tools/infer_rec.py
tools/infer_rec.py
+23
-2
tools/program.py
tools/program.py
+18
-3
No files found.
ppocr/postprocess/__init__.py
View file @
acd479ea
...
@@ -26,11 +26,12 @@ def build_post_process(config, global_config=None):
...
@@ -26,11 +26,12 @@ def build_post_process(config, global_config=None):
from
.db_postprocess
import
DBPostProcess
from
.db_postprocess
import
DBPostProcess
from
.east_postprocess
import
EASTPostProcess
from
.east_postprocess
import
EASTPostProcess
from
.sast_postprocess
import
SASTPostProcess
from
.sast_postprocess
import
SASTPostProcess
from
.rec_postprocess
import
CTCLabelDecode
,
AttnLabelDecode
from
.rec_postprocess
import
CTCLabelDecode
,
AttnLabelDecode
,
SRNLabelDecode
from
.cls_postprocess
import
ClsPostProcess
from
.cls_postprocess
import
ClsPostProcess
support_dict
=
[
support_dict
=
[
'DBPostProcess'
,
'EASTPostProcess'
,
'SASTPostProcess'
,
'CTCLabelDecode'
,
'AttnLabelDecode'
,
'ClsPostProcess'
'DBPostProcess'
,
'EASTPostProcess'
,
'SASTPostProcess'
,
'CTCLabelDecode'
,
'AttnLabelDecode'
,
'ClsPostProcess'
,
'SRNLabelDecode'
]
]
config
=
copy
.
deepcopy
(
config
)
config
=
copy
.
deepcopy
(
config
)
...
...
ppocr/postprocess/rec_postprocess.py
View file @
acd479ea
...
@@ -33,6 +33,9 @@ class BaseRecLabelDecode(object):
...
@@ -33,6 +33,9 @@ class BaseRecLabelDecode(object):
assert
character_type
in
support_character_type
,
"Only {} are supported now but get {}"
.
format
(
assert
character_type
in
support_character_type
,
"Only {} are supported now but get {}"
.
format
(
support_character_type
,
character_type
)
support_character_type
,
character_type
)
self
.
beg_str
=
"sos"
self
.
end_str
=
"eos"
if
character_type
==
"en"
:
if
character_type
==
"en"
:
self
.
character_str
=
"0123456789abcdefghijklmnopqrstuvwxyz"
self
.
character_str
=
"0123456789abcdefghijklmnopqrstuvwxyz"
dict_character
=
list
(
self
.
character_str
)
dict_character
=
list
(
self
.
character_str
)
...
@@ -109,7 +112,6 @@ class CTCLabelDecode(BaseRecLabelDecode):
...
@@ -109,7 +112,6 @@ class CTCLabelDecode(BaseRecLabelDecode):
def
__call__
(
self
,
preds
,
label
=
None
,
*
args
,
**
kwargs
):
def
__call__
(
self
,
preds
,
label
=
None
,
*
args
,
**
kwargs
):
if
isinstance
(
preds
,
paddle
.
Tensor
):
if
isinstance
(
preds
,
paddle
.
Tensor
):
preds
=
preds
.
numpy
()
preds
=
preds
.
numpy
()
preds_idx
=
preds
.
argmax
(
axis
=
2
)
preds_idx
=
preds
.
argmax
(
axis
=
2
)
preds_prob
=
preds
.
max
(
axis
=
2
)
preds_prob
=
preds
.
max
(
axis
=
2
)
text
=
self
.
decode
(
preds_idx
,
preds_prob
,
is_remove_duplicate
=
True
)
text
=
self
.
decode
(
preds_idx
,
preds_prob
,
is_remove_duplicate
=
True
)
...
@@ -158,3 +160,84 @@ class AttnLabelDecode(BaseRecLabelDecode):
...
@@ -158,3 +160,84 @@ class AttnLabelDecode(BaseRecLabelDecode):
assert
False
,
"unsupport type %s in get_beg_end_flag_idx"
\
assert
False
,
"unsupport type %s in get_beg_end_flag_idx"
\
%
beg_or_end
%
beg_or_end
return
idx
return
idx
class
SRNLabelDecode
(
BaseRecLabelDecode
):
""" Convert between text-label and text-index """
def
__init__
(
self
,
character_dict_path
=
None
,
character_type
=
'en'
,
use_space_char
=
False
,
**
kwargs
):
super
(
SRNLabelDecode
,
self
).
__init__
(
character_dict_path
,
character_type
,
use_space_char
)
def
__call__
(
self
,
preds
,
label
=
None
,
*
args
,
**
kwargs
):
pred
=
preds
[
'predict'
]
char_num
=
len
(
self
.
character_str
)
+
2
if
isinstance
(
pred
,
paddle
.
Tensor
):
pred
=
pred
.
numpy
()
pred
=
np
.
reshape
(
pred
,
[
-
1
,
char_num
])
preds_idx
=
np
.
argmax
(
pred
,
axis
=
1
)
preds_prob
=
np
.
max
(
pred
,
axis
=
1
)
preds_idx
=
np
.
reshape
(
preds_idx
,
[
-
1
,
25
])
preds_prob
=
np
.
reshape
(
preds_prob
,
[
-
1
,
25
])
text
=
self
.
decode
(
preds_idx
,
preds_prob
)
if
label
is
None
:
text
=
self
.
decode
(
preds_idx
,
preds_prob
,
is_remove_duplicate
=
False
)
return
text
label
=
self
.
decode
(
label
)
return
text
,
label
def
decode
(
self
,
text_index
,
text_prob
=
None
,
is_remove_duplicate
=
False
):
""" convert text-index into text-label. """
result_list
=
[]
ignored_tokens
=
self
.
get_ignored_tokens
()
batch_size
=
len
(
text_index
)
for
batch_idx
in
range
(
batch_size
):
char_list
=
[]
conf_list
=
[]
for
idx
in
range
(
len
(
text_index
[
batch_idx
])):
if
text_index
[
batch_idx
][
idx
]
in
ignored_tokens
:
continue
if
is_remove_duplicate
:
# only for predict
if
idx
>
0
and
text_index
[
batch_idx
][
idx
-
1
]
==
text_index
[
batch_idx
][
idx
]:
continue
char_list
.
append
(
self
.
character
[
int
(
text_index
[
batch_idx
][
idx
])])
if
text_prob
is
not
None
:
conf_list
.
append
(
text_prob
[
batch_idx
][
idx
])
else
:
conf_list
.
append
(
1
)
text
=
''
.
join
(
char_list
)
result_list
.
append
((
text
,
np
.
mean
(
conf_list
)))
return
result_list
def
add_special_char
(
self
,
dict_character
):
dict_character
=
dict_character
+
[
self
.
beg_str
,
self
.
end_str
]
return
dict_character
def
get_ignored_tokens
(
self
):
beg_idx
=
self
.
get_beg_end_flag_idx
(
"beg"
)
end_idx
=
self
.
get_beg_end_flag_idx
(
"end"
)
return
[
beg_idx
,
end_idx
]
def
get_beg_end_flag_idx
(
self
,
beg_or_end
):
if
beg_or_end
==
"beg"
:
idx
=
np
.
array
(
self
.
dict
[
self
.
beg_str
])
elif
beg_or_end
==
"end"
:
idx
=
np
.
array
(
self
.
dict
[
self
.
end_str
])
else
:
assert
False
,
"unsupport type %s in get_beg_end_flag_idx"
\
%
beg_or_end
return
idx
tools/export_model.py
View file @
acd479ea
...
@@ -31,6 +31,14 @@ from ppocr.utils.logging import get_logger
...
@@ -31,6 +31,14 @@ from ppocr.utils.logging import get_logger
from
tools.program
import
load_config
,
merge_config
,
ArgsParser
from
tools.program
import
load_config
,
merge_config
,
ArgsParser
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"-c"
,
"--config"
,
help
=
"configuration file to use"
)
parser
.
add_argument
(
"-o"
,
"--output_path"
,
type
=
str
,
default
=
'./output/infer/'
)
return
parser
.
parse_args
()
def
main
():
def
main
():
FLAGS
=
ArgsParser
().
parse_args
()
FLAGS
=
ArgsParser
().
parse_args
()
config
=
load_config
(
FLAGS
.
config
)
config
=
load_config
(
FLAGS
.
config
)
...
@@ -52,6 +60,22 @@ def main():
...
@@ -52,6 +60,22 @@ def main():
save_path
=
'{}/inference'
.
format
(
config
[
'Global'
][
'save_inference_dir'
])
save_path
=
'{}/inference'
.
format
(
config
[
'Global'
][
'save_inference_dir'
])
if
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
:
other_shape
=
[
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
1
,
64
,
256
],
dtype
=
'float32'
),
[
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
256
,
1
],
dtype
=
"int64"
),
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
25
,
1
],
dtype
=
"int64"
),
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
8
,
25
,
25
],
dtype
=
"int64"
),
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
8
,
25
,
25
],
dtype
=
"int64"
)
]
]
model
=
to_static
(
model
,
input_spec
=
other_shape
)
else
:
infer_shape
=
[
3
,
-
1
,
-
1
]
infer_shape
=
[
3
,
-
1
,
-
1
]
if
config
[
'Architecture'
][
'model_type'
]
==
"rec"
:
if
config
[
'Architecture'
][
'model_type'
]
==
"rec"
:
infer_shape
=
[
3
,
32
,
-
1
]
# for rec model, H must be 32
infer_shape
=
[
3
,
32
,
-
1
]
# for rec model, H must be 32
...
@@ -62,13 +86,13 @@ def main():
...
@@ -62,13 +86,13 @@ def main():
'When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training'
'When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training'
)
)
infer_shape
[
-
1
]
=
100
infer_shape
[
-
1
]
=
100
model
=
to_static
(
model
=
to_static
(
model
,
model
,
input_spec
=
[
input_spec
=
[
paddle
.
static
.
InputSpec
(
paddle
.
static
.
InputSpec
(
shape
=
[
None
]
+
infer_shape
,
dtype
=
'float32'
)
shape
=
[
None
]
+
infer_shape
,
dtype
=
'float32'
)
])
])
paddle
.
jit
.
save
(
model
,
save_path
)
paddle
.
jit
.
save
(
model
,
save_path
)
logger
.
info
(
'inference model is saved to {}'
.
format
(
save_path
))
logger
.
info
(
'inference model is saved to {}'
.
format
(
save_path
))
...
...
tools/infer/predict_rec.py
View file @
acd479ea
...
@@ -25,6 +25,7 @@ import numpy as np
...
@@ -25,6 +25,7 @@ import numpy as np
import
math
import
math
import
time
import
time
import
traceback
import
traceback
import
paddle
import
tools.infer.utility
as
utility
import
tools.infer.utility
as
utility
from
ppocr.postprocess
import
build_post_process
from
ppocr.postprocess
import
build_post_process
...
@@ -46,6 +47,13 @@ class TextRecognizer(object):
...
@@ -46,6 +47,13 @@ class TextRecognizer(object):
"character_dict_path"
:
args
.
rec_char_dict_path
,
"character_dict_path"
:
args
.
rec_char_dict_path
,
"use_space_char"
:
args
.
use_space_char
"use_space_char"
:
args
.
use_space_char
}
}
if
self
.
rec_algorithm
==
"SRN"
:
postprocess_params
=
{
'name'
:
'SRNLabelDecode'
,
"character_type"
:
args
.
rec_char_type
,
"character_dict_path"
:
args
.
rec_char_dict_path
,
"use_space_char"
:
args
.
use_space_char
}
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
=
\
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
=
\
utility
.
create_predictor
(
args
,
'rec'
,
logger
)
utility
.
create_predictor
(
args
,
'rec'
,
logger
)
...
@@ -70,6 +78,78 @@ class TextRecognizer(object):
...
@@ -70,6 +78,78 @@ class TextRecognizer(object):
padding_im
[:,
:,
0
:
resized_w
]
=
resized_image
padding_im
[:,
:,
0
:
resized_w
]
=
resized_image
return
padding_im
return
padding_im
def
resize_norm_img_srn
(
self
,
img
,
image_shape
):
imgC
,
imgH
,
imgW
=
image_shape
img_black
=
np
.
zeros
((
imgH
,
imgW
))
im_hei
=
img
.
shape
[
0
]
im_wid
=
img
.
shape
[
1
]
if
im_wid
<=
im_hei
*
1
:
img_new
=
cv2
.
resize
(
img
,
(
imgH
*
1
,
imgH
))
elif
im_wid
<=
im_hei
*
2
:
img_new
=
cv2
.
resize
(
img
,
(
imgH
*
2
,
imgH
))
elif
im_wid
<=
im_hei
*
3
:
img_new
=
cv2
.
resize
(
img
,
(
imgH
*
3
,
imgH
))
else
:
img_new
=
cv2
.
resize
(
img
,
(
imgW
,
imgH
))
img_np
=
np
.
asarray
(
img_new
)
img_np
=
cv2
.
cvtColor
(
img_np
,
cv2
.
COLOR_BGR2GRAY
)
img_black
[:,
0
:
img_np
.
shape
[
1
]]
=
img_np
img_black
=
img_black
[:,
:,
np
.
newaxis
]
row
,
col
,
c
=
img_black
.
shape
c
=
1
return
np
.
reshape
(
img_black
,
(
c
,
row
,
col
)).
astype
(
np
.
float32
)
def
srn_other_inputs
(
self
,
image_shape
,
num_heads
,
max_text_length
):
imgC
,
imgH
,
imgW
=
image_shape
feature_dim
=
int
((
imgH
/
8
)
*
(
imgW
/
8
))
encoder_word_pos
=
np
.
array
(
range
(
0
,
feature_dim
)).
reshape
(
(
feature_dim
,
1
)).
astype
(
'int64'
)
gsrm_word_pos
=
np
.
array
(
range
(
0
,
max_text_length
)).
reshape
(
(
max_text_length
,
1
)).
astype
(
'int64'
)
gsrm_attn_bias_data
=
np
.
ones
((
1
,
max_text_length
,
max_text_length
))
gsrm_slf_attn_bias1
=
np
.
triu
(
gsrm_attn_bias_data
,
1
).
reshape
(
[
-
1
,
1
,
max_text_length
,
max_text_length
])
gsrm_slf_attn_bias1
=
np
.
tile
(
gsrm_slf_attn_bias1
,
[
1
,
num_heads
,
1
,
1
]).
astype
(
'float32'
)
*
[
-
1e9
]
gsrm_slf_attn_bias2
=
np
.
tril
(
gsrm_attn_bias_data
,
-
1
).
reshape
(
[
-
1
,
1
,
max_text_length
,
max_text_length
])
gsrm_slf_attn_bias2
=
np
.
tile
(
gsrm_slf_attn_bias2
,
[
1
,
num_heads
,
1
,
1
]).
astype
(
'float32'
)
*
[
-
1e9
]
encoder_word_pos
=
encoder_word_pos
[
np
.
newaxis
,
:]
gsrm_word_pos
=
gsrm_word_pos
[
np
.
newaxis
,
:]
return
[
encoder_word_pos
,
gsrm_word_pos
,
gsrm_slf_attn_bias1
,
gsrm_slf_attn_bias2
]
def
process_image_srn
(
self
,
img
,
image_shape
,
num_heads
,
max_text_length
):
norm_img
=
self
.
resize_norm_img_srn
(
img
,
image_shape
)
norm_img
=
norm_img
[
np
.
newaxis
,
:]
[
encoder_word_pos
,
gsrm_word_pos
,
gsrm_slf_attn_bias1
,
gsrm_slf_attn_bias2
]
=
\
self
.
srn_other_inputs
(
image_shape
,
num_heads
,
max_text_length
)
gsrm_slf_attn_bias1
=
gsrm_slf_attn_bias1
.
astype
(
np
.
float32
)
gsrm_slf_attn_bias2
=
gsrm_slf_attn_bias2
.
astype
(
np
.
float32
)
encoder_word_pos
=
encoder_word_pos
.
astype
(
np
.
int64
)
gsrm_word_pos
=
gsrm_word_pos
.
astype
(
np
.
int64
)
return
(
norm_img
,
encoder_word_pos
,
gsrm_word_pos
,
gsrm_slf_attn_bias1
,
gsrm_slf_attn_bias2
)
def
__call__
(
self
,
img_list
):
def
__call__
(
self
,
img_list
):
img_num
=
len
(
img_list
)
img_num
=
len
(
img_list
)
# Calculate the aspect ratio of all text bars
# Calculate the aspect ratio of all text bars
...
@@ -93,21 +173,64 @@ class TextRecognizer(object):
...
@@ -93,21 +173,64 @@ class TextRecognizer(object):
wh_ratio
=
w
*
1.0
/
h
wh_ratio
=
w
*
1.0
/
h
max_wh_ratio
=
max
(
max_wh_ratio
,
wh_ratio
)
max_wh_ratio
=
max
(
max_wh_ratio
,
wh_ratio
)
for
ino
in
range
(
beg_img_no
,
end_img_no
):
for
ino
in
range
(
beg_img_no
,
end_img_no
):
# norm_img = self.resize_norm_img(img_list[ino], max_wh_ratio)
if
self
.
rec_algorithm
!=
"SRN"
:
norm_img
=
self
.
resize_norm_img
(
img_list
[
indices
[
ino
]],
norm_img
=
self
.
resize_norm_img
(
img_list
[
indices
[
ino
]],
max_wh_ratio
)
max_wh_ratio
)
norm_img
=
norm_img
[
np
.
newaxis
,
:]
norm_img
=
norm_img
[
np
.
newaxis
,
:]
norm_img_batch
.
append
(
norm_img
)
norm_img_batch
.
append
(
norm_img
)
else
:
norm_img
=
self
.
process_image_srn
(
img_list
[
indices
[
ino
]],
self
.
rec_image_shape
,
8
,
25
)
encoder_word_pos_list
=
[]
gsrm_word_pos_list
=
[]
gsrm_slf_attn_bias1_list
=
[]
gsrm_slf_attn_bias2_list
=
[]
encoder_word_pos_list
.
append
(
norm_img
[
1
])
gsrm_word_pos_list
.
append
(
norm_img
[
2
])
gsrm_slf_attn_bias1_list
.
append
(
norm_img
[
3
])
gsrm_slf_attn_bias2_list
.
append
(
norm_img
[
4
])
norm_img_batch
.
append
(
norm_img
[
0
])
norm_img_batch
=
np
.
concatenate
(
norm_img_batch
)
norm_img_batch
=
np
.
concatenate
(
norm_img_batch
)
norm_img_batch
=
norm_img_batch
.
copy
()
norm_img_batch
=
norm_img_batch
.
copy
()
if
self
.
rec_algorithm
==
"SRN"
:
starttime
=
time
.
time
()
encoder_word_pos_list
=
np
.
concatenate
(
encoder_word_pos_list
)
gsrm_word_pos_list
=
np
.
concatenate
(
gsrm_word_pos_list
)
gsrm_slf_attn_bias1_list
=
np
.
concatenate
(
gsrm_slf_attn_bias1_list
)
gsrm_slf_attn_bias2_list
=
np
.
concatenate
(
gsrm_slf_attn_bias2_list
)
inputs
=
[
norm_img_batch
,
encoder_word_pos_list
,
gsrm_word_pos_list
,
gsrm_slf_attn_bias1_list
,
gsrm_slf_attn_bias2_list
,
]
input_names
=
self
.
predictor
.
get_input_names
()
for
i
in
range
(
len
(
input_names
)):
input_tensor
=
self
.
predictor
.
get_input_handle
(
input_names
[
i
])
input_tensor
.
copy_from_cpu
(
inputs
[
i
])
self
.
predictor
.
run
()
outputs
=
[]
for
output_tensor
in
self
.
output_tensors
:
output
=
output_tensor
.
copy_to_cpu
()
outputs
.
append
(
output
)
preds
=
{
"predict"
:
outputs
[
2
]}
else
:
starttime
=
time
.
time
()
starttime
=
time
.
time
()
self
.
input_tensor
.
copy_from_cpu
(
norm_img_batch
)
self
.
input_tensor
.
copy_from_cpu
(
norm_img_batch
)
self
.
predictor
.
run
()
self
.
predictor
.
run
()
outputs
=
[]
outputs
=
[]
for
output_tensor
in
self
.
output_tensors
:
for
output_tensor
in
self
.
output_tensors
:
output
=
output_tensor
.
copy_to_cpu
()
output
=
output_tensor
.
copy_to_cpu
()
outputs
.
append
(
output
)
outputs
.
append
(
output
)
preds
=
outputs
[
0
]
preds
=
outputs
[
0
]
rec_result
=
self
.
postprocess_op
(
preds
)
rec_result
=
self
.
postprocess_op
(
preds
)
for
rno
in
range
(
len
(
rec_result
)):
for
rno
in
range
(
len
(
rec_result
)):
rec_res
[
indices
[
beg_img_no
+
rno
]]
=
rec_result
[
rno
]
rec_res
[
indices
[
beg_img_no
+
rno
]]
=
rec_result
[
rno
]
...
...
tools/infer_rec.py
View file @
acd479ea
...
@@ -62,6 +62,12 @@ def main():
...
@@ -62,6 +62,12 @@ def main():
elif
op_name
in
[
'RecResizeImg'
]:
elif
op_name
in
[
'RecResizeImg'
]:
op
[
op_name
][
'infer_mode'
]
=
True
op
[
op_name
][
'infer_mode'
]
=
True
elif
op_name
==
'KeepKeys'
:
elif
op_name
==
'KeepKeys'
:
if
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
:
op
[
op_name
][
'keep_keys'
]
=
[
'image'
,
'encoder_word_pos'
,
'gsrm_word_pos'
,
'gsrm_slf_attn_bias1'
,
'gsrm_slf_attn_bias2'
]
else
:
op
[
op_name
][
'keep_keys'
]
=
[
'image'
]
op
[
op_name
][
'keep_keys'
]
=
[
'image'
]
transforms
.
append
(
op
)
transforms
.
append
(
op
)
global_config
[
'infer_mode'
]
=
True
global_config
[
'infer_mode'
]
=
True
...
@@ -74,9 +80,24 @@ def main():
...
@@ -74,9 +80,24 @@ def main():
img
=
f
.
read
()
img
=
f
.
read
()
data
=
{
'image'
:
img
}
data
=
{
'image'
:
img
}
batch
=
transform
(
data
,
ops
)
batch
=
transform
(
data
,
ops
)
if
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
:
encoder_word_pos_list
=
np
.
expand_dims
(
batch
[
1
],
axis
=
0
)
gsrm_word_pos_list
=
np
.
expand_dims
(
batch
[
2
],
axis
=
0
)
gsrm_slf_attn_bias1_list
=
np
.
expand_dims
(
batch
[
3
],
axis
=
0
)
gsrm_slf_attn_bias2_list
=
np
.
expand_dims
(
batch
[
4
],
axis
=
0
)
others
=
[
paddle
.
to_tensor
(
encoder_word_pos_list
),
paddle
.
to_tensor
(
gsrm_word_pos_list
),
paddle
.
to_tensor
(
gsrm_slf_attn_bias1_list
),
paddle
.
to_tensor
(
gsrm_slf_attn_bias2_list
)
]
images
=
np
.
expand_dims
(
batch
[
0
],
axis
=
0
)
images
=
np
.
expand_dims
(
batch
[
0
],
axis
=
0
)
images
=
paddle
.
to_tensor
(
images
)
images
=
paddle
.
to_tensor
(
images
)
if
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
:
preds
=
model
(
images
,
others
)
else
:
preds
=
model
(
images
)
preds
=
model
(
images
)
post_result
=
post_process_class
(
preds
)
post_result
=
post_process_class
(
preds
)
for
rec_reuslt
in
post_result
:
for
rec_reuslt
in
post_result
:
...
...
tools/program.py
View file @
acd479ea
...
@@ -174,6 +174,7 @@ def train(config,
...
@@ -174,6 +174,7 @@ def train(config,
best_model_dict
=
{
main_indicator
:
0
}
best_model_dict
=
{
main_indicator
:
0
}
best_model_dict
.
update
(
pre_best_model_dict
)
best_model_dict
.
update
(
pre_best_model_dict
)
train_stats
=
TrainingStats
(
log_smooth_window
,
[
'lr'
])
train_stats
=
TrainingStats
(
log_smooth_window
,
[
'lr'
])
model_average
=
False
model
.
train
()
model
.
train
()
if
'start_epoch'
in
best_model_dict
:
if
'start_epoch'
in
best_model_dict
:
...
@@ -194,6 +195,11 @@ def train(config,
...
@@ -194,6 +195,11 @@ def train(config,
break
break
lr
=
optimizer
.
get_lr
()
lr
=
optimizer
.
get_lr
()
images
=
batch
[
0
]
images
=
batch
[
0
]
if
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
:
others
=
batch
[
-
4
:]
preds
=
model
(
images
,
others
)
model_average
=
True
else
:
preds
=
model
(
images
)
preds
=
model
(
images
)
loss
=
loss_class
(
preds
,
batch
)
loss
=
loss_class
(
preds
,
batch
)
avg_loss
=
loss
[
'loss'
]
avg_loss
=
loss
[
'loss'
]
...
@@ -238,7 +244,14 @@ def train(config,
...
@@ -238,7 +244,14 @@ def train(config,
# eval
# eval
if
global_step
>
start_eval_step
and
\
if
global_step
>
start_eval_step
and
\
(
global_step
-
start_eval_step
)
%
eval_batch_step
==
0
and
dist
.
get_rank
()
==
0
:
(
global_step
-
start_eval_step
)
%
eval_batch_step
==
0
and
dist
.
get_rank
()
==
0
:
cur_metric
=
eval
(
model
,
valid_dataloader
,
post_process_class
,
if
model_average
:
Model_Average
=
paddle
.
incubate
.
optimizer
.
ModelAverage
(
0.15
,
parameters
=
model
.
parameters
(),
min_average_window
=
10000
,
max_average_window
=
15625
)
Model_Average
.
apply
()
cur_metirc
=
eval
(
model
,
valid_dataloader
,
post_process_class
,
eval_class
)
eval_class
)
cur_metric_str
=
'cur metric, {}'
.
format
(
', '
.
join
(
cur_metric_str
=
'cur metric, {}'
.
format
(
', '
.
join
(
[
'{}: {}'
.
format
(
k
,
v
)
for
k
,
v
in
cur_metric
.
items
()]))
[
'{}: {}'
.
format
(
k
,
v
)
for
k
,
v
in
cur_metric
.
items
()]))
...
@@ -273,6 +286,7 @@ def train(config,
...
@@ -273,6 +286,7 @@ def train(config,
best_model_dict
[
main_indicator
],
best_model_dict
[
main_indicator
],
global_step
)
global_step
)
global_step
+=
1
global_step
+=
1
optimizer
.
clear_grad
()
batch_start
=
time
.
time
()
batch_start
=
time
.
time
()
if
dist
.
get_rank
()
==
0
:
if
dist
.
get_rank
()
==
0
:
save_model
(
save_model
(
...
@@ -312,8 +326,9 @@ def eval(model, valid_dataloader, post_process_class, eval_class):
...
@@ -312,8 +326,9 @@ def eval(model, valid_dataloader, post_process_class, eval_class):
if
idx
>=
len
(
valid_dataloader
):
if
idx
>=
len
(
valid_dataloader
):
break
break
images
=
batch
[
0
]
images
=
batch
[
0
]
others
=
batch
[
-
4
:]
start
=
time
.
time
()
start
=
time
.
time
()
preds
=
model
(
images
)
preds
=
model
(
images
,
others
)
batch
=
[
item
.
numpy
()
for
item
in
batch
]
batch
=
[
item
.
numpy
()
for
item
in
batch
]
# Obtain usable results from post-processing methods
# Obtain usable results from post-processing methods
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment