Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
4f8b5113
Commit
4f8b5113
authored
Dec 15, 2021
by
Leif
Browse files
Merge remote-tracking branch 'origin/dygraph' into dygraph
parents
d73ed79c
370f0fef
Changes
50
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
789 additions
and
163 deletions
+789
-163
ppstructure/vqa/infer_ser_e2e.py
ppstructure/vqa/infer_ser_e2e.py
+62
-52
ppstructure/vqa/infer_ser_re_e2e.py
ppstructure/vqa/infer_ser_re_e2e.py
+131
-0
ppstructure/vqa/metric.py
ppstructure/vqa/metric.py
+175
-0
ppstructure/vqa/train_re.py
ppstructure/vqa/train_re.py
+261
-0
ppstructure/vqa/train_ser.py
ppstructure/vqa/train_ser.py
+17
-32
ppstructure/vqa/utils.py
ppstructure/vqa/utils.py
+111
-47
test_tipc/configs/ch_PP-OCRv2_det_PACT/train_infer_python.txt
..._tipc/configs/ch_PP-OCRv2_det_PACT/train_infer_python.txt
+2
-2
test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt
test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt
+3
-3
test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_infer_python.txt
..._tipc/configs/ch_PP-OCRv2_rec_PACT/train_infer_python.txt
+8
-8
test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt
...c/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt
+1
-1
test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt
...figs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt
+3
-3
test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt
...figs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt
+1
-1
test_tipc/configs/ch_ppocr_server_v2.0_det/train_infer_python.txt
...c/configs/ch_ppocr_server_v2.0_det/train_infer_python.txt
+6
-6
test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt
test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt
+1
-1
test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/det_r50_vd_sast_icdar2015.yml
...et_r50_vd_sast_icdar15_v2.0/det_r50_vd_sast_icdar2015.yml
+1
-1
test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/train_infer_python.txt
...nfigs/det_r50_vd_sast_icdar15_v2.0/train_infer_python.txt
+1
-1
test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/train_infer_python.txt
...igs/det_r50_vd_sast_totaltext_v2.0/train_infer_python.txt
+1
-1
test_tipc/configs/en_server_pgnetA/train_infer_python.txt
test_tipc/configs/en_server_pgnetA/train_infer_python.txt
+2
-2
test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt
test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt
+1
-1
test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/train_infer_python.txt
...onfigs/rec_mv3_tps_bilstm_att_v2.0/train_infer_python.txt
+1
-1
No files found.
ppstructure/vqa/infer_ser_e2e.py
View file @
4f8b5113
...
...
@@ -23,8 +23,10 @@ from PIL import Image
import
paddle
from
paddlenlp.transformers
import
LayoutXLMModel
,
LayoutXLMTokenizer
,
LayoutXLMForTokenClassification
from
paddleocr
import
PaddleOCR
# relative reference
from
utils
import
parse_args
,
get_image_file_list
,
draw_ser_results
,
get_bio_label_maps
,
build_ocr_engine
from
utils
import
parse_args
,
get_image_file_list
,
draw_ser_results
,
get_bio_label_maps
from
utils
import
pad_sentences
,
split_page
,
preprocess
,
postprocess
,
merge_preds_list_with_ocr_info
...
...
@@ -48,48 +50,45 @@ def parse_ocr_info_for_ser(ocr_result):
return
ocr_info
@
paddle
.
no_grad
()
def
infer
(
args
):
os
.
makedirs
(
args
.
output_dir
,
exist_ok
=
True
)
class
SerPredictor
(
object
):
def
__init__
(
self
,
args
):
self
.
max_seq_length
=
args
.
max_seq_length
# init token and model
tokenizer
=
LayoutXLMTokenizer
.
from_pretrained
(
args
.
model_name_or_path
)
model
=
LayoutXLMForTokenClassification
.
from_pretrained
(
# init ser token and model
self
.
tokenizer
=
LayoutXLMTokenizer
.
from_pretrained
(
args
.
model_name_or_path
)
model
.
eval
()
label2id_map
,
id2label_map
=
get_bio_label_maps
(
args
.
label_map_path
)
label2id_map_for_draw
=
dict
()
self
.
model
=
LayoutXLMForTokenClassification
.
from_pretrained
(
args
.
model_name_or_path
)
self
.
model
.
eval
()
# init ocr_engine
self
.
ocr_engine
=
PaddleOCR
(
rec_model_dir
=
args
.
ocr_rec_model_dir
,
det_model_dir
=
args
.
ocr_det_model_dir
,
use_angle_cls
=
False
,
show_log
=
False
)
# init dict
label2id_map
,
self
.
id2label_map
=
get_bio_label_maps
(
args
.
label_map_path
)
self
.
label2id_map_for_draw
=
dict
()
for
key
in
label2id_map
:
if
key
.
startswith
(
"I-"
):
label2id_map_for_draw
[
key
]
=
label2id_map
[
"B"
+
key
[
1
:]]
self
.
label2id_map_for_draw
[
key
]
=
label2id_map
[
"B"
+
key
[
1
:]]
else
:
label2id_map_for_draw
[
key
]
=
label2id_map
[
key
]
self
.
label2id_map_for_draw
[
key
]
=
label2id_map
[
key
]
# get infer img list
infer_imgs
=
get_image_file_list
(
args
.
infer_imgs
)
ocr_engine
=
build_ocr_engine
(
args
.
ocr_rec_model_dir
,
args
.
ocr_det_model_dir
)
# loop for infer
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"infer_results.txt"
),
"w"
)
as
fout
:
for
idx
,
img_path
in
enumerate
(
infer_imgs
):
print
(
"process: [{}/{}]"
.
format
(
idx
,
len
(
infer_imgs
),
img_path
))
img
=
cv2
.
imread
(
img_path
)
ocr_result
=
ocr_engine
.
ocr
(
img_path
,
cls
=
False
)
def
__call__
(
self
,
img
):
ocr_result
=
self
.
ocr_engine
.
ocr
(
img
,
cls
=
False
)
ocr_info
=
parse_ocr_info_for_ser
(
ocr_result
)
inputs
=
preprocess
(
tokenizer
=
tokenizer
,
tokenizer
=
self
.
tokenizer
,
ori_img
=
img
,
ocr_info
=
ocr_info
,
max_seq_len
=
args
.
max_seq_length
)
max_seq_len
=
self
.
max_seq_length
)
outputs
=
model
(
outputs
=
self
.
model
(
input_ids
=
inputs
[
"input_ids"
],
bbox
=
inputs
[
"bbox"
],
image
=
inputs
[
"image"
],
...
...
@@ -97,25 +96,36 @@ def infer(args):
attention_mask
=
inputs
[
"attention_mask"
])
preds
=
outputs
[
0
]
preds
=
postprocess
(
inputs
[
"attention_mask"
],
preds
,
id2label_map
)
preds
=
postprocess
(
inputs
[
"attention_mask"
],
preds
,
self
.
id2label_map
)
ocr_info
=
merge_preds_list_with_ocr_info
(
ocr_info
,
inputs
[
"segment_offset_id"
],
preds
,
label2id_map_for_draw
)
self
.
label2id_map_for_draw
)
return
ocr_info
,
inputs
if
__name__
==
"__main__"
:
args
=
parse_args
()
os
.
makedirs
(
args
.
output_dir
,
exist_ok
=
True
)
# get infer img list
infer_imgs
=
get_image_file_list
(
args
.
infer_imgs
)
# loop for infer
ser_engine
=
SerPredictor
(
args
)
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"infer_results.txt"
),
"w"
)
as
fout
:
for
idx
,
img_path
in
enumerate
(
infer_imgs
):
print
(
"process: [{}/{}], {}"
.
format
(
idx
,
len
(
infer_imgs
),
img_path
))
img
=
cv2
.
imread
(
img_path
)
result
,
_
=
ser_engine
(
img
)
fout
.
write
(
img_path
+
"
\t
"
+
json
.
dumps
(
{
"
ocr_info"
:
ocr_info
,
"
ser_resule"
:
result
,
},
ensure_ascii
=
False
)
+
"
\n
"
)
img_res
=
draw_ser_results
(
img
,
ocr_info
)
img_res
=
draw_ser_results
(
img
,
result
)
cv2
.
imwrite
(
os
.
path
.
join
(
args
.
output_dir
,
os
.
path
.
splitext
(
os
.
path
.
basename
(
img_path
))[
0
]
+
"_ser.jpg"
),
img_res
)
return
if
__name__
==
"__main__"
:
args
=
parse_args
()
infer
(
args
)
ppstructure/vqa/infer_ser_re_e2e.py
0 → 100644
View file @
4f8b5113
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
import
json
import
cv2
import
numpy
as
np
from
copy
import
deepcopy
from
PIL
import
Image
import
paddle
from
paddlenlp.transformers
import
LayoutXLMModel
,
LayoutXLMTokenizer
,
LayoutXLMForRelationExtraction
# relative reference
from
utils
import
parse_args
,
get_image_file_list
,
draw_re_results
from
infer_ser_e2e
import
SerPredictor
def
make_input
(
ser_input
,
ser_result
,
max_seq_len
=
512
):
entities_labels
=
{
'HEADER'
:
0
,
'QUESTION'
:
1
,
'ANSWER'
:
2
}
entities
=
ser_input
[
'entities'
][
0
]
assert
len
(
entities
)
==
len
(
ser_result
)
# entities
start
=
[]
end
=
[]
label
=
[]
entity_idx_dict
=
{}
for
i
,
(
res
,
entity
)
in
enumerate
(
zip
(
ser_result
,
entities
)):
if
res
[
'pred'
]
==
'O'
:
continue
entity_idx_dict
[
len
(
start
)]
=
i
start
.
append
(
entity
[
'start'
])
end
.
append
(
entity
[
'end'
])
label
.
append
(
entities_labels
[
res
[
'pred'
]])
entities
=
dict
(
start
=
start
,
end
=
end
,
label
=
label
)
# relations
head
=
[]
tail
=
[]
for
i
in
range
(
len
(
entities
[
"label"
])):
for
j
in
range
(
len
(
entities
[
"label"
])):
if
entities
[
"label"
][
i
]
==
1
and
entities
[
"label"
][
j
]
==
2
:
head
.
append
(
i
)
tail
.
append
(
j
)
relations
=
dict
(
head
=
head
,
tail
=
tail
)
batch_size
=
ser_input
[
"input_ids"
].
shape
[
0
]
entities_batch
=
[]
relations_batch
=
[]
for
b
in
range
(
batch_size
):
entities_batch
.
append
(
entities
)
relations_batch
.
append
(
relations
)
ser_input
[
'entities'
]
=
entities_batch
ser_input
[
'relations'
]
=
relations_batch
ser_input
.
pop
(
'segment_offset_id'
)
return
ser_input
,
entity_idx_dict
class
SerReSystem
(
object
):
def
__init__
(
self
,
args
):
self
.
ser_engine
=
SerPredictor
(
args
)
self
.
tokenizer
=
LayoutXLMTokenizer
.
from_pretrained
(
args
.
re_model_name_or_path
)
self
.
model
=
LayoutXLMForRelationExtraction
.
from_pretrained
(
args
.
re_model_name_or_path
)
self
.
model
.
eval
()
def
__call__
(
self
,
img
):
ser_result
,
ser_inputs
=
self
.
ser_engine
(
img
)
re_input
,
entity_idx_dict
=
make_input
(
ser_inputs
,
ser_result
)
re_result
=
self
.
model
(
**
re_input
)
pred_relations
=
re_result
[
'pred_relations'
][
0
]
# 进行 relations 到 ocr信息的转换
result
=
[]
used_tail_id
=
[]
for
relation
in
pred_relations
:
if
relation
[
'tail_id'
]
in
used_tail_id
:
continue
used_tail_id
.
append
(
relation
[
'tail_id'
])
ocr_info_head
=
ser_result
[
entity_idx_dict
[
relation
[
'head_id'
]]]
ocr_info_tail
=
ser_result
[
entity_idx_dict
[
relation
[
'tail_id'
]]]
result
.
append
((
ocr_info_head
,
ocr_info_tail
))
return
result
if
__name__
==
"__main__"
:
args
=
parse_args
()
os
.
makedirs
(
args
.
output_dir
,
exist_ok
=
True
)
# get infer img list
infer_imgs
=
get_image_file_list
(
args
.
infer_imgs
)
# loop for infer
ser_re_engine
=
SerReSystem
(
args
)
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"infer_results.txt"
),
"w"
)
as
fout
:
for
idx
,
img_path
in
enumerate
(
infer_imgs
):
print
(
"process: [{}/{}], {}"
.
format
(
idx
,
len
(
infer_imgs
),
img_path
))
img
=
cv2
.
imread
(
img_path
)
result
=
ser_re_engine
(
img
)
fout
.
write
(
img_path
+
"
\t
"
+
json
.
dumps
(
{
"result"
:
result
,
},
ensure_ascii
=
False
)
+
"
\n
"
)
img_res
=
draw_re_results
(
img
,
result
)
cv2
.
imwrite
(
os
.
path
.
join
(
args
.
output_dir
,
os
.
path
.
splitext
(
os
.
path
.
basename
(
img_path
))[
0
]
+
"_re.jpg"
),
img_res
)
ppstructure/vqa/metric.py
0 → 100644
View file @
4f8b5113
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
re
import
numpy
as
np
import
logging
logger
=
logging
.
getLogger
(
__name__
)
PREFIX_CHECKPOINT_DIR
=
"checkpoint"
_re_checkpoint
=
re
.
compile
(
r
"^"
+
PREFIX_CHECKPOINT_DIR
+
r
"\-(\d+)$"
)
def
get_last_checkpoint
(
folder
):
content
=
os
.
listdir
(
folder
)
checkpoints
=
[
path
for
path
in
content
if
_re_checkpoint
.
search
(
path
)
is
not
None
and
os
.
path
.
isdir
(
os
.
path
.
join
(
folder
,
path
))
]
if
len
(
checkpoints
)
==
0
:
return
return
os
.
path
.
join
(
folder
,
max
(
checkpoints
,
key
=
lambda
x
:
int
(
_re_checkpoint
.
search
(
x
).
groups
()[
0
])))
def
re_score
(
pred_relations
,
gt_relations
,
mode
=
"strict"
):
"""Evaluate RE predictions
Args:
pred_relations (list) : list of list of predicted relations (several relations in each sentence)
gt_relations (list) : list of list of ground truth relations
rel = { "head": (start_idx (inclusive), end_idx (exclusive)),
"tail": (start_idx (inclusive), end_idx (exclusive)),
"head_type": ent_type,
"tail_type": ent_type,
"type": rel_type}
vocab (Vocab) : dataset vocabulary
mode (str) : in 'strict' or 'boundaries'"""
assert
mode
in
[
"strict"
,
"boundaries"
]
relation_types
=
[
v
for
v
in
[
0
,
1
]
if
not
v
==
0
]
scores
=
{
rel
:
{
"tp"
:
0
,
"fp"
:
0
,
"fn"
:
0
}
for
rel
in
relation_types
+
[
"ALL"
]
}
# Count GT relations and Predicted relations
n_sents
=
len
(
gt_relations
)
n_rels
=
sum
([
len
([
rel
for
rel
in
sent
])
for
sent
in
gt_relations
])
n_found
=
sum
([
len
([
rel
for
rel
in
sent
])
for
sent
in
pred_relations
])
# Count TP, FP and FN per type
for
pred_sent
,
gt_sent
in
zip
(
pred_relations
,
gt_relations
):
for
rel_type
in
relation_types
:
# strict mode takes argument types into account
if
mode
==
"strict"
:
pred_rels
=
{(
rel
[
"head"
],
rel
[
"head_type"
],
rel
[
"tail"
],
rel
[
"tail_type"
])
for
rel
in
pred_sent
if
rel
[
"type"
]
==
rel_type
}
gt_rels
=
{(
rel
[
"head"
],
rel
[
"head_type"
],
rel
[
"tail"
],
rel
[
"tail_type"
])
for
rel
in
gt_sent
if
rel
[
"type"
]
==
rel_type
}
# boundaries mode only takes argument spans into account
elif
mode
==
"boundaries"
:
pred_rels
=
{(
rel
[
"head"
],
rel
[
"tail"
])
for
rel
in
pred_sent
if
rel
[
"type"
]
==
rel_type
}
gt_rels
=
{(
rel
[
"head"
],
rel
[
"tail"
])
for
rel
in
gt_sent
if
rel
[
"type"
]
==
rel_type
}
scores
[
rel_type
][
"tp"
]
+=
len
(
pred_rels
&
gt_rels
)
scores
[
rel_type
][
"fp"
]
+=
len
(
pred_rels
-
gt_rels
)
scores
[
rel_type
][
"fn"
]
+=
len
(
gt_rels
-
pred_rels
)
# Compute per entity Precision / Recall / F1
for
rel_type
in
scores
.
keys
():
if
scores
[
rel_type
][
"tp"
]:
scores
[
rel_type
][
"p"
]
=
scores
[
rel_type
][
"tp"
]
/
(
scores
[
rel_type
][
"fp"
]
+
scores
[
rel_type
][
"tp"
])
scores
[
rel_type
][
"r"
]
=
scores
[
rel_type
][
"tp"
]
/
(
scores
[
rel_type
][
"fn"
]
+
scores
[
rel_type
][
"tp"
])
else
:
scores
[
rel_type
][
"p"
],
scores
[
rel_type
][
"r"
]
=
0
,
0
if
not
scores
[
rel_type
][
"p"
]
+
scores
[
rel_type
][
"r"
]
==
0
:
scores
[
rel_type
][
"f1"
]
=
(
2
*
scores
[
rel_type
][
"p"
]
*
scores
[
rel_type
][
"r"
]
/
(
scores
[
rel_type
][
"p"
]
+
scores
[
rel_type
][
"r"
]))
else
:
scores
[
rel_type
][
"f1"
]
=
0
# Compute micro F1 Scores
tp
=
sum
([
scores
[
rel_type
][
"tp"
]
for
rel_type
in
relation_types
])
fp
=
sum
([
scores
[
rel_type
][
"fp"
]
for
rel_type
in
relation_types
])
fn
=
sum
([
scores
[
rel_type
][
"fn"
]
for
rel_type
in
relation_types
])
if
tp
:
precision
=
tp
/
(
tp
+
fp
)
recall
=
tp
/
(
tp
+
fn
)
f1
=
2
*
precision
*
recall
/
(
precision
+
recall
)
else
:
precision
,
recall
,
f1
=
0
,
0
,
0
scores
[
"ALL"
][
"p"
]
=
precision
scores
[
"ALL"
][
"r"
]
=
recall
scores
[
"ALL"
][
"f1"
]
=
f1
scores
[
"ALL"
][
"tp"
]
=
tp
scores
[
"ALL"
][
"fp"
]
=
fp
scores
[
"ALL"
][
"fn"
]
=
fn
# Compute Macro F1 Scores
scores
[
"ALL"
][
"Macro_f1"
]
=
np
.
mean
(
[
scores
[
ent_type
][
"f1"
]
for
ent_type
in
relation_types
])
scores
[
"ALL"
][
"Macro_p"
]
=
np
.
mean
(
[
scores
[
ent_type
][
"p"
]
for
ent_type
in
relation_types
])
scores
[
"ALL"
][
"Macro_r"
]
=
np
.
mean
(
[
scores
[
ent_type
][
"r"
]
for
ent_type
in
relation_types
])
# logger.info(f"RE Evaluation in *** {mode.upper()} *** mode")
# logger.info(
# "processed {} sentences with {} relations; found: {} relations; correct: {}.".format(
# n_sents, n_rels, n_found, tp
# )
# )
# logger.info(
# "\tALL\t TP: {};\tFP: {};\tFN: {}".format(scores["ALL"]["tp"], scores["ALL"]["fp"], scores["ALL"]["fn"])
# )
# logger.info("\t\t(m avg): precision: {:.2f};\trecall: {:.2f};\tf1: {:.2f} (micro)".format(precision, recall, f1))
# logger.info(
# "\t\t(M avg): precision: {:.2f};\trecall: {:.2f};\tf1: {:.2f} (Macro)\n".format(
# scores["ALL"]["Macro_p"], scores["ALL"]["Macro_r"], scores["ALL"]["Macro_f1"]
# )
# )
# for rel_type in relation_types:
# logger.info(
# "\t{}: \tTP: {};\tFP: {};\tFN: {};\tprecision: {:.2f};\trecall: {:.2f};\tf1: {:.2f};\t{}".format(
# rel_type,
# scores[rel_type]["tp"],
# scores[rel_type]["fp"],
# scores[rel_type]["fn"],
# scores[rel_type]["p"],
# scores[rel_type]["r"],
# scores[rel_type]["f1"],
# scores[rel_type]["tp"] + scores[rel_type]["fp"],
# )
# )
return
scores
ppstructure/vqa/train_re.py
0 → 100644
View file @
4f8b5113
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'../..'
)))
import
random
import
numpy
as
np
import
paddle
from
paddlenlp.transformers
import
LayoutXLMTokenizer
,
LayoutXLMModel
,
LayoutXLMForRelationExtraction
from
xfun
import
XFUNDataset
from
utils
import
parse_args
,
get_bio_label_maps
,
print_arguments
from
data_collator
import
DataCollator
from
metric
import
re_score
from
ppocr.utils.logging
import
get_logger
def
set_seed
(
seed
):
random
.
seed
(
seed
)
np
.
random
.
seed
(
seed
)
paddle
.
seed
(
seed
)
def
cal_metric
(
re_preds
,
re_labels
,
entities
):
gt_relations
=
[]
for
b
in
range
(
len
(
re_labels
)):
rel_sent
=
[]
for
head
,
tail
in
zip
(
re_labels
[
b
][
"head"
],
re_labels
[
b
][
"tail"
]):
rel
=
{}
rel
[
"head_id"
]
=
head
rel
[
"head"
]
=
(
entities
[
b
][
"start"
][
rel
[
"head_id"
]],
entities
[
b
][
"end"
][
rel
[
"head_id"
]])
rel
[
"head_type"
]
=
entities
[
b
][
"label"
][
rel
[
"head_id"
]]
rel
[
"tail_id"
]
=
tail
rel
[
"tail"
]
=
(
entities
[
b
][
"start"
][
rel
[
"tail_id"
]],
entities
[
b
][
"end"
][
rel
[
"tail_id"
]])
rel
[
"tail_type"
]
=
entities
[
b
][
"label"
][
rel
[
"tail_id"
]]
rel
[
"type"
]
=
1
rel_sent
.
append
(
rel
)
gt_relations
.
append
(
rel_sent
)
re_metrics
=
re_score
(
re_preds
,
gt_relations
,
mode
=
"boundaries"
)
return
re_metrics
def
evaluate
(
model
,
eval_dataloader
,
logger
,
prefix
=
""
):
# Eval!
logger
.
info
(
"***** Running evaluation {} *****"
.
format
(
prefix
))
logger
.
info
(
" Num examples = {}"
.
format
(
len
(
eval_dataloader
.
dataset
)))
re_preds
=
[]
re_labels
=
[]
entities
=
[]
eval_loss
=
0.0
model
.
eval
()
for
idx
,
batch
in
enumerate
(
eval_dataloader
):
with
paddle
.
no_grad
():
outputs
=
model
(
**
batch
)
loss
=
outputs
[
'loss'
].
mean
().
item
()
if
paddle
.
distributed
.
get_rank
()
==
0
:
logger
.
info
(
"[Eval] process: {}/{}, loss: {:.5f}"
.
format
(
idx
,
len
(
eval_dataloader
),
loss
))
eval_loss
+=
loss
re_preds
.
extend
(
outputs
[
'pred_relations'
])
re_labels
.
extend
(
batch
[
'relations'
])
entities
.
extend
(
batch
[
'entities'
])
re_metrics
=
cal_metric
(
re_preds
,
re_labels
,
entities
)
re_metrics
=
{
"precision"
:
re_metrics
[
"ALL"
][
"p"
],
"recall"
:
re_metrics
[
"ALL"
][
"r"
],
"f1"
:
re_metrics
[
"ALL"
][
"f1"
],
}
model
.
train
()
return
re_metrics
def
train
(
args
):
logger
=
get_logger
(
log_file
=
os
.
path
.
join
(
args
.
output_dir
,
"train.log"
))
print_arguments
(
args
,
logger
)
# Added here for reproducibility (even between python 2 and 3)
set_seed
(
args
.
seed
)
label2id_map
,
id2label_map
=
get_bio_label_maps
(
args
.
label_map_path
)
pad_token_label_id
=
paddle
.
nn
.
CrossEntropyLoss
().
ignore_index
# dist mode
if
paddle
.
distributed
.
get_world_size
()
>
1
:
paddle
.
distributed
.
init_parallel_env
()
tokenizer
=
LayoutXLMTokenizer
.
from_pretrained
(
args
.
model_name_or_path
)
model
=
LayoutXLMModel
.
from_pretrained
(
args
.
model_name_or_path
)
model
=
LayoutXLMForRelationExtraction
(
model
,
dropout
=
None
)
# dist mode
if
paddle
.
distributed
.
get_world_size
()
>
1
:
model
=
paddle
.
distributed
.
DataParallel
(
model
)
train_dataset
=
XFUNDataset
(
tokenizer
,
data_dir
=
args
.
train_data_dir
,
label_path
=
args
.
train_label_path
,
label2id_map
=
label2id_map
,
img_size
=
(
224
,
224
),
max_seq_len
=
args
.
max_seq_length
,
pad_token_label_id
=
pad_token_label_id
,
contains_re
=
True
,
add_special_ids
=
False
,
return_attention_mask
=
True
,
load_mode
=
'all'
)
eval_dataset
=
XFUNDataset
(
tokenizer
,
data_dir
=
args
.
eval_data_dir
,
label_path
=
args
.
eval_label_path
,
label2id_map
=
label2id_map
,
img_size
=
(
224
,
224
),
max_seq_len
=
args
.
max_seq_length
,
pad_token_label_id
=
pad_token_label_id
,
contains_re
=
True
,
add_special_ids
=
False
,
return_attention_mask
=
True
,
load_mode
=
'all'
)
train_sampler
=
paddle
.
io
.
DistributedBatchSampler
(
train_dataset
,
batch_size
=
args
.
per_gpu_train_batch_size
,
shuffle
=
True
)
args
.
train_batch_size
=
args
.
per_gpu_train_batch_size
*
\
max
(
1
,
paddle
.
distributed
.
get_world_size
())
train_dataloader
=
paddle
.
io
.
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
num_workers
=
8
,
use_shared_memory
=
True
,
collate_fn
=
DataCollator
())
eval_dataloader
=
paddle
.
io
.
DataLoader
(
eval_dataset
,
batch_size
=
args
.
per_gpu_eval_batch_size
,
num_workers
=
8
,
shuffle
=
False
,
collate_fn
=
DataCollator
())
t_total
=
len
(
train_dataloader
)
*
args
.
num_train_epochs
# build linear decay with warmup lr sch
lr_scheduler
=
paddle
.
optimizer
.
lr
.
PolynomialDecay
(
learning_rate
=
args
.
learning_rate
,
decay_steps
=
t_total
,
end_lr
=
0.0
,
power
=
1.0
)
if
args
.
warmup_steps
>
0
:
lr_scheduler
=
paddle
.
optimizer
.
lr
.
LinearWarmup
(
lr_scheduler
,
args
.
warmup_steps
,
start_lr
=
0
,
end_lr
=
args
.
learning_rate
,
)
grad_clip
=
paddle
.
nn
.
ClipGradByNorm
(
clip_norm
=
10
)
optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
args
.
learning_rate
,
parameters
=
model
.
parameters
(),
epsilon
=
args
.
adam_epsilon
,
grad_clip
=
grad_clip
,
weight_decay
=
args
.
weight_decay
)
# Train!
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = {}"
.
format
(
len
(
train_dataset
)))
logger
.
info
(
" Num Epochs = {}"
.
format
(
args
.
num_train_epochs
))
logger
.
info
(
" Instantaneous batch size per GPU = {}"
.
format
(
args
.
per_gpu_train_batch_size
))
logger
.
info
(
" Total train batch size (w. parallel, distributed & accumulation) = {}"
.
format
(
args
.
train_batch_size
*
paddle
.
distributed
.
get_world_size
()))
logger
.
info
(
" Total optimization steps = {}"
.
format
(
t_total
))
global_step
=
0
model
.
clear_gradients
()
train_dataloader_len
=
len
(
train_dataloader
)
best_metirc
=
{
'f1'
:
0
}
model
.
train
()
for
epoch
in
range
(
int
(
args
.
num_train_epochs
)):
for
step
,
batch
in
enumerate
(
train_dataloader
):
outputs
=
model
(
**
batch
)
# model outputs are always tuple in ppnlp (see doc)
loss
=
outputs
[
'loss'
]
loss
=
loss
.
mean
()
logger
.
info
(
"epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {}, lr: {}"
.
format
(
epoch
,
args
.
num_train_epochs
,
step
,
train_dataloader_len
,
global_step
,
np
.
mean
(
loss
.
numpy
()),
optimizer
.
get_lr
()))
loss
.
backward
()
optimizer
.
step
()
optimizer
.
clear_grad
()
# lr_scheduler.step() # Update learning rate schedule
global_step
+=
1
if
(
paddle
.
distributed
.
get_rank
()
==
0
and
args
.
eval_steps
>
0
and
global_step
%
args
.
eval_steps
==
0
):
# Log metrics
if
(
paddle
.
distributed
.
get_rank
()
==
0
and
args
.
evaluate_during_training
):
# Only evaluate when single GPU otherwise metrics may not average well
results
=
evaluate
(
model
,
eval_dataloader
,
logger
)
if
results
[
'f1'
]
>
best_metirc
[
'f1'
]:
best_metirc
=
results
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"checkpoint-best"
)
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
model
.
save_pretrained
(
output_dir
)
tokenizer
.
save_pretrained
(
output_dir
)
paddle
.
save
(
args
,
os
.
path
.
join
(
output_dir
,
"training_args.bin"
))
logger
.
info
(
"Saving model checkpoint to {}"
.
format
(
output_dir
))
logger
.
info
(
"eval results: {}"
.
format
(
results
))
logger
.
info
(
"best_metirc: {}"
.
format
(
best_metirc
))
if
(
paddle
.
distributed
.
get_rank
()
==
0
and
args
.
save_steps
>
0
and
global_step
%
args
.
save_steps
==
0
):
# Save model checkpoint
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"checkpoint-latest"
)
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
if
paddle
.
distributed
.
get_rank
()
==
0
:
model
.
save_pretrained
(
output_dir
)
tokenizer
.
save_pretrained
(
output_dir
)
paddle
.
save
(
args
,
os
.
path
.
join
(
output_dir
,
"training_args.bin"
))
logger
.
info
(
"Saving model checkpoint to {}"
.
format
(
output_dir
))
logger
.
info
(
"best_metirc: {}"
.
format
(
best_metirc
))
if
__name__
==
"__main__"
:
args
=
parse_args
()
os
.
makedirs
(
args
.
output_dir
,
exist_ok
=
True
)
train
(
args
)
ppstructure/vqa/train_ser.py
View file @
4f8b5113
...
...
@@ -12,8 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'../..'
)))
import
random
import
copy
import
logging
...
...
@@ -26,8 +31,9 @@ from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLM
from
xfun
import
XFUNDataset
from
utils
import
parse_args
from
utils
import
get_bio_label_maps
from
utils
import
print_arguments
logger
=
logging
.
get
L
ogger
(
__name__
)
from
ppocr.utils.logging
import
get
_l
ogger
def
set_seed
(
args
):
...
...
@@ -38,17 +44,8 @@ def set_seed(args):
def
train
(
args
):
os
.
makedirs
(
args
.
output_dir
,
exist_ok
=
True
)
logging
.
basicConfig
(
filename
=
os
.
path
.
join
(
args
.
output_dir
,
"train.log"
)
if
paddle
.
distributed
.
get_rank
()
==
0
else
None
,
format
=
"%(asctime)s - %(levelname)s - %(name)s - %(message)s"
,
datefmt
=
"%m/%d/%Y %H:%M:%S"
,
level
=
logging
.
INFO
if
paddle
.
distributed
.
get_rank
()
==
0
else
logging
.
WARN
,
)
ch
=
logging
.
StreamHandler
()
ch
.
setLevel
(
logging
.
DEBUG
)
logger
.
addHandler
(
ch
)
logger
=
get_logger
(
log_file
=
os
.
path
.
join
(
args
.
output_dir
,
"train.log"
))
print_arguments
(
args
,
logger
)
label2id_map
,
id2label_map
=
get_bio_label_maps
(
args
.
label_map_path
)
pad_token_label_id
=
paddle
.
nn
.
CrossEntropyLoss
().
ignore_index
...
...
@@ -136,10 +133,10 @@ def train(args):
loss
=
outputs
[
0
]
loss
=
loss
.
mean
()
logger
.
info
(
"
[
epoch
{}/{}]
[
iter: {}/{}]
lr: {:.5f
}, train loss: {
:.5f},
"
.
"epoch
: [
{}/{}]
,
iter:
[
{}/{}]
, global_step:{
}, train loss: {
}, lr: {}
"
.
format
(
epoch_id
,
args
.
num_train_epochs
,
step
,
len
(
train_dataloader
),
lr_scheduler
.
get_lr
()
,
loss
.
numpy
()[
0
]
))
len
(
train_dataloader
),
global_step
,
loss
.
numpy
()[
0
],
lr_scheduler
.
get_lr
()))
loss
.
backward
()
tr_loss
+=
loss
.
item
()
...
...
@@ -154,13 +151,9 @@ def train(args):
# Only evaluate when single GPU otherwise metrics may not average well
if
paddle
.
distributed
.
get_rank
(
)
==
0
and
args
.
evaluate_during_training
:
results
,
_
=
evaluate
(
args
,
model
,
tokenizer
,
label2id_map
,
id2label_map
,
pad_token_label_id
,
)
results
,
_
=
evaluate
(
args
,
model
,
tokenizer
,
label2id_map
,
id2label_map
,
pad_token_label_id
,
logger
)
if
best_metrics
is
None
or
results
[
"f1"
]
>=
best_metrics
[
"f1"
]:
...
...
@@ -204,6 +197,7 @@ def evaluate(args,
label2id_map
,
id2label_map
,
pad_token_label_id
,
logger
,
prefix
=
""
):
eval_dataset
=
XFUNDataset
(
tokenizer
,
...
...
@@ -299,15 +293,6 @@ def evaluate(args,
return
results
,
preds_list
def
print_arguments
(
args
):
"""print arguments"""
print
(
'----------- Configuration Arguments -----------'
)
for
arg
,
value
in
sorted
(
vars
(
args
).
items
()):
print
(
'%s: %s'
%
(
arg
,
value
))
print
(
'------------------------------------------------'
)
if
__name__
==
"__main__"
:
args
=
parse_args
()
print_arguments
(
args
)
train
(
args
)
ppstructure/vqa/utils.py
View file @
4f8b5113
...
...
@@ -24,8 +24,6 @@ import paddle
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
paddleocr
import
PaddleOCR
def
get_bio_label_maps
(
label_map_path
):
with
open
(
label_map_path
,
"r"
)
as
fin
:
...
...
@@ -66,9 +64,9 @@ def get_image_file_list(img_file):
def
draw_ser_results
(
image
,
ocr_results
,
font_path
=
"../doc/fonts/simfang.ttf"
,
font_path
=
"../
../
doc/fonts/simfang.ttf"
,
font_size
=
18
):
np
.
random
.
seed
(
0
)
np
.
random
.
seed
(
2021
)
color
=
(
np
.
random
.
permutation
(
range
(
255
)),
np
.
random
.
permutation
(
range
(
255
)),
np
.
random
.
permutation
(
range
(
255
)))
...
...
@@ -82,38 +80,64 @@ def draw_ser_results(image,
draw
=
ImageDraw
.
Draw
(
img_new
)
font
=
ImageFont
.
truetype
(
font_path
,
font_size
,
encoding
=
"utf-8"
)
for
ocr_info
in
ocr_results
:
if
ocr_info
[
"pred_id"
]
not
in
color_map
:
continue
color
=
color_map
[
ocr_info
[
"pred_id"
]]
text
=
"{}: {}"
.
format
(
ocr_info
[
"pred"
],
ocr_info
[
"text"
])
draw_box_txt
(
ocr_info
[
"bbox"
],
text
,
draw
,
font
,
font_size
,
color
)
img_new
=
Image
.
blend
(
image
,
img_new
,
0.5
)
return
np
.
array
(
img_new
)
def
draw_box_txt
(
bbox
,
text
,
draw
,
font
,
font_size
,
color
):
# draw ocr results outline
bbox
=
ocr_info
[
"bbox"
]
bbox
=
((
bbox
[
0
],
bbox
[
1
]),
(
bbox
[
2
],
bbox
[
3
]))
draw
.
rectangle
(
bbox
,
fill
=
color
)
# draw ocr results
text
=
"{}: {}"
.
format
(
ocr_info
[
"pred"
],
ocr_info
[
"text"
])
start_y
=
max
(
0
,
bbox
[
0
][
1
]
-
font_size
)
tw
=
font
.
getsize
(
text
)[
0
]
draw
.
rectangle
(
[(
bbox
[
0
][
0
]
+
1
,
start_y
),
(
bbox
[
0
][
0
]
+
tw
+
1
,
start_y
+
font_size
)],
[(
bbox
[
0
][
0
]
+
1
,
start_y
),
(
bbox
[
0
][
0
]
+
tw
+
1
,
start_y
+
font_size
)],
fill
=
(
0
,
0
,
255
))
draw
.
text
(
(
bbox
[
0
][
0
]
+
1
,
start_y
),
text
,
fill
=
(
255
,
255
,
255
),
font
=
font
)
draw
.
text
((
bbox
[
0
][
0
]
+
1
,
start_y
),
text
,
fill
=
(
255
,
255
,
255
),
font
=
font
)
img_new
=
Image
.
blend
(
image
,
img_new
,
0.5
)
return
np
.
array
(
img_new
)
def
draw_re_results
(
image
,
result
,
font_path
=
"../../doc/fonts/simfang.ttf"
,
font_size
=
18
):
np
.
random
.
seed
(
0
)
if
isinstance
(
image
,
np
.
ndarray
):
image
=
Image
.
fromarray
(
image
)
img_new
=
image
.
copy
()
draw
=
ImageDraw
.
Draw
(
img_new
)
font
=
ImageFont
.
truetype
(
font_path
,
font_size
,
encoding
=
"utf-8"
)
color_head
=
(
0
,
0
,
255
)
color_tail
=
(
255
,
0
,
0
)
color_line
=
(
0
,
255
,
0
)
for
ocr_info_head
,
ocr_info_tail
in
result
:
draw_box_txt
(
ocr_info_head
[
"bbox"
],
ocr_info_head
[
"text"
],
draw
,
font
,
font_size
,
color_head
)
draw_box_txt
(
ocr_info_tail
[
"bbox"
],
ocr_info_tail
[
"text"
],
draw
,
font
,
font_size
,
color_tail
)
center_head
=
(
(
ocr_info_head
[
'bbox'
][
0
]
+
ocr_info_head
[
'bbox'
][
2
])
//
2
,
(
ocr_info_head
[
'bbox'
][
1
]
+
ocr_info_head
[
'bbox'
][
3
])
//
2
)
center_tail
=
(
(
ocr_info_tail
[
'bbox'
][
0
]
+
ocr_info_tail
[
'bbox'
][
2
])
//
2
,
(
ocr_info_tail
[
'bbox'
][
1
]
+
ocr_info_tail
[
'bbox'
][
3
])
//
2
)
def
build_ocr_engine
(
rec_model_dir
,
det_model_dir
):
ocr_engine
=
PaddleOCR
(
rec_model_dir
=
rec_model_dir
,
det_model_dir
=
det_model_dir
,
use_angle_cls
=
False
)
return
ocr_engine
draw
.
line
([
center_head
,
center_tail
],
fill
=
color_line
,
width
=
5
)
img_new
=
Image
.
blend
(
image
,
img_new
,
0.5
)
return
np
.
array
(
img_new
)
# pad sentences
...
...
@@ -162,6 +186,9 @@ def split_page(encoded_inputs, max_seq_len=512):
truncate is often used in training process
"""
for
key
in
encoded_inputs
:
if
key
==
'entities'
:
encoded_inputs
[
key
]
=
[
encoded_inputs
[
key
]]
continue
encoded_inputs
[
key
]
=
paddle
.
to_tensor
(
encoded_inputs
[
key
])
if
encoded_inputs
[
key
].
ndim
<=
1
:
# for input_ids, att_mask and so on
encoded_inputs
[
key
]
=
encoded_inputs
[
key
].
reshape
([
-
1
,
max_seq_len
])
...
...
@@ -184,14 +211,14 @@ def preprocess(
height
=
ori_img
.
shape
[
0
]
width
=
ori_img
.
shape
[
1
]
img
=
cv2
.
resize
(
ori_img
,
(
224
,
224
)).
transpose
([
2
,
0
,
1
]).
astype
(
np
.
float32
)
img
=
cv2
.
resize
(
ori_img
,
img_size
).
transpose
([
2
,
0
,
1
]).
astype
(
np
.
float32
)
segment_offset_id
=
[]
words_list
=
[]
bbox_list
=
[]
input_ids_list
=
[]
token_type_ids_list
=
[]
entities
=
[]
for
info
in
ocr_info
:
# x1, y1, x2, y2
...
...
@@ -211,6 +238,13 @@ def preprocess(
encode_res
[
"token_type_ids"
]
=
encode_res
[
"token_type_ids"
][
1
:
-
1
]
encode_res
[
"attention_mask"
]
=
encode_res
[
"attention_mask"
][
1
:
-
1
]
# for re
entities
.
append
({
"start"
:
len
(
input_ids_list
),
"end"
:
len
(
input_ids_list
)
+
len
(
encode_res
[
"input_ids"
]),
"label"
:
"O"
,
})
input_ids_list
.
extend
(
encode_res
[
"input_ids"
])
token_type_ids_list
.
extend
(
encode_res
[
"token_type_ids"
])
bbox_list
.
extend
([
bbox
]
*
len
(
encode_res
[
"input_ids"
]))
...
...
@@ -222,6 +256,7 @@ def preprocess(
"token_type_ids"
:
token_type_ids_list
,
"bbox"
:
bbox_list
,
"attention_mask"
:
[
1
]
*
len
(
input_ids_list
),
"entities"
:
entities
}
encoded_inputs
=
pad_sentences
(
...
...
@@ -294,35 +329,64 @@ def merge_preds_list_with_ocr_info(ocr_info, segment_offset_id, preds_list,
return
ocr_info
def
print_arguments
(
args
,
logger
=
None
):
print_func
=
logger
.
info
if
logger
is
not
None
else
print
"""print arguments"""
print_func
(
'----------- Configuration Arguments -----------'
)
for
arg
,
value
in
sorted
(
vars
(
args
).
items
()):
print_func
(
'%s: %s'
%
(
arg
,
value
))
print_func
(
'------------------------------------------------'
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
# Required parameters
# yapf: disable
parser
.
add_argument
(
"--model_name_or_path"
,
default
=
None
,
type
=
str
,
required
=
True
,)
parser
.
add_argument
(
"--train_data_dir"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--train_label_path"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--eval_data_dir"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--eval_label_path"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--model_name_or_path"
,
default
=
None
,
type
=
str
,
required
=
True
,)
parser
.
add_argument
(
"--re_model_name_or_path"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--train_data_dir"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--train_label_path"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--eval_data_dir"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--eval_label_path"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--output_dir"
,
default
=
None
,
type
=
str
,
required
=
True
,)
parser
.
add_argument
(
"--max_seq_length"
,
default
=
512
,
type
=
int
,)
parser
.
add_argument
(
"--evaluate_during_training"
,
action
=
"store_true"
,)
parser
.
add_argument
(
"--per_gpu_train_batch_size"
,
default
=
8
,
type
=
int
,
help
=
"Batch size per GPU/CPU for training."
,)
parser
.
add_argument
(
"--per_gpu_eval_batch_size"
,
default
=
8
,
type
=
int
,
help
=
"Batch size per GPU/CPU for eval."
,)
parser
.
add_argument
(
"--learning_rate"
,
default
=
5e-5
,
type
=
float
,
help
=
"The initial learning rate for Adam."
,)
parser
.
add_argument
(
"--weight_decay"
,
default
=
0.0
,
type
=
float
,
help
=
"Weight decay if we apply some."
,)
parser
.
add_argument
(
"--adam_epsilon"
,
default
=
1e-8
,
type
=
float
,
help
=
"Epsilon for Adam optimizer."
,)
parser
.
add_argument
(
"--max_grad_norm"
,
default
=
1.0
,
type
=
float
,
help
=
"Max gradient norm."
,)
parser
.
add_argument
(
"--num_train_epochs"
,
default
=
3
,
type
=
int
,
help
=
"Total number of training epochs to perform."
,)
parser
.
add_argument
(
"--warmup_steps"
,
default
=
0
,
type
=
int
,
help
=
"Linear warmup over warmup_steps."
,)
parser
.
add_argument
(
"--eval_steps"
,
type
=
int
,
default
=
10
,
help
=
"eval every X updates steps."
,)
parser
.
add_argument
(
"--save_steps"
,
type
=
int
,
default
=
50
,
help
=
"Save checkpoint every X updates steps."
,)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
2048
,
help
=
"random seed for initialization"
,)
parser
.
add_argument
(
"--per_gpu_train_batch_size"
,
default
=
8
,
type
=
int
,
help
=
"Batch size per GPU/CPU for training."
,)
parser
.
add_argument
(
"--per_gpu_eval_batch_size"
,
default
=
8
,
type
=
int
,
help
=
"Batch size per GPU/CPU for eval."
,)
parser
.
add_argument
(
"--learning_rate"
,
default
=
5e-5
,
type
=
float
,
help
=
"The initial learning rate for Adam."
,)
parser
.
add_argument
(
"--weight_decay"
,
default
=
0.0
,
type
=
float
,
help
=
"Weight decay if we apply some."
,)
parser
.
add_argument
(
"--adam_epsilon"
,
default
=
1e-8
,
type
=
float
,
help
=
"Epsilon for Adam optimizer."
,)
parser
.
add_argument
(
"--max_grad_norm"
,
default
=
1.0
,
type
=
float
,
help
=
"Max gradient norm."
,)
parser
.
add_argument
(
"--num_train_epochs"
,
default
=
3
,
type
=
int
,
help
=
"Total number of training epochs to perform."
,)
parser
.
add_argument
(
"--warmup_steps"
,
default
=
0
,
type
=
int
,
help
=
"Linear warmup over warmup_steps."
,)
parser
.
add_argument
(
"--eval_steps"
,
type
=
int
,
default
=
10
,
help
=
"eval every X updates steps."
,)
parser
.
add_argument
(
"--save_steps"
,
type
=
int
,
default
=
50
,
help
=
"Save checkpoint every X updates steps."
,)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
2048
,
help
=
"random seed for initialization"
,)
parser
.
add_argument
(
"--ocr_rec_model_dir"
,
default
=
None
,
type
=
str
,
)
parser
.
add_argument
(
"--ocr_det_model_dir"
,
default
=
None
,
type
=
str
,
)
parser
.
add_argument
(
"--label_map_path"
,
default
=
"./labels/labels_ser.txt"
,
type
=
str
,
required
=
False
,
)
parser
.
add_argument
(
"--label_map_path"
,
default
=
"./labels/labels_ser.txt"
,
type
=
str
,
required
=
False
,
)
parser
.
add_argument
(
"--infer_imgs"
,
default
=
None
,
type
=
str
,
required
=
False
)
parser
.
add_argument
(
"--ocr_json_path"
,
default
=
None
,
type
=
str
,
required
=
False
,
help
=
"ocr prediction results"
)
parser
.
add_argument
(
"--ocr_json_path"
,
default
=
None
,
type
=
str
,
required
=
False
,
help
=
"ocr prediction results"
)
# yapf: enable
args
=
parser
.
parse_args
()
return
args
test_tipc/configs/ch_PP-OCRv2_det_PACT/train_infer_python.txt
View file @
4f8b5113
===========================train_params===========================
model_name:PPOCRv2_
ocr_
det
model_name:PPOCRv2_det
python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
...
...
@@ -26,7 +26,7 @@ null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.
pretrained_model
:
Global.
checkpoints
:
norm_export:null
quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o
fpgm_export:
...
...
test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -6,7 +6,7 @@ Global.use_gpu:True|True
Global.auto_cast:fp32
Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=1
28
|whole_train_whole_infer=128
Train.loader.batch_size_per_card:lite_train_lite_infer=1
6
|whole_train_whole_infer=128
Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./inference/rec_inference
...
...
@@ -34,7 +34,7 @@ distill_export:null
export1:null
export2:null
inference_dir:Student
infer_model:./inference/ch_PP-OCRv2_rec_infer
/
infer_model:./inference/ch_PP-OCRv2_rec_infer
infer_export:null
infer_quant:False
inference:tools/infer/predict_rec.py
...
...
@@ -45,7 +45,7 @@ inference:tools/infer/predict_rec.py
--use_tensorrt:False|True
--precision:fp32|fp16|int8
--rec_model_dir:
--image_dir:/inference/rec_inference
--image_dir:
.
/inference/rec_inference
null:null
--benchmark:True
null:null
...
...
test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -6,15 +6,15 @@ Global.use_gpu:True|True
Global.auto_cast:fp32
Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=1
28
|whole_train_whole_infer=128
Train.loader.batch_size_per_card:lite_train_lite_infer=1
6
|whole_train_whole_infer=128
Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./inference/rec_inference
null:null
##
trainer:pact_train
norm_train:
deploy/slim/quantization/quant.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
pact_train:
null
norm_train:
null
pact_train:
deploy/slim/quantization/quant.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
fpgm_train:null
distill_train:null
null:null
...
...
@@ -27,14 +27,14 @@ null:null
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.pretrained_model:
norm_export:
deploy/slim/quantization/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
quant_export:
fpgm_export:
norm_export:
null
quant_export:
deploy/slim/quantization/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
fpgm_export:
null
distill_export:null
export1:null
export2:null
inference_dir:Student
infer_model:./inference/ch_PP-OCRv2_rec_infer
/
infer_model:./inference/ch_PP-OCRv2_rec_infer
infer_export:null
infer_quant:True
inference:tools/infer/predict_rec.py
...
...
@@ -45,7 +45,7 @@ inference:tools/infer/predict_rec.py
--use_tensorrt:False|True
--precision:fp32|fp16|int8
--rec_model_dir:
--image_dir:/inference/rec_inference
--image_dir:
.
/inference/rec_inference
null:null
--benchmark:True
null:null
...
...
test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -4,7 +4,7 @@ python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_lite_infer=
5
|whole_train_whole_infer=300
Global.epoch_num:lite_train_lite_infer=
100
|whole_train_whole_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
Global.pretrained_model:null
...
...
test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -4,7 +4,7 @@ python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_lite_infer=
5
|whole_train_whole_infer=300
Global.epoch_num:lite_train_lite_infer=
20
|whole_train_whole_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
Global.pretrained_model:null
...
...
@@ -26,7 +26,7 @@ null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.
pretrained_model
:
Global.
checkpoints
:
norm_export:null
quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o
fpgm_export:null
...
...
test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -28,7 +28,7 @@ null:null
Global.save_inference_dir:./output/
Global.checkpoints:
norm_export:null
quant_export:deploy/slim/quantization/export_model.py -ctest_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o
quant_export:deploy/slim/quantization/export_model.py -c
test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o
fpgm_export:null
distill_export:null
export1:null
...
...
test_tipc/configs/ch_ppocr_server_v2.0_det/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -12,22 +12,22 @@ train_model_name:latest
train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
null:null
##
trainer:norm_train
|pact_train|fpgm_export
norm_train:tools/train.py -c test_tipc/configs/ppocr_
det_
server/det_r50_vd_db.yml -o
quant_
export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ppocr_det_server/det_r50_vd_db.yml -o
fpgm_
export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ppocr_det_server/det_r50_vd_db.yml -o
trainer:norm_train
norm_train:tools/train.py -c test_tipc/configs/
ch_
ppocr_server
_v2.0_det
/det_r50_vd_db.yml -o
quant_
train:null
fpgm_
train:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:tools/eval.py -c test_tipc/configs/ppocr_
det_
server/det_r50_vd_db.yml -o
eval:tools/eval.py -c test_tipc/configs/
ch_
ppocr_server
_v2.0_det
/det_r50_vd_db.yml -o
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.pretrained_model:
norm_export:tools/export_model.py -c test_tipc/configs/ppocr_
det_
server/det_r50_vd_db.yml -o
norm_export:tools/export_model.py -c test_tipc/configs/
ch_
ppocr_server
_v2.0_det
/det_r50_vd_db.yml -o
quant_export:null
fpgm_export:null
distill_export:null
...
...
test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -35,7 +35,7 @@ export1:null
export2:null
##
train_model:./inference/det_r50_vd_pse/best_accuracy
infer_export:tools/export_model.py -c test_tipc/
c
configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o
infer_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o
infer_quant:False
inference:tools/infer/predict_det.py
--use_gpu:True|False
...
...
test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/det_r50_vd_sast_icdar2015.yml
View file @
4f8b5113
...
...
@@ -62,7 +62,7 @@ Train:
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
0.1
,
0.45
,
0.3
,
0.15
]
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
...
...
test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -48,4 +48,4 @@ inference:tools/infer/predict_det.py
--image_dir:./inference/ch_det_data_50/all-sum-510/
null:null
--benchmark:True
null:null
--det_algorithm:SAST
test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -48,4 +48,4 @@ inference:tools/infer/predict_det.py
--image_dir:./inference/ch_det_data_50/all-sum-510/
null:null
--benchmark:True
null:null
--det_algorithm:SAST
test_tipc/configs/en_server_pgnetA/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -4,7 +4,7 @@ python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_lite_infer=
1
|whole_train_whole_infer=500
Global.epoch_num:lite_train_lite_infer=
5
|whole_train_whole_infer=500
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=14
Global.pretrained_model:null
...
...
@@ -42,7 +42,7 @@ inference:tools/infer/predict_e2e.py
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False
|True
--use_tensorrt:False
--precision:fp32|fp16|int8
--e2e_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
...
...
test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -6,7 +6,7 @@ Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=1
28
|whole_train_whole_infer=
128
Train.loader.batch_size_per_card:lite_train_lite_infer=1
6
|whole_train_whole_infer=
64
Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./inference/rec_inference
...
...
test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/train_infer_python.txt
View file @
4f8b5113
...
...
@@ -6,7 +6,7 @@ Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=1
28
|whole_train_whole_infer=
128
Train.loader.batch_size_per_card:lite_train_lite_infer=1
6
|whole_train_whole_infer=
64
Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./inference/rec_inference
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment