Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
d8571bdb
Commit
d8571bdb
authored
Jan 05, 2022
by
WenmuZhou
Browse files
move imgs to doc
parent
0d7ee968
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
16 additions
and
16 deletions
+16
-16
configs/vqa/re/layoutxlm.yml
configs/vqa/re/layoutxlm.yml
+3
-3
configs/vqa/ser/layoutlm.yml
configs/vqa/ser/layoutlm.yml
+2
-2
configs/vqa/ser/layoutxlm.yml
configs/vqa/ser/layoutxlm.yml
+2
-2
doc/vqa/input/zh_val_0.jpg
doc/vqa/input/zh_val_0.jpg
+0
-0
doc/vqa/input/zh_val_21.jpg
doc/vqa/input/zh_val_21.jpg
+0
-0
doc/vqa/input/zh_val_40.jpg
doc/vqa/input/zh_val_40.jpg
+0
-0
doc/vqa/input/zh_val_42.jpg
doc/vqa/input/zh_val_42.jpg
+0
-0
doc/vqa/result_re/zh_val_21_re.jpg
doc/vqa/result_re/zh_val_21_re.jpg
+0
-0
doc/vqa/result_re/zh_val_40_re.jpg
doc/vqa/result_re/zh_val_40_re.jpg
+0
-0
doc/vqa/result_ser/zh_val_0_ser.jpg
doc/vqa/result_ser/zh_val_0_ser.jpg
+0
-0
doc/vqa/result_ser/zh_val_42_ser.jpg
doc/vqa/result_ser/zh_val_42_ser.jpg
+0
-0
ppocr/data/imaug/label_ops.py
ppocr/data/imaug/label_ops.py
+1
-1
ppocr/data/simple_dataset.py
ppocr/data/simple_dataset.py
+1
-1
ppocr/losses/vqa_token_layoutlm_loss.py
ppocr/losses/vqa_token_layoutlm_loss.py
+1
-1
ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py
ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py
+1
-1
ppstructure/vqa/README.md
ppstructure/vqa/README.md
+4
-4
tools/infer_vqa_token_ser_re.py
tools/infer_vqa_token_ser_re.py
+1
-1
No files found.
configs/vqa/re/layoutxlm.yml
View file @
d8571bdb
...
@@ -6,12 +6,12 @@ Global:
...
@@ -6,12 +6,12 @@ Global:
save_model_dir
:
./output/re_layoutxlm/
save_model_dir
:
./output/re_layoutxlm/
save_epoch_step
:
2000
save_epoch_step
:
2000
# evaluation is run every 10 iterations after the 0th iteration
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
38
]
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
&pretrained_model
layoutxlm-base-uncased
pretrained_model
:
&pretrained_model
layoutxlm-base-uncased
# This field can only be changed by modifying the configuration file
save_inference_dir
:
save_inference_dir
:
use_visualdl
:
False
use_visualdl
:
False
infer_img
:
ppstructure/vqa/images
/input/zh_val_21.jpg
infer_img
:
doc/vqa
/input/zh_val_21.jpg
save_res_path
:
./output/re/
save_res_path
:
./output/re/
Architecture
:
Architecture
:
...
...
configs/vqa/ser/layoutlm.yml
View file @
d8571bdb
...
@@ -8,10 +8,10 @@ Global:
...
@@ -8,10 +8,10 @@ Global:
# evaluation is run every 10 iterations after the 0th iteration
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
&pretrained_model
layoutlm-base-uncased
pretrained_model
:
&pretrained_model
layoutlm-base-uncased
# This field can only be changed by modifying the configuration file
save_inference_dir
:
save_inference_dir
:
use_visualdl
:
False
use_visualdl
:
False
infer_img
:
ppstructure/vqa/images
/input/zh_val_0.jpg
infer_img
:
doc/vqa
/input/zh_val_0.jpg
save_res_path
:
./output/ser/predicts_layoutlm.txt
save_res_path
:
./output/ser/predicts_layoutlm.txt
Architecture
:
Architecture
:
...
...
configs/vqa/ser/layoutxlm.yml
View file @
d8571bdb
...
@@ -8,10 +8,10 @@ Global:
...
@@ -8,10 +8,10 @@ Global:
# evaluation is run every 10 iterations after the 0th iteration
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step
:
[
0
,
19
]
eval_batch_step
:
[
0
,
19
]
cal_metric_during_train
:
False
cal_metric_during_train
:
False
pretrained_model
:
&pretrained_model
layoutxlm-base-uncased
pretrained_model
:
&pretrained_model
layoutxlm-base-uncased
# This field can only be changed by modifying the configuration file
save_inference_dir
:
save_inference_dir
:
use_visualdl
:
False
use_visualdl
:
False
infer_img
:
ppstructure/vqa/images
/input/zh_val_42.jpg
infer_img
:
doc/vqa
/input/zh_val_42.jpg
save_res_path
:
./output/ser
save_res_path
:
./output/ser
Architecture
:
Architecture
:
...
...
ppstructure/vqa/images
/input/zh_val_0.jpg
→
doc/vqa
/input/zh_val_0.jpg
View file @
d8571bdb
File moved
ppstructure/vqa/images
/input/zh_val_21.jpg
→
doc/vqa
/input/zh_val_21.jpg
View file @
d8571bdb
File moved
ppstructure/vqa/images
/input/zh_val_40.jpg
→
doc/vqa
/input/zh_val_40.jpg
View file @
d8571bdb
File moved
ppstructure/vqa/images
/input/zh_val_42.jpg
→
doc/vqa
/input/zh_val_42.jpg
View file @
d8571bdb
File moved
ppstructure/vqa/images
/result_re/zh_val_21_re.jpg
→
doc/vqa
/result_re/zh_val_21_re.jpg
View file @
d8571bdb
File moved
ppstructure/vqa/images
/result_re/zh_val_40_re.jpg
→
doc/vqa
/result_re/zh_val_40_re.jpg
View file @
d8571bdb
File moved
ppstructure/vqa/images
/result_ser/zh_val_0_ser.jpg
→
doc/vqa
/result_ser/zh_val_0_ser.jpg
View file @
d8571bdb
File moved
ppstructure/vqa/images
/result_ser/zh_val_42_ser.jpg
→
doc/vqa
/result_ser/zh_val_42_ser.jpg
View file @
d8571bdb
File moved
ppocr/data/imaug/label_ops.py
View file @
d8571bdb
...
@@ -787,7 +787,7 @@ class SARLabelEncode(BaseRecLabelEncode):
...
@@ -787,7 +787,7 @@ class SARLabelEncode(BaseRecLabelEncode):
class
VQATokenLabelEncode
(
object
):
class
VQATokenLabelEncode
(
object
):
"""
"""
基于NLP的标签编码
Label encode for NLP VQA methods
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
...
...
ppocr/data/simple_dataset.py
View file @
d8571bdb
...
@@ -122,7 +122,7 @@ class SimpleDataSet(Dataset):
...
@@ -122,7 +122,7 @@ class SimpleDataSet(Dataset):
self
.
logger
.
error
(
self
.
logger
.
error
(
"When parsing line {}, error happened with msg: {}"
.
format
(
"When parsing line {}, error happened with msg: {}"
.
format
(
data_line
,
traceback
.
format_exc
()))
data_line
,
traceback
.
format_exc
()))
#
outs = None
outs
=
None
if
outs
is
None
:
if
outs
is
None
:
# during evaluation, we should fix the idx to get same results for many times of evaluation.
# during evaluation, we should fix the idx to get same results for many times of evaluation.
rnd_idx
=
np
.
random
.
randint
(
self
.
__len__
(
rnd_idx
=
np
.
random
.
randint
(
self
.
__len__
(
...
...
ppocr/losses/vqa_token_layoutlm_loss.py
View file @
d8571bdb
# copyright (c) 201
9
PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 20
2
1 PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
...
ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py
View file @
d8571bdb
...
@@ -34,7 +34,7 @@ class VQAReTokenLayoutLMPostProcess(object):
...
@@ -34,7 +34,7 @@ class VQAReTokenLayoutLMPostProcess(object):
entity_idx_dict_batch
=
kwargs
[
'entity_idx_dict_batch'
]
entity_idx_dict_batch
=
kwargs
[
'entity_idx_dict_batch'
]
pred_relations
=
preds
[
'pred_relations'
]
pred_relations
=
preds
[
'pred_relations'
]
#
进行
relations
到
ocr
信息的转换
#
merge
relations
and
ocr
info
results
=
[]
results
=
[]
for
pred_relation
,
ser_result
,
entity_idx_dict
in
zip
(
for
pred_relation
,
ser_result
,
entity_idx_dict
in
zip
(
pred_relations
,
ser_results
,
entity_idx_dict_batch
):
pred_relations
,
ser_results
,
entity_idx_dict_batch
):
...
...
ppstructure/vqa/README.md
View file @
d8571bdb
...
@@ -34,7 +34,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进
...
@@ -34,7 +34,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进
### 2.1 SER
### 2.1 SER

| !
[](
.
/images
/result_ser/zh_val_42_ser.jpg
)

| !
[](
.
./../doc/vqa
/result_ser/zh_val_42_ser.jpg
)
---|---
---|---
图中不同颜色的框表示不同的类别,对于XFUN数据集,有
`QUESTION`
,
`ANSWER`
,
`HEADER`
3种类别
图中不同颜色的框表示不同的类别,对于XFUN数据集,有
`QUESTION`
,
`ANSWER`
,
`HEADER`
3种类别
...
@@ -48,7 +48,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进
...
@@ -48,7 +48,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进
### 2.2 RE
### 2.2 RE

| !
[](
.
/images
/result_re/zh_val_40_re.jpg
)

| !
[](
.
./../doc/vqa
/result_re/zh_val_40_re.jpg
)
---|---
---|---
...
@@ -164,7 +164,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/ser/layoutxlm.yml -o
...
@@ -164,7 +164,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/ser/layoutxlm.yml -o
使用如下命令即可完成
`OCR引擎 + SER`
的串联预测
使用如下命令即可完成
`OCR引擎 + SER`
的串联预测
```
shell
```
shell
CUDA_VISIBLE_DEVICES
=
0 python3 tools/infer_vqa_token_ser.py
-c
configs/vqa/ser/layoutxlm.yml
-o
Architecture.Backbone.checkpoints
=
PP-Layout_v1.0_ser_pretrained/ Global.infer_img
=
ppstructure/vqa/images
/input/zh_val_42.jpg
CUDA_VISIBLE_DEVICES
=
0 python3 tools/infer_vqa_token_ser.py
-c
configs/vqa/ser/layoutxlm.yml
-o
Architecture.Backbone.checkpoints
=
PP-Layout_v1.0_ser_pretrained/ Global.infer_img
=
doc/vqa
/input/zh_val_42.jpg
```
```
最终会在
`config.Global.save_res_path`
字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为
`infer_results.txt`
。
最终会在
`config.Global.save_res_path`
字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为
`infer_results.txt`
。
...
@@ -219,7 +219,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/re/layoutxlm.yml -o
...
@@ -219,7 +219,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/re/layoutxlm.yml -o
使用如下命令即可完成
`OCR引擎 + SER + RE`
的串联预测
使用如下命令即可完成
`OCR引擎 + SER + RE`
的串联预测
```
shell
```
shell
export
CUDA_VISIBLE_DEVICES
=
0
export
CUDA_VISIBLE_DEVICES
=
0
python3 tools/infer_vqa_token_ser_re.py
-c
configs/vqa/re/layoutxlm.yml
-o
Architecture.Backbone.checkpoints
=
PP-Layout_v1.0_re_pretrained/ Global.infer_img
=
ppstructure/vqa/images
/input/zh_val_21.jpg
-c_ser
configs/vqa/ser/layoutxlm.yml
-o_ser
Architecture.Backbone.checkpoints
=
PP-Layout_v1.0_ser_pretrained/
python3 tools/infer_vqa_token_ser_re.py
-c
configs/vqa/re/layoutxlm.yml
-o
Architecture.Backbone.checkpoints
=
PP-Layout_v1.0_re_pretrained/ Global.infer_img
=
doc/vqa
/input/zh_val_21.jpg
-c_ser
configs/vqa/ser/layoutxlm.yml
-o_ser
Architecture.Backbone.checkpoints
=
PP-Layout_v1.0_ser_pretrained/
```
```
最终会在
`config.Global.save_res_path`
字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为
`infer_results.txt`
。
最终会在
`config.Global.save_res_path`
字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为
`infer_results.txt`
。
...
...
tools/infer_vqa_token_ser_re.py
View file @
d8571bdb
...
@@ -104,7 +104,7 @@ def make_input(ser_inputs, ser_results):
...
@@ -104,7 +104,7 @@ def make_input(ser_inputs, ser_results):
ser_inputs
[
8
]
=
entities_batch
ser_inputs
[
8
]
=
entities_batch
ser_inputs
.
append
(
relations_batch
)
ser_inputs
.
append
(
relations_batch
)
# remove ocr_info segment_offset_id and label in ser input
ser_inputs
.
pop
(
7
)
ser_inputs
.
pop
(
7
)
ser_inputs
.
pop
(
6
)
ser_inputs
.
pop
(
6
)
ser_inputs
.
pop
(
1
)
ser_inputs
.
pop
(
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment