Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
8c80a251
Unverified
Commit
8c80a251
authored
Dec 20, 2021
by
zhoujun
Committed by
GitHub
Dec 20, 2021
Browse files
Merge pull request #4977 from WenmuZhou/fix_vqa
fix win train bug
parents
01499821
a0a0a363
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
45 additions
and
17 deletions
+45
-17
ppstructure/vqa/README.md
ppstructure/vqa/README.md
+5
-0
ppstructure/vqa/eval_re.py
ppstructure/vqa/eval_re.py
+1
-1
ppstructure/vqa/eval_ser.py
ppstructure/vqa/eval_ser.py
+1
-1
ppstructure/vqa/train_re.py
ppstructure/vqa/train_re.py
+2
-2
ppstructure/vqa/train_ser.py
ppstructure/vqa/train_ser.py
+2
-2
ppstructure/vqa/utils.py
ppstructure/vqa/utils.py
+1
-0
ppstructure/vqa/xfun.py
ppstructure/vqa/xfun.py
+33
-11
No files found.
ppstructure/vqa/README.md
View file @
8c80a251
...
...
@@ -165,6 +165,7 @@ python3.7 train_ser.py \
--learning_rate
5e-5
\
--warmup_steps
50
\
--evaluate_during_training
\
--num_workers
8
\
--seed
2048
\
--resume
```
...
...
@@ -177,6 +178,7 @@ python3 eval_ser.py \
--eval_data_dir
"XFUND/zh_val/image"
\
--eval_label_path
"XFUND/zh_val/xfun_normalize_val.json"
\
--per_gpu_eval_batch_size
8
\
--num_workers
8
\
--output_dir
"output/ser/"
\
--seed
2048
```
...
...
@@ -234,6 +236,7 @@ python3 train_re.py \
--warmup_steps
50
\
--per_gpu_train_batch_size
8
\
--per_gpu_eval_batch_size
8
\
--num_workers
8
\
--evaluate_during_training
\
--seed
2048
...
...
@@ -257,6 +260,7 @@ python3 train_re.py \
--warmup_steps
50
\
--per_gpu_train_batch_size
8
\
--per_gpu_eval_batch_size
8
\
--num_workers
8
\
--evaluate_during_training
\
--seed
2048
\
--resume
...
...
@@ -276,6 +280,7 @@ python3 eval_re.py \
--label_map_path
'labels/labels_ser.txt'
\
--output_dir
"output/re_test/"
\
--per_gpu_eval_batch_size
8
\
--num_workers
8
\
--seed
2048
```
最终会打印出
`precision`
,
`recall`
,
`f1`
等指标
...
...
ppstructure/vqa/eval_re.py
View file @
8c80a251
...
...
@@ -112,7 +112,7 @@ def eval(args):
eval_dataloader
=
paddle
.
io
.
DataLoader
(
eval_dataset
,
batch_size
=
args
.
per_gpu_eval_batch_size
,
num_workers
=
8
,
num_workers
=
args
.
num_workers
,
shuffle
=
False
,
collate_fn
=
DataCollator
())
...
...
ppstructure/vqa/eval_ser.py
View file @
8c80a251
...
...
@@ -61,7 +61,7 @@ def eval(args):
eval_dataloader
=
paddle
.
io
.
DataLoader
(
eval_dataset
,
batch_size
=
args
.
per_gpu_eval_batch_size
,
num_workers
=
0
,
num_workers
=
args
.
num_workers
,
use_shared_memory
=
True
,
collate_fn
=
None
,
)
...
...
ppstructure/vqa/train_re.py
View file @
8c80a251
...
...
@@ -97,14 +97,14 @@ def train(args):
train_dataloader
=
paddle
.
io
.
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
num_workers
=
8
,
num_workers
=
args
.
num_workers
,
use_shared_memory
=
True
,
collate_fn
=
DataCollator
())
eval_dataloader
=
paddle
.
io
.
DataLoader
(
eval_dataset
,
batch_size
=
args
.
per_gpu_eval_batch_size
,
num_workers
=
8
,
num_workers
=
args
.
num_workers
,
shuffle
=
False
,
collate_fn
=
DataCollator
())
...
...
ppstructure/vqa/train_ser.py
View file @
8c80a251
...
...
@@ -94,14 +94,14 @@ def train(args):
train_dataloader
=
paddle
.
io
.
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
num_workers
=
0
,
num_workers
=
args
.
num_workers
,
use_shared_memory
=
True
,
collate_fn
=
None
,
)
eval_dataloader
=
paddle
.
io
.
DataLoader
(
eval_dataset
,
batch_size
=
args
.
per_gpu_eval_batch_size
,
num_workers
=
0
,
num_workers
=
args
.
num_workers
,
use_shared_memory
=
True
,
collate_fn
=
None
,
)
...
...
ppstructure/vqa/utils.py
View file @
8c80a251
...
...
@@ -363,6 +363,7 @@ def parse_args():
parser
.
add_argument
(
"--output_dir"
,
default
=
None
,
type
=
str
,
required
=
True
,)
parser
.
add_argument
(
"--max_seq_length"
,
default
=
512
,
type
=
int
,)
parser
.
add_argument
(
"--evaluate_during_training"
,
action
=
"store_true"
,)
parser
.
add_argument
(
"--num_workers"
,
default
=
8
,
type
=
int
,)
parser
.
add_argument
(
"--per_gpu_train_batch_size"
,
default
=
8
,
type
=
int
,
help
=
"Batch size per GPU/CPU for training."
,)
parser
.
add_argument
(
"--per_gpu_eval_batch_size"
,
default
=
8
,
...
...
ppstructure/vqa/xfun.py
View file @
8c80a251
...
...
@@ -79,14 +79,36 @@ class XFUNDataset(Dataset):
self
.
entities_labels
=
{
'HEADER'
:
0
,
'QUESTION'
:
1
,
'ANSWER'
:
2
}
self
.
return_keys
=
{
'bbox'
:
'np'
,
'input_ids'
:
'np'
,
'labels'
:
'np'
,
'attention_mask'
:
'np'
,
'image'
:
'np'
,
'token_type_ids'
:
'np'
,
'entities'
:
'dict'
,
'relations'
:
'dict'
,
'bbox'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'input_ids'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'labels'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'attention_mask'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'image'
:
{
'type'
:
'np'
,
'dtype'
:
'float32'
},
'token_type_ids'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'entities'
:
{
'type'
:
'dict'
},
'relations'
:
{
'type'
:
'dict'
}
}
if
load_mode
==
"all"
:
...
...
@@ -103,7 +125,7 @@ class XFUNDataset(Dataset):
return_special_tokens_mask
=
False
):
# Padding
needs_to_be_padded
=
pad_to_max_seq_len
and
\
max_seq_len
and
len
(
encoded_inputs
[
"input_ids"
])
<
max_seq_len
max_seq_len
and
len
(
encoded_inputs
[
"input_ids"
])
<
max_seq_len
if
needs_to_be_padded
:
difference
=
max_seq_len
-
len
(
encoded_inputs
[
"input_ids"
])
...
...
@@ -412,8 +434,8 @@ class XFUNDataset(Dataset):
return_data
=
{}
for
k
,
v
in
data
.
items
():
if
k
in
self
.
return_keys
:
if
self
.
return_keys
[
k
]
==
'np'
:
v
=
np
.
array
(
v
)
if
self
.
return_keys
[
k
]
[
'type'
]
==
'np'
:
v
=
np
.
array
(
v
,
dtype
=
self
.
return_keys
[
k
][
'dtype'
]
)
return_data
[
k
]
=
v
return
return_data
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment