Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
41a1b292
Commit
41a1b292
authored
Jan 20, 2022
by
Leif
Browse files
Merge remote-tracking branch 'origin/dygraph' into dygraph
parents
9471054e
3d30899b
Changes
162
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1408 additions
and
1353 deletions
+1408
-1353
ppstructure/vqa/train_re.py
ppstructure/vqa/train_re.py
+0
-229
ppstructure/vqa/train_ser.py
ppstructure/vqa/train_ser.py
+0
-248
ppstructure/vqa/vqa_utils.py
ppstructure/vqa/vqa_utils.py
+0
-400
ppstructure/vqa/xfun.py
ppstructure/vqa/xfun.py
+0
-464
requirements.txt
requirements.txt
+0
-1
test_tipc/prepare.sh
test_tipc/prepare.sh
+3
-2
test_tipc/readme.md
test_tipc/readme.md
+9
-9
test_tipc/supplementary/__init__.py
test_tipc/supplementary/__init__.py
+1
-0
test_tipc/supplementary/config.py
test_tipc/supplementary/config.py
+137
-0
test_tipc/supplementary/custom_op/custom_relu_op.cc
test_tipc/supplementary/custom_op/custom_relu_op.cc
+109
-0
test_tipc/supplementary/custom_op/custom_relu_op.cu
test_tipc/supplementary/custom_op/custom_relu_op.cu
+76
-0
test_tipc/supplementary/custom_op/test.py
test_tipc/supplementary/custom_op/test.py
+76
-0
test_tipc/supplementary/data.py
test_tipc/supplementary/data.py
+140
-0
test_tipc/supplementary/data_loader.py
test_tipc/supplementary/data_loader.py
+66
-0
test_tipc/supplementary/load_cifar.py
test_tipc/supplementary/load_cifar.py
+40
-0
test_tipc/supplementary/loss.py
test_tipc/supplementary/loss.py
+128
-0
test_tipc/supplementary/metric.py
test_tipc/supplementary/metric.py
+56
-0
test_tipc/supplementary/mv3.py
test_tipc/supplementary/mv3.py
+487
-0
test_tipc/supplementary/mv3_distill.yml
test_tipc/supplementary/mv3_distill.yml
+31
-0
test_tipc/supplementary/mv3_large_x0_5.yml
test_tipc/supplementary/mv3_large_x0_5.yml
+49
-0
No files found.
ppstructure/vqa/train_re.py
deleted
100644 → 0
View file @
9471054e
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'../..'
)))
import
random
import
time
import
numpy
as
np
import
paddle
from
paddlenlp.transformers
import
LayoutXLMTokenizer
,
LayoutXLMModel
,
LayoutXLMForRelationExtraction
from
xfun
import
XFUNDataset
from
vqa_utils
import
parse_args
,
get_bio_label_maps
,
print_arguments
,
set_seed
from
data_collator
import
DataCollator
from
eval_re
import
evaluate
from
ppocr.utils.logging
import
get_logger
def
train
(
args
):
logger
=
get_logger
(
log_file
=
os
.
path
.
join
(
args
.
output_dir
,
"train.log"
))
rank
=
paddle
.
distributed
.
get_rank
()
distributed
=
paddle
.
distributed
.
get_world_size
()
>
1
print_arguments
(
args
,
logger
)
# Added here for reproducibility (even between python 2 and 3)
set_seed
(
args
.
seed
)
label2id_map
,
id2label_map
=
get_bio_label_maps
(
args
.
label_map_path
)
pad_token_label_id
=
paddle
.
nn
.
CrossEntropyLoss
().
ignore_index
# dist mode
if
distributed
:
paddle
.
distributed
.
init_parallel_env
()
tokenizer
=
LayoutXLMTokenizer
.
from_pretrained
(
args
.
model_name_or_path
)
if
not
args
.
resume
:
model
=
LayoutXLMModel
.
from_pretrained
(
args
.
model_name_or_path
)
model
=
LayoutXLMForRelationExtraction
(
model
,
dropout
=
None
)
logger
.
info
(
'train from scratch'
)
else
:
logger
.
info
(
'resume from {}'
.
format
(
args
.
model_name_or_path
))
model
=
LayoutXLMForRelationExtraction
.
from_pretrained
(
args
.
model_name_or_path
)
# dist mode
if
distributed
:
model
=
paddle
.
DataParallel
(
model
)
train_dataset
=
XFUNDataset
(
tokenizer
,
data_dir
=
args
.
train_data_dir
,
label_path
=
args
.
train_label_path
,
label2id_map
=
label2id_map
,
img_size
=
(
224
,
224
),
max_seq_len
=
args
.
max_seq_length
,
pad_token_label_id
=
pad_token_label_id
,
contains_re
=
True
,
add_special_ids
=
False
,
return_attention_mask
=
True
,
load_mode
=
'all'
)
eval_dataset
=
XFUNDataset
(
tokenizer
,
data_dir
=
args
.
eval_data_dir
,
label_path
=
args
.
eval_label_path
,
label2id_map
=
label2id_map
,
img_size
=
(
224
,
224
),
max_seq_len
=
args
.
max_seq_length
,
pad_token_label_id
=
pad_token_label_id
,
contains_re
=
True
,
add_special_ids
=
False
,
return_attention_mask
=
True
,
load_mode
=
'all'
)
train_sampler
=
paddle
.
io
.
DistributedBatchSampler
(
train_dataset
,
batch_size
=
args
.
per_gpu_train_batch_size
,
shuffle
=
True
)
train_dataloader
=
paddle
.
io
.
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
num_workers
=
args
.
num_workers
,
use_shared_memory
=
True
,
collate_fn
=
DataCollator
())
eval_dataloader
=
paddle
.
io
.
DataLoader
(
eval_dataset
,
batch_size
=
args
.
per_gpu_eval_batch_size
,
num_workers
=
args
.
num_workers
,
shuffle
=
False
,
collate_fn
=
DataCollator
())
t_total
=
len
(
train_dataloader
)
*
args
.
num_train_epochs
# build linear decay with warmup lr sch
lr_scheduler
=
paddle
.
optimizer
.
lr
.
PolynomialDecay
(
learning_rate
=
args
.
learning_rate
,
decay_steps
=
t_total
,
end_lr
=
0.0
,
power
=
1.0
)
if
args
.
warmup_steps
>
0
:
lr_scheduler
=
paddle
.
optimizer
.
lr
.
LinearWarmup
(
lr_scheduler
,
args
.
warmup_steps
,
start_lr
=
0
,
end_lr
=
args
.
learning_rate
,
)
grad_clip
=
paddle
.
nn
.
ClipGradByNorm
(
clip_norm
=
10
)
optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
args
.
learning_rate
,
parameters
=
model
.
parameters
(),
epsilon
=
args
.
adam_epsilon
,
grad_clip
=
grad_clip
,
weight_decay
=
args
.
weight_decay
)
# Train!
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = {}"
.
format
(
len
(
train_dataset
)))
logger
.
info
(
" Num Epochs = {}"
.
format
(
args
.
num_train_epochs
))
logger
.
info
(
" Instantaneous batch size per GPU = {}"
.
format
(
args
.
per_gpu_train_batch_size
))
logger
.
info
(
" Total train batch size (w. parallel, distributed & accumulation) = {}"
.
format
(
args
.
per_gpu_train_batch_size
*
paddle
.
distributed
.
get_world_size
()))
logger
.
info
(
" Total optimization steps = {}"
.
format
(
t_total
))
global_step
=
0
model
.
clear_gradients
()
train_dataloader_len
=
len
(
train_dataloader
)
best_metirc
=
{
'f1'
:
0
}
model
.
train
()
train_reader_cost
=
0.0
train_run_cost
=
0.0
total_samples
=
0
reader_start
=
time
.
time
()
print_step
=
1
for
epoch
in
range
(
int
(
args
.
num_train_epochs
)):
for
step
,
batch
in
enumerate
(
train_dataloader
):
train_reader_cost
+=
time
.
time
()
-
reader_start
train_start
=
time
.
time
()
outputs
=
model
(
**
batch
)
train_run_cost
+=
time
.
time
()
-
train_start
# model outputs are always tuple in ppnlp (see doc)
loss
=
outputs
[
'loss'
]
loss
=
loss
.
mean
()
loss
.
backward
()
optimizer
.
step
()
optimizer
.
clear_grad
()
# lr_scheduler.step() # Update learning rate schedule
global_step
+=
1
total_samples
+=
batch
[
'image'
].
shape
[
0
]
if
rank
==
0
and
step
%
print_step
==
0
:
logger
.
info
(
"epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec"
.
format
(
epoch
,
args
.
num_train_epochs
,
step
,
train_dataloader_len
,
global_step
,
np
.
mean
(
loss
.
numpy
()),
optimizer
.
get_lr
(),
train_reader_cost
/
print_step
,
(
train_reader_cost
+
train_run_cost
)
/
print_step
,
total_samples
/
print_step
,
total_samples
/
(
train_reader_cost
+
train_run_cost
)))
train_reader_cost
=
0.0
train_run_cost
=
0.0
total_samples
=
0
if
rank
==
0
and
args
.
eval_steps
>
0
and
global_step
%
args
.
eval_steps
==
0
and
args
.
evaluate_during_training
:
# Log metrics
# Only evaluate when single GPU otherwise metrics may not average well
results
=
evaluate
(
model
,
eval_dataloader
,
logger
)
if
results
[
'f1'
]
>=
best_metirc
[
'f1'
]:
best_metirc
=
results
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"best_model"
)
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
if
distributed
:
model
.
_layers
.
save_pretrained
(
output_dir
)
else
:
model
.
save_pretrained
(
output_dir
)
tokenizer
.
save_pretrained
(
output_dir
)
paddle
.
save
(
args
,
os
.
path
.
join
(
output_dir
,
"training_args.bin"
))
logger
.
info
(
"Saving model checkpoint to {}"
.
format
(
output_dir
))
logger
.
info
(
"eval results: {}"
.
format
(
results
))
logger
.
info
(
"best_metirc: {}"
.
format
(
best_metirc
))
reader_start
=
time
.
time
()
if
rank
==
0
:
# Save model checkpoint
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"latest_model"
)
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
if
distributed
:
model
.
_layers
.
save_pretrained
(
output_dir
)
else
:
model
.
save_pretrained
(
output_dir
)
tokenizer
.
save_pretrained
(
output_dir
)
paddle
.
save
(
args
,
os
.
path
.
join
(
output_dir
,
"training_args.bin"
))
logger
.
info
(
"Saving model checkpoint to {}"
.
format
(
output_dir
))
logger
.
info
(
"best_metirc: {}"
.
format
(
best_metirc
))
if
__name__
==
"__main__"
:
args
=
parse_args
()
os
.
makedirs
(
args
.
output_dir
,
exist_ok
=
True
)
train
(
args
)
ppstructure/vqa/train_ser.py
deleted
100644 → 0
View file @
9471054e
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'../..'
)))
import
random
import
time
import
copy
import
logging
import
argparse
import
paddle
import
numpy
as
np
from
seqeval.metrics
import
classification_report
,
f1_score
,
precision_score
,
recall_score
from
paddlenlp.transformers
import
LayoutXLMModel
,
LayoutXLMTokenizer
,
LayoutXLMForTokenClassification
from
paddlenlp.transformers
import
LayoutLMModel
,
LayoutLMTokenizer
,
LayoutLMForTokenClassification
from
xfun
import
XFUNDataset
from
vqa_utils
import
parse_args
,
get_bio_label_maps
,
print_arguments
,
set_seed
from
eval_ser
import
evaluate
from
losses
import
SERLoss
from
ppocr.utils.logging
import
get_logger
MODELS
=
{
'LayoutXLM'
:
(
LayoutXLMTokenizer
,
LayoutXLMModel
,
LayoutXLMForTokenClassification
),
'LayoutLM'
:
(
LayoutLMTokenizer
,
LayoutLMModel
,
LayoutLMForTokenClassification
)
}
def
train
(
args
):
os
.
makedirs
(
args
.
output_dir
,
exist_ok
=
True
)
rank
=
paddle
.
distributed
.
get_rank
()
distributed
=
paddle
.
distributed
.
get_world_size
()
>
1
logger
=
get_logger
(
log_file
=
os
.
path
.
join
(
args
.
output_dir
,
"train.log"
))
print_arguments
(
args
,
logger
)
label2id_map
,
id2label_map
=
get_bio_label_maps
(
args
.
label_map_path
)
loss_class
=
SERLoss
(
len
(
label2id_map
))
pad_token_label_id
=
loss_class
.
ignore_index
# dist mode
if
distributed
:
paddle
.
distributed
.
init_parallel_env
()
tokenizer_class
,
base_model_class
,
model_class
=
MODELS
[
args
.
ser_model_type
]
tokenizer
=
tokenizer_class
.
from_pretrained
(
args
.
model_name_or_path
)
if
not
args
.
resume
:
base_model
=
base_model_class
.
from_pretrained
(
args
.
model_name_or_path
)
model
=
model_class
(
base_model
,
num_classes
=
len
(
label2id_map
),
dropout
=
None
)
logger
.
info
(
'train from scratch'
)
else
:
logger
.
info
(
'resume from {}'
.
format
(
args
.
model_name_or_path
))
model
=
model_class
.
from_pretrained
(
args
.
model_name_or_path
)
# dist mode
if
distributed
:
model
=
paddle
.
DataParallel
(
model
)
train_dataset
=
XFUNDataset
(
tokenizer
,
data_dir
=
args
.
train_data_dir
,
label_path
=
args
.
train_label_path
,
label2id_map
=
label2id_map
,
img_size
=
(
224
,
224
),
pad_token_label_id
=
pad_token_label_id
,
contains_re
=
False
,
add_special_ids
=
False
,
return_attention_mask
=
True
,
load_mode
=
'all'
)
eval_dataset
=
XFUNDataset
(
tokenizer
,
data_dir
=
args
.
eval_data_dir
,
label_path
=
args
.
eval_label_path
,
label2id_map
=
label2id_map
,
img_size
=
(
224
,
224
),
pad_token_label_id
=
pad_token_label_id
,
contains_re
=
False
,
add_special_ids
=
False
,
return_attention_mask
=
True
,
load_mode
=
'all'
)
train_sampler
=
paddle
.
io
.
DistributedBatchSampler
(
train_dataset
,
batch_size
=
args
.
per_gpu_train_batch_size
,
shuffle
=
True
)
train_dataloader
=
paddle
.
io
.
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
num_workers
=
args
.
num_workers
,
use_shared_memory
=
True
,
collate_fn
=
None
,
)
eval_dataloader
=
paddle
.
io
.
DataLoader
(
eval_dataset
,
batch_size
=
args
.
per_gpu_eval_batch_size
,
num_workers
=
args
.
num_workers
,
use_shared_memory
=
True
,
collate_fn
=
None
,
)
t_total
=
len
(
train_dataloader
)
*
args
.
num_train_epochs
# build linear decay with warmup lr sch
lr_scheduler
=
paddle
.
optimizer
.
lr
.
PolynomialDecay
(
learning_rate
=
args
.
learning_rate
,
decay_steps
=
t_total
,
end_lr
=
0.0
,
power
=
1.0
)
if
args
.
warmup_steps
>
0
:
lr_scheduler
=
paddle
.
optimizer
.
lr
.
LinearWarmup
(
lr_scheduler
,
args
.
warmup_steps
,
start_lr
=
0
,
end_lr
=
args
.
learning_rate
,
)
optimizer
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_scheduler
,
parameters
=
model
.
parameters
(),
epsilon
=
args
.
adam_epsilon
,
weight_decay
=
args
.
weight_decay
)
# Train!
logger
.
info
(
"***** Running training *****"
)
logger
.
info
(
" Num examples = %d"
,
len
(
train_dataset
))
logger
.
info
(
" Num Epochs = %d"
,
args
.
num_train_epochs
)
logger
.
info
(
" Instantaneous batch size per GPU = %d"
,
args
.
per_gpu_train_batch_size
)
logger
.
info
(
" Total train batch size (w. parallel, distributed) = %d"
,
args
.
per_gpu_train_batch_size
*
paddle
.
distributed
.
get_world_size
(),
)
logger
.
info
(
" Total optimization steps = %d"
,
t_total
)
global_step
=
0
tr_loss
=
0.0
set_seed
(
args
.
seed
)
best_metrics
=
None
train_reader_cost
=
0.0
train_run_cost
=
0.0
total_samples
=
0
reader_start
=
time
.
time
()
print_step
=
1
model
.
train
()
for
epoch_id
in
range
(
args
.
num_train_epochs
):
for
step
,
batch
in
enumerate
(
train_dataloader
):
train_reader_cost
+=
time
.
time
()
-
reader_start
if
args
.
ser_model_type
==
'LayoutLM'
:
if
'image'
in
batch
:
batch
.
pop
(
'image'
)
labels
=
batch
.
pop
(
'labels'
)
train_start
=
time
.
time
()
outputs
=
model
(
**
batch
)
train_run_cost
+=
time
.
time
()
-
train_start
if
args
.
ser_model_type
==
'LayoutXLM'
:
outputs
=
outputs
[
0
]
loss
=
loss_class
(
labels
,
outputs
,
batch
[
'attention_mask'
])
# model outputs are always tuple in ppnlp (see doc)
loss
=
loss
.
mean
()
loss
.
backward
()
tr_loss
+=
loss
.
item
()
optimizer
.
step
()
lr_scheduler
.
step
()
# Update learning rate schedule
optimizer
.
clear_grad
()
global_step
+=
1
total_samples
+=
batch
[
'input_ids'
].
shape
[
0
]
if
rank
==
0
and
step
%
print_step
==
0
:
logger
.
info
(
"epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec"
.
format
(
epoch_id
,
args
.
num_train_epochs
,
step
,
len
(
train_dataloader
),
global_step
,
loss
.
numpy
()[
0
],
lr_scheduler
.
get_lr
(),
train_reader_cost
/
print_step
,
(
train_reader_cost
+
train_run_cost
)
/
print_step
,
total_samples
/
print_step
,
total_samples
/
(
train_reader_cost
+
train_run_cost
)))
train_reader_cost
=
0.0
train_run_cost
=
0.0
total_samples
=
0
if
rank
==
0
and
args
.
eval_steps
>
0
and
global_step
%
args
.
eval_steps
==
0
and
args
.
evaluate_during_training
:
# Log metrics
# Only evaluate when single GPU otherwise metrics may not average well
results
,
_
=
evaluate
(
args
,
model
,
tokenizer
,
loss_class
,
eval_dataloader
,
label2id_map
,
id2label_map
,
pad_token_label_id
,
logger
)
if
best_metrics
is
None
or
results
[
"f1"
]
>=
best_metrics
[
"f1"
]:
best_metrics
=
copy
.
deepcopy
(
results
)
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"best_model"
)
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
if
distributed
:
model
.
_layers
.
save_pretrained
(
output_dir
)
else
:
model
.
save_pretrained
(
output_dir
)
tokenizer
.
save_pretrained
(
output_dir
)
paddle
.
save
(
args
,
os
.
path
.
join
(
output_dir
,
"training_args.bin"
))
logger
.
info
(
"Saving model checkpoint to {}"
.
format
(
output_dir
))
logger
.
info
(
"[epoch {}/{}][iter: {}/{}] results: {}"
.
format
(
epoch_id
,
args
.
num_train_epochs
,
step
,
len
(
train_dataloader
),
results
))
if
best_metrics
is
not
None
:
logger
.
info
(
"best metrics: {}"
.
format
(
best_metrics
))
reader_start
=
time
.
time
()
if
rank
==
0
:
# Save model checkpoint
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"latest_model"
)
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
if
distributed
:
model
.
_layers
.
save_pretrained
(
output_dir
)
else
:
model
.
save_pretrained
(
output_dir
)
tokenizer
.
save_pretrained
(
output_dir
)
paddle
.
save
(
args
,
os
.
path
.
join
(
output_dir
,
"training_args.bin"
))
logger
.
info
(
"Saving model checkpoint to {}"
.
format
(
output_dir
))
return
global_step
,
tr_loss
/
global_step
if
__name__
==
"__main__"
:
args
=
parse_args
()
train
(
args
)
ppstructure/vqa/vqa_utils.py
deleted
100644 → 0
View file @
9471054e
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
argparse
import
cv2
import
random
import
numpy
as
np
import
imghdr
from
copy
import
deepcopy
import
paddle
from
PIL
import
Image
,
ImageDraw
,
ImageFont
def
set_seed
(
seed
):
random
.
seed
(
seed
)
np
.
random
.
seed
(
seed
)
paddle
.
seed
(
seed
)
def
get_bio_label_maps
(
label_map_path
):
with
open
(
label_map_path
,
"r"
,
encoding
=
'utf-8'
)
as
fin
:
lines
=
fin
.
readlines
()
lines
=
[
line
.
strip
()
for
line
in
lines
]
if
"O"
not
in
lines
:
lines
.
insert
(
0
,
"O"
)
labels
=
[]
for
line
in
lines
:
if
line
==
"O"
:
labels
.
append
(
"O"
)
else
:
labels
.
append
(
"B-"
+
line
)
labels
.
append
(
"I-"
+
line
)
label2id_map
=
{
label
:
idx
for
idx
,
label
in
enumerate
(
labels
)}
id2label_map
=
{
idx
:
label
for
idx
,
label
in
enumerate
(
labels
)}
return
label2id_map
,
id2label_map
def
get_image_file_list
(
img_file
):
imgs_lists
=
[]
if
img_file
is
None
or
not
os
.
path
.
exists
(
img_file
):
raise
Exception
(
"not found any img file in {}"
.
format
(
img_file
))
img_end
=
{
'jpg'
,
'bmp'
,
'png'
,
'jpeg'
,
'rgb'
,
'tif'
,
'tiff'
,
'gif'
,
'GIF'
}
if
os
.
path
.
isfile
(
img_file
)
and
imghdr
.
what
(
img_file
)
in
img_end
:
imgs_lists
.
append
(
img_file
)
elif
os
.
path
.
isdir
(
img_file
):
for
single_file
in
os
.
listdir
(
img_file
):
file_path
=
os
.
path
.
join
(
img_file
,
single_file
)
if
os
.
path
.
isfile
(
file_path
)
and
imghdr
.
what
(
file_path
)
in
img_end
:
imgs_lists
.
append
(
file_path
)
if
len
(
imgs_lists
)
==
0
:
raise
Exception
(
"not found any img file in {}"
.
format
(
img_file
))
imgs_lists
=
sorted
(
imgs_lists
)
return
imgs_lists
def
draw_ser_results
(
image
,
ocr_results
,
font_path
=
"../../doc/fonts/simfang.ttf"
,
font_size
=
18
):
np
.
random
.
seed
(
2021
)
color
=
(
np
.
random
.
permutation
(
range
(
255
)),
np
.
random
.
permutation
(
range
(
255
)),
np
.
random
.
permutation
(
range
(
255
)))
color_map
=
{
idx
:
(
color
[
0
][
idx
],
color
[
1
][
idx
],
color
[
2
][
idx
])
for
idx
in
range
(
1
,
255
)
}
if
isinstance
(
image
,
np
.
ndarray
):
image
=
Image
.
fromarray
(
image
)
img_new
=
image
.
copy
()
draw
=
ImageDraw
.
Draw
(
img_new
)
font
=
ImageFont
.
truetype
(
font_path
,
font_size
,
encoding
=
"utf-8"
)
for
ocr_info
in
ocr_results
:
if
ocr_info
[
"pred_id"
]
not
in
color_map
:
continue
color
=
color_map
[
ocr_info
[
"pred_id"
]]
text
=
"{}: {}"
.
format
(
ocr_info
[
"pred"
],
ocr_info
[
"text"
])
draw_box_txt
(
ocr_info
[
"bbox"
],
text
,
draw
,
font
,
font_size
,
color
)
img_new
=
Image
.
blend
(
image
,
img_new
,
0.5
)
return
np
.
array
(
img_new
)
def
draw_box_txt
(
bbox
,
text
,
draw
,
font
,
font_size
,
color
):
# draw ocr results outline
bbox
=
((
bbox
[
0
],
bbox
[
1
]),
(
bbox
[
2
],
bbox
[
3
]))
draw
.
rectangle
(
bbox
,
fill
=
color
)
# draw ocr results
start_y
=
max
(
0
,
bbox
[
0
][
1
]
-
font_size
)
tw
=
font
.
getsize
(
text
)[
0
]
draw
.
rectangle
(
[(
bbox
[
0
][
0
]
+
1
,
start_y
),
(
bbox
[
0
][
0
]
+
tw
+
1
,
start_y
+
font_size
)],
fill
=
(
0
,
0
,
255
))
draw
.
text
((
bbox
[
0
][
0
]
+
1
,
start_y
),
text
,
fill
=
(
255
,
255
,
255
),
font
=
font
)
def
draw_re_results
(
image
,
result
,
font_path
=
"../../doc/fonts/simfang.ttf"
,
font_size
=
18
):
np
.
random
.
seed
(
0
)
if
isinstance
(
image
,
np
.
ndarray
):
image
=
Image
.
fromarray
(
image
)
img_new
=
image
.
copy
()
draw
=
ImageDraw
.
Draw
(
img_new
)
font
=
ImageFont
.
truetype
(
font_path
,
font_size
,
encoding
=
"utf-8"
)
color_head
=
(
0
,
0
,
255
)
color_tail
=
(
255
,
0
,
0
)
color_line
=
(
0
,
255
,
0
)
for
ocr_info_head
,
ocr_info_tail
in
result
:
draw_box_txt
(
ocr_info_head
[
"bbox"
],
ocr_info_head
[
"text"
],
draw
,
font
,
font_size
,
color_head
)
draw_box_txt
(
ocr_info_tail
[
"bbox"
],
ocr_info_tail
[
"text"
],
draw
,
font
,
font_size
,
color_tail
)
center_head
=
(
(
ocr_info_head
[
'bbox'
][
0
]
+
ocr_info_head
[
'bbox'
][
2
])
//
2
,
(
ocr_info_head
[
'bbox'
][
1
]
+
ocr_info_head
[
'bbox'
][
3
])
//
2
)
center_tail
=
(
(
ocr_info_tail
[
'bbox'
][
0
]
+
ocr_info_tail
[
'bbox'
][
2
])
//
2
,
(
ocr_info_tail
[
'bbox'
][
1
]
+
ocr_info_tail
[
'bbox'
][
3
])
//
2
)
draw
.
line
([
center_head
,
center_tail
],
fill
=
color_line
,
width
=
5
)
img_new
=
Image
.
blend
(
image
,
img_new
,
0.5
)
return
np
.
array
(
img_new
)
# pad sentences
def
pad_sentences
(
tokenizer
,
encoded_inputs
,
max_seq_len
=
512
,
pad_to_max_seq_len
=
True
,
return_attention_mask
=
True
,
return_token_type_ids
=
True
,
return_overflowing_tokens
=
False
,
return_special_tokens_mask
=
False
):
# Padding with larger size, reshape is carried out
max_seq_len
=
(
len
(
encoded_inputs
[
"input_ids"
])
//
max_seq_len
+
1
)
*
max_seq_len
needs_to_be_padded
=
pad_to_max_seq_len
and
\
max_seq_len
and
len
(
encoded_inputs
[
"input_ids"
])
<
max_seq_len
if
needs_to_be_padded
:
difference
=
max_seq_len
-
len
(
encoded_inputs
[
"input_ids"
])
if
tokenizer
.
padding_side
==
'right'
:
if
return_attention_mask
:
encoded_inputs
[
"attention_mask"
]
=
[
1
]
*
len
(
encoded_inputs
[
"input_ids"
])
+
[
0
]
*
difference
if
return_token_type_ids
:
encoded_inputs
[
"token_type_ids"
]
=
(
encoded_inputs
[
"token_type_ids"
]
+
[
tokenizer
.
pad_token_type_id
]
*
difference
)
if
return_special_tokens_mask
:
encoded_inputs
[
"special_tokens_mask"
]
=
encoded_inputs
[
"special_tokens_mask"
]
+
[
1
]
*
difference
encoded_inputs
[
"input_ids"
]
=
encoded_inputs
[
"input_ids"
]
+
[
tokenizer
.
pad_token_id
]
*
difference
encoded_inputs
[
"bbox"
]
=
encoded_inputs
[
"bbox"
]
+
[[
0
,
0
,
0
,
0
]
]
*
difference
else
:
if
return_attention_mask
:
encoded_inputs
[
"attention_mask"
]
=
[
1
]
*
len
(
encoded_inputs
[
"input_ids"
])
return
encoded_inputs
def
split_page
(
encoded_inputs
,
max_seq_len
=
512
):
"""
truncate is often used in training process
"""
for
key
in
encoded_inputs
:
if
key
==
'entities'
:
encoded_inputs
[
key
]
=
[
encoded_inputs
[
key
]]
continue
encoded_inputs
[
key
]
=
paddle
.
to_tensor
(
encoded_inputs
[
key
])
if
encoded_inputs
[
key
].
ndim
<=
1
:
# for input_ids, att_mask and so on
encoded_inputs
[
key
]
=
encoded_inputs
[
key
].
reshape
([
-
1
,
max_seq_len
])
else
:
# for bbox
encoded_inputs
[
key
]
=
encoded_inputs
[
key
].
reshape
(
[
-
1
,
max_seq_len
,
4
])
return
encoded_inputs
def
preprocess
(
tokenizer
,
ori_img
,
ocr_info
,
img_size
=
(
224
,
224
),
pad_token_label_id
=-
100
,
max_seq_len
=
512
,
add_special_ids
=
False
,
return_attention_mask
=
True
,
):
ocr_info
=
deepcopy
(
ocr_info
)
height
=
ori_img
.
shape
[
0
]
width
=
ori_img
.
shape
[
1
]
img
=
cv2
.
resize
(
ori_img
,
img_size
).
transpose
([
2
,
0
,
1
]).
astype
(
np
.
float32
)
segment_offset_id
=
[]
words_list
=
[]
bbox_list
=
[]
input_ids_list
=
[]
token_type_ids_list
=
[]
entities
=
[]
for
info
in
ocr_info
:
# x1, y1, x2, y2
bbox
=
info
[
"bbox"
]
bbox
[
0
]
=
int
(
bbox
[
0
]
*
1000.0
/
width
)
bbox
[
2
]
=
int
(
bbox
[
2
]
*
1000.0
/
width
)
bbox
[
1
]
=
int
(
bbox
[
1
]
*
1000.0
/
height
)
bbox
[
3
]
=
int
(
bbox
[
3
]
*
1000.0
/
height
)
text
=
info
[
"text"
]
encode_res
=
tokenizer
.
encode
(
text
,
pad_to_max_seq_len
=
False
,
return_attention_mask
=
True
)
if
not
add_special_ids
:
# TODO: use tok.all_special_ids to remove
encode_res
[
"input_ids"
]
=
encode_res
[
"input_ids"
][
1
:
-
1
]
encode_res
[
"token_type_ids"
]
=
encode_res
[
"token_type_ids"
][
1
:
-
1
]
encode_res
[
"attention_mask"
]
=
encode_res
[
"attention_mask"
][
1
:
-
1
]
# for re
entities
.
append
({
"start"
:
len
(
input_ids_list
),
"end"
:
len
(
input_ids_list
)
+
len
(
encode_res
[
"input_ids"
]),
"label"
:
"O"
,
})
input_ids_list
.
extend
(
encode_res
[
"input_ids"
])
token_type_ids_list
.
extend
(
encode_res
[
"token_type_ids"
])
bbox_list
.
extend
([
bbox
]
*
len
(
encode_res
[
"input_ids"
]))
words_list
.
append
(
text
)
segment_offset_id
.
append
(
len
(
input_ids_list
))
encoded_inputs
=
{
"input_ids"
:
input_ids_list
,
"token_type_ids"
:
token_type_ids_list
,
"bbox"
:
bbox_list
,
"attention_mask"
:
[
1
]
*
len
(
input_ids_list
),
"entities"
:
entities
}
encoded_inputs
=
pad_sentences
(
tokenizer
,
encoded_inputs
,
max_seq_len
=
max_seq_len
,
return_attention_mask
=
return_attention_mask
)
encoded_inputs
=
split_page
(
encoded_inputs
)
fake_bs
=
encoded_inputs
[
"input_ids"
].
shape
[
0
]
encoded_inputs
[
"image"
]
=
paddle
.
to_tensor
(
img
).
unsqueeze
(
0
).
expand
(
[
fake_bs
]
+
list
(
img
.
shape
))
encoded_inputs
[
"segment_offset_id"
]
=
segment_offset_id
return
encoded_inputs
def
postprocess
(
attention_mask
,
preds
,
id2label_map
):
if
isinstance
(
preds
,
paddle
.
Tensor
):
preds
=
preds
.
numpy
()
preds
=
np
.
argmax
(
preds
,
axis
=
2
)
preds_list
=
[[]
for
_
in
range
(
preds
.
shape
[
0
])]
# keep batch info
for
i
in
range
(
preds
.
shape
[
0
]):
for
j
in
range
(
preds
.
shape
[
1
]):
if
attention_mask
[
i
][
j
]
==
1
:
preds_list
[
i
].
append
(
id2label_map
[
preds
[
i
][
j
]])
return
preds_list
def
merge_preds_list_with_ocr_info
(
ocr_info
,
segment_offset_id
,
preds_list
,
label2id_map_for_draw
):
# must ensure the preds_list is generated from the same image
preds
=
[
p
for
pred
in
preds_list
for
p
in
pred
]
id2label_map
=
dict
()
for
key
in
label2id_map_for_draw
:
val
=
label2id_map_for_draw
[
key
]
if
key
==
"O"
:
id2label_map
[
val
]
=
key
if
key
.
startswith
(
"B-"
)
or
key
.
startswith
(
"I-"
):
id2label_map
[
val
]
=
key
[
2
:]
else
:
id2label_map
[
val
]
=
key
for
idx
in
range
(
len
(
segment_offset_id
)):
if
idx
==
0
:
start_id
=
0
else
:
start_id
=
segment_offset_id
[
idx
-
1
]
end_id
=
segment_offset_id
[
idx
]
curr_pred
=
preds
[
start_id
:
end_id
]
curr_pred
=
[
label2id_map_for_draw
[
p
]
for
p
in
curr_pred
]
if
len
(
curr_pred
)
<=
0
:
pred_id
=
0
else
:
counts
=
np
.
bincount
(
curr_pred
)
pred_id
=
np
.
argmax
(
counts
)
ocr_info
[
idx
][
"pred_id"
]
=
int
(
pred_id
)
ocr_info
[
idx
][
"pred"
]
=
id2label_map
[
int
(
pred_id
)]
return
ocr_info
def
print_arguments
(
args
,
logger
=
None
):
print_func
=
logger
.
info
if
logger
is
not
None
else
print
"""print arguments"""
print_func
(
'----------- Configuration Arguments -----------'
)
for
arg
,
value
in
sorted
(
vars
(
args
).
items
()):
print_func
(
'%s: %s'
%
(
arg
,
value
))
print_func
(
'------------------------------------------------'
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
# Required parameters
# yapf: disable
parser
.
add_argument
(
"--model_name_or_path"
,
default
=
None
,
type
=
str
,
required
=
True
,)
parser
.
add_argument
(
"--ser_model_type"
,
default
=
'LayoutXLM'
,
type
=
str
)
parser
.
add_argument
(
"--re_model_name_or_path"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--train_data_dir"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--train_label_path"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--eval_data_dir"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--eval_label_path"
,
default
=
None
,
type
=
str
,
required
=
False
,)
parser
.
add_argument
(
"--output_dir"
,
default
=
None
,
type
=
str
,
required
=
True
,)
parser
.
add_argument
(
"--max_seq_length"
,
default
=
512
,
type
=
int
,)
parser
.
add_argument
(
"--evaluate_during_training"
,
action
=
"store_true"
,)
parser
.
add_argument
(
"--num_workers"
,
default
=
8
,
type
=
int
,)
parser
.
add_argument
(
"--per_gpu_train_batch_size"
,
default
=
8
,
type
=
int
,
help
=
"Batch size per GPU/CPU for training."
,)
parser
.
add_argument
(
"--per_gpu_eval_batch_size"
,
default
=
8
,
type
=
int
,
help
=
"Batch size per GPU/CPU for eval."
,)
parser
.
add_argument
(
"--learning_rate"
,
default
=
5e-5
,
type
=
float
,
help
=
"The initial learning rate for Adam."
,)
parser
.
add_argument
(
"--weight_decay"
,
default
=
0.0
,
type
=
float
,
help
=
"Weight decay if we apply some."
,)
parser
.
add_argument
(
"--adam_epsilon"
,
default
=
1e-8
,
type
=
float
,
help
=
"Epsilon for Adam optimizer."
,)
parser
.
add_argument
(
"--max_grad_norm"
,
default
=
1.0
,
type
=
float
,
help
=
"Max gradient norm."
,)
parser
.
add_argument
(
"--num_train_epochs"
,
default
=
3
,
type
=
int
,
help
=
"Total number of training epochs to perform."
,)
parser
.
add_argument
(
"--warmup_steps"
,
default
=
0
,
type
=
int
,
help
=
"Linear warmup over warmup_steps."
,)
parser
.
add_argument
(
"--eval_steps"
,
type
=
int
,
default
=
10
,
help
=
"eval every X updates steps."
,)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
2048
,
help
=
"random seed for initialization"
,)
parser
.
add_argument
(
"--rec_model_dir"
,
default
=
None
,
type
=
str
,
)
parser
.
add_argument
(
"--det_model_dir"
,
default
=
None
,
type
=
str
,
)
parser
.
add_argument
(
"--label_map_path"
,
default
=
"./labels/labels_ser.txt"
,
type
=
str
,
required
=
False
,
)
parser
.
add_argument
(
"--infer_imgs"
,
default
=
None
,
type
=
str
,
required
=
False
)
parser
.
add_argument
(
"--resume"
,
action
=
'store_true'
)
parser
.
add_argument
(
"--ocr_json_path"
,
default
=
None
,
type
=
str
,
required
=
False
,
help
=
"ocr prediction results"
)
# yapf: enable
args
=
parser
.
parse_args
()
return
args
ppstructure/vqa/xfun.py
deleted
100644 → 0
View file @
9471054e
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
os
import
cv2
import
numpy
as
np
import
paddle
import
copy
from
paddle.io
import
Dataset
__all__
=
[
"XFUNDataset"
]
class
XFUNDataset
(
Dataset
):
"""
Example:
print("=====begin to build dataset=====")
from paddlenlp.transformers import LayoutXLMTokenizer
tokenizer = LayoutXLMTokenizer.from_pretrained("/paddle/models/transformers/layoutxlm-base-paddle/")
tok_res = tokenizer.tokenize("Maribyrnong")
# res = tokenizer.convert_ids_to_tokens(val_data["input_ids"][0])
dataset = XfunDatasetForSer(
tokenizer,
data_dir="./zh.val/",
label_path="zh.val/xfun_normalize_val.json",
img_size=(224,224))
print(len(dataset))
data = dataset[0]
print(data.keys())
print("input_ids: ", data["input_ids"])
print("labels: ", data["labels"])
print("token_type_ids: ", data["token_type_ids"])
print("words_list: ", data["words_list"])
print("image shape: ", data["image"].shape)
"""
def
__init__
(
self
,
tokenizer
,
data_dir
,
label_path
,
contains_re
=
False
,
label2id_map
=
None
,
img_size
=
(
224
,
224
),
pad_token_label_id
=
None
,
add_special_ids
=
False
,
return_attention_mask
=
True
,
load_mode
=
'all'
,
max_seq_len
=
512
):
super
().
__init__
()
self
.
tokenizer
=
tokenizer
self
.
data_dir
=
data_dir
self
.
label_path
=
label_path
self
.
contains_re
=
contains_re
self
.
label2id_map
=
label2id_map
self
.
img_size
=
img_size
self
.
pad_token_label_id
=
pad_token_label_id
self
.
add_special_ids
=
add_special_ids
self
.
return_attention_mask
=
return_attention_mask
self
.
load_mode
=
load_mode
self
.
max_seq_len
=
max_seq_len
if
self
.
pad_token_label_id
is
None
:
self
.
pad_token_label_id
=
paddle
.
nn
.
CrossEntropyLoss
().
ignore_index
self
.
all_lines
=
self
.
read_all_lines
()
self
.
entities_labels
=
{
'HEADER'
:
0
,
'QUESTION'
:
1
,
'ANSWER'
:
2
}
self
.
return_keys
=
{
'bbox'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'input_ids'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'labels'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'attention_mask'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'image'
:
{
'type'
:
'np'
,
'dtype'
:
'float32'
},
'token_type_ids'
:
{
'type'
:
'np'
,
'dtype'
:
'int64'
},
'entities'
:
{
'type'
:
'dict'
},
'relations'
:
{
'type'
:
'dict'
}
}
if
load_mode
==
"all"
:
self
.
encoded_inputs_all
=
self
.
_parse_label_file_all
()
def
pad_sentences
(
self
,
encoded_inputs
,
max_seq_len
=
512
,
pad_to_max_seq_len
=
True
,
return_attention_mask
=
True
,
return_token_type_ids
=
True
,
truncation_strategy
=
"longest_first"
,
return_overflowing_tokens
=
False
,
return_special_tokens_mask
=
False
):
# Padding
needs_to_be_padded
=
pad_to_max_seq_len
and
\
max_seq_len
and
len
(
encoded_inputs
[
"input_ids"
])
<
max_seq_len
if
needs_to_be_padded
:
difference
=
max_seq_len
-
len
(
encoded_inputs
[
"input_ids"
])
if
self
.
tokenizer
.
padding_side
==
'right'
:
if
return_attention_mask
:
encoded_inputs
[
"attention_mask"
]
=
[
1
]
*
len
(
encoded_inputs
[
"input_ids"
])
+
[
0
]
*
difference
if
return_token_type_ids
:
encoded_inputs
[
"token_type_ids"
]
=
(
encoded_inputs
[
"token_type_ids"
]
+
[
self
.
tokenizer
.
pad_token_type_id
]
*
difference
)
if
return_special_tokens_mask
:
encoded_inputs
[
"special_tokens_mask"
]
=
encoded_inputs
[
"special_tokens_mask"
]
+
[
1
]
*
difference
encoded_inputs
[
"input_ids"
]
=
encoded_inputs
[
"input_ids"
]
+
[
self
.
tokenizer
.
pad_token_id
]
*
difference
encoded_inputs
[
"labels"
]
=
encoded_inputs
[
"labels"
]
+
[
self
.
pad_token_label_id
]
*
difference
encoded_inputs
[
"bbox"
]
=
encoded_inputs
[
"bbox"
]
+
[[
0
,
0
,
0
,
0
]]
*
difference
elif
self
.
tokenizer
.
padding_side
==
'left'
:
if
return_attention_mask
:
encoded_inputs
[
"attention_mask"
]
=
[
0
]
*
difference
+
[
1
]
*
len
(
encoded_inputs
[
"input_ids"
])
if
return_token_type_ids
:
encoded_inputs
[
"token_type_ids"
]
=
(
[
self
.
tokenizer
.
pad_token_type_id
]
*
difference
+
encoded_inputs
[
"token_type_ids"
])
if
return_special_tokens_mask
:
encoded_inputs
[
"special_tokens_mask"
]
=
[
1
]
*
difference
+
encoded_inputs
[
"special_tokens_mask"
]
encoded_inputs
[
"input_ids"
]
=
[
self
.
tokenizer
.
pad_token_id
]
*
difference
+
encoded_inputs
[
"input_ids"
]
encoded_inputs
[
"labels"
]
=
[
self
.
pad_token_label_id
]
*
difference
+
encoded_inputs
[
"labels"
]
encoded_inputs
[
"bbox"
]
=
[
[
0
,
0
,
0
,
0
]
]
*
difference
+
encoded_inputs
[
"bbox"
]
else
:
if
return_attention_mask
:
encoded_inputs
[
"attention_mask"
]
=
[
1
]
*
len
(
encoded_inputs
[
"input_ids"
])
return
encoded_inputs
def
truncate_inputs
(
self
,
encoded_inputs
,
max_seq_len
=
512
):
for
key
in
encoded_inputs
:
if
key
==
"sample_id"
:
continue
length
=
min
(
len
(
encoded_inputs
[
key
]),
max_seq_len
)
encoded_inputs
[
key
]
=
encoded_inputs
[
key
][:
length
]
return
encoded_inputs
def
read_all_lines
(
self
,
):
with
open
(
self
.
label_path
,
"r"
,
encoding
=
'utf-8'
)
as
fin
:
lines
=
fin
.
readlines
()
return
lines
def
_parse_label_file_all
(
self
):
"""
parse all samples
"""
encoded_inputs_all
=
[]
for
line
in
self
.
all_lines
:
encoded_inputs_all
.
extend
(
self
.
_parse_label_file
(
line
))
return
encoded_inputs_all
def
_parse_label_file
(
self
,
line
):
"""
parse single sample
"""
image_name
,
info_str
=
line
.
split
(
"
\t
"
)
image_path
=
os
.
path
.
join
(
self
.
data_dir
,
image_name
)
def
add_imgge_path
(
x
):
x
[
'image_path'
]
=
image_path
return
x
encoded_inputs
=
self
.
_read_encoded_inputs_sample
(
info_str
)
if
self
.
contains_re
:
encoded_inputs
=
self
.
_chunk_re
(
encoded_inputs
)
else
:
encoded_inputs
=
self
.
_chunk_ser
(
encoded_inputs
)
encoded_inputs
=
list
(
map
(
add_imgge_path
,
encoded_inputs
))
return
encoded_inputs
def
_read_encoded_inputs_sample
(
self
,
info_str
):
"""
parse label info
"""
# read text info
info_dict
=
json
.
loads
(
info_str
)
height
=
info_dict
[
"height"
]
width
=
info_dict
[
"width"
]
words_list
=
[]
bbox_list
=
[]
input_ids_list
=
[]
token_type_ids_list
=
[]
gt_label_list
=
[]
if
self
.
contains_re
:
# for re
entities
=
[]
relations
=
[]
id2label
=
{}
entity_id_to_index_map
=
{}
empty_entity
=
set
()
for
info
in
info_dict
[
"ocr_info"
]:
if
self
.
contains_re
:
# for re
if
len
(
info
[
"text"
])
==
0
:
empty_entity
.
add
(
info
[
"id"
])
continue
id2label
[
info
[
"id"
]]
=
info
[
"label"
]
relations
.
extend
([
tuple
(
sorted
(
l
))
for
l
in
info
[
"linking"
]])
# x1, y1, x2, y2
bbox
=
info
[
"bbox"
]
label
=
info
[
"label"
]
bbox
[
0
]
=
int
(
bbox
[
0
]
*
1000.0
/
width
)
bbox
[
2
]
=
int
(
bbox
[
2
]
*
1000.0
/
width
)
bbox
[
1
]
=
int
(
bbox
[
1
]
*
1000.0
/
height
)
bbox
[
3
]
=
int
(
bbox
[
3
]
*
1000.0
/
height
)
text
=
info
[
"text"
]
encode_res
=
self
.
tokenizer
.
encode
(
text
,
pad_to_max_seq_len
=
False
,
return_attention_mask
=
True
)
gt_label
=
[]
if
not
self
.
add_special_ids
:
# TODO: use tok.all_special_ids to remove
encode_res
[
"input_ids"
]
=
encode_res
[
"input_ids"
][
1
:
-
1
]
encode_res
[
"token_type_ids"
]
=
encode_res
[
"token_type_ids"
][
1
:
-
1
]
encode_res
[
"attention_mask"
]
=
encode_res
[
"attention_mask"
][
1
:
-
1
]
if
label
.
lower
()
==
"other"
:
gt_label
.
extend
([
0
]
*
len
(
encode_res
[
"input_ids"
]))
else
:
gt_label
.
append
(
self
.
label2id_map
[(
"b-"
+
label
).
upper
()])
gt_label
.
extend
([
self
.
label2id_map
[(
"i-"
+
label
).
upper
()]]
*
(
len
(
encode_res
[
"input_ids"
])
-
1
))
if
self
.
contains_re
:
if
gt_label
[
0
]
!=
self
.
label2id_map
[
"O"
]:
entity_id_to_index_map
[
info
[
"id"
]]
=
len
(
entities
)
entities
.
append
({
"start"
:
len
(
input_ids_list
),
"end"
:
len
(
input_ids_list
)
+
len
(
encode_res
[
"input_ids"
]),
"label"
:
label
.
upper
(),
})
input_ids_list
.
extend
(
encode_res
[
"input_ids"
])
token_type_ids_list
.
extend
(
encode_res
[
"token_type_ids"
])
bbox_list
.
extend
([
bbox
]
*
len
(
encode_res
[
"input_ids"
]))
gt_label_list
.
extend
(
gt_label
)
words_list
.
append
(
text
)
encoded_inputs
=
{
"input_ids"
:
input_ids_list
,
"labels"
:
gt_label_list
,
"token_type_ids"
:
token_type_ids_list
,
"bbox"
:
bbox_list
,
"attention_mask"
:
[
1
]
*
len
(
input_ids_list
),
# "words_list": words_list,
}
encoded_inputs
=
self
.
pad_sentences
(
encoded_inputs
,
max_seq_len
=
self
.
max_seq_len
,
return_attention_mask
=
self
.
return_attention_mask
)
encoded_inputs
=
self
.
truncate_inputs
(
encoded_inputs
)
if
self
.
contains_re
:
relations
=
self
.
_relations
(
entities
,
relations
,
id2label
,
empty_entity
,
entity_id_to_index_map
)
encoded_inputs
[
'relations'
]
=
relations
encoded_inputs
[
'entities'
]
=
entities
return
encoded_inputs
def
_chunk_ser
(
self
,
encoded_inputs
):
encoded_inputs_all
=
[]
seq_len
=
len
(
encoded_inputs
[
'input_ids'
])
chunk_size
=
512
for
chunk_id
,
index
in
enumerate
(
range
(
0
,
seq_len
,
chunk_size
)):
chunk_beg
=
index
chunk_end
=
min
(
index
+
chunk_size
,
seq_len
)
encoded_inputs_example
=
{}
for
key
in
encoded_inputs
:
encoded_inputs_example
[
key
]
=
encoded_inputs
[
key
][
chunk_beg
:
chunk_end
]
encoded_inputs_all
.
append
(
encoded_inputs_example
)
return
encoded_inputs_all
def
_chunk_re
(
self
,
encoded_inputs
):
# prepare data
entities
=
encoded_inputs
.
pop
(
'entities'
)
relations
=
encoded_inputs
.
pop
(
'relations'
)
encoded_inputs_all
=
[]
chunk_size
=
512
for
chunk_id
,
index
in
enumerate
(
range
(
0
,
len
(
encoded_inputs
[
"input_ids"
]),
chunk_size
)):
item
=
{}
for
k
in
encoded_inputs
:
item
[
k
]
=
encoded_inputs
[
k
][
index
:
index
+
chunk_size
]
# select entity in current chunk
entities_in_this_span
=
[]
global_to_local_map
=
{}
#
for
entity_id
,
entity
in
enumerate
(
entities
):
if
(
index
<=
entity
[
"start"
]
<
index
+
chunk_size
and
index
<=
entity
[
"end"
]
<
index
+
chunk_size
):
entity
[
"start"
]
=
entity
[
"start"
]
-
index
entity
[
"end"
]
=
entity
[
"end"
]
-
index
global_to_local_map
[
entity_id
]
=
len
(
entities_in_this_span
)
entities_in_this_span
.
append
(
entity
)
# select relations in current chunk
relations_in_this_span
=
[]
for
relation
in
relations
:
if
(
index
<=
relation
[
"start_index"
]
<
index
+
chunk_size
and
index
<=
relation
[
"end_index"
]
<
index
+
chunk_size
):
relations_in_this_span
.
append
({
"head"
:
global_to_local_map
[
relation
[
"head"
]],
"tail"
:
global_to_local_map
[
relation
[
"tail"
]],
"start_index"
:
relation
[
"start_index"
]
-
index
,
"end_index"
:
relation
[
"end_index"
]
-
index
,
})
item
.
update
({
"entities"
:
reformat
(
entities_in_this_span
),
"relations"
:
reformat
(
relations_in_this_span
),
})
item
[
'entities'
][
'label'
]
=
[
self
.
entities_labels
[
x
]
for
x
in
item
[
'entities'
][
'label'
]
]
encoded_inputs_all
.
append
(
item
)
return
encoded_inputs_all
def
_relations
(
self
,
entities
,
relations
,
id2label
,
empty_entity
,
entity_id_to_index_map
):
"""
build relations
"""
relations
=
list
(
set
(
relations
))
relations
=
[
rel
for
rel
in
relations
if
rel
[
0
]
not
in
empty_entity
and
rel
[
1
]
not
in
empty_entity
]
kv_relations
=
[]
for
rel
in
relations
:
pair
=
[
id2label
[
rel
[
0
]],
id2label
[
rel
[
1
]]]
if
pair
==
[
"question"
,
"answer"
]:
kv_relations
.
append
({
"head"
:
entity_id_to_index_map
[
rel
[
0
]],
"tail"
:
entity_id_to_index_map
[
rel
[
1
]]
})
elif
pair
==
[
"answer"
,
"question"
]:
kv_relations
.
append
({
"head"
:
entity_id_to_index_map
[
rel
[
1
]],
"tail"
:
entity_id_to_index_map
[
rel
[
0
]]
})
else
:
continue
relations
=
sorted
(
[{
"head"
:
rel
[
"head"
],
"tail"
:
rel
[
"tail"
],
"start_index"
:
get_relation_span
(
rel
,
entities
)[
0
],
"end_index"
:
get_relation_span
(
rel
,
entities
)[
1
],
}
for
rel
in
kv_relations
],
key
=
lambda
x
:
x
[
"head"
],
)
return
relations
def
load_img
(
self
,
image_path
):
# read img
img
=
cv2
.
imread
(
image_path
)
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2RGB
)
resize_h
,
resize_w
=
self
.
img_size
im_shape
=
img
.
shape
[
0
:
2
]
im_scale_y
=
resize_h
/
im_shape
[
0
]
im_scale_x
=
resize_w
/
im_shape
[
1
]
img_new
=
cv2
.
resize
(
img
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
2
)
mean
=
np
.
array
([
0.485
,
0.456
,
0.406
])[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
([
0.229
,
0.224
,
0.225
])[
np
.
newaxis
,
np
.
newaxis
,
:]
img_new
=
img_new
/
255.0
img_new
-=
mean
img_new
/=
std
img
=
img_new
.
transpose
((
2
,
0
,
1
))
return
img
def
__getitem__
(
self
,
idx
):
if
self
.
load_mode
==
"all"
:
data
=
copy
.
deepcopy
(
self
.
encoded_inputs_all
[
idx
])
else
:
data
=
self
.
_parse_label_file
(
self
.
all_lines
[
idx
])[
0
]
image_path
=
data
.
pop
(
'image_path'
)
data
[
"image"
]
=
self
.
load_img
(
image_path
)
return_data
=
{}
for
k
,
v
in
data
.
items
():
if
k
in
self
.
return_keys
:
if
self
.
return_keys
[
k
][
'type'
]
==
'np'
:
v
=
np
.
array
(
v
,
dtype
=
self
.
return_keys
[
k
][
'dtype'
])
return_data
[
k
]
=
v
return
return_data
def
__len__
(
self
,
):
if
self
.
load_mode
==
"all"
:
return
len
(
self
.
encoded_inputs_all
)
else
:
return
len
(
self
.
all_lines
)
def
get_relation_span
(
rel
,
entities
):
bound
=
[]
for
entity_index
in
[
rel
[
"head"
],
rel
[
"tail"
]]:
bound
.
append
(
entities
[
entity_index
][
"start"
])
bound
.
append
(
entities
[
entity_index
][
"end"
])
return
min
(
bound
),
max
(
bound
)
def
reformat
(
data
):
new_data
=
{}
for
item
in
data
:
for
k
,
v
in
item
.
items
():
if
k
not
in
new_data
:
new_data
[
k
]
=
[]
new_data
[
k
].
append
(
v
)
return
new_data
requirements.txt
View file @
41a1b292
...
...
@@ -13,4 +13,3 @@ lxml
premailer
openpyxl
fasttext
==0.9.1
test_tipc/prepare.sh
View file @
41a1b292
...
...
@@ -239,8 +239,7 @@ fi
if
[
${
MODE
}
=
"klquant_whole_infer"
]
;
then
wget
-nc
-P
./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar
--no-check-certificate
cd
./train_data/
&&
tar
xf icdar2015_lite.tar
ln
-s
./icdar2015_lite ./icdar2015
&&
cd
../
cd
./train_data/
&&
tar
xf icdar2015_lite.tar
&&
rm
-rf
./icdar2015
&&
ln
-s
./icdar2015_lite ./icdar2015
&&
cd
../
if
[
${
model_name
}
=
"ch_ppocr_mobile_v2.0_det_KL"
]
;
then
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
--no-check-certificate
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
--no-check-certificate
...
...
@@ -249,6 +248,8 @@ if [ ${MODE} = "klquant_whole_infer" ]; then
if
[
${
model_name
}
=
"PPOCRv2_ocr_rec_kl"
]
;
then
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
--no-check-certificate
wget
-nc
-P
./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar
--no-check-certificate
wget
-nc
-P
./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar
--no-check-certificate
cd
./train_data/
&&
tar
xf ic15_data.tar
&&
cd
../
cd
./inference
&&
tar
xf rec_inference.tar
&&
tar
xf ch_PP-OCRv2_rec_infer.tar
&&
cd
../
fi
if
[
${
model_name
}
=
"PPOCRv2_ocr_det_kl"
]
;
then
...
...
test_tipc/readme.md
View file @
41a1b292
...
...
@@ -68,14 +68,14 @@ test_tipc/
├── model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
# 测试Linux上c++预测的配置文件
├── model_linux_gpu_normal_normal_infer_python_jetson.txt
# 测试Jetson上python预测的配置文件
├── train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt
# 测试Linux上多机多卡、混合精度训练和python预测的配置文件
├── ...
├── ...
├── ch_ppocr_server_v2.0_det
# ch_ppocr_server_v2.0_det模型的测试配置文件目录
├── ...
├── ...
├── ch_ppocr_mobile_v2.0_rec
# ch_ppocr_mobile_v2.0_rec模型的测试配置文件目录
├── ...
├── ...
├── ch_ppocr_server_v2.0_det
# ch_ppocr_server_v2.0_det模型的测试配置文件目录
├── ...
├── ...
├── ...
├── ...
├── results/
# 预先保存的预测结果,用于和实际预测结果进行精读比对
├── python_ppocr_det_mobile_results_fp32.txt
# 预存的mobile版ppocr检测模型python预测fp32精度的结果
├── python_ppocr_det_mobile_results_fp16.txt
# 预存的mobile版ppocr检测模型python预测fp16精度的结果
...
...
@@ -119,7 +119,7 @@ bash test_tipc/test_train_inference_python.sh configs/[model_name]/[params_file_
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt
'lite_train_lite_infer'
# 运行测试
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt
'lite_train_lite_infer'
```
```
关于本示例命令的更多信息可查看
[
基础训练预测使用文档
](
https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/test_tipc/docs/test_train_inference_python.md#22-%E5%8A%9F%E8%83%BD%E6%B5%8B%E8%AF%95
)
。
### 配置文件命名规范
...
...
@@ -136,9 +136,9 @@ bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobil
<a
name=
"more"
></a>
## 4. 开始测试
各功能测试中涉及混合精度、裁剪、量化等训练相关,及mkldnn、Tensorrt等多种预测相关参数配置,请点击下方相应链接了解更多细节和使用教程:
-
[
test_train_inference_python 使用
](
docs/test_train_inference_python.md
)
:测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。
各功能测试中涉及混合精度、裁剪、量化等训练相关,及mkldnn、Tensorrt等多种预测相关参数配置,请点击下方相应链接了解更多细节和使用教程:
-
[
test_train_inference_python 使用
](
docs/test_train_inference_python.md
)
:测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。
-
[
test_inference_cpp 使用
](
docs/test_inference_cpp.md
)
:测试基于C++的模型推理。
-
[
test_serving 使用
](
docs/test_serving.md
)
:测试基于Paddle Serving的服务化部署功能。
-
[
test_lite_arm_
cpu_
cpp 使用
](
docs/test_lite_arm_
cpu_
cpp.md
)
:测试基于Paddle-Lite的ARM CPU端c++预测部署功能。
-
[
test_lite_arm_cpp 使用
](
docs/test_lite_arm_cpp.md
)
:测试基于Paddle-Lite的ARM CPU端c++预测部署功能。
-
[
test_paddle2onnx 使用
](
docs/test_paddle2onnx.md
)
:测试Paddle2ONNX的模型转化功能,并验证正确性。
test_tipc/supplementary/__init__.py
0 → 100644
View file @
41a1b292
test_tipc/supplementary/config.py
0 → 100644
View file @
41a1b292
import
numpy
as
np
import
os
import
sys
import
platform
import
yaml
import
time
import
shutil
import
paddle
import
paddle.distributed
as
dist
from
tqdm
import
tqdm
from
argparse
import
ArgumentParser
,
RawDescriptionHelpFormatter
from
utils
import
get_logger
,
print_dict
class
ArgsParser
(
ArgumentParser
):
def
__init__
(
self
):
super
(
ArgsParser
,
self
).
__init__
(
formatter_class
=
RawDescriptionHelpFormatter
)
self
.
add_argument
(
"-c"
,
"--config"
,
help
=
"configuration file to use"
)
self
.
add_argument
(
"-o"
,
"--opt"
,
nargs
=
'+'
,
help
=
"set configuration options"
)
self
.
add_argument
(
'-p'
,
'--profiler_options'
,
type
=
str
,
default
=
None
,
help
=
'The option of profiler, which should be in format
\"
key1=value1;key2=value2;key3=value3
\"
.'
)
def
parse_args
(
self
,
argv
=
None
):
args
=
super
(
ArgsParser
,
self
).
parse_args
(
argv
)
assert
args
.
config
is
not
None
,
\
"Please specify --config=configure_file_path."
args
.
opt
=
self
.
_parse_opt
(
args
.
opt
)
return
args
def
_parse_opt
(
self
,
opts
):
config
=
{}
if
not
opts
:
return
config
for
s
in
opts
:
s
=
s
.
strip
()
k
,
v
=
s
.
split
(
'='
)
config
[
k
]
=
yaml
.
load
(
v
,
Loader
=
yaml
.
Loader
)
return
config
class
AttrDict
(
dict
):
"""Single level attribute dict, NOT recursive"""
def
__init__
(
self
,
**
kwargs
):
super
(
AttrDict
,
self
).
__init__
()
super
(
AttrDict
,
self
).
update
(
kwargs
)
def
__getattr__
(
self
,
key
):
if
key
in
self
:
return
self
[
key
]
raise
AttributeError
(
"object has no attribute '{}'"
.
format
(
key
))
global_config
=
AttrDict
()
default_config
=
{
'Global'
:
{
'debug'
:
False
,
}}
def
load_config
(
file_path
):
"""
Load config from yml/yaml file.
Args:
file_path (str): Path of the config file to be loaded.
Returns: global config
"""
merge_config
(
default_config
)
_
,
ext
=
os
.
path
.
splitext
(
file_path
)
assert
ext
in
[
'.yml'
,
'.yaml'
],
"only support yaml files for now"
merge_config
(
yaml
.
load
(
open
(
file_path
,
'rb'
),
Loader
=
yaml
.
Loader
))
return
global_config
def
merge_config
(
config
):
"""
Merge config into global config.
Args:
config (dict): Config to be merged.
Returns: global config
"""
for
key
,
value
in
config
.
items
():
if
"."
not
in
key
:
if
isinstance
(
value
,
dict
)
and
key
in
global_config
:
global_config
[
key
].
update
(
value
)
else
:
global_config
[
key
]
=
value
else
:
sub_keys
=
key
.
split
(
'.'
)
assert
(
sub_keys
[
0
]
in
global_config
),
"the sub_keys can only be one of global_config: {}, but get: {}, please check your running command"
.
format
(
global_config
.
keys
(),
sub_keys
[
0
])
cur
=
global_config
[
sub_keys
[
0
]]
for
idx
,
sub_key
in
enumerate
(
sub_keys
[
1
:]):
if
idx
==
len
(
sub_keys
)
-
2
:
cur
[
sub_key
]
=
value
else
:
cur
=
cur
[
sub_key
]
def
preprocess
(
is_train
=
False
):
FLAGS
=
ArgsParser
().
parse_args
()
profiler_options
=
FLAGS
.
profiler_options
config
=
load_config
(
FLAGS
.
config
)
merge_config
(
FLAGS
.
opt
)
profile_dic
=
{
"profiler_options"
:
FLAGS
.
profiler_options
}
merge_config
(
profile_dic
)
if
is_train
:
# save_config
save_model_dir
=
config
[
'save_model_dir'
]
os
.
makedirs
(
save_model_dir
,
exist_ok
=
True
)
with
open
(
os
.
path
.
join
(
save_model_dir
,
'config.yml'
),
'w'
)
as
f
:
yaml
.
dump
(
dict
(
config
),
f
,
default_flow_style
=
False
,
sort_keys
=
False
)
log_file
=
'{}/train.log'
.
format
(
save_model_dir
)
else
:
log_file
=
None
logger
=
get_logger
(
name
=
'root'
,
log_file
=
log_file
)
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu
=
config
[
'use_gpu'
]
print_dict
(
config
,
logger
)
return
config
,
logger
if
__name__
==
"__main__"
:
config
,
logger
=
preprocess
(
is_train
=
False
)
# print(config)
test_tipc/supplementary/custom_op/custom_relu_op.cc
0 → 100644
View file @
41a1b292
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from : https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/python/custom-operator/custom_relu_op.cc
#include <iostream>
#include <vector>
#include "paddle/extension.h"
template
<
typename
data_t
>
void
relu_cpu_forward_kernel
(
const
data_t
*
x_data
,
data_t
*
out_data
,
int64_t
x_numel
)
{
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
out_data
[
i
]
=
std
::
max
(
static_cast
<
data_t
>
(
0.
),
x_data
[
i
]);
}
}
template
<
typename
data_t
>
void
relu_cpu_backward_kernel
(
const
data_t
*
grad_out_data
,
const
data_t
*
out_data
,
data_t
*
grad_x_data
,
int64_t
out_numel
)
{
for
(
int
i
=
0
;
i
<
out_numel
;
++
i
)
{
grad_x_data
[
i
]
=
grad_out_data
[
i
]
*
(
out_data
[
i
]
>
static_cast
<
data_t
>
(
0
)
?
1.
:
0.
);
}
}
std
::
vector
<
paddle
::
Tensor
>
relu_cpu_forward
(
const
paddle
::
Tensor
&
x
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
out
.
reshape
(
x
.
shape
());
PD_DISPATCH_FLOATING_TYPES
(
x
.
type
(),
"relu_cpu_forward"
,
([
&
]
{
relu_cpu_forward_kernel
<
data_t
>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
x
.
size
());
}));
return
{
out
};
}
std
::
vector
<
paddle
::
Tensor
>
relu_cpu_backward
(
const
paddle
::
Tensor
&
x
,
const
paddle
::
Tensor
&
out
,
const
paddle
::
Tensor
&
grad_out
)
{
auto
grad_x
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
grad_x
.
reshape
(
x
.
shape
());
PD_DISPATCH_FLOATING_TYPES
(
out
.
type
(),
"relu_cpu_backward"
,
([
&
]
{
relu_cpu_backward_kernel
<
data_t
>
(
grad_out
.
data
<
data_t
>
(),
out
.
data
<
data_t
>
(),
grad_x
.
mutable_data
<
data_t
>
(
x
.
place
()),
out
.
size
());
}));
return
{
grad_x
};
}
std
::
vector
<
paddle
::
Tensor
>
relu_cuda_forward
(
const
paddle
::
Tensor
&
x
);
std
::
vector
<
paddle
::
Tensor
>
relu_cuda_backward
(
const
paddle
::
Tensor
&
x
,
const
paddle
::
Tensor
&
out
,
const
paddle
::
Tensor
&
grad_out
);
std
::
vector
<
paddle
::
Tensor
>
ReluForward
(
const
paddle
::
Tensor
&
x
)
{
// TODO(chenweihang): Check Input
if
(
x
.
place
()
==
paddle
::
PlaceType
::
kCPU
)
{
return
relu_cpu_forward
(
x
);
}
else
if
(
x
.
place
()
==
paddle
::
PlaceType
::
kGPU
)
{
return
relu_cuda_forward
(
x
);
}
else
{
throw
std
::
runtime_error
(
"Not implemented."
);
}
}
std
::
vector
<
paddle
::
Tensor
>
ReluBackward
(
const
paddle
::
Tensor
&
x
,
const
paddle
::
Tensor
&
out
,
const
paddle
::
Tensor
&
grad_out
)
{
// TODO(chenweihang): Check Input
if
(
x
.
place
()
==
paddle
::
PlaceType
::
kCPU
)
{
return
relu_cpu_backward
(
x
,
out
,
grad_out
);
}
else
if
(
x
.
place
()
==
paddle
::
PlaceType
::
kGPU
)
{
return
relu_cuda_backward
(
x
,
out
,
grad_out
);
}
else
{
throw
std
::
runtime_error
(
"Not implemented."
);
}
}
PD_BUILD_OP
(
custom_relu
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
})
.
SetKernelFn
(
PD_KERNEL
(
ReluForward
));
PD_BUILD_GRAD_OP
(
custom_relu
)
.
Inputs
({
"X"
,
"Out"
,
paddle
::
Grad
(
"Out"
)})
.
Outputs
({
paddle
::
Grad
(
"X"
)})
.
SetKernelFn
(
PD_KERNEL
(
ReluBackward
));
\ No newline at end of file
test_tipc/supplementary/custom_op/custom_relu_op.cu
0 → 100644
View file @
41a1b292
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/python/custom-operator/custom_relu_op.cu
#include "paddle/extension.h"
template
<
typename
data_t
>
__global__
void
relu_cuda_forward_kernel
(
const
data_t
*
x
,
data_t
*
y
,
const
int
num
)
{
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
y
[
i
]
=
max
(
x
[
i
],
static_cast
<
data_t
>
(
0.
));
}
}
template
<
typename
data_t
>
__global__
void
relu_cuda_backward_kernel
(
const
data_t
*
dy
,
const
data_t
*
y
,
data_t
*
dx
,
const
int
num
)
{
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
dx
[
i
]
=
dy
[
i
]
*
(
y
[
i
]
>
0
?
1.
:
0.
);
}
}
std
::
vector
<
paddle
::
Tensor
>
relu_cuda_forward
(
const
paddle
::
Tensor
&
x
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kGPU
);
out
.
reshape
(
x
.
shape
());
int
numel
=
x
.
size
();
int
block
=
512
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
PD_DISPATCH_FLOATING_TYPES
(
x
.
type
(),
"relu_cuda_forward_kernel"
,
([
&
]
{
relu_cuda_forward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
numel
);
}));
return
{
out
};
}
std
::
vector
<
paddle
::
Tensor
>
relu_cuda_backward
(
const
paddle
::
Tensor
&
x
,
const
paddle
::
Tensor
&
out
,
const
paddle
::
Tensor
&
grad_out
)
{
auto
grad_x
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kGPU
);
grad_x
.
reshape
(
x
.
shape
());
int
numel
=
out
.
size
();
int
block
=
512
;
int
grid
=
(
numel
+
block
-
1
)
/
block
;
PD_DISPATCH_FLOATING_TYPES
(
out
.
type
(),
"relu_cuda_backward_kernel"
,
([
&
]
{
relu_cuda_backward_kernel
<
data_t
><<<
grid
,
block
,
0
,
x
.
stream
()
>>>
(
grad_out
.
data
<
data_t
>
(),
out
.
data
<
data_t
>
(),
grad_x
.
mutable_data
<
data_t
>
(
x
.
place
()),
numel
);
}));
return
{
grad_x
};
}
test_tipc/supplementary/custom_op/test.py
0 → 100644
View file @
41a1b292
import
paddle
import
paddle.nn
as
nn
from
paddle.vision.transforms
import
Compose
,
Normalize
from
paddle.utils.cpp_extension
import
load
from
paddle.inference
import
Config
from
paddle.inference
import
create_predictor
import
numpy
as
np
EPOCH_NUM
=
4
BATCH_SIZE
=
64
# jit compile custom op
custom_ops
=
load
(
name
=
"custom_jit_ops"
,
sources
=
[
"custom_relu_op.cc"
,
"custom_relu_op.cu"
])
class
LeNet
(
nn
.
Layer
):
def
__init__
(
self
):
super
(
LeNet
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2D
(
in_channels
=
1
,
out_channels
=
6
,
kernel_size
=
5
,
stride
=
1
,
padding
=
2
)
self
.
max_pool1
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
)
self
.
conv2
=
nn
.
Conv2D
(
in_channels
=
6
,
out_channels
=
16
,
kernel_size
=
5
,
stride
=
1
)
self
.
max_pool2
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
)
self
.
linear1
=
nn
.
Linear
(
in_features
=
16
*
5
*
5
,
out_features
=
120
)
self
.
linear2
=
nn
.
Linear
(
in_features
=
120
,
out_features
=
84
)
self
.
linear3
=
nn
.
Linear
(
in_features
=
84
,
out_features
=
10
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
custom_ops
.
custom_relu
(
x
)
x
=
self
.
max_pool1
(
x
)
x
=
custom_ops
.
custom_relu
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
max_pool2
(
x
)
x
=
paddle
.
flatten
(
x
,
start_axis
=
1
,
stop_axis
=-
1
)
x
=
self
.
linear1
(
x
)
x
=
custom_ops
.
custom_relu
(
x
)
x
=
self
.
linear2
(
x
)
x
=
custom_ops
.
custom_relu
(
x
)
x
=
self
.
linear3
(
x
)
return
x
# set device
paddle
.
set_device
(
"gpu"
)
# model
net
=
LeNet
()
loss_fn
=
nn
.
CrossEntropyLoss
()
opt
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
0.001
,
parameters
=
net
.
parameters
())
# data loader
transform
=
Compose
([
Normalize
(
mean
=
[
127.5
],
std
=
[
127.5
],
data_format
=
'CHW'
)])
train_dataset
=
paddle
.
vision
.
datasets
.
MNIST
(
mode
=
'train'
,
transform
=
transform
)
train_loader
=
paddle
.
io
.
DataLoader
(
train_dataset
,
batch_size
=
BATCH_SIZE
,
shuffle
=
True
,
drop_last
=
True
,
num_workers
=
2
)
# train
for
epoch_id
in
range
(
EPOCH_NUM
):
for
batch_id
,
(
image
,
label
)
in
enumerate
(
train_loader
()):
out
=
net
(
image
)
loss
=
loss_fn
(
out
,
label
)
loss
.
backward
()
if
batch_id
%
300
==
0
:
print
(
"Epoch {} batch {}: loss = {}"
.
format
(
epoch_id
,
batch_id
,
np
.
mean
(
loss
.
numpy
())))
opt
.
step
()
opt
.
clear_grad
()
test_tipc/supplementary/data.py
0 → 100644
View file @
41a1b292
import
numpy
as
np
import
paddle
import
os
import
cv2
import
glob
def
transform
(
data
,
ops
=
None
):
""" transform """
if
ops
is
None
:
ops
=
[]
for
op
in
ops
:
data
=
op
(
data
)
if
data
is
None
:
return
None
return
data
def
create_operators
(
op_param_list
,
global_config
=
None
):
"""
create operators based on the config
Args:
params(list): a dict list, used to create some operators
"""
assert
isinstance
(
op_param_list
,
list
),
(
'operator config should be a list'
)
ops
=
[]
for
operator
in
op_param_list
:
assert
isinstance
(
operator
,
dict
)
and
len
(
operator
)
==
1
,
"yaml format error"
op_name
=
list
(
operator
)[
0
]
param
=
{}
if
operator
[
op_name
]
is
None
else
operator
[
op_name
]
if
global_config
is
not
None
:
param
.
update
(
global_config
)
op
=
eval
(
op_name
)(
**
param
)
ops
.
append
(
op
)
return
ops
class
DecodeImage
(
object
):
""" decode image """
def
__init__
(
self
,
img_mode
=
'RGB'
,
channel_first
=
False
,
**
kwargs
):
self
.
img_mode
=
img_mode
self
.
channel_first
=
channel_first
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
if
six
.
PY2
:
assert
type
(
img
)
is
str
and
len
(
img
)
>
0
,
"invalid input 'img' in DecodeImage"
else
:
assert
type
(
img
)
is
bytes
and
len
(
img
)
>
0
,
"invalid input 'img' in DecodeImage"
img
=
np
.
frombuffer
(
img
,
dtype
=
'uint8'
)
img
=
cv2
.
imdecode
(
img
,
1
)
if
img
is
None
:
return
None
if
self
.
img_mode
==
'GRAY'
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
elif
self
.
img_mode
==
'RGB'
:
assert
img
.
shape
[
2
]
==
3
,
'invalid shape of image[%s]'
%
(
img
.
shape
)
img
=
img
[:,
:,
::
-
1
]
if
self
.
channel_first
:
img
=
img
.
transpose
((
2
,
0
,
1
))
data
[
'image'
]
=
img
data
[
'src_image'
]
=
img
return
data
class
NormalizeImage
(
object
):
""" normalize image such as substract mean, divide std
"""
def
__init__
(
self
,
scale
=
None
,
mean
=
None
,
std
=
None
,
order
=
'chw'
,
**
kwargs
):
if
isinstance
(
scale
,
str
):
scale
=
eval
(
scale
)
self
.
scale
=
np
.
float32
(
scale
if
scale
is
not
None
else
1.0
/
255.0
)
mean
=
mean
if
mean
is
not
None
else
[
0.485
,
0.456
,
0.406
]
std
=
std
if
std
is
not
None
else
[
0.229
,
0.224
,
0.225
]
shape
=
(
3
,
1
,
1
)
if
order
==
'chw'
else
(
1
,
1
,
3
)
self
.
mean
=
np
.
array
(
mean
).
reshape
(
shape
).
astype
(
'float32'
)
self
.
std
=
np
.
array
(
std
).
reshape
(
shape
).
astype
(
'float32'
)
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
from
PIL
import
Image
if
isinstance
(
img
,
Image
.
Image
):
img
=
np
.
array
(
img
)
assert
isinstance
(
img
,
np
.
ndarray
),
"invalid input 'img' in NormalizeImage"
data
[
'image'
]
=
(
img
.
astype
(
'float32'
)
*
self
.
scale
-
self
.
mean
)
/
self
.
std
return
data
class
ToCHWImage
(
object
):
""" convert hwc image to chw image
"""
def
__init__
(
self
,
**
kwargs
):
pass
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
from
PIL
import
Image
if
isinstance
(
img
,
Image
.
Image
):
img
=
np
.
array
(
img
)
data
[
'image'
]
=
img
.
transpose
((
2
,
0
,
1
))
src_img
=
data
[
'src_image'
]
from
PIL
import
Image
if
isinstance
(
img
,
Image
.
Image
):
src_img
=
np
.
array
(
src_img
)
data
[
'src_image'
]
=
img
.
transpose
((
2
,
0
,
1
))
return
data
class
SimpleDataset
(
nn
.
Dataset
):
def
__init__
(
self
,
config
,
mode
,
logger
,
seed
=
None
):
self
.
logger
=
logger
self
.
mode
=
mode
.
lower
()
data_dir
=
config
[
'Train'
][
'data_dir'
]
imgs_list
=
self
.
get_image_list
(
data_dir
)
self
.
ops
=
create_operators
(
cfg
[
'transforms'
],
None
)
def
get_image_list
(
self
,
img_dir
):
imgs
=
glob
.
glob
(
os
.
path
.
join
(
img_dir
,
"*.png"
))
if
len
(
imgs
)
==
0
:
raise
ValueError
(
f
"not any images founded in
{
img_dir
}
"
)
return
imgs
def
__getitem__
(
self
,
idx
):
return
None
test_tipc/supplementary/data_loader.py
0 → 100644
View file @
41a1b292
import
numpy
as
np
from
paddle.vision.datasets
import
Cifar100
from
paddle.vision.transforms
import
Normalize
from
paddle.fluid.dataloader.collate
import
default_collate_fn
import
signal
import
os
from
paddle.io
import
Dataset
,
DataLoader
,
DistributedBatchSampler
def
term_mp
(
sig_num
,
frame
):
""" kill all child processes
"""
pid
=
os
.
getpid
()
pgid
=
os
.
getpgid
(
os
.
getpid
())
print
(
"main proc {} exit, kill process group "
"{}"
.
format
(
pid
,
pgid
))
os
.
killpg
(
pgid
,
signal
.
SIGKILL
)
return
def
build_dataloader
(
mode
,
batch_size
=
4
,
seed
=
None
,
num_workers
=
0
,
device
=
'gpu:0'
):
normalize
=
Normalize
(
mean
=
[
0.5
,
0.5
,
0.5
],
std
=
[
0.5
,
0.5
,
0.5
],
data_format
=
'HWC'
)
if
mode
.
lower
()
==
"train"
:
dataset
=
Cifar100
(
mode
=
mode
,
transform
=
normalize
)
elif
mode
.
lower
()
in
[
"test"
,
'valid'
,
'eval'
]:
dataset
=
Cifar100
(
mode
=
"test"
,
transform
=
normalize
)
else
:
raise
ValueError
(
f
"
{
mode
}
should be one of ['train', 'test']"
)
# define batch sampler
batch_sampler
=
DistributedBatchSampler
(
dataset
=
dataset
,
batch_size
=
batch_size
,
shuffle
=
False
,
drop_last
=
True
)
data_loader
=
DataLoader
(
dataset
=
dataset
,
batch_sampler
=
batch_sampler
,
places
=
device
,
num_workers
=
num_workers
,
return_list
=
True
,
use_shared_memory
=
False
)
# support exit using ctrl+c
signal
.
signal
(
signal
.
SIGINT
,
term_mp
)
signal
.
signal
(
signal
.
SIGTERM
,
term_mp
)
return
data_loader
# cifar100 = Cifar100(mode='train', transform=normalize)
# data = cifar100[0]
# image, label = data
# reader = build_dataloader('train')
# for idx, data in enumerate(reader):
# print(idx, data[0].shape, data[1].shape)
# if idx >= 10:
# break
test_tipc/supplementary/load_cifar.py
0 → 100644
View file @
41a1b292
import
pickle
as
p
import
numpy
as
np
from
PIL
import
Image
def
load_CIFAR_batch
(
filename
):
""" load single batch of cifar """
with
open
(
filename
,
'rb'
)
as
f
:
datadict
=
p
.
load
(
f
,
encoding
=
'bytes'
)
# 以字典的形式取出数据
X
=
datadict
[
b
'data'
]
Y
=
datadict
[
b
'fine_labels'
]
try
:
X
=
X
.
reshape
(
10000
,
3
,
32
,
32
)
except
:
X
=
X
.
reshape
(
50000
,
3
,
32
,
32
)
Y
=
np
.
array
(
Y
)
print
(
Y
.
shape
)
return
X
,
Y
if
__name__
==
"__main__"
:
mode
=
"train"
imgX
,
imgY
=
load_CIFAR_batch
(
f
"./cifar-100-python/
{
mode
}
"
)
with
open
(
f
'./cifar-100-python/
{
mode
}
_imgs/img_label.txt'
,
'a+'
)
as
f
:
for
i
in
range
(
imgY
.
shape
[
0
]):
f
.
write
(
'img'
+
str
(
i
)
+
' '
+
str
(
imgY
[
i
])
+
'
\n
'
)
for
i
in
range
(
imgX
.
shape
[
0
]):
imgs
=
imgX
[
i
]
img0
=
imgs
[
0
]
img1
=
imgs
[
1
]
img2
=
imgs
[
2
]
i0
=
Image
.
fromarray
(
img0
)
i1
=
Image
.
fromarray
(
img1
)
i2
=
Image
.
fromarray
(
img2
)
img
=
Image
.
merge
(
"RGB"
,
(
i0
,
i1
,
i2
))
name
=
"img"
+
str
(
i
)
+
".png"
img
.
save
(
f
"./cifar-100-python/
{
mode
}
_imgs/"
+
name
,
"png"
)
print
(
"save successfully!"
)
test_tipc/supplementary/loss.py
0 → 100644
View file @
41a1b292
import
paddle
import
paddle.nn.functional
as
F
class
Loss
(
object
):
"""
Loss
"""
def
__init__
(
self
,
class_dim
=
1000
,
epsilon
=
None
):
assert
class_dim
>
1
,
"class_dim=%d is not larger than 1"
%
(
class_dim
)
self
.
_class_dim
=
class_dim
if
epsilon
is
not
None
and
epsilon
>=
0.0
and
epsilon
<=
1.0
:
self
.
_epsilon
=
epsilon
self
.
_label_smoothing
=
True
else
:
self
.
_epsilon
=
None
self
.
_label_smoothing
=
False
def
_labelsmoothing
(
self
,
target
):
if
target
.
shape
[
-
1
]
!=
self
.
_class_dim
:
one_hot_target
=
F
.
one_hot
(
target
,
self
.
_class_dim
)
else
:
one_hot_target
=
target
soft_target
=
F
.
label_smooth
(
one_hot_target
,
epsilon
=
self
.
_epsilon
)
soft_target
=
paddle
.
reshape
(
soft_target
,
shape
=
[
-
1
,
self
.
_class_dim
])
return
soft_target
def
_crossentropy
(
self
,
input
,
target
,
use_pure_fp16
=
False
):
if
self
.
_label_smoothing
:
target
=
self
.
_labelsmoothing
(
target
)
input
=
-
F
.
log_softmax
(
input
,
axis
=-
1
)
cost
=
paddle
.
sum
(
target
*
input
,
axis
=-
1
)
else
:
cost
=
F
.
cross_entropy
(
input
=
input
,
label
=
target
)
if
use_pure_fp16
:
avg_cost
=
paddle
.
sum
(
cost
)
else
:
avg_cost
=
paddle
.
mean
(
cost
)
return
avg_cost
def
__call__
(
self
,
input
,
target
):
return
self
.
_crossentropy
(
input
,
target
)
def
build_loss
(
config
,
epsilon
=
None
):
class_dim
=
config
[
'class_dim'
]
loss_func
=
Loss
(
class_dim
=
class_dim
,
epsilon
=
epsilon
)
return
loss_func
class
LossDistill
(
Loss
):
def
__init__
(
self
,
model_name_list
,
class_dim
=
1000
,
epsilon
=
None
):
assert
class_dim
>
1
,
"class_dim=%d is not larger than 1"
%
(
class_dim
)
self
.
_class_dim
=
class_dim
if
epsilon
is
not
None
and
epsilon
>=
0.0
and
epsilon
<=
1.0
:
self
.
_epsilon
=
epsilon
self
.
_label_smoothing
=
True
else
:
self
.
_epsilon
=
None
self
.
_label_smoothing
=
False
self
.
model_name_list
=
model_name_list
assert
len
(
self
.
model_name_list
)
>
1
,
"error"
def
__call__
(
self
,
input
,
target
):
losses
=
{}
for
k
in
self
.
model_name_list
:
inp
=
input
[
k
]
losses
[
k
]
=
self
.
_crossentropy
(
inp
,
target
)
return
losses
class
KLJSLoss
(
object
):
def
__init__
(
self
,
mode
=
'kl'
):
assert
mode
in
[
'kl'
,
'js'
,
'KL'
,
'JS'
],
"mode can only be one of ['kl', 'js', 'KL', 'JS']"
self
.
mode
=
mode
def
__call__
(
self
,
p1
,
p2
,
reduction
=
"mean"
):
p1
=
F
.
softmax
(
p1
,
axis
=-
1
)
p2
=
F
.
softmax
(
p2
,
axis
=-
1
)
loss
=
paddle
.
multiply
(
p2
,
paddle
.
log
((
p2
+
1e-5
)
/
(
p1
+
1e-5
)
+
1e-5
))
if
self
.
mode
.
lower
()
==
"js"
:
loss
+=
paddle
.
multiply
(
p1
,
paddle
.
log
((
p1
+
1e-5
)
/
(
p2
+
1e-5
)
+
1e-5
))
loss
*=
0.5
if
reduction
==
"mean"
:
loss
=
paddle
.
mean
(
loss
)
elif
reduction
==
"none"
or
reduction
is
None
:
return
loss
else
:
loss
=
paddle
.
sum
(
loss
)
return
loss
class
DMLLoss
(
object
):
def
__init__
(
self
,
model_name_pairs
,
mode
=
'js'
):
self
.
model_name_pairs
=
self
.
_check_model_name_pairs
(
model_name_pairs
)
self
.
kljs_loss
=
KLJSLoss
(
mode
=
mode
)
def
_check_model_name_pairs
(
self
,
model_name_pairs
):
if
not
isinstance
(
model_name_pairs
,
list
):
return
[]
elif
isinstance
(
model_name_pairs
[
0
],
list
)
and
isinstance
(
model_name_pairs
[
0
][
0
],
str
):
return
model_name_pairs
else
:
return
[
model_name_pairs
]
def
__call__
(
self
,
predicts
,
target
=
None
):
loss_dict
=
dict
()
for
pairs
in
self
.
model_name_pairs
:
p1
=
predicts
[
pairs
[
0
]]
p2
=
predicts
[
pairs
[
1
]]
loss_dict
[
pairs
[
0
]
+
"_"
+
pairs
[
1
]]
=
self
.
kljs_loss
(
p1
,
p2
)
return
loss_dict
# def build_distill_loss(config, epsilon=None):
# class_dim = config['class_dim']
# loss = LossDistill(model_name_list=['student', 'student1'], )
# return loss_func
test_tipc/supplementary/metric.py
0 → 100644
View file @
41a1b292
import
paddle
import
paddle.nn.functional
as
F
from
collections
import
OrderedDict
def
create_metric
(
out
,
label
,
architecture
=
None
,
topk
=
5
,
classes_num
=
1000
,
use_distillation
=
False
,
mode
=
"train"
):
"""
Create measures of model accuracy, such as top1 and top5
Args:
out(variable): model output variable
feeds(dict): dict of model input variables(included label)
topk(int): usually top5
classes_num(int): num of classes
use_distillation(bool): whether to use distillation training
mode(str): mode, train/valid
Returns:
fetchs(dict): dict of measures
"""
# if architecture["name"] == "GoogLeNet":
# assert len(out) == 3, "GoogLeNet should have 3 outputs"
# out = out[0]
# else:
# # just need student label to get metrics
# if use_distillation:
# out = out[1]
softmax_out
=
F
.
softmax
(
out
)
fetchs
=
OrderedDict
()
# set top1 to fetchs
top1
=
paddle
.
metric
.
accuracy
(
softmax_out
,
label
=
label
,
k
=
1
)
# set topk to fetchs
k
=
min
(
topk
,
classes_num
)
topk
=
paddle
.
metric
.
accuracy
(
softmax_out
,
label
=
label
,
k
=
k
)
# multi cards' eval
if
mode
!=
"train"
and
paddle
.
distributed
.
get_world_size
()
>
1
:
top1
=
paddle
.
distributed
.
all_reduce
(
top1
,
op
=
paddle
.
distributed
.
ReduceOp
.
SUM
)
/
paddle
.
distributed
.
get_world_size
()
topk
=
paddle
.
distributed
.
all_reduce
(
topk
,
op
=
paddle
.
distributed
.
ReduceOp
.
SUM
)
/
paddle
.
distributed
.
get_world_size
()
fetchs
[
'top1'
]
=
top1
topk_name
=
'top{}'
.
format
(
k
)
fetchs
[
topk_name
]
=
topk
return
fetchs
test_tipc/supplementary/mv3.py
0 → 100644
View file @
41a1b292
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn.functional
import
hardswish
,
hardsigmoid
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddle.regularizer
import
L2Decay
import
math
from
paddle.utils.cpp_extension
import
load
# jit compile custom op
custom_ops
=
load
(
name
=
"custom_jit_ops"
,
sources
=
[
"./custom_op/custom_relu_op.cc"
,
"./custom_op/custom_relu_op.cu"
])
def
make_divisible
(
v
,
divisor
=
8
,
min_value
=
None
):
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
class
MobileNetV3
(
nn
.
Layer
):
def
__init__
(
self
,
scale
=
1.0
,
model_name
=
"small"
,
dropout_prob
=
0.2
,
class_dim
=
1000
,
use_custom_relu
=
False
):
super
(
MobileNetV3
,
self
).
__init__
()
self
.
use_custom_relu
=
use_custom_relu
inplanes
=
16
if
model_name
==
"large"
:
self
.
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
"relu"
,
1
],
[
3
,
64
,
24
,
False
,
"relu"
,
2
],
[
3
,
72
,
24
,
False
,
"relu"
,
1
],
[
5
,
72
,
40
,
True
,
"relu"
,
2
],
[
5
,
120
,
40
,
True
,
"relu"
,
1
],
[
5
,
120
,
40
,
True
,
"relu"
,
1
],
[
3
,
240
,
80
,
False
,
"hardswish"
,
2
],
[
3
,
200
,
80
,
False
,
"hardswish"
,
1
],
[
3
,
184
,
80
,
False
,
"hardswish"
,
1
],
[
3
,
184
,
80
,
False
,
"hardswish"
,
1
],
[
3
,
480
,
112
,
True
,
"hardswish"
,
1
],
[
3
,
672
,
112
,
True
,
"hardswish"
,
1
],
[
5
,
672
,
160
,
True
,
"hardswish"
,
2
],
[
5
,
960
,
160
,
True
,
"hardswish"
,
1
],
[
5
,
960
,
160
,
True
,
"hardswish"
,
1
],
]
self
.
cls_ch_squeeze
=
960
self
.
cls_ch_expand
=
1280
elif
model_name
==
"small"
:
self
.
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
"relu"
,
2
],
[
3
,
72
,
24
,
False
,
"relu"
,
2
],
[
3
,
88
,
24
,
False
,
"relu"
,
1
],
[
5
,
96
,
40
,
True
,
"hardswish"
,
2
],
[
5
,
240
,
40
,
True
,
"hardswish"
,
1
],
[
5
,
240
,
40
,
True
,
"hardswish"
,
1
],
[
5
,
120
,
48
,
True
,
"hardswish"
,
1
],
[
5
,
144
,
48
,
True
,
"hardswish"
,
1
],
[
5
,
288
,
96
,
True
,
"hardswish"
,
2
],
[
5
,
576
,
96
,
True
,
"hardswish"
,
1
],
[
5
,
576
,
96
,
True
,
"hardswish"
,
1
],
]
self
.
cls_ch_squeeze
=
576
self
.
cls_ch_expand
=
1280
else
:
raise
NotImplementedError
(
"mode[{}_model] is not implemented!"
.
format
(
model_name
))
self
.
conv1
=
ConvBNLayer
(
in_c
=
3
,
out_c
=
make_divisible
(
inplanes
*
scale
),
filter_size
=
3
,
stride
=
2
,
padding
=
1
,
num_groups
=
1
,
if_act
=
True
,
act
=
"hardswish"
,
name
=
"conv1"
,
use_custom_relu
=
self
.
use_custom_relu
)
self
.
block_list
=
[]
i
=
0
inplanes
=
make_divisible
(
inplanes
*
scale
)
for
(
k
,
exp
,
c
,
se
,
nl
,
s
)
in
self
.
cfg
:
block
=
self
.
add_sublayer
(
"conv"
+
str
(
i
+
2
),
ResidualUnit
(
in_c
=
inplanes
,
mid_c
=
make_divisible
(
scale
*
exp
),
out_c
=
make_divisible
(
scale
*
c
),
filter_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
"conv"
+
str
(
i
+
2
),
use_custom_relu
=
self
.
use_custom_relu
))
self
.
block_list
.
append
(
block
)
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
self
.
last_second_conv
=
ConvBNLayer
(
in_c
=
inplanes
,
out_c
=
make_divisible
(
scale
*
self
.
cls_ch_squeeze
),
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
num_groups
=
1
,
if_act
=
True
,
act
=
"hardswish"
,
name
=
"conv_last"
,
use_custom_relu
=
self
.
use_custom_relu
)
self
.
pool
=
AdaptiveAvgPool2D
(
1
)
self
.
last_conv
=
Conv2D
(
in_channels
=
make_divisible
(
scale
*
self
.
cls_ch_squeeze
),
out_channels
=
self
.
cls_ch_expand
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
False
)
self
.
dropout
=
Dropout
(
p
=
dropout_prob
,
mode
=
"downscale_in_infer"
)
self
.
out
=
Linear
(
self
.
cls_ch_expand
,
class_dim
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
def
forward
(
self
,
inputs
,
label
=
None
):
x
=
self
.
conv1
(
inputs
)
for
block
in
self
.
block_list
:
x
=
block
(
x
)
x
=
self
.
last_second_conv
(
x
)
x
=
self
.
pool
(
x
)
x
=
self
.
last_conv
(
x
)
x
=
hardswish
(
x
)
x
=
self
.
dropout
(
x
)
x
=
paddle
.
flatten
(
x
,
start_axis
=
1
,
stop_axis
=-
1
)
x
=
self
.
out
(
x
)
return
x
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_c
,
out_c
,
filter_size
,
stride
,
padding
,
num_groups
=
1
,
if_act
=
True
,
act
=
None
,
use_cudnn
=
True
,
name
=
""
,
use_custom_relu
=
False
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
Conv2D
(
in_channels
=
in_c
,
out_channels
=
out_c
,
kernel_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
num_groups
,
weight_attr
=
ParamAttr
(),
bias_attr
=
False
)
self
.
bn
=
BatchNorm
(
num_channels
=
out_c
,
act
=
None
,
param_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)),
bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)))
# moving_mean_name=name + "_bn_mean",
# moving_variance_name=name + "_bn_variance")
self
.
use_custom_relu
=
use_custom_relu
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
if
self
.
if_act
:
if
self
.
act
==
"relu"
:
if
self
.
use_custom_relu
:
x
=
custom_ops
.
custom_relu
(
x
)
else
:
x
=
F
.
relu
(
x
)
elif
self
.
act
==
"hardswish"
:
x
=
hardswish
(
x
)
else
:
print
(
"The activation function is selected incorrectly."
)
exit
()
return
x
class
ResidualUnit
(
nn
.
Layer
):
def
__init__
(
self
,
in_c
,
mid_c
,
out_c
,
filter_size
,
stride
,
use_se
,
act
=
None
,
name
=
''
,
use_custom_relu
=
False
):
super
(
ResidualUnit
,
self
).
__init__
()
self
.
if_shortcut
=
stride
==
1
and
in_c
==
out_c
self
.
if_se
=
use_se
self
.
use_custom_relu
=
use_custom_relu
self
.
expand_conv
=
ConvBNLayer
(
in_c
=
in_c
,
out_c
=
mid_c
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_expand"
,
use_custom_relu
=
self
.
use_custom_relu
)
self
.
bottleneck_conv
=
ConvBNLayer
(
in_c
=
mid_c
,
out_c
=
mid_c
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
int
((
filter_size
-
1
)
//
2
),
num_groups
=
mid_c
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_depthwise"
,
use_custom_relu
=
self
.
use_custom_relu
)
if
self
.
if_se
:
self
.
mid_se
=
SEModule
(
mid_c
,
name
=
name
+
"_se"
)
self
.
linear_conv
=
ConvBNLayer
(
in_c
=
mid_c
,
out_c
=
out_c
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
name
+
"_linear"
,
use_custom_relu
=
self
.
use_custom_relu
)
def
forward
(
self
,
inputs
):
x
=
self
.
expand_conv
(
inputs
)
x
=
self
.
bottleneck_conv
(
x
)
if
self
.
if_se
:
x
=
self
.
mid_se
(
x
)
x
=
self
.
linear_conv
(
x
)
if
self
.
if_shortcut
:
x
=
paddle
.
add
(
inputs
,
x
)
return
x
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
channel
,
reduction
=
4
,
name
=
""
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
AdaptiveAvgPool2D
(
1
)
self
.
conv1
=
Conv2D
(
in_channels
=
channel
,
out_channels
=
channel
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
self
.
conv2
=
Conv2D
(
in_channels
=
channel
//
reduction
,
out_channels
=
channel
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
F
.
relu
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
hardsigmoid
(
outputs
,
slope
=
0.2
,
offset
=
0.5
)
return
paddle
.
multiply
(
x
=
inputs
,
y
=
outputs
)
def
MobileNetV3_small_x0_35
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"small"
,
scale
=
0.35
,
**
args
)
return
model
def
MobileNetV3_small_x0_5
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"small"
,
scale
=
0.5
,
**
args
)
return
model
def
MobileNetV3_small_x0_75
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"small"
,
scale
=
0.75
,
**
args
)
return
model
def
MobileNetV3_small_x1_0
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"small"
,
scale
=
1.0
,
**
args
)
return
model
def
MobileNetV3_small_x1_25
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"small"
,
scale
=
1.25
,
**
args
)
return
model
def
MobileNetV3_large_x0_35
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"large"
,
scale
=
0.35
,
**
args
)
return
model
def
MobileNetV3_large_x0_5
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"large"
,
scale
=
0.5
,
**
args
)
return
model
def
MobileNetV3_large_x0_75
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"large"
,
scale
=
0.75
,
**
args
)
return
model
def
MobileNetV3_large_x1_0
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"large"
,
scale
=
1.0
,
**
args
)
return
model
def
MobileNetV3_large_x1_25
(
**
args
):
model
=
MobileNetV3
(
model_name
=
"large"
,
scale
=
1.25
,
**
args
)
return
class
DistillMV3
(
nn
.
Layer
):
def
__init__
(
self
,
scale
=
1.0
,
model_name
=
"small"
,
dropout_prob
=
0.2
,
class_dim
=
1000
,
args
=
None
,
use_custom_relu
=
False
):
super
(
DistillMV3
,
self
).
__init__
()
self
.
student
=
MobileNetV3
(
model_name
=
model_name
,
scale
=
scale
,
class_dim
=
class_dim
,
use_custom_relu
=
use_custom_relu
)
self
.
student1
=
MobileNetV3
(
model_name
=
model_name
,
scale
=
scale
,
class_dim
=
class_dim
,
use_custom_relu
=
use_custom_relu
)
def
forward
(
self
,
inputs
,
label
=
None
):
predicts
=
dict
()
predicts
[
'student'
]
=
self
.
student
(
inputs
,
label
)
predicts
[
'student1'
]
=
self
.
student1
(
inputs
,
label
)
return
predicts
def
distillmv3_large_x0_5
(
**
args
):
model
=
DistillMV3
(
model_name
=
"large"
,
scale
=
0.5
,
**
args
)
return
model
class
SiameseMV3
(
nn
.
Layer
):
def
__init__
(
self
,
scale
=
1.0
,
model_name
=
"small"
,
dropout_prob
=
0.2
,
class_dim
=
1000
,
args
=
None
,
use_custom_relu
=
False
):
super
(
SiameseMV3
,
self
).
__init__
()
self
.
net
=
MobileNetV3
(
model_name
=
model_name
,
scale
=
scale
,
class_dim
=
class_dim
,
use_custom_relu
=
use_custom_relu
)
self
.
net1
=
MobileNetV3
(
model_name
=
model_name
,
scale
=
scale
,
class_dim
=
class_dim
,
use_custom_relu
=
use_custom_relu
)
def
forward
(
self
,
inputs
,
label
=
None
):
# net
x
=
self
.
net
.
conv1
(
inputs
)
for
block
in
self
.
net
.
block_list
:
x
=
block
(
x
)
# net1
x1
=
self
.
net1
.
conv1
(
inputs
)
for
block
in
self
.
net1
.
block_list
:
x1
=
block
(
x1
)
# add
x
=
x
+
x1
x
=
self
.
net
.
last_second_conv
(
x
)
x
=
self
.
net
.
pool
(
x
)
x
=
self
.
net
.
last_conv
(
x
)
x
=
hardswish
(
x
)
x
=
self
.
net
.
dropout
(
x
)
x
=
paddle
.
flatten
(
x
,
start_axis
=
1
,
stop_axis
=-
1
)
x
=
self
.
net
.
out
(
x
)
return
x
def
siamese_mv3
(
class_dim
,
use_custom_relu
):
model
=
SiameseMV3
(
scale
=
0.5
,
model_name
=
"large"
,
class_dim
=
class_dim
,
use_custom_relu
=
use_custom_relu
)
return
model
def
build_model
(
config
):
model_type
=
config
[
'model_type'
]
if
model_type
==
"cls"
:
class_dim
=
config
[
'MODEL'
][
'class_dim'
]
use_custom_relu
=
config
[
'MODEL'
][
'use_custom_relu'
]
if
'siamese'
in
config
[
'MODEL'
]
and
config
[
'MODEL'
][
'siamese'
]
is
True
:
model
=
siamese_mv3
(
class_dim
=
class_dim
,
use_custom_relu
=
use_custom_relu
)
else
:
model
=
MobileNetV3_large_x0_5
(
class_dim
=
class_dim
,
use_custom_relu
=
use_custom_relu
)
elif
model_type
==
"cls_distill"
:
class_dim
=
config
[
'MODEL'
][
'class_dim'
]
use_custom_relu
=
config
[
'MODEL'
][
'use_custom_relu'
]
model
=
distillmv3_large_x0_5
(
class_dim
=
class_dim
,
use_custom_relu
=
use_custom_relu
)
elif
model_type
==
"cls_distill_multiopt"
:
class_dim
=
config
[
'MODEL'
][
'class_dim'
]
use_custom_relu
=
config
[
'MODEL'
][
'use_custom_relu'
]
model
=
distillmv3_large_x0_5
(
class_dim
=
100
,
use_custom_relu
=
use_custom_relu
)
else
:
raise
ValueError
(
"model_type should be one of ['']"
)
return
model
test_tipc/supplementary/mv3_distill.yml
0 → 100644
View file @
41a1b292
class_dim
:
100
total_images
:
50000
epochs
:
1000
topk
:
5
save_model_dir
:
./output/
use_gpu
:
True
model_type
:
cls_distill
LEARNING_RATE
:
function
:
'
Cosine'
params
:
lr
:
0.001
warmup_epoch
:
5
OPTIMIZER
:
function
:
'
Momentum'
params
:
momentum
:
0.9
regularizer
:
function
:
'
L2'
factor
:
0.00002
TRAIN
:
batch_size
:
1280
num_workers
:
4
VALID
:
batch_size
:
64
num_workers
:
4
test_tipc/supplementary/mv3_large_x0_5.yml
0 → 100644
View file @
41a1b292
class_dim
:
100
total_images
:
50000
epoch
:
1000
topk
:
5
save_model_dir
:
./output/
use_gpu
:
True
model_type
:
cls
use_custom_relu
:
false
pretrained_model
:
checkpoints
:
save_model_dir
:
./output/cls/
# slim
quant_train
:
false
prune_train
:
false
MODEL
:
class_dim
:
100
use_custom_relu
:
False
siamese
:
False
AMP
:
use_amp
:
False
scale_loss
:
1024.0
use_dynamic_loss_scale
:
True
LEARNING_RATE
:
function
:
'
Cosine'
params
:
lr
:
0.001
warmup_epoch
:
5
OPTIMIZER
:
function
:
'
Momentum'
params
:
momentum
:
0.9
regularizer
:
function
:
'
L2'
factor
:
0.00002
TRAIN
:
batch_size
:
1280
num_workers
:
4
VALID
:
batch_size
:
64
num_workers
:
4
Prev
1
…
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment