Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
11f6ff38
"composable_kernel/include/utility/math.hpp" did not exist on "79d9b1084b8f65fe6c261483276b791aeb918627"
Commit
11f6ff38
authored
Jan 13, 2022
by
LDOUBLEV
Browse files
add supplementary
parent
b9c0627d
Changes
28
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
811 additions
and
0 deletions
+811
-0
test_tipc/supplementary/test_tipc/test_train_python.sh
test_tipc/supplementary/test_tipc/test_train_python.sh
+117
-0
test_tipc/supplementary/test_tipc/tipc_train.png
test_tipc/supplementary/test_tipc/tipc_train.png
+0
-0
test_tipc/supplementary/test_tipc/train_infer_python.txt
test_tipc/supplementary/test_tipc/train_infer_python.txt
+17
-0
test_tipc/supplementary/test_tipc/train_infer_python_FPGM.txt
..._tipc/supplementary/test_tipc/train_infer_python_FPGM.txt
+17
-0
test_tipc/supplementary/test_tipc/train_infer_python_PACT.txt
..._tipc/supplementary/test_tipc/train_infer_python_PACT.txt
+17
-0
test_tipc/supplementary/train.py
test_tipc/supplementary/train.py
+474
-0
test_tipc/supplementary/train.sh
test_tipc/supplementary/train.sh
+5
-0
test_tipc/supplementary/utils.py
test_tipc/supplementary/utils.py
+164
-0
No files found.
test_tipc/supplementary/test_tipc/test_train_python.sh
0 → 100644
View file @
11f6ff38
#!/bin/bash
source
test_tipc/common_func.sh
FILENAME
=
$1
# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer']
MODE
=
$2
dataline
=
$(
awk
'NR==1, NR==51{print}'
$FILENAME
)
# parser params
IFS
=
$'
\n
'
lines
=(
${
dataline
}
)
model_name
=
$(
func_parser_value
"
${
lines
[1]
}
"
)
python
=
$(
func_parser_value
"
${
lines
[2]
}
"
)
gpu_list
=
$(
func_parser_value
"
${
lines
[3]
}
"
)
train_use_gpu_key
=
$(
func_parser_key
"
${
lines
[4]
}
"
)
train_use_gpu_value
=
$(
func_parser_value
"
${
lines
[4]
}
"
)
autocast_list
=
$(
func_parser_value
"
${
lines
[5]
}
"
)
autocast_key
=
$(
func_parser_key
"
${
lines
[5]
}
"
)
epoch_key
=
$(
func_parser_key
"
${
lines
[6]
}
"
)
epoch_num
=
$(
func_parser_params
"
${
lines
[6]
}
"
"
${
MODE
}
"
)
save_model_key
=
$(
func_parser_key
"
${
lines
[7]
}
"
)
train_batch_key
=
$(
func_parser_key
"
${
lines
[8]
}
"
)
train_batch_value
=
$(
func_parser_params
"
${
lines
[8]
}
"
"
${
MODE
}
"
)
pretrain_model_key
=
$(
func_parser_key
"
${
lines
[9]
}
"
)
pretrain_model_value
=
$(
func_parser_value
"
${
lines
[9]
}
"
)
checkpoints_key
=
$(
func_parser_key
"
${
lines
[10]
}
"
)
checkpoints_value
=
$(
func_parser_value
"
${
lines
[10]
}
"
)
use_custom_key
=
$(
func_parser_key
"
${
lines
[11]
}
"
)
use_custom_list
=
$(
func_parser_value
"
${
lines
[11]
}
"
)
model_type_key
=
$(
func_parser_key
"
${
lines
[12]
}
"
)
model_type_list
=
$(
func_parser_value
"
${
lines
[12]
}
"
)
use_share_conv_key
=
$(
func_parser_key
"
${
lines
[13]
}
"
)
use_share_conv_list
=
$(
func_parser_value
"
${
lines
[13]
}
"
)
run_train_py
=
$(
func_parser_value
"
${
lines
[14]
}
"
)
LOG_PATH
=
"./test_tipc/extra_output"
mkdir
-p
${
LOG_PATH
}
status_log
=
"
${
LOG_PATH
}
/results_python.log"
if
[
${
MODE
}
=
"lite_train_lite_infer"
]
||
[
${
MODE
}
=
"whole_train_whole_infer"
]
;
then
IFS
=
"|"
export
Count
=
0
USE_GPU_KEY
=(
${
train_use_gpu_value
}
)
# select cpu\gpu\distribute training
for
gpu
in
${
gpu_list
[*]
}
;
do
train_use_gpu
=
${
USE_GPU_KEY
[Count]
}
Count
=
$((
$Count
+
1
))
ips
=
""
if
[
${
gpu
}
=
"-1"
]
;
then
env
=
""
elif
[
${#
gpu
}
-le
1
]
;
then
env
=
"export CUDA_VISIBLE_DEVICES=
${
gpu
}
"
eval
${
env
}
elif
[
${#
gpu
}
-le
15
]
;
then
IFS
=
","
array
=(
${
gpu
}
)
env
=
"export CUDA_VISIBLE_DEVICES=
${
array
[0]
}
"
IFS
=
"|"
else
IFS
=
";"
array
=(
${
gpu
}
)
ips
=
${
array
[0]
}
gpu
=
${
array
[1]
}
IFS
=
"|"
env
=
" "
fi
for
autocast
in
${
autocast_list
[*]
}
;
do
# set amp
if
[
${
autocast
}
=
"amp"
]
;
then
set_amp_config
=
"AMP.use_amp=True"
else
set_amp_config
=
" "
fi
if
[
${
run_train_py
}
=
"null"
]
;
then
continue
fi
set_autocast
=
$(
func_set_params
"
${
autocast_key
}
"
"
${
autocast
}
"
)
set_epoch
=
$(
func_set_params
"
${
epoch_key
}
"
"
${
epoch_num
}
"
)
set_pretrain
=
$(
func_set_params
"
${
pretrain_model_key
}
"
"
${
pretrain_model_value
}
"
)
set_checkpoints
=
$(
func_set_params
"
${
checkpoints_key
}
"
"
${
checkpoints_value
}
"
)
set_batchsize
=
$(
func_set_params
"
${
train_batch_key
}
"
"
${
train_batch_value
}
"
)
set_use_gpu
=
$(
func_set_params
"
${
train_use_gpu_key
}
"
"
${
train_use_gpu
}
"
)
for
custom_op
in
${
use_custom_list
[*]
}
;
do
for
model_type
in
${
model_type_list
[*]
}
;
do
for
share_conv
in
${
use_share_conv_list
[*]
}
;
do
set_use_custom_op
=
$(
func_set_params
"
${
use_custom_key
}
"
"
${
custom_op
}
"
)
set_model_type
=
$(
func_set_params
"
${
model_type_key
}
"
"
${
model_type
}
"
)
set_use_share_conv
=
$(
func_set_params
"
${
use_share_conv_key
}
"
"
${
share_conv
}
"
)
set_save_model
=
$(
func_set_params
"
${
save_model_key
}
"
"
${
save_log
}
"
)
if
[
${#
gpu
}
-le
2
]
;
then
# train with cpu or single gpu
cmd
=
"
${
python
}
${
run_train_py
}
${
set_use_gpu
}
${
set_save_model
}
${
set_epoch
}
${
set_pretrain
}
${
set_checkpoints
}
${
set_autocast
}
${
set_batchsize
}
${
set_use_custom_op
}
${
set_model_type
}
${
set_use_share_conv
}
${
set_amp_config
}
"
elif
[
${#
ips
}
-le
26
]
;
then
# train with multi-gpu
cmd
=
"
${
python
}
-m paddle.distributed.launch --gpus=
${
gpu
}
${
run_train_py
}
${
set_use_gpu
}
${
set_save_model
}
${
set_epoch
}
${
set_pretrain
}
${
set_checkpoints
}
${
set_autocast
}
${
set_batchsize
}
${
set_use_custom_op
}
${
set_model_type
}
${
set_use_share_conv
}
${
set_amp_config
}
"
fi
# run train
eval
"unset CUDA_VISIBLE_DEVICES"
# echo $cmd
eval
$cmd
status_check
$?
"
${
cmd
}
"
"
${
status_log
}
"
done
done
done
done
done
fi
test_tipc/supplementary/test_tipc/tipc_train.png
0 → 100644
View file @
11f6ff38
1.02 MB
test_tipc/supplementary/test_tipc/train_infer_python.txt
0 → 100644
View file @
11f6ff38
===========================train_params===========================
model_name:ch_PPOCRv2_det
python:python3.7
gpu_list:0|0,1
use_gpu:True|True
AMP.use_amp:True|False
epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
save_model_dir:./output/
TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
pretrained_model:null
checkpoints:null
use_custom_relu:False|True
model_type:cls|cls_distill|cls_distill_multiopt
MODEL.siamese:False|True
norm_train:train.py -c mv3_large_x0_5.yml -o
quant_train:False
prune_train:False
test_tipc/supplementary/test_tipc/train_infer_python_FPGM.txt
0 → 100644
View file @
11f6ff38
===========================train_params===========================
model_name:ch_PPOCRv2_det
python:python3.7
gpu_list:0|0,1
use_gpu:True|True
AMP.use_amp:True|False
epoch:lite_train_lite_infer=20|whole_train_whole_infer=1000
save_model_dir:./output/
TRAIN.batch_size:lite_train_lite_infer=2|whole_train_whole_infer=4
pretrained_model:null
checkpoints:null
use_custom_relu:False|True
model_type:cls|cls_distill|cls_distill_multiopt
MODEL.siamese:False|True
norm_train:train.py -c mv3_large_x0_5.yml -o prune_train=True
quant_train:False
prune_train:False
test_tipc/supplementary/test_tipc/train_infer_python_PACT.txt
0 → 100644
View file @
11f6ff38
===========================train_params===========================
model_name:ch_PPOCRv2_det
python:python3.7
gpu_list:0|0,1
use_gpu:True|True
AMP.use_amp:True|False
epoch:lite_train_lite_infer=20|whole_train_whole_infer=1000
save_model_dir:./output/
TRAIN.batch_size:lite_train_lite_infer=2|whole_train_whole_infer=4
pretrained_model:null
checkpoints:null
use_custom_relu:False|True
model_type:cls|cls_distill|cls_distill_multiopt
MODEL.siamese:False|True
norm_train:train.py -c mv3_large_x0_5.yml -o quant_train=True
quant_train:False
prune_train:False
test_tipc/supplementary/train.py
0 → 100644
View file @
11f6ff38
import
paddle
import
numpy
as
np
import
os
import
paddle.nn
as
nn
import
paddle.distributed
as
dist
dist
.
get_world_size
()
dist
.
init_parallel_env
()
from
loss
import
build_loss
,
LossDistill
,
DMLLoss
,
KLJSLoss
from
optimizer
import
create_optimizer
from
data_loader
import
build_dataloader
from
metric
import
create_metric
from
mv3
import
MobileNetV3_large_x0_5
,
distillmv3_large_x0_5
,
build_model
from
config
import
preprocess
import
time
from
paddleslim.dygraph.quant
import
QAT
from
slim.slim_quant
import
PACT
,
quant_config
from
slim.slim_fpgm
import
prune_model
from
utils
import
load_model
def
_mkdir_if_not_exist
(
path
,
logger
):
"""
mkdir if not exists, ignore the exception when multiprocess mkdir together
"""
if
not
os
.
path
.
exists
(
path
):
try
:
os
.
makedirs
(
path
)
except
OSError
as
e
:
if
e
.
errno
==
errno
.
EEXIST
and
os
.
path
.
isdir
(
path
):
logger
.
warning
(
'be happy if some process has already created {}'
.
format
(
path
))
else
:
raise
OSError
(
'Failed to mkdir {}'
.
format
(
path
))
def
save_model
(
model
,
optimizer
,
model_path
,
logger
,
is_best
=
False
,
prefix
=
'ppocr'
,
**
kwargs
):
"""
save model to the target path
"""
_mkdir_if_not_exist
(
model_path
,
logger
)
model_prefix
=
os
.
path
.
join
(
model_path
,
prefix
)
paddle
.
save
(
model
.
state_dict
(),
model_prefix
+
'.pdparams'
)
if
type
(
optimizer
)
is
list
:
paddle
.
save
(
optimizer
[
0
].
state_dict
(),
model_prefix
+
'.pdopt'
)
paddle
.
save
(
optimizer
[
1
].
state_dict
(),
model_prefix
+
"_1"
+
'.pdopt'
)
else
:
paddle
.
save
(
optimizer
.
state_dict
(),
model_prefix
+
'.pdopt'
)
# # save metric and config
# with open(model_prefix + '.states', 'wb') as f:
# pickle.dump(kwargs, f, protocol=2)
if
is_best
:
logger
.
info
(
'save best model is to {}'
.
format
(
model_prefix
))
else
:
logger
.
info
(
"save model in {}"
.
format
(
model_prefix
))
def
amp_scaler
(
config
):
if
'AMP'
in
config
and
config
[
'AMP'
][
'use_amp'
]
is
True
:
AMP_RELATED_FLAGS_SETTING
=
{
'FLAGS_cudnn_batchnorm_spatial_persistent'
:
1
,
'FLAGS_max_inplace_grad_add'
:
8
,
}
paddle
.
fluid
.
set_flags
(
AMP_RELATED_FLAGS_SETTING
)
scale_loss
=
config
[
"AMP"
].
get
(
"scale_loss"
,
1.0
)
use_dynamic_loss_scaling
=
config
[
"AMP"
].
get
(
"use_dynamic_loss_scaling"
,
False
)
scaler
=
paddle
.
amp
.
GradScaler
(
init_loss_scaling
=
scale_loss
,
use_dynamic_loss_scaling
=
use_dynamic_loss_scaling
)
return
scaler
else
:
return
None
def
set_seed
(
seed
):
paddle
.
seed
(
seed
)
np
.
random
.
seed
(
seed
)
def
train
(
config
,
scaler
=
None
):
EPOCH
=
config
[
'epoch'
]
topk
=
config
[
'topk'
]
batch_size
=
config
[
'TRAIN'
][
'batch_size'
]
num_workers
=
config
[
'TRAIN'
][
'num_workers'
]
train_loader
=
build_dataloader
(
'train'
,
batch_size
=
batch_size
,
num_workers
=
num_workers
)
# build metric
metric_func
=
create_metric
# build model
# model = MobileNetV3_large_x0_5(class_dim=100)
model
=
build_model
(
config
)
# build_optimizer
optimizer
,
lr_scheduler
=
create_optimizer
(
config
,
parameter_list
=
model
.
parameters
())
# load model
pre_best_model_dict
=
load_model
(
config
,
model
,
optimizer
)
if
len
(
pre_best_model_dict
)
>
0
:
pre_str
=
'The metric of loaded metric as follows {}'
.
format
(
', '
.
join
(
[
'{}: {}'
.
format
(
k
,
v
)
for
k
,
v
in
pre_best_model_dict
.
items
()]))
logger
.
info
(
pre_str
)
# about slim prune and quant
if
"quant_train"
in
config
and
config
[
'quant_train'
]
is
True
:
quanter
=
QAT
(
config
=
quant_config
,
act_preprocess
=
PACT
)
quanter
.
quantize
(
model
)
elif
"prune_train"
in
config
and
config
[
'prune_train'
]
is
True
:
model
=
prune_model
(
model
,
[
1
,
3
,
32
,
32
],
0.1
)
else
:
pass
# distribution
model
.
train
()
model
=
paddle
.
DataParallel
(
model
)
# build loss function
loss_func
=
build_loss
(
config
)
data_num
=
len
(
train_loader
)
best_acc
=
{}
for
epoch
in
range
(
EPOCH
):
st
=
time
.
time
()
for
idx
,
data
in
enumerate
(
train_loader
):
img_batch
,
label
=
data
img_batch
=
paddle
.
transpose
(
img_batch
,
[
0
,
3
,
1
,
2
])
label
=
paddle
.
unsqueeze
(
label
,
-
1
)
if
scaler
is
not
None
:
with
paddle
.
amp
.
auto_cast
():
outs
=
model
(
img_batch
)
else
:
outs
=
model
(
img_batch
)
# cal metric
acc
=
metric_func
(
outs
,
label
)
# cal loss
avg_loss
=
loss_func
(
outs
,
label
)
if
scaler
is
None
:
# backward
avg_loss
.
backward
()
optimizer
.
step
()
optimizer
.
clear_grad
()
else
:
scaled_avg_loss
=
scaler
.
scale
(
avg_loss
)
scaled_avg_loss
.
backward
()
scaler
.
minimize
(
optimizer
,
scaled_avg_loss
)
if
not
isinstance
(
lr_scheduler
,
float
):
lr_scheduler
.
step
()
if
idx
%
10
==
0
:
et
=
time
.
time
()
strs
=
f
"epoch: [
{
epoch
}
/
{
EPOCH
}
], iter: [
{
idx
}
/
{
data_num
}
], "
strs
+=
f
"loss:
{
avg_loss
.
numpy
()[
0
]
}
"
strs
+=
f
", acc_topk1:
{
acc
[
'top1'
].
numpy
()[
0
]
}
, acc_top5:
{
acc
[
'top5'
].
numpy
()[
0
]
}
"
strs
+=
f
", batch_time:
{
round
(
et
-
st
,
4
)
}
s"
logger
.
info
(
strs
)
st
=
time
.
time
()
if
epoch
%
10
==
0
:
acc
=
eval
(
config
,
model
)
if
len
(
best_acc
)
<
1
or
acc
[
'top5'
].
numpy
()[
0
]
>
best_acc
[
'top5'
]:
best_acc
=
acc
best_acc
[
'epoch'
]
=
epoch
is_best
=
True
else
:
is_best
=
False
logger
.
info
(
f
"The best acc: acc_topk1:
{
best_acc
[
'top1'
].
numpy
()[
0
]
}
, acc_top5:
{
best_acc
[
'top5'
].
numpy
()[
0
]
}
, best_epoch:
{
best_acc
[
'epoch'
]
}
"
)
save_model
(
model
,
optimizer
,
config
[
'save_model_dir'
],
logger
,
is_best
,
prefix
=
"cls"
)
def
train_distill
(
config
,
scaler
=
None
):
EPOCH
=
config
[
'epoch'
]
topk
=
config
[
'topk'
]
batch_size
=
config
[
'TRAIN'
][
'batch_size'
]
num_workers
=
config
[
'TRAIN'
][
'num_workers'
]
train_loader
=
build_dataloader
(
'train'
,
batch_size
=
batch_size
,
num_workers
=
num_workers
)
# build metric
metric_func
=
create_metric
# model = distillmv3_large_x0_5(class_dim=100)
model
=
build_model
(
config
)
# pact quant train
if
"quant_train"
in
config
and
config
[
'quant_train'
]
is
True
:
quanter
=
QAT
(
config
=
quant_config
,
act_preprocess
=
PACT
)
quanter
.
quantize
(
model
)
elif
"prune_train"
in
config
and
config
[
'prune_train'
]
is
True
:
model
=
prune_model
(
model
,
[
1
,
3
,
32
,
32
],
0.1
)
else
:
pass
# build_optimizer
optimizer
,
lr_scheduler
=
create_optimizer
(
config
,
parameter_list
=
model
.
parameters
())
# load model
pre_best_model_dict
=
load_model
(
config
,
model
,
optimizer
)
if
len
(
pre_best_model_dict
)
>
0
:
pre_str
=
'The metric of loaded metric as follows {}'
.
format
(
', '
.
join
(
[
'{}: {}'
.
format
(
k
,
v
)
for
k
,
v
in
pre_best_model_dict
.
items
()]))
logger
.
info
(
pre_str
)
model
.
train
()
model
=
paddle
.
DataParallel
(
model
)
# build loss function
loss_func_distill
=
LossDistill
(
model_name_list
=
[
'student'
,
'student1'
])
loss_func_dml
=
DMLLoss
(
model_name_pairs
=
[
'student'
,
'student1'
])
loss_func_js
=
KLJSLoss
(
mode
=
'js'
)
data_num
=
len
(
train_loader
)
best_acc
=
{}
for
epoch
in
range
(
EPOCH
):
st
=
time
.
time
()
for
idx
,
data
in
enumerate
(
train_loader
):
img_batch
,
label
=
data
img_batch
=
paddle
.
transpose
(
img_batch
,
[
0
,
3
,
1
,
2
])
label
=
paddle
.
unsqueeze
(
label
,
-
1
)
if
scaler
is
not
None
:
with
paddle
.
amp
.
auto_cast
():
outs
=
model
(
img_batch
)
else
:
outs
=
model
(
img_batch
)
# cal metric
acc
=
metric_func
(
outs
[
'student'
],
label
)
# cal loss
avg_loss
=
loss_func_distill
(
outs
,
label
)[
'student'
]
+
\
loss_func_distill
(
outs
,
label
)[
'student1'
]
+
\
loss_func_dml
(
outs
,
label
)[
'student_student1'
]
# backward
if
scaler
is
None
:
avg_loss
.
backward
()
optimizer
.
step
()
optimizer
.
clear_grad
()
else
:
scaled_avg_loss
=
scaler
.
scale
(
avg_loss
)
scaled_avg_loss
.
backward
()
scaler
.
minimize
(
optimizer
,
scaled_avg_loss
)
if
not
isinstance
(
lr_scheduler
,
float
):
lr_scheduler
.
step
()
if
idx
%
10
==
0
:
et
=
time
.
time
()
strs
=
f
"epoch: [
{
epoch
}
/
{
EPOCH
}
], iter: [
{
idx
}
/
{
data_num
}
], "
strs
+=
f
"loss:
{
avg_loss
.
numpy
()[
0
]
}
"
strs
+=
f
", acc_topk1:
{
acc
[
'top1'
].
numpy
()[
0
]
}
, acc_top5:
{
acc
[
'top5'
].
numpy
()[
0
]
}
"
strs
+=
f
", batch_time:
{
round
(
et
-
st
,
4
)
}
s"
logger
.
info
(
strs
)
st
=
time
.
time
()
if
epoch
%
10
==
0
:
acc
=
eval
(
config
,
model
.
_layers
.
student
)
if
len
(
best_acc
)
<
1
or
acc
[
'top5'
].
numpy
()[
0
]
>
best_acc
[
'top5'
]:
best_acc
=
acc
best_acc
[
'epoch'
]
=
epoch
is_best
=
True
else
:
is_best
=
False
logger
.
info
(
f
"The best acc: acc_topk1:
{
best_acc
[
'top1'
].
numpy
()[
0
]
}
, acc_top5:
{
best_acc
[
'top5'
].
numpy
()[
0
]
}
, best_epoch:
{
best_acc
[
'epoch'
]
}
"
)
save_model
(
model
,
optimizer
,
config
[
'save_model_dir'
],
logger
,
is_best
,
prefix
=
"cls_distill"
)
def
train_distill_multiopt
(
config
,
scaler
=
None
):
EPOCH
=
config
[
'epoch'
]
topk
=
config
[
'topk'
]
batch_size
=
config
[
'TRAIN'
][
'batch_size'
]
num_workers
=
config
[
'TRAIN'
][
'num_workers'
]
train_loader
=
build_dataloader
(
'train'
,
batch_size
=
batch_size
,
num_workers
=
num_workers
)
# build metric
metric_func
=
create_metric
# model = distillmv3_large_x0_5(class_dim=100)
model
=
build_model
(
config
)
# build_optimizer
optimizer
,
lr_scheduler
=
create_optimizer
(
config
,
parameter_list
=
model
.
student
.
parameters
())
optimizer1
,
lr_scheduler1
=
create_optimizer
(
config
,
parameter_list
=
model
.
student1
.
parameters
())
# load model
pre_best_model_dict
=
load_model
(
config
,
model
,
optimizer
)
if
len
(
pre_best_model_dict
)
>
0
:
pre_str
=
'The metric of loaded metric as follows {}'
.
format
(
', '
.
join
(
[
'{}: {}'
.
format
(
k
,
v
)
for
k
,
v
in
pre_best_model_dict
.
items
()]))
logger
.
info
(
pre_str
)
# quant train
if
"quant_train"
in
config
and
config
[
'quant_train'
]
is
True
:
quanter
=
QAT
(
config
=
quant_config
,
act_preprocess
=
PACT
)
quanter
.
quantize
(
model
)
elif
"prune_train"
in
config
and
config
[
'prune_train'
]
is
True
:
model
=
prune_model
(
model
,
[
1
,
3
,
32
,
32
],
0.1
)
else
:
pass
model
.
train
()
model
=
paddle
.
DataParallel
(
model
)
# build loss function
loss_func_distill
=
LossDistill
(
model_name_list
=
[
'student'
,
'student1'
])
loss_func_dml
=
DMLLoss
(
model_name_pairs
=
[
'student'
,
'student1'
])
loss_func_js
=
KLJSLoss
(
mode
=
'js'
)
data_num
=
len
(
train_loader
)
best_acc
=
{}
for
epoch
in
range
(
EPOCH
):
st
=
time
.
time
()
for
idx
,
data
in
enumerate
(
train_loader
):
img_batch
,
label
=
data
img_batch
=
paddle
.
transpose
(
img_batch
,
[
0
,
3
,
1
,
2
])
label
=
paddle
.
unsqueeze
(
label
,
-
1
)
if
scaler
is
not
None
:
with
paddle
.
amp
.
auto_cast
():
outs
=
model
(
img_batch
)
else
:
outs
=
model
(
img_batch
)
# cal metric
acc
=
metric_func
(
outs
[
'student'
],
label
)
# cal loss
avg_loss
=
loss_func_distill
(
outs
,
label
)[
'student'
]
+
loss_func_dml
(
outs
,
label
)[
'student_student1'
]
avg_loss1
=
loss_func_distill
(
outs
,
label
)[
'student1'
]
+
loss_func_dml
(
outs
,
label
)[
'student_student1'
]
if
scaler
is
None
:
# backward
avg_loss
.
backward
(
retain_graph
=
True
)
optimizer
.
step
()
optimizer
.
clear_grad
()
avg_loss1
.
backward
()
optimizer1
.
step
()
optimizer1
.
clear_grad
()
else
:
scaled_avg_loss
=
scaler
.
scale
(
avg_loss
)
scaled_avg_loss
.
backward
()
scaler
.
minimize
(
optimizer
,
scaled_avg_loss
)
scaled_avg_loss
=
scaler
.
scale
(
avg_loss1
)
scaled_avg_loss
.
backward
()
scaler
.
minimize
(
optimizer1
,
scaled_avg_loss
)
if
not
isinstance
(
lr_scheduler
,
float
):
lr_scheduler
.
step
()
if
not
isinstance
(
lr_scheduler1
,
float
):
lr_scheduler1
.
step
()
if
idx
%
10
==
0
:
et
=
time
.
time
()
strs
=
f
"epoch: [
{
epoch
}
/
{
EPOCH
}
], iter: [
{
idx
}
/
{
data_num
}
], "
strs
+=
f
"loss:
{
avg_loss
.
numpy
()[
0
]
}
, loss1:
{
avg_loss1
.
numpy
()[
0
]
}
"
strs
+=
f
", acc_topk1:
{
acc
[
'top1'
].
numpy
()[
0
]
}
, acc_top5:
{
acc
[
'top5'
].
numpy
()[
0
]
}
"
strs
+=
f
", batch_time:
{
round
(
et
-
st
,
4
)
}
s"
logger
.
info
(
strs
)
st
=
time
.
time
()
if
epoch
%
10
==
0
:
acc
=
eval
(
config
,
model
.
_layers
.
student
)
if
len
(
best_acc
)
<
1
or
acc
[
'top5'
].
numpy
()[
0
]
>
best_acc
[
'top5'
]:
best_acc
=
acc
best_acc
[
'epoch'
]
=
epoch
is_best
=
True
else
:
is_best
=
False
logger
.
info
(
f
"The best acc: acc_topk1:
{
best_acc
[
'top1'
].
numpy
()[
0
]
}
, acc_top5:
{
best_acc
[
'top5'
].
numpy
()[
0
]
}
, best_epoch:
{
best_acc
[
'epoch'
]
}
"
)
save_model
(
model
,
[
optimizer
,
optimizer1
],
config
[
'save_model_dir'
],
logger
,
is_best
,
prefix
=
"cls_distill_multiopt"
)
def
eval
(
config
,
model
):
batch_size
=
config
[
'VALID'
][
'batch_size'
]
num_workers
=
config
[
'VALID'
][
'num_workers'
]
valid_loader
=
build_dataloader
(
'test'
,
batch_size
=
batch_size
,
num_workers
=
num_workers
)
# build metric
metric_func
=
create_metric
outs
=
[]
labels
=
[]
for
idx
,
data
in
enumerate
(
valid_loader
):
img_batch
,
label
=
data
img_batch
=
paddle
.
transpose
(
img_batch
,
[
0
,
3
,
1
,
2
])
label
=
paddle
.
unsqueeze
(
label
,
-
1
)
out
=
model
(
img_batch
)
outs
.
append
(
out
)
labels
.
append
(
label
)
outs
=
paddle
.
concat
(
outs
,
axis
=
0
)
labels
=
paddle
.
concat
(
labels
,
axis
=
0
)
acc
=
metric_func
(
outs
,
labels
)
strs
=
f
"The metric are as follows: acc_topk1:
{
acc
[
'top1'
].
numpy
()[
0
]
}
, acc_top5:
{
acc
[
'top5'
].
numpy
()[
0
]
}
"
logger
.
info
(
strs
)
return
acc
if
__name__
==
"__main__"
:
config
,
logger
=
preprocess
(
is_train
=
False
)
# AMP scaler
scaler
=
amp_scaler
(
config
)
model_type
=
config
[
'model_type'
]
if
model_type
==
"cls"
:
train
(
config
)
elif
model_type
==
"cls_distill"
:
train_distill
(
config
)
elif
model_type
==
"cls_distill_multiopt"
:
train_distill_multiopt
(
config
)
else
:
raise
ValueError
(
"model_type should be one of ['']"
)
test_tipc/supplementary/train.sh
0 → 100644
View file @
11f6ff38
# single GPU
python3.7 train.py
-c
mv3_large_x0_5.yml
# distribute training
python3.7
-m
paddle.distributed.launch
--log_dir
=
./debug/
--gpus
'0,1'
train.py
-c
mv3_large_x0_5.yml
test_tipc/supplementary/utils.py
0 → 100644
View file @
11f6ff38
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
import
logging
import
functools
import
paddle.distributed
as
dist
logger_initialized
=
{}
def
print_dict
(
d
,
logger
,
delimiter
=
0
):
"""
Recursively visualize a dict and
indenting acrrording by the relationship of keys.
"""
for
k
,
v
in
sorted
(
d
.
items
()):
if
isinstance
(
v
,
dict
):
logger
.
info
(
"{}{} : "
.
format
(
delimiter
*
" "
,
str
(
k
)))
print_dict
(
v
,
logger
,
delimiter
+
4
)
elif
isinstance
(
v
,
list
)
and
len
(
v
)
>=
1
and
isinstance
(
v
[
0
],
dict
):
logger
.
info
(
"{}{} : "
.
format
(
delimiter
*
" "
,
str
(
k
)))
for
value
in
v
:
print_dict
(
value
,
logger
,
delimiter
+
4
)
else
:
logger
.
info
(
"{}{} : {}"
.
format
(
delimiter
*
" "
,
k
,
v
))
@
functools
.
lru_cache
()
def
get_logger
(
name
=
'root'
,
log_file
=
None
,
log_level
=
logging
.
DEBUG
):
"""Initialize and get a logger by name.
If the logger has not been initialized, this method will initialize the
logger by adding one or two handlers, otherwise the initialized logger will
be directly returned. During initialization, a StreamHandler will always be
added. If `log_file` is specified a FileHandler will also be added.
Args:
name (str): Logger name.
log_file (str | None): The log filename. If specified, a FileHandler
will be added to the logger.
log_level (int): The logger level. Note that only the process of
rank 0 is affected, and other processes will set the level to
"Error" thus be silent most of the time.
Returns:
logging.Logger: The expected logger.
"""
logger
=
logging
.
getLogger
(
name
)
if
name
in
logger_initialized
:
return
logger
for
logger_name
in
logger_initialized
:
if
name
.
startswith
(
logger_name
):
return
logger
formatter
=
logging
.
Formatter
(
'[%(asctime)s] %(name)s %(levelname)s: %(message)s'
,
datefmt
=
"%Y/%m/%d %H:%M:%S"
)
stream_handler
=
logging
.
StreamHandler
(
stream
=
sys
.
stdout
)
stream_handler
.
setFormatter
(
formatter
)
logger
.
addHandler
(
stream_handler
)
if
log_file
is
not
None
and
dist
.
get_rank
()
==
0
:
log_file_folder
=
os
.
path
.
split
(
log_file
)[
0
]
os
.
makedirs
(
log_file_folder
,
exist_ok
=
True
)
file_handler
=
logging
.
FileHandler
(
log_file
,
'a'
)
file_handler
.
setFormatter
(
formatter
)
logger
.
addHandler
(
file_handler
)
if
dist
.
get_rank
()
==
0
:
logger
.
setLevel
(
log_level
)
else
:
logger
.
setLevel
(
logging
.
ERROR
)
logger_initialized
[
name
]
=
True
return
logger
def
load_model
(
config
,
model
,
optimizer
=
None
):
"""
load model from checkpoint or pretrained_model
"""
logger
=
get_logger
()
checkpoints
=
config
.
get
(
'checkpoints'
)
pretrained_model
=
config
.
get
(
'pretrained_model'
)
best_model_dict
=
{}
if
checkpoints
:
if
checkpoints
.
endswith
(
'.pdparams'
):
checkpoints
=
checkpoints
.
replace
(
'.pdparams'
,
''
)
assert
os
.
path
.
exists
(
checkpoints
+
".pdparams"
),
\
"The {}.pdparams does not exists!"
.
format
(
checkpoints
)
# load params from trained model
params
=
paddle
.
load
(
checkpoints
+
'.pdparams'
)
state_dict
=
model
.
state_dict
()
new_state_dict
=
{}
for
key
,
value
in
state_dict
.
items
():
if
key
not
in
params
:
logger
.
warning
(
"{} not in loaded params {} !"
.
format
(
key
,
params
.
keys
()))
continue
pre_value
=
params
[
key
]
if
list
(
value
.
shape
)
==
list
(
pre_value
.
shape
):
new_state_dict
[
key
]
=
pre_value
else
:
logger
.
warning
(
"The shape of model params {} {} not matched with loaded params shape {} !"
.
format
(
key
,
value
.
shape
,
pre_value
.
shape
))
model
.
set_state_dict
(
new_state_dict
)
if
optimizer
is
not
None
:
if
os
.
path
.
exists
(
checkpoints
+
'.pdopt'
):
optim_dict
=
paddle
.
load
(
checkpoints
+
'.pdopt'
)
optimizer
.
set_state_dict
(
optim_dict
)
else
:
logger
.
warning
(
"{}.pdopt is not exists, params of optimizer is not loaded"
.
format
(
checkpoints
))
if
os
.
path
.
exists
(
checkpoints
+
'.states'
):
with
open
(
checkpoints
+
'.states'
,
'rb'
)
as
f
:
states_dict
=
pickle
.
load
(
f
)
if
six
.
PY2
else
pickle
.
load
(
f
,
encoding
=
'latin1'
)
best_model_dict
=
states_dict
.
get
(
'best_model_dict'
,
{})
if
'epoch'
in
states_dict
:
best_model_dict
[
'start_epoch'
]
=
states_dict
[
'epoch'
]
+
1
logger
.
info
(
"resume from {}"
.
format
(
checkpoints
))
elif
pretrained_model
:
load_pretrained_params
(
model
,
pretrained_model
)
else
:
logger
.
info
(
'train from scratch'
)
return
best_model_dict
def
load_pretrained_params
(
model
,
path
):
logger
=
get_logger
()
if
path
.
endswith
(
'.pdparams'
):
path
=
path
.
replace
(
'.pdparams'
,
''
)
assert
os
.
path
.
exists
(
path
+
".pdparams"
),
\
"The {}.pdparams does not exists!"
.
format
(
path
)
params
=
paddle
.
load
(
path
+
'.pdparams'
)
state_dict
=
model
.
state_dict
()
new_state_dict
=
{}
for
k1
in
params
.
keys
():
if
k1
not
in
state_dict
.
keys
():
logger
.
warning
(
"The pretrained params {} not in model"
.
format
(
k1
))
else
:
if
list
(
state_dict
[
k1
].
shape
)
==
list
(
params
[
k1
].
shape
):
new_state_dict
[
k1
]
=
params
[
k1
]
else
:
logger
.
warning
(
"The shape of model params {} {} not matched with loaded params {} {} !"
.
format
(
k1
,
state_dict
[
k1
].
shape
,
k1
,
params
[
k1
].
shape
))
model
.
set_state_dict
(
new_state_dict
)
logger
.
info
(
"load pretrain successful from {}"
.
format
(
path
))
return
model
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment