Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
338ba3ee
Commit
338ba3ee
authored
May 10, 2020
by
LDOUBLEV
Browse files
upload PaddleOCR code
parent
bc93c549
Changes
84
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
679 additions
and
0 deletions
+679
-0
tools/tmp/test_rec_benchmark.py
tools/tmp/test_rec_benchmark.py
+128
-0
tools/tmp/train_det.py
tools/tmp/train_det.py
+216
-0
tools/tmp/train_rec.py
tools/tmp/train_rec.py
+222
-0
tools/train.py
tools/train.py
+113
-0
No files found.
tools/tmp/test_rec_benchmark.py
0 → 100755
View file @
338ba3ee
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
time
import
multiprocessing
import
numpy
as
np
def
set_paddle_flags
(
**
kwargs
):
for
key
,
value
in
kwargs
.
items
():
if
os
.
environ
.
get
(
key
,
None
)
is
None
:
os
.
environ
[
key
]
=
str
(
value
)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags
(
FLAGS_eager_delete_tensor_gb
=
0
,
# enable GC to save memory
)
from
paddle
import
fluid
from
ppocr.utils.utility
import
load_config
,
merge_config
import
ppocr.data.rec.reader_main
as
reader
from
ppocr.utils.utility
import
ArgsParser
from
ppocr.utils.character
import
CharacterOps
,
cal_predicts_accuracy
from
ppocr.utils.check
import
check_gpu
from
ppocr.utils.utility
import
create_module
from
ppocr.utils.eval_utils
import
eval_run
from
ppocr.utils.utility
import
initial_logger
logger
=
initial_logger
()
def
main
():
config
=
load_config
(
FLAGS
.
config
)
merge_config
(
FLAGS
.
opt
)
char_ops
=
CharacterOps
(
config
[
'Global'
])
config
[
'Global'
][
'char_num'
]
=
char_ops
.
get_char_num
()
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu
=
config
[
'Global'
][
'use_gpu'
]
check_gpu
(
use_gpu
)
if
use_gpu
:
devices_num
=
fluid
.
core
.
get_cuda_device_count
()
else
:
devices_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
place
=
fluid
.
CUDAPlace
(
0
)
if
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
rec_model
=
create_module
(
config
[
'Architecture'
][
'function'
])(
params
=
config
)
startup_prog
=
fluid
.
Program
()
eval_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
eval_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
eval_loader
,
eval_outputs
=
rec_model
(
mode
=
"eval"
)
eval_fetch_list
=
[
v
.
name
for
v
in
eval_outputs
]
eval_prog
=
eval_prog
.
clone
(
for_test
=
True
)
exe
.
run
(
startup_prog
)
pretrain_weights
=
config
[
'Global'
][
'pretrain_weights'
]
if
pretrain_weights
is
not
None
:
fluid
.
load
(
eval_prog
,
pretrain_weights
)
eval_data_list
=
[
'IIIT5k_3000'
,
'SVT'
,
'IC03_860'
,
'IC03_867'
,
\
'IC13_857'
,
'IC13_1015'
,
'IC15_1811'
,
'IC15_2077'
,
'SVTP'
,
'CUTE80'
]
eval_data_dir
=
config
[
'TestReader'
][
'lmdb_sets_dir'
]
total_forward_time
=
0
total_evaluation_data_number
=
0
total_correct_number
=
0
eval_data_acc_info
=
{}
for
eval_data
in
eval_data_list
:
config
[
'TestReader'
][
'lmdb_sets_dir'
]
=
\
eval_data_dir
+
"/"
+
eval_data
eval_reader
=
reader
.
train_eval_reader
(
config
=
config
,
char_ops
=
char_ops
,
mode
=
"test"
)
eval_loader
.
set_sample_list_generator
(
eval_reader
,
places
=
place
)
start_time
=
time
.
time
()
outs
=
eval_run
(
exe
,
eval_prog
,
eval_loader
,
eval_fetch_list
,
char_ops
,
"best"
,
"test"
)
infer_time
=
time
.
time
()
-
start_time
eval_acc
,
acc_num
,
sample_num
=
outs
total_forward_time
+=
infer_time
total_evaluation_data_number
+=
sample_num
total_correct_number
+=
acc_num
eval_data_acc_info
[
eval_data
]
=
outs
avg_forward_time
=
total_forward_time
/
total_evaluation_data_number
avg_acc
=
total_correct_number
*
1.0
/
total_evaluation_data_number
logger
.
info
(
'-'
*
50
)
strs
=
""
for
eval_data
in
eval_data_list
:
eval_acc
,
acc_num
,
sample_num
=
eval_data_acc_info
[
eval_data
]
strs
+=
"
\n
{}, accuracy:{:.6f}"
.
format
(
eval_data
,
eval_acc
)
strs
+=
"
\n
average, accuracy:{:.6f}, time:{:.6f}"
.
format
(
avg_acc
,
avg_forward_time
)
logger
.
info
(
strs
)
logger
.
info
(
'-'
*
50
)
if
__name__
==
'__main__'
:
parser
=
ArgsParser
()
FLAGS
=
parser
.
parse_args
()
main
()
tools/tmp/train_det.py
0 → 100755
View file @
338ba3ee
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
sys
import
time
import
multiprocessing
import
numpy
as
np
# from paddle.fluid.contrib.model_stat import summary
def
set_paddle_flags
(
**
kwargs
):
for
key
,
value
in
kwargs
.
items
():
if
os
.
environ
.
get
(
key
,
None
)
is
None
:
os
.
environ
[
key
]
=
str
(
value
)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags
(
FLAGS_eager_delete_tensor_gb
=
0
,
# enable GC to save memory
)
from
paddle
import
fluid
from
ppocr.utils.utility
import
create_module
from
ppocr.utils.utility
import
load_config
,
merge_config
import
ppocr.data.det.reader_main
as
reader
from
ppocr.utils.utility
import
ArgsParser
from
ppocr.utils.character
import
CharacterOps
,
cal_predicts_accuracy
from
ppocr.utils.check
import
check_gpu
from
ppocr.utils.stats
import
TrainingStats
from
ppocr.utils.checkpoint
import
load_pretrain
,
load_checkpoint
,
save
,
save_model
from
ppocr.utils.eval_utils
import
eval_run
from
ppocr.utils.eval_utils
import
eval_det_run
from
ppocr.utils.utility
import
initial_logger
logger
=
initial_logger
()
from
ppocr.utils.utility
import
create_multi_devices_program
def
main
():
config
=
load_config
(
FLAGS
.
config
)
merge_config
(
FLAGS
.
opt
)
print
(
config
)
alg
=
config
[
'Global'
][
'algorithm'
]
assert
alg
in
[
'EAST'
,
'DB'
]
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu
=
config
[
'Global'
][
'use_gpu'
]
check_gpu
(
use_gpu
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
det_model
=
create_module
(
config
[
'Architecture'
][
'function'
])(
params
=
config
)
startup_prog
=
fluid
.
Program
()
train_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
train_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
train_loader
,
train_outputs
=
det_model
(
mode
=
"train"
)
train_fetch_list
=
[
v
.
name
for
v
in
train_outputs
]
train_loss
=
train_outputs
[
0
]
opt_params
=
config
[
'Optimizer'
]
optimizer
=
create_module
(
opt_params
[
'function'
])(
opt_params
)
optimizer
.
minimize
(
train_loss
)
global_lr
=
optimizer
.
_global_learning_rate
()
global_lr
.
persistable
=
True
train_fetch_list
.
append
(
global_lr
.
name
)
eval_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
eval_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
eval_loader
,
eval_outputs
=
det_model
(
mode
=
"eval"
)
eval_fetch_list
=
[
v
.
name
for
v
in
eval_outputs
]
eval_prog
=
eval_prog
.
clone
(
for_test
=
True
)
train_reader
=
reader
.
train_reader
(
config
=
config
)
train_loader
.
set_sample_list_generator
(
train_reader
,
places
=
place
)
exe
.
run
(
startup_prog
)
# compile program for multi-devices
train_compile_program
=
create_multi_devices_program
(
train_prog
,
train_loss
.
name
)
pretrain_weights
=
config
[
'Global'
][
'pretrain_weights'
]
if
pretrain_weights
is
not
None
:
load_pretrain
(
exe
,
train_prog
,
pretrain_weights
)
print
(
"pretrain weights loaded!"
)
train_batch_id
=
0
if
alg
==
'EAST'
:
train_log_keys
=
[
'loss_total'
,
'loss_cls'
,
'loss_offset'
]
elif
alg
==
'DB'
:
train_log_keys
=
[
'loss_total'
,
'loss_shrink'
,
'loss_threshold'
,
'loss_binary'
]
log_smooth_window
=
config
[
'Global'
][
'log_smooth_window'
]
epoch_num
=
config
[
'Global'
][
'epoch_num'
]
print_step
=
config
[
'Global'
][
'print_step'
]
eval_step
=
config
[
'Global'
][
'eval_step'
]
save_epoch_step
=
config
[
'Global'
][
'save_epoch_step'
]
save_dir
=
config
[
'Global'
][
'save_dir'
]
train_stats
=
TrainingStats
(
log_smooth_window
,
train_log_keys
)
best_eval_hmean
=
-
1
best_batch_id
=
0
best_epoch
=
0
for
epoch
in
range
(
epoch_num
):
train_loader
.
start
()
try
:
while
True
:
t1
=
time
.
time
()
train_outs
=
exe
.
run
(
program
=
train_compile_program
,
fetch_list
=
train_fetch_list
,
return_numpy
=
False
)
loss_total
=
np
.
mean
(
np
.
array
(
train_outs
[
0
]))
if
alg
==
'EAST'
:
loss_cls
=
np
.
mean
(
np
.
array
(
train_outs
[
1
]))
loss_offset
=
np
.
mean
(
np
.
array
(
train_outs
[
2
]))
stats
=
{
'loss_total'
:
loss_total
,
'loss_cls'
:
loss_cls
,
\
'loss_offset'
:
loss_offset
}
elif
alg
==
'DB'
:
loss_shrink_maps
=
np
.
mean
(
np
.
array
(
train_outs
[
1
]))
loss_threshold_maps
=
np
.
mean
(
np
.
array
(
train_outs
[
2
]))
loss_binary_maps
=
np
.
mean
(
np
.
array
(
train_outs
[
3
]))
stats
=
{
'loss_total'
:
loss_total
,
'loss_shrink'
:
loss_shrink_maps
,
\
'loss_threshold'
:
loss_threshold_maps
,
'loss_binary'
:
loss_binary_maps
}
lr
=
np
.
mean
(
np
.
array
(
train_outs
[
-
1
]))
t2
=
time
.
time
()
train_batch_elapse
=
t2
-
t1
# stats = {'loss_total':loss_total, 'loss_cls':loss_cls,\
# 'loss_offset':loss_offset}
train_stats
.
update
(
stats
)
if
train_batch_id
>
0
and
train_batch_id
%
print_step
==
0
:
logs
=
train_stats
.
log
()
strs
=
'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'
.
format
(
epoch
,
train_batch_id
,
lr
,
logs
,
train_batch_elapse
)
logger
.
info
(
strs
)
if
train_batch_id
>
0
and
\
train_batch_id
%
eval_step
==
0
:
metrics
=
eval_det_run
(
exe
,
eval_prog
,
eval_fetch_list
,
config
,
"eval"
)
hmean
=
metrics
[
'hmean'
]
if
hmean
>=
best_eval_hmean
:
best_eval_hmean
=
hmean
best_batch_id
=
train_batch_id
best_epoch
=
epoch
save_path
=
save_dir
+
"/best_accuracy"
save_model
(
train_prog
,
save_path
)
strs
=
'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'
.
format
(
train_batch_id
,
metrics
,
best_eval_hmean
,
best_epoch
,
best_batch_id
)
logger
.
info
(
strs
)
train_batch_id
+=
1
except
fluid
.
core
.
EOFException
:
train_loader
.
reset
()
if
epoch
>
0
and
epoch
%
save_epoch_step
==
0
:
save_path
=
save_dir
+
"/iter_epoch_%d"
%
(
epoch
)
save_model
(
train_prog
,
save_path
)
def
test_reader
():
config
=
load_config
(
FLAGS
.
config
)
merge_config
(
FLAGS
.
opt
)
print
(
config
)
tmp_reader
=
reader
.
train_reader
(
config
=
config
)
count
=
0
print_count
=
0
import
time
while
True
:
starttime
=
time
.
time
()
count
=
0
for
data
in
tmp_reader
():
count
+=
1
if
print_count
%
1
==
0
:
batch_time
=
time
.
time
()
-
starttime
starttime
=
time
.
time
()
print
(
"reader:"
,
count
,
len
(
data
),
batch_time
)
print
(
"finish reader:"
,
count
)
print
(
"success"
)
if
__name__
==
'__main__'
:
parser
=
ArgsParser
()
parser
.
add_argument
(
"-r"
,
"--resume_checkpoint"
,
default
=
None
,
type
=
str
,
help
=
"Checkpoint path for resuming training."
)
FLAGS
=
parser
.
parse_args
()
main
()
# test_reader()
tools/tmp/train_rec.py
0 → 100755
View file @
338ba3ee
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
sys
import
time
import
multiprocessing
import
numpy
as
np
# from paddle.fluid.contrib.model_stat import summary
def
set_paddle_flags
(
**
kwargs
):
for
key
,
value
in
kwargs
.
items
():
if
os
.
environ
.
get
(
key
,
None
)
is
None
:
os
.
environ
[
key
]
=
str
(
value
)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags
(
FLAGS_eager_delete_tensor_gb
=
0
,
# enable GC to save memory
)
from
paddle
import
fluid
from
ppocr.utils.utility
import
create_module
from
ppocr.utils.utility
import
load_config
,
merge_config
import
ppocr.data.rec.reader_main
as
reader
from
ppocr.utils.utility
import
ArgsParser
from
ppocr.utils.character
import
CharacterOps
,
cal_predicts_accuracy
from
ppocr.utils.check
import
check_gpu
from
ppocr.utils.stats
import
TrainingStats
from
ppocr.utils.checkpoint
import
load_pretrain
,
load_checkpoint
,
save
,
save_model
from
ppocr.utils.eval_utils
import
eval_run
from
ppocr.utils.utility
import
initial_logger
logger
=
initial_logger
()
from
ppocr.utils.utility
import
create_multi_devices_program
def
main
():
config
=
load_config
(
FLAGS
.
config
)
merge_config
(
FLAGS
.
opt
)
char_ops
=
CharacterOps
(
config
[
'Global'
])
config
[
'Global'
][
'char_num'
]
=
char_ops
.
get_char_num
()
print
(
config
)
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu
=
config
[
'Global'
][
'use_gpu'
]
check_gpu
(
use_gpu
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
rec_model
=
create_module
(
config
[
'Architecture'
][
'function'
])(
params
=
config
)
startup_prog
=
fluid
.
Program
()
train_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
train_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
train_loader
,
train_outputs
=
rec_model
(
mode
=
"train"
)
save_var
=
train_outputs
[
1
]
if
"gradient_clip"
in
config
[
'Global'
]:
gradient_clip
=
config
[
'Global'
][
'gradient_clip'
]
clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
gradient_clip
)
fluid
.
clip
.
set_gradient_clip
(
clip
,
program
=
train_prog
)
train_fetch_list
=
[
v
.
name
for
v
in
train_outputs
]
train_loss
=
train_outputs
[
0
]
opt_params
=
config
[
'Optimizer'
]
optimizer
=
create_module
(
opt_params
[
'function'
])(
opt_params
)
optimizer
.
minimize
(
train_loss
)
global_lr
=
optimizer
.
_global_learning_rate
()
global_lr
.
persistable
=
True
train_fetch_list
.
append
(
global_lr
.
name
)
train_reader
=
reader
.
train_eval_reader
(
config
=
config
,
char_ops
=
char_ops
,
mode
=
"train"
)
train_loader
.
set_sample_list_generator
(
train_reader
,
places
=
place
)
eval_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
eval_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
eval_loader
,
eval_outputs
=
rec_model
(
mode
=
"eval"
)
eval_fetch_list
=
[
v
.
name
for
v
in
eval_outputs
]
eval_prog
=
eval_prog
.
clone
(
for_test
=
True
)
exe
.
run
(
startup_prog
)
eval_reader
=
reader
.
train_eval_reader
(
config
=
config
,
char_ops
=
char_ops
,
mode
=
"eval"
)
eval_loader
.
set_sample_list_generator
(
eval_reader
,
places
=
place
)
# compile program for multi-devices
train_compile_program
=
create_multi_devices_program
(
train_prog
,
train_loss
.
name
)
pretrain_weights
=
config
[
'Global'
][
'pretrain_weights'
]
if
pretrain_weights
is
not
None
:
load_pretrain
(
exe
,
train_prog
,
pretrain_weights
)
train_batch_id
=
0
train_log_keys
=
[
'loss'
,
'acc'
]
log_smooth_window
=
config
[
'Global'
][
'log_smooth_window'
]
epoch_num
=
config
[
'Global'
][
'epoch_num'
]
loss_type
=
config
[
'Global'
][
'loss_type'
]
print_step
=
config
[
'Global'
][
'print_step'
]
eval_step
=
config
[
'Global'
][
'eval_step'
]
save_epoch_step
=
config
[
'Global'
][
'save_epoch_step'
]
save_dir
=
config
[
'Global'
][
'save_dir'
]
train_stats
=
TrainingStats
(
log_smooth_window
,
train_log_keys
)
best_eval_acc
=
-
1
best_batch_id
=
0
best_epoch
=
0
for
epoch
in
range
(
epoch_num
):
train_loader
.
start
()
try
:
while
True
:
t1
=
time
.
time
()
train_outs
=
exe
.
run
(
program
=
train_compile_program
,
fetch_list
=
train_fetch_list
,
return_numpy
=
False
)
loss
=
np
.
mean
(
np
.
array
(
train_outs
[
0
]))
lr
=
np
.
mean
(
np
.
array
(
train_outs
[
-
1
]))
preds
=
np
.
array
(
train_outs
[
1
])
preds_lod
=
train_outs
[
1
].
lod
()[
0
]
labels
=
np
.
array
(
train_outs
[
2
])
labels_lod
=
train_outs
[
2
].
lod
()[
0
]
acc
,
acc_num
,
img_num
=
cal_predicts_accuracy
(
char_ops
,
preds
,
preds_lod
,
labels
,
labels_lod
)
t2
=
time
.
time
()
train_batch_elapse
=
t2
-
t1
stats
=
{
'loss'
:
loss
,
'acc'
:
acc
}
train_stats
.
update
(
stats
)
if
train_batch_id
>
0
and
train_batch_id
%
print_step
==
0
:
logs
=
train_stats
.
log
()
strs
=
'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'
.
format
(
epoch
,
train_batch_id
,
lr
,
logs
,
train_batch_elapse
)
logger
.
info
(
strs
)
if
train_batch_id
>
0
and
train_batch_id
%
eval_step
==
0
:
outs
=
eval_run
(
exe
,
eval_prog
,
eval_loader
,
eval_fetch_list
,
char_ops
,
train_batch_id
,
"eval"
)
eval_acc
,
acc_num
,
sample_num
=
outs
if
eval_acc
>
best_eval_acc
:
best_eval_acc
=
eval_acc
best_batch_id
=
train_batch_id
best_epoch
=
epoch
save_path
=
save_dir
+
"/best_accuracy"
save_model
(
train_prog
,
save_path
)
strs
=
'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, sample_num:{}'
.
format
(
train_batch_id
,
eval_acc
,
best_eval_acc
,
best_epoch
,
best_batch_id
,
sample_num
)
logger
.
info
(
strs
)
train_batch_id
+=
1
except
fluid
.
core
.
EOFException
:
train_loader
.
reset
()
if
epoch
>
0
and
epoch
%
save_epoch_step
==
0
:
save_path
=
save_dir
+
"/iter_epoch_%d"
%
(
epoch
)
save_model
(
train_prog
,
save_path
)
def
test_reader
():
config
=
load_config
(
FLAGS
.
config
)
merge_config
(
FLAGS
.
opt
)
char_ops
=
CharacterOps
(
config
[
'Global'
])
config
[
'Global'
][
'char_num'
]
=
char_ops
.
get_char_num
()
print
(
config
)
# tmp_reader = reader.train_eval_reader(
# config=cfg, char_ops=char_ops, mode="train")
tmp_reader
=
reader
.
train_eval_reader
(
config
=
config
,
char_ops
=
char_ops
,
mode
=
"eval"
)
count
=
0
print_count
=
0
import
time
starttime
=
time
.
time
()
for
data
in
tmp_reader
():
count
+=
len
(
data
)
print_count
+=
1
if
print_count
%
10
==
0
:
batch_time
=
(
time
.
time
()
-
starttime
)
/
print_count
print
(
"reader:"
,
count
,
len
(
data
),
batch_time
)
print
(
"finish reader:"
,
count
)
print
(
"success"
)
if
__name__
==
'__main__'
:
parser
=
ArgsParser
()
parser
.
add_argument
(
"-r"
,
"--resume_checkpoint"
,
default
=
None
,
type
=
str
,
help
=
"Checkpoint path for resuming training."
)
FLAGS
=
parser
.
parse_args
()
main
()
# test_reader()
tools/train.py
0 → 100755
View file @
338ba3ee
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
sys
import
time
import
multiprocessing
import
numpy
as
np
def
set_paddle_flags
(
**
kwargs
):
for
key
,
value
in
kwargs
.
items
():
if
os
.
environ
.
get
(
key
,
None
)
is
None
:
os
.
environ
[
key
]
=
str
(
value
)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags
(
FLAGS_eager_delete_tensor_gb
=
0
,
# enable GC to save memory
)
import
program
from
paddle
import
fluid
from
ppocr.utils.utility
import
initial_logger
logger
=
initial_logger
()
from
ppocr.data.reader_main
import
reader_main
from
ppocr.utils.save_load
import
init_model
from
ppocr.utils.character
import
CharacterOps
def
main
():
config
=
program
.
load_config
(
FLAGS
.
config
)
program
.
merge_config
(
FLAGS
.
opt
)
logger
.
info
(
config
)
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu
=
config
[
'Global'
][
'use_gpu'
]
program
.
check_gpu
(
True
)
alg
=
config
[
'Global'
][
'algorithm'
]
assert
alg
in
[
'EAST'
,
'DB'
,
'Rosetta'
,
'CRNN'
,
'STARNet'
,
'RARE'
]
if
alg
in
[
'Rosetta'
,
'CRNN'
,
'STARNet'
,
'RARE'
]:
config
[
'Global'
][
'char_ops'
]
=
CharacterOps
(
config
[
'Global'
])
place
=
fluid
.
CUDAPlace
(
0
)
if
use_gpu
else
fluid
.
CPUPlace
()
startup_program
=
fluid
.
Program
()
train_program
=
fluid
.
Program
()
train_build_outputs
=
program
.
build
(
config
,
train_program
,
startup_program
,
mode
=
'train'
)
train_loader
=
train_build_outputs
[
0
]
train_fetch_name_list
=
train_build_outputs
[
1
]
train_fetch_varname_list
=
train_build_outputs
[
2
]
train_opt_loss_name
=
train_build_outputs
[
3
]
eval_program
=
fluid
.
Program
()
eval_build_outputs
=
program
.
build
(
config
,
eval_program
,
startup_program
,
mode
=
'eval'
)
eval_fetch_name_list
=
eval_build_outputs
[
1
]
eval_fetch_varname_list
=
eval_build_outputs
[
2
]
eval_program
=
eval_program
.
clone
(
for_test
=
True
)
train_reader
=
reader_main
(
config
=
config
,
mode
=
"train"
)
train_loader
.
set_sample_list_generator
(
train_reader
,
places
=
place
)
eval_reader
=
reader_main
(
config
=
config
,
mode
=
"eval"
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
# compile program for multi-devices
train_compile_program
=
program
.
create_multi_devices_program
(
train_program
,
train_opt_loss_name
)
init_model
(
config
,
train_program
,
exe
)
train_info_dict
=
{
'compile_program'
:
train_compile_program
,
\
'train_program'
:
train_program
,
\
'reader'
:
train_loader
,
\
'fetch_name_list'
:
train_fetch_name_list
,
\
'fetch_varname_list'
:
train_fetch_varname_list
}
eval_info_dict
=
{
'program'
:
eval_program
,
\
'reader'
:
eval_reader
,
\
'fetch_name_list'
:
eval_fetch_name_list
,
\
'fetch_varname_list'
:
eval_fetch_varname_list
}
if
alg
in
[
'EAST'
,
'DB'
]:
program
.
train_eval_det_run
(
config
,
exe
,
train_info_dict
,
eval_info_dict
)
else
:
program
.
train_eval_rec_run
(
config
,
exe
,
train_info_dict
,
eval_info_dict
)
if
__name__
==
'__main__'
:
parser
=
program
.
ArgsParser
()
FLAGS
=
parser
.
parse_args
()
main
()
# test_reader()
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment