Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
CRNN_Paddle
Commits
9646af88
Commit
9646af88
authored
Jul 07, 2025
by
wanglch
Browse files
Update program_prof.py
parent
993cdf7b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
54 additions
and
63 deletions
+54
-63
tools/program_prof.py
tools/program_prof.py
+54
-63
No files found.
tools/program_prof.py
View file @
9646af88
...
@@ -16,6 +16,7 @@ from __future__ import absolute_import
...
@@ -16,6 +16,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
import
paddle.profiler
as
profiler
import
os
import
os
import
gc
import
gc
import
sys
import
sys
...
@@ -30,22 +31,29 @@ import cv2
...
@@ -30,22 +31,29 @@ import cv2
import
numpy
as
np
import
numpy
as
np
import
copy
import
copy
from
argparse
import
ArgumentParser
,
RawDescriptionHelpFormatter
from
argparse
import
ArgumentParser
,
RawDescriptionHelpFormatter
from
paddle.profiler
import
export_chrome_tracing
from
ppocr.utils.stats
import
TrainingStats
from
ppocr.utils.stats
import
TrainingStats
from
ppocr.utils.save_load
import
save_model
from
ppocr.utils.save_load
import
save_model
from
ppocr.utils.utility
import
print_dict
,
AverageMeter
from
ppocr.utils.utility
import
print_dict
,
AverageMeter
from
ppocr.utils.logging
import
get_logger
from
ppocr.utils.logging
import
get_logger
from
ppocr.utils.loggers
import
WandbLogger
,
Loggers
from
ppocr.utils.loggers
import
WandbLogger
,
Loggers
from
ppocr.utils
import
profiler
from
ppocr.data
import
build_dataloader
from
ppocr.data
import
build_dataloader
from
ppocr.utils.export_model
import
export
from
ppocr.utils.export_model
import
export
class
ArgsParser
(
ArgumentParser
):
class
ArgsParser
(
ArgumentParser
):
def
__init__
(
self
):
def
__init__
(
self
):
super
(
ArgsParser
,
self
).
__init__
(
formatter_class
=
RawDescriptionHelpFormatter
)
super
(
ArgsParser
,
self
).
__init__
(
formatter_class
=
RawDescriptionHelpFormatter
)
self
.
add_argument
(
"-c"
,
"--config"
,
help
=
"configuration file to use"
)
self
.
add_argument
(
"-c"
,
"--config"
,
help
=
"configuration file to use"
)
self
.
add_argument
(
"-o"
,
"--opt"
,
nargs
=
"+"
,
help
=
"set configuration options"
)
self
.
add_argument
(
"-o"
,
"--opt"
,
nargs
=
"+"
,
help
=
"set configuration options"
)
self
.
add_argument
(
"-p"
,
"--profiler_options"
,
type
=
str
,
default
=
None
,
help
=
"The option of profiler, which should be in format "
'"key1=value1;key2=value2;key3=value3".'
,
)
def
parse_args
(
self
,
argv
=
None
):
def
parse_args
(
self
,
argv
=
None
):
args
=
super
(
ArgsParser
,
self
).
parse_args
(
argv
)
args
=
super
(
ArgsParser
,
self
).
parse_args
(
argv
)
...
@@ -132,11 +140,13 @@ def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=F
...
@@ -132,11 +140,13 @@ def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=F
if
use_npu
:
if
use_npu
:
if
(
if
(
int
(
paddle
.
version
.
major
)
!=
0
int
(
paddle
.
version
.
major
)
!=
0
and
int
(
paddle
.
version
.
major
)
<=
2
and
int
(
paddle
.
version
.
minor
)
<=
4
and
int
(
paddle
.
version
.
minor
)
<=
4
):
):
if
not
paddle
.
device
.
is_compiled_with_npu
():
if
not
paddle
.
device
.
is_compiled_with_npu
():
print
(
err
.
format
(
"use_npu"
,
"npu"
,
"npu"
,
"use_npu"
))
print
(
err
.
format
(
"use_npu"
,
"npu"
,
"npu"
,
"use_npu"
))
sys
.
exit
(
1
)
sys
.
exit
(
1
)
# is_compiled_with_npu() has been updated after paddle-2.4
else
:
else
:
if
not
paddle
.
device
.
is_compiled_with_custom_device
(
"npu"
):
if
not
paddle
.
device
.
is_compiled_with_custom_device
(
"npu"
):
print
(
err
.
format
(
"use_npu"
,
"npu"
,
"npu"
,
"use_npu"
))
print
(
err
.
format
(
"use_npu"
,
"npu"
,
"npu"
,
"use_npu"
))
...
@@ -171,6 +181,7 @@ def to_float32(preds):
...
@@ -171,6 +181,7 @@ def to_float32(preds):
return
preds
return
preds
def
train
(
def
train
(
config
,
config
,
train_dataloader
,
train_dataloader
,
...
@@ -199,23 +210,9 @@ def train(
...
@@ -199,23 +210,9 @@ def train(
print_batch_step
=
config
[
"Global"
][
"print_batch_step"
]
print_batch_step
=
config
[
"Global"
][
"print_batch_step"
]
eval_batch_step
=
config
[
"Global"
][
"eval_batch_step"
]
eval_batch_step
=
config
[
"Global"
][
"eval_batch_step"
]
eval_batch_epoch
=
config
[
"Global"
].
get
(
"eval_batch_epoch"
,
None
)
eval_batch_epoch
=
config
[
"Global"
].
get
(
"eval_batch_epoch"
,
None
)
profiler_options
=
config
.
get
(
"profiler_options"
,
{})
profiler_options
=
config
[
"profiler_options"
]
enable_profiler
=
profiler_options
.
get
(
"enable"
,
True
)
batch_range
=
profiler_options
.
get
(
"batch_range"
,
[
1
,
10
])
profile_path
=
profiler_options
.
get
(
"profile_path"
,
"./profiler_log"
)
print_mem_info
=
config
[
"Global"
].
get
(
"print_mem_info"
,
True
)
print_mem_info
=
config
[
"Global"
].
get
(
"print_mem_info"
,
True
)
uniform_output_enabled
=
config
[
"Global"
].
get
(
"uniform_output_enabled"
,
False
)
uniform_output_enabled
=
config
[
"Global"
].
get
(
"uniform_output_enabled"
,
False
)
options_list
=
[]
for
k
,
v
in
profiler_options
.
items
():
if
isinstance
(
v
,
bool
):
options_list
.
append
(
f
"
{
k
}
=
{
str
(
v
)
}
"
)
elif
isinstance
(
v
,
list
):
options_list
.
append
(
f
"
{
k
}
=
{
v
}
"
)
else
:
options_list
.
append
(
f
"
{
k
}
=
{
v
}
"
)
options_str
=
";"
.
join
(
options_list
)
global_step
=
0
global_step
=
0
if
"global_step"
in
pre_best_model_dict
:
if
"global_step"
in
pre_best_model_dict
:
...
@@ -302,42 +299,20 @@ def train(
...
@@ -302,42 +299,20 @@ def train(
else
len
(
train_dataloader
)
else
len
(
train_dataloader
)
)
)
#
创建性能分析器相关的回调函数
#
Initialize profiler
def
my_
on_trace_ready
(
prof
):
def
on_trace_ready
(
prof
):
callback
=
export_chrome_tracing
(
profile
_path
)
callback
=
profiler
.
export_chrome_tracing
(
'./
profile
r_log'
)
callback
(
prof
)
callback
(
prof
)
prof
.
summary
(
sorted_by
=
profiler
.
SortedKeys
.
GPUTotal
,
op_detail
=
True
,
thread_sep
=
False
,
time_unit
=
'ms'
)
# 将 Overview Summary 和 Operator Summary 保存到文件
summary_path
=
os
.
path
.
join
(
profile_path
,
"summary.txt"
)
train_prof
=
profiler
.
Profiler
(
with
open
(
summary_path
,
'w'
)
as
f
:
targets
=
[
profiler
.
ProfilerTarget
.
CPU
,
profiler
.
ProfilerTarget
.
GPU
],
f
.
write
(
"Overview Summary:
\n
"
)
scheduler
=
[
2
,
20
],
# warmup for 3 steps, profile for 5 steps
summary_overview
=
prof
.
summary
(
sorted_by
=
paddle
.
profiler
.
SortedKeys
.
GPUTotal
,
on_trace_ready
=
on_trace_ready
,
op_detail
=
False
,
timer_only
=
False
thread_sep
=
False
,
)
time_unit
=
'ms'
)
if
summary_overview
is
not
None
:
train_prof
.
start
()
f
.
write
(
summary_overview
)
else
:
f
.
write
(
"No summary available for Overview.
\n
"
)
f
.
write
(
"
\n\n
Operator Summary:
\n
"
)
summary_operator
=
prof
.
summary
(
sorted_by
=
paddle
.
profiler
.
SortedKeys
.
GPUTotal
,
op_detail
=
True
,
thread_sep
=
False
,
time_unit
=
'ms'
)
if
summary_operator
is
not
None
:
f
.
write
(
summary_operator
)
else
:
f
.
write
(
"No summary available for Operator.
\n
"
)
# 初始化 Profiler
if
enable_profiler
:
p
=
paddle
.
profiler
.
Profiler
(
scheduler
=
batch_range
,
on_trace_ready
=
my_on_trace_ready
,
timer_only
=
False
)
p
.
start
()
for
epoch
in
range
(
start_epoch
,
epoch_num
+
1
):
for
epoch
in
range
(
start_epoch
,
epoch_num
+
1
):
if
train_dataloader
.
dataset
.
need_reset
:
if
train_dataloader
.
dataset
.
need_reset
:
...
@@ -351,11 +326,10 @@ def train(
...
@@ -351,11 +326,10 @@ def train(
)
)
for
idx
,
batch
in
enumerate
(
train_dataloader
):
for
idx
,
batch
in
enumerate
(
train_dataloader
):
train_prof
.
step
()
# Notify profiler at each step
model
.
train
()
model
.
train
()
if
enable_profiler
:
p
.
step
()
# 每个 step 调用一次 Profiler 的 step
profiler
.
add_profiler_step
(
options_str
)
train_reader_cost
+=
time
.
time
()
-
reader_start
train_reader_cost
+=
time
.
time
()
-
reader_start
if
idx
>=
max_iter
:
if
idx
>=
max_iter
:
break
break
...
@@ -513,7 +487,6 @@ def train(
...
@@ -513,7 +487,6 @@ def train(
total_samples
=
0
total_samples
=
0
train_reader_cost
=
0.0
train_reader_cost
=
0.0
train_batch_cost
=
0.0
train_batch_cost
=
0.0
# eval
# eval
if
(
if
(
global_step
>
start_eval_step
global_step
>
start_eval_step
...
@@ -666,6 +639,7 @@ def train(
...
@@ -666,6 +639,7 @@ def train(
is_best
=
False
,
prefix
=
"iter_epoch_{}"
.
format
(
epoch
)
is_best
=
False
,
prefix
=
"iter_epoch_{}"
.
format
(
epoch
)
)
)
train_prof
.
stop
()
# Ensure profiler is stopped after training
best_str
=
"best metric, {}"
.
format
(
best_str
=
"best metric, {}"
.
format
(
", "
.
join
([
"{}: {}"
.
format
(
k
,
v
)
for
k
,
v
in
best_model_dict
.
items
()])
", "
.
join
([
"{}: {}"
.
format
(
k
,
v
)
for
k
,
v
in
best_model_dict
.
items
()])
)
)
...
@@ -688,6 +662,23 @@ def eval(
...
@@ -688,6 +662,23 @@ def eval(
amp_dtype
=
"float16"
,
amp_dtype
=
"float16"
,
):
):
model
.
eval
()
model
.
eval
()
def
on_trace_ready
(
prof
):
# Export timeline trace
callback
=
profiler
.
export_chrome_tracing
(
"./eval_trace"
)
callback
(
prof
)
# Optional: print summary
prof
.
summary
(
sorted_by
=
profiler
.
SortedKeys
.
GPUTotal
,
op_detail
=
True
,
thread_sep
=
False
,
time_unit
=
'ms'
)
# Profile first 10 evaluation steps (adjust as needed)
p
=
profiler
.
Profiler
(
targets
=
[
profiler
.
ProfilerTarget
.
CPU
,
profiler
.
ProfilerTarget
.
GPU
],
scheduler
=
[
0
,
20
],
# start at step 0, stop after step 10
on_trace_ready
=
on_trace_ready
,
timer_only
=
False
# capture full operator trace
)
p
.
start
()
with
paddle
.
no_grad
():
with
paddle
.
no_grad
():
total_frame
=
0.0
total_frame
=
0.0
total_time
=
0.0
total_time
=
0.0
...
@@ -773,9 +764,10 @@ def eval(
...
@@ -773,9 +764,10 @@ def eval(
pbar
.
update
(
1
)
pbar
.
update
(
1
)
total_frame
+=
len
(
images
)
total_frame
+=
len
(
images
)
sum_images
+=
1
sum_images
+=
1
p
.
step
()
# Get final metric,eg. acc or hmean
# Get final metric,eg. acc or hmean
p
.
stop
()
metric
=
eval_class
.
get_metric
()
metric
=
eval_class
.
get_metric
()
pbar
.
close
()
pbar
.
close
()
model
.
train
()
model
.
train
()
# Avoid ZeroDivisionError
# Avoid ZeroDivisionError
...
@@ -840,12 +832,10 @@ def get_center(model, eval_dataloader, post_process_class):
...
@@ -840,12 +832,10 @@ def get_center(model, eval_dataloader, post_process_class):
def
preprocess
(
is_train
=
False
):
def
preprocess
(
is_train
=
False
):
FLAGS
=
ArgsParser
().
parse_args
()
FLAGS
=
ArgsParser
().
parse_args
()
profiler_options
=
FLAGS
.
profiler_options
config
=
load_config
(
FLAGS
.
config
)
config
=
load_config
(
FLAGS
.
config
)
config
=
merge_config
(
config
,
FLAGS
.
opt
)
config
=
merge_config
(
config
,
FLAGS
.
opt
)
profile_dic
=
{
"profiler_options"
:
FLAGS
.
profiler_options
}
# 从 config 中读取 profiler_options
profiler_options
=
config
.
get
(
"profiler_options"
,
{})
profile_dic
=
{
"profiler_options"
:
profiler_options
}
config
=
merge_config
(
config
,
profile_dic
)
config
=
merge_config
(
config
,
profile_dic
)
if
is_train
:
if
is_train
:
...
@@ -965,4 +955,5 @@ def preprocess(is_train=False):
...
@@ -965,4 +955,5 @@ def preprocess(is_train=False):
log_writer
=
None
log_writer
=
None
logger
.
info
(
"train with paddle {} and device {}"
.
format
(
paddle
.
__version__
,
device
))
logger
.
info
(
"train with paddle {} and device {}"
.
format
(
paddle
.
__version__
,
device
))
return
config
,
device
,
logger
,
log_writer
return
config
,
device
,
logger
,
log_writer
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment