Commit 9646af88 authored by wanglch's avatar wanglch
Browse files

Update program_prof.py

parent 993cdf7b
...@@ -16,6 +16,7 @@ from __future__ import absolute_import ...@@ -16,6 +16,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle.profiler as profiler
import os import os
import gc import gc
import sys import sys
...@@ -30,22 +31,29 @@ import cv2 ...@@ -30,22 +31,29 @@ import cv2
import numpy as np import numpy as np
import copy import copy
from argparse import ArgumentParser, RawDescriptionHelpFormatter from argparse import ArgumentParser, RawDescriptionHelpFormatter
from paddle.profiler import export_chrome_tracing
from ppocr.utils.stats import TrainingStats from ppocr.utils.stats import TrainingStats
from ppocr.utils.save_load import save_model from ppocr.utils.save_load import save_model
from ppocr.utils.utility import print_dict, AverageMeter from ppocr.utils.utility import print_dict, AverageMeter
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
from ppocr.utils.loggers import WandbLogger, Loggers from ppocr.utils.loggers import WandbLogger, Loggers
from ppocr.utils import profiler
from ppocr.data import build_dataloader from ppocr.data import build_dataloader
from ppocr.utils.export_model import export from ppocr.utils.export_model import export
class ArgsParser(ArgumentParser): class ArgsParser(ArgumentParser):
def __init__(self): def __init__(self):
super(ArgsParser, self).__init__(formatter_class=RawDescriptionHelpFormatter) super(ArgsParser, self).__init__(formatter_class=RawDescriptionHelpFormatter)
self.add_argument("-c", "--config", help="configuration file to use") self.add_argument("-c", "--config", help="configuration file to use")
self.add_argument("-o", "--opt", nargs="+", help="set configuration options") self.add_argument("-o", "--opt", nargs="+", help="set configuration options")
self.add_argument(
"-p",
"--profiler_options",
type=str,
default=None,
help="The option of profiler, which should be in format "
'"key1=value1;key2=value2;key3=value3".',
)
def parse_args(self, argv=None): def parse_args(self, argv=None):
args = super(ArgsParser, self).parse_args(argv) args = super(ArgsParser, self).parse_args(argv)
...@@ -132,11 +140,13 @@ def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=F ...@@ -132,11 +140,13 @@ def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=F
if use_npu: if use_npu:
if ( if (
int(paddle.version.major) != 0 int(paddle.version.major) != 0
and int(paddle.version.major) <= 2
and int(paddle.version.minor) <= 4 and int(paddle.version.minor) <= 4
): ):
if not paddle.device.is_compiled_with_npu(): if not paddle.device.is_compiled_with_npu():
print(err.format("use_npu", "npu", "npu", "use_npu")) print(err.format("use_npu", "npu", "npu", "use_npu"))
sys.exit(1) sys.exit(1)
# is_compiled_with_npu() has been updated after paddle-2.4
else: else:
if not paddle.device.is_compiled_with_custom_device("npu"): if not paddle.device.is_compiled_with_custom_device("npu"):
print(err.format("use_npu", "npu", "npu", "use_npu")) print(err.format("use_npu", "npu", "npu", "use_npu"))
...@@ -171,6 +181,7 @@ def to_float32(preds): ...@@ -171,6 +181,7 @@ def to_float32(preds):
return preds return preds
def train( def train(
config, config,
train_dataloader, train_dataloader,
...@@ -199,23 +210,9 @@ def train( ...@@ -199,23 +210,9 @@ def train(
print_batch_step = config["Global"]["print_batch_step"] print_batch_step = config["Global"]["print_batch_step"]
eval_batch_step = config["Global"]["eval_batch_step"] eval_batch_step = config["Global"]["eval_batch_step"]
eval_batch_epoch = config["Global"].get("eval_batch_epoch", None) eval_batch_epoch = config["Global"].get("eval_batch_epoch", None)
profiler_options = config.get("profiler_options", {}) profiler_options = config["profiler_options"]
enable_profiler = profiler_options.get("enable", True)
batch_range = profiler_options.get("batch_range", [1, 10])
profile_path = profiler_options.get("profile_path", "./profiler_log")
print_mem_info = config["Global"].get("print_mem_info", True) print_mem_info = config["Global"].get("print_mem_info", True)
uniform_output_enabled = config["Global"].get("uniform_output_enabled", False) uniform_output_enabled = config["Global"].get("uniform_output_enabled", False)
options_list = []
for k, v in profiler_options.items():
if isinstance(v, bool):
options_list.append(f"{k}={str(v)}")
elif isinstance(v, list):
options_list.append(f"{k}={v}")
else:
options_list.append(f"{k}={v}")
options_str = ";".join(options_list)
global_step = 0 global_step = 0
if "global_step" in pre_best_model_dict: if "global_step" in pre_best_model_dict:
...@@ -302,42 +299,20 @@ def train( ...@@ -302,42 +299,20 @@ def train(
else len(train_dataloader) else len(train_dataloader)
) )
# 创建性能分析器相关的回调函数 # Initialize profiler
def my_on_trace_ready(prof): def on_trace_ready(prof):
callback = export_chrome_tracing(profile_path) callback = profiler.export_chrome_tracing('./profiler_log')
callback(prof) callback(prof)
prof.summary(sorted_by=profiler.SortedKeys.GPUTotal, op_detail=True, thread_sep=False, time_unit='ms')
# 将 Overview Summary 和 Operator Summary 保存到文件
summary_path = os.path.join(profile_path, "summary.txt") train_prof = profiler.Profiler(
with open(summary_path, 'w') as f: targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
f.write("Overview Summary:\n") scheduler=[2, 20], # warmup for 3 steps, profile for 5 steps
summary_overview = prof.summary(sorted_by=paddle.profiler.SortedKeys.GPUTotal, on_trace_ready=on_trace_ready,
op_detail=False, timer_only=False
thread_sep=False, )
time_unit='ms')
if summary_overview is not None: train_prof.start()
f.write(summary_overview)
else:
f.write("No summary available for Overview.\n")
f.write("\n\nOperator Summary:\n")
summary_operator = prof.summary(sorted_by=paddle.profiler.SortedKeys.GPUTotal,
op_detail=True,
thread_sep=False,
time_unit='ms')
if summary_operator is not None:
f.write(summary_operator)
else:
f.write("No summary available for Operator.\n")
# 初始化 Profiler
if enable_profiler:
p = paddle.profiler.Profiler(
scheduler=batch_range,
on_trace_ready=my_on_trace_ready,
timer_only=False
)
p.start()
for epoch in range(start_epoch, epoch_num + 1): for epoch in range(start_epoch, epoch_num + 1):
if train_dataloader.dataset.need_reset: if train_dataloader.dataset.need_reset:
...@@ -351,11 +326,10 @@ def train( ...@@ -351,11 +326,10 @@ def train(
) )
for idx, batch in enumerate(train_dataloader): for idx, batch in enumerate(train_dataloader):
train_prof.step() # Notify profiler at each step
model.train() model.train()
if enable_profiler:
p.step() # 每个 step 调用一次 Profiler 的 step
profiler.add_profiler_step(options_str)
train_reader_cost += time.time() - reader_start train_reader_cost += time.time() - reader_start
if idx >= max_iter: if idx >= max_iter:
break break
...@@ -513,7 +487,6 @@ def train( ...@@ -513,7 +487,6 @@ def train(
total_samples = 0 total_samples = 0
train_reader_cost = 0.0 train_reader_cost = 0.0
train_batch_cost = 0.0 train_batch_cost = 0.0
# eval # eval
if ( if (
global_step > start_eval_step global_step > start_eval_step
...@@ -666,6 +639,7 @@ def train( ...@@ -666,6 +639,7 @@ def train(
is_best=False, prefix="iter_epoch_{}".format(epoch) is_best=False, prefix="iter_epoch_{}".format(epoch)
) )
train_prof.stop() # Ensure profiler is stopped after training
best_str = "best metric, {}".format( best_str = "best metric, {}".format(
", ".join(["{}: {}".format(k, v) for k, v in best_model_dict.items()]) ", ".join(["{}: {}".format(k, v) for k, v in best_model_dict.items()])
) )
...@@ -688,6 +662,23 @@ def eval( ...@@ -688,6 +662,23 @@ def eval(
amp_dtype="float16", amp_dtype="float16",
): ):
model.eval() model.eval()
def on_trace_ready(prof):
# Export timeline trace
callback = profiler.export_chrome_tracing("./eval_trace")
callback(prof)
# Optional: print summary
prof.summary(sorted_by=profiler.SortedKeys.GPUTotal, op_detail=True, thread_sep=False, time_unit='ms')
# Profile first 10 evaluation steps (adjust as needed)
p = profiler.Profiler(
targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
scheduler=[0, 20], # start at step 0, stop after step 10
on_trace_ready=on_trace_ready,
timer_only=False # capture full operator trace
)
p.start()
with paddle.no_grad(): with paddle.no_grad():
total_frame = 0.0 total_frame = 0.0
total_time = 0.0 total_time = 0.0
...@@ -773,9 +764,10 @@ def eval( ...@@ -773,9 +764,10 @@ def eval(
pbar.update(1) pbar.update(1)
total_frame += len(images) total_frame += len(images)
sum_images += 1 sum_images += 1
p.step()
# Get final metric,eg. acc or hmean # Get final metric,eg. acc or hmean
p.stop()
metric = eval_class.get_metric() metric = eval_class.get_metric()
pbar.close() pbar.close()
model.train() model.train()
# Avoid ZeroDivisionError # Avoid ZeroDivisionError
...@@ -840,12 +832,10 @@ def get_center(model, eval_dataloader, post_process_class): ...@@ -840,12 +832,10 @@ def get_center(model, eval_dataloader, post_process_class):
def preprocess(is_train=False): def preprocess(is_train=False):
FLAGS = ArgsParser().parse_args() FLAGS = ArgsParser().parse_args()
profiler_options = FLAGS.profiler_options
config = load_config(FLAGS.config) config = load_config(FLAGS.config)
config = merge_config(config, FLAGS.opt) config = merge_config(config, FLAGS.opt)
profile_dic = {"profiler_options": FLAGS.profiler_options}
# 从 config 中读取 profiler_options
profiler_options = config.get("profiler_options", {})
profile_dic = {"profiler_options": profiler_options}
config = merge_config(config, profile_dic) config = merge_config(config, profile_dic)
if is_train: if is_train:
...@@ -965,4 +955,5 @@ def preprocess(is_train=False): ...@@ -965,4 +955,5 @@ def preprocess(is_train=False):
log_writer = None log_writer = None
logger.info("train with paddle {} and device {}".format(paddle.__version__, device)) logger.info("train with paddle {} and device {}".format(paddle.__version__, device))
return config, device, logger, log_writer return config, device, logger, log_writer
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment