Update program_prof.py

9646af88 · wanglch · 993cdf7b · 9646af88
Commit 9646af88 authored Jul 07, 2025 by wanglch
Hide whitespace changes
Inline Side-by-side

Showing with 54 additions and 63 deletions

tools/program_prof.py tools/program_prof.py +54 -63

No files found.
--- a/tools/program_prof.py
+++ b/tools/program_prof.py
@@ -16,6 +16,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import paddle.profiler as profiler
 import os
 import gc
 import sys
@@ -30,22 +31,29 @@ import cv2
 import numpy as np
 import copy
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
-from paddle.profiler import export_chrome_tracing
 from ppocr.utils.stats import TrainingStats
 from ppocr.utils.save_load import save_model
 from ppocr.utils.utility import print_dict, AverageMeter
 from ppocr.utils.logging import get_logger
 from ppocr.utils.loggers import WandbLogger, Loggers
-from ppocr.utils import profiler
 from ppocr.data import build_dataloader
 from ppocr.utils.export_model import export
 class ArgsParser(ArgumentParser):
    def __init__(self):
        super(ArgsParser, self).__init__(formatter_class=RawDescriptionHelpFormatter)
        self.add_argument("-c", "--config", help="configuration file to use")
        self.add_argument("-o", "--opt", nargs="+", help="set configuration options")
+        self.add_argument(
+            "-p",
+            "--profiler_options",
+            type=str,
+            default=None,
+            help="The option of profiler, which should be in format "
+            '"key1=value1;key2=value2;key3=value3".',
+        )
    def parse_args(self, argv=None):
        args = super(ArgsParser, self).parse_args(argv)
@@ -132,11 +140,13 @@ def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=F
        if use_npu:
            if (
                int(paddle.version.major) != 0
+                and int(paddle.version.major) <= 2
                and int(paddle.version.minor) <= 4
            ):
                if not paddle.device.is_compiled_with_npu():
                    print(err.format("use_npu", "npu", "npu", "use_npu"))
                    sys.exit(1)
+            # is_compiled_with_npu() has been updated after paddle-2.4
            else:
                if not paddle.device.is_compiled_with_custom_device("npu"):
                    print(err.format("use_npu", "npu", "npu", "use_npu"))
@@ -171,6 +181,7 @@ def to_float32(preds):
    return preds
 def train(
    config,
    train_dataloader,
@@ -199,23 +210,9 @@ def train(
    print_batch_step = config["Global"]["print_batch_step"]
    eval_batch_step = config["Global"]["eval_batch_step"]
    eval_batch_epoch = config["Global"].get("eval_batch_epoch", None)
-    profiler_options = config.get("profiler_options", {})
+    profiler_options = config["profiler_options"]
-    enable_profiler = profiler_options.get("enable", True)
-    batch_range = profiler_options.get("batch_range", [1, 10])
-    profile_path = profiler_options.get("profile_path", "./profiler_log")
    print_mem_info = config["Global"].get("print_mem_info", True)
    uniform_output_enabled = config["Global"].get("uniform_output_enabled", False)
-    options_list = []
-    for k, v in profiler_options.items():
-        if isinstance(v, bool):
-            options_list.append(f"{k}={str(v)}")
-        elif isinstance(v, list):
-            options_list.append(f"{k}={v}")
-        else:
-            options_list.append(f"{k}={v}")
-    options_str = ";".join(options_list)
    global_step = 0
    if "global_step" in pre_best_model_dict:
@@ -302,42 +299,20 @@ def train(
        else len(train_dataloader)
    )
-    # 创建性能分析器相关的回调函数
+    # Initialize profiler
-    def my_on_trace_ready(prof):
+    def on_trace_ready(prof):
-        callback = export_chrome_tracing(profile_path)
+        callback = profiler.export_chrome_tracing('./profiler_log')
        callback(prof)
+        prof.summary(sorted_by=profiler.SortedKeys.GPUTotal, op_detail=True, thread_sep=False, time_unit='ms')
-        # 将 Overview Summary 和 Operator Summary 保存到文件
-        summary_path = os.path.join(profile_path, "summary.txt")
+    train_prof = profiler.Profiler(
-        with open(summary_path, 'w') as f:
+        targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
-            f.write("Overview Summary:\n")
+        scheduler=[2, 20],  # warmup for 3 steps, profile for 5 steps
-            summary_overview = prof.summary(sorted_by=paddle.profiler.SortedKeys.GPUTotal,
+        on_trace_ready=on_trace_ready,
-                                            op_detail=False,
+        timer_only=False
-                                            thread_sep=False,
+    )
-                                            time_unit='ms')
-            if summary_overview is not None:
+    train_prof.start()
-                f.write(summary_overview)
-            else:
-                f.write("No summary available for Overview.\n")
-            f.write("\n\nOperator Summary:\n")
-            summary_operator = prof.summary(sorted_by=paddle.profiler.SortedKeys.GPUTotal,
-                                            op_detail=True,
-                                            thread_sep=False,
-                                            time_unit='ms')
-            if summary_operator is not None:
-                f.write(summary_operator)
-            else:
-                f.write("No summary available for Operator.\n")
-    # 初始化 Profiler
-    if enable_profiler:
-        p = paddle.profiler.Profiler(
-            scheduler=batch_range,
-            on_trace_ready=my_on_trace_ready,
-            timer_only=False
-        )
-        p.start()
    for epoch in range(start_epoch, epoch_num + 1):
        if train_dataloader.dataset.need_reset:
@@ -351,11 +326,10 @@ def train(
            )
        for idx, batch in enumerate(train_dataloader):
+            train_prof.step()  # Notify profiler at each step
            model.train()
-            if enable_profiler:
-                p.step()  # 每个 step 调用一次 Profiler 的 step
-            profiler.add_profiler_step(options_str)
            train_reader_cost += time.time() - reader_start
            if idx >= max_iter:
                break
@@ -513,7 +487,6 @@ def train(
                total_samples = 0
                train_reader_cost = 0.0
                train_batch_cost = 0.0
            # eval
            if (
                global_step > start_eval_step
@@ -666,6 +639,7 @@ def train(
                    is_best=False, prefix="iter_epoch_{}".format(epoch)
                )
+    train_prof.stop()  # Ensure profiler is stopped after training
    best_str = "best metric, {}".format(
        ", ".join(["{}: {}".format(k, v) for k, v in best_model_dict.items()])
    )
@@ -688,6 +662,23 @@ def eval(
    amp_dtype="float16",
 ):
    model.eval()
+    def on_trace_ready(prof):
+        # Export timeline trace
+        callback = profiler.export_chrome_tracing("./eval_trace")
+        callback(prof)
+        # Optional: print summary
+        prof.summary(sorted_by=profiler.SortedKeys.GPUTotal, op_detail=True, thread_sep=False, time_unit='ms')
+    # Profile first 10 evaluation steps (adjust as needed)
+    p = profiler.Profiler(
+        targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
+        scheduler=[0, 20],          # start at step 0, stop after step 10
+        on_trace_ready=on_trace_ready,
+        timer_only=False            # capture full operator trace
+    )
+    p.start()
    with paddle.no_grad():
        total_frame = 0.0
        total_time = 0.0
@@ -773,9 +764,10 @@ def eval(
            pbar.update(1)
            total_frame += len(images)
            sum_images += 1
+            p.step()
        # Get final metric，eg. acc or hmean
+        p.stop()
        metric = eval_class.get_metric()
    pbar.close()
    model.train()
    # Avoid ZeroDivisionError
@@ -840,12 +832,10 @@ def get_center(model, eval_dataloader, post_process_class):
 def preprocess(is_train=False):
    FLAGS = ArgsParser().parse_args()
+    profiler_options = FLAGS.profiler_options
    config = load_config(FLAGS.config)
    config = merge_config(config, FLAGS.opt)
+    profile_dic = {"profiler_options": FLAGS.profiler_options}
-    # 从 config 中读取 profiler_options
-    profiler_options = config.get("profiler_options", {})
-    profile_dic = {"profiler_options": profiler_options}
    config = merge_config(config, profile_dic)
    if is_train:
@@ -965,4 +955,5 @@ def preprocess(is_train=False):
        log_writer = None
    logger.info("train with paddle {} and device {}".format(paddle.__version__, device))
    return config, device, logger, log_writer
\ No newline at end of file