Commit 60ffce35 authored by silencealiang's avatar silencealiang
Browse files

增加profiling table可显示shape(需打开record_shapes参数)

增加tensorboard可视化功能(需替换on_trace_ready参数)
修复由于kernel名称过长显示不全的问题
parent 64266070
Pipeline #2207 passed with stage
......@@ -1420,7 +1420,13 @@ def train(forward_step_func, model, optimizer, opt_param_scheduler,
from pathlib import Path
Path(f"{args.profile_dir}").mkdir(parents=True, exist_ok=True)
if args.rank in [0]:
print(p.key_averages().table(sort_by="self_cuda_time_total", row_limit=-1))
print(p.key_averages(group_by_input_shape=True,
group_by_stack_n=5).table(sort_by="self_cuda_time_total",
row_limit=-1,
max_src_column_width=100,
max_name_column_width=250,
max_shapes_column_width=200))
p.export_chrome_trace("{path}/trace_rank{rank}_step{step}.json".format(
path=args.profile_dir, rank=torch.distributed.get_rank(), step=p.step_num))
......@@ -1434,6 +1440,8 @@ def train(forward_step_func, model, optimizer, opt_param_scheduler,
warmup=1 if args.profile_step_start > 0 else 0,
active=args.profile_step_end-args.profile_step_start,
repeat=1),
#record_shapes=True,
#on_trace_ready=torch.profiler.tensorboard_trace_handler('./torch_prof_data'))
on_trace_ready=trace_handler)
prof.start()
elif args.profile and torch.distributed.get_rank() in args.profile_ranks and args.use_hip_profiler:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment