Unverified Commit 6535fda1 authored by Cheng Wan's avatar Cheng Wan Committed by GitHub
Browse files

[Profile] dump memory trace when cuda graph profile is enabled (#11083)

parent 3713eb61
......@@ -437,6 +437,7 @@ class CudaGraphRunner:
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
record_shapes=True,
)
torch.cuda.memory._record_memory_history()
# Trigger CUDA graph capture for specific shapes.
# Capture the large shapes first so that the smaller shapes
......@@ -485,6 +486,8 @@ class CudaGraphRunner:
save_gemlite_cache()
if self.enable_profile_cuda_graph:
torch.cuda.memory._dump_snapshot(f"cuda_graph_runner_memory_usage.pickle")
torch.cuda.memory._record_memory_history(enabled=None)
log_message = (
"Sorted by CUDA Time:\n"
+ prof.key_averages(group_by_input_shape=True).table(
......@@ -494,6 +497,7 @@ class CudaGraphRunner:
+ prof.key_averages(group_by_input_shape=True).table(
sort_by="cpu_time_total", row_limit=10
)
+ "\n\nMemory Usage is saved to cuda_graph_runner_memory_usage.pickle\n"
)
logger.info(log_message)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment