Unverified Commit f6a71139 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Make profiler output file names consistent (#5548)

parent 1e0806f3
...@@ -834,6 +834,7 @@ class ProfileReq: ...@@ -834,6 +834,7 @@ class ProfileReq:
activities: Optional[List[str]] = None activities: Optional[List[str]] = None
with_stack: Optional[bool] = None with_stack: Optional[bool] = None
record_shapes: Optional[bool] = None record_shapes: Optional[bool] = None
profile_id: Optional[str] = None
@dataclass @dataclass
......
...@@ -391,6 +391,7 @@ class Scheduler( ...@@ -391,6 +391,7 @@ class Scheduler(
self.torch_profiler = None self.torch_profiler = None
self.torch_profiler_output_dir: Optional[str] = None self.torch_profiler_output_dir: Optional[str] = None
self.profiler_activities: Optional[List[str]] = None self.profiler_activities: Optional[List[str]] = None
self.profiler_id: Optional[str] = None
self.profiler_target_forward_ct: Optional[int] = None self.profiler_target_forward_ct: Optional[int] = None
# Init metrics stats # Init metrics stats
...@@ -1805,6 +1806,7 @@ class Scheduler( ...@@ -1805,6 +1806,7 @@ class Scheduler(
recv_req.activities, recv_req.activities,
recv_req.with_stack, recv_req.with_stack,
recv_req.record_shapes, recv_req.record_shapes,
recv_req.profile_id,
) )
else: else:
return self.stop_profile() return self.stop_profile()
...@@ -1816,6 +1818,7 @@ class Scheduler( ...@@ -1816,6 +1818,7 @@ class Scheduler(
activities: Optional[List[str]], activities: Optional[List[str]],
with_stack: Optional[bool], with_stack: Optional[bool],
record_shapes: Optional[bool], record_shapes: Optional[bool],
profile_id: Optional[str],
) -> None: ) -> None:
if self.profiler_activities: if self.profiler_activities:
return ProfileReqOutput( return ProfileReqOutput(
...@@ -1830,9 +1833,11 @@ class Scheduler( ...@@ -1830,9 +1833,11 @@ class Scheduler(
self.torch_profiler_output_dir = output_dir self.torch_profiler_output_dir = output_dir
self.profiler_activities = activities self.profiler_activities = activities
self.profiler_id = profile_id
logger.info( logger.info(
"Profiling starts. Traces will be saved to: %s", "Profiling starts. Traces will be saved to: %s (with id %s)",
self.torch_profiler_output_dir, self.torch_profiler_output_dir,
self.profiler_id,
) )
activity_map = { activity_map = {
...@@ -1874,14 +1879,14 @@ class Scheduler( ...@@ -1874,14 +1879,14 @@ class Scheduler(
self.torch_profiler.export_chrome_trace( self.torch_profiler.export_chrome_trace(
os.path.join( os.path.join(
self.torch_profiler_output_dir, self.torch_profiler_output_dir,
str(time.time()) + f"-TP-{self.tp_rank}" + ".trace.json.gz", self.profiler_id + f"-TP-{self.tp_rank}" + ".trace.json.gz",
) )
) )
if "MEM" in self.profiler_activities: if "MEM" in self.profiler_activities:
memory_profile_path = os.path.join( memory_profile_path = os.path.join(
self.torch_profiler_output_dir, self.torch_profiler_output_dir,
str(time.time()) + f"-TP-{self.tp_rank}-memory" + ".pickle", self.profiler_id + f"-TP-{self.tp_rank}-memory" + ".pickle",
) )
torch.cuda.memory._dump_snapshot(memory_profile_path) torch.cuda.memory._dump_snapshot(memory_profile_path)
torch.cuda.memory._record_memory_history(enabled=None) torch.cuda.memory._record_memory_history(enabled=None)
......
...@@ -650,6 +650,7 @@ class TokenizerManager: ...@@ -650,6 +650,7 @@ class TokenizerManager:
output_dir=output_dir, output_dir=output_dir,
num_steps=num_steps, num_steps=num_steps,
activities=activities, activities=activities,
profile_id=str(time.time()),
) )
result = (await self.start_profile_communicator(req))[0] result = (await self.start_profile_communicator(req))[0]
if not result.success: if not result.success:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment