Unverified Commit 17676585 authored by Dayeol Lee's avatar Dayeol Lee Committed by GitHub
Browse files

[Debugging] Add annotation for easier trace analysis (#22496)

parent efe73e9b
...@@ -509,6 +509,19 @@ class Worker(WorkerBase): ...@@ -509,6 +509,19 @@ class Worker(WorkerBase):
def get_supported_tasks(self) -> tuple[SupportedTask, ...]: def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
return self.model_runner.get_supported_tasks() return self.model_runner.get_supported_tasks()
def annotate_profile(self, scheduler_output):
# add trace annotation so that we can easily distinguish
# new/cached request numbers in each iteration
if not self.profiler:
return nullcontext()
num_new = len(scheduler_output.scheduled_new_reqs)
num_cached = len(scheduler_output.scheduled_cached_reqs.req_ids)
return torch.profiler.record_function(
f"execute_new_{num_new}_cached_{num_cached}"
)
@torch.inference_mode() @torch.inference_mode()
def sample_tokens( def sample_tokens(
self, grammar_output: "GrammarOutput | None" self, grammar_output: "GrammarOutput | None"
...@@ -536,7 +549,10 @@ class Worker(WorkerBase): ...@@ -536,7 +549,10 @@ class Worker(WorkerBase):
) )
) )
output = self.model_runner.execute_model(scheduler_output, intermediate_tensors) with self.annotate_profile(scheduler_output):
output = self.model_runner.execute_model(
scheduler_output, intermediate_tensors
)
if isinstance(output, (ModelRunnerOutput, NoneType)): if isinstance(output, (ModelRunnerOutput, NoneType)):
return output return output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment