Unverified Commit 53a2c3b4 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Support controlling nsys start and end range programmatically (#4688)

parent 550586ef
......@@ -20,7 +20,7 @@ import copy
import uuid
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Literal, Optional, Union
from sglang.srt.managers.schedule_batch import BaseFinishReason
from sglang.srt.sampling.sampling_params import SamplingParams
......@@ -650,7 +650,7 @@ class ProfileReqInput:
# If it is set, profiling is automatically stopped after this step, and
# the caller doesn't need to run stop_profile.
num_steps: Optional[int] = None
activities: Optional[List[str]] = None
activities: Optional[List[Literal["CPU", "GPU", "MEM", "CUDA_PROFILER"]]] = None
class ProfileReqType(Enum):
......
......@@ -379,7 +379,7 @@ class Scheduler(
# Init profiler
self.torch_profiler = None
self.torch_profiler_output_dir: Optional[str] = None
self.torch_profiler_activities: Optional[List[str]] = None
self.profiler_activities: Optional[List[str]] = None
self.profiler_target_forward_ct: Optional[int] = None
# Init metrics stats
......@@ -1818,7 +1818,7 @@ class Scheduler(
num_steps: Optional[int],
activities: Optional[List[str]],
) -> None:
if self.torch_profiler_activities:
if self.profiler_activities:
return ProfileReqOutput(
success=False,
message="Profiling is already in progress. Call /stop_profile first.",
......@@ -1830,7 +1830,7 @@ class Scheduler(
activities = ["CPU", "GPU"]
self.torch_profiler_output_dir = output_dir
self.torch_profiler_activities = activities
self.profiler_activities = activities
logger.info(
"Profiling starts. Traces will be saved to: %s",
self.torch_profiler_output_dir,
......@@ -1854,6 +1854,9 @@ class Scheduler(
if "MEM" in activities:
torch.cuda.memory._record_memory_history(max_entries=100000)
if "CUDA_PROFILER" in activities:
torch.cuda.cudart().cudaProfilerStart()
if num_steps:
self.profiler_target_forward_ct = self.forward_ct + num_steps
# The caller will be notified when reaching profiler_target_forward_ct
......@@ -1862,7 +1865,7 @@ class Scheduler(
return ProfileReqOutput(success=True, message="Succeeded")
def stop_profile(self) -> None:
if self.torch_profiler_activities is None:
if self.profiler_activities is None:
return
logger.info("Stop profiling...")
......@@ -1875,7 +1878,7 @@ class Scheduler(
)
)
if "MEM" in self.torch_profiler_activities:
if "MEM" in self.profiler_activities:
memory_profile_path = os.path.join(
self.torch_profiler_trace_dir,
str(time.time()) + f"-TP-{self.tp_rank}-memory" + ".pickle",
......@@ -1883,13 +1886,16 @@ class Scheduler(
torch.cuda.memory._dump_snapshot(memory_profile_path)
torch.cuda.memory._record_memory_history(enabled=None)
if "CUDA_PROFILER" in self.profiler_activities:
torch.cuda.cudart().cudaProfilerStop()
logger.info(
"Profiling done. Traces are saved to: %s",
self.torch_profiler_output_dir,
)
self.torch_profiler = None
self.torch_profiler_output_dir = None
self.torch_profiler_activities = None
self.profiler_activities = None
if self.profiler_target_forward_ct:
self.send_to_tokenizer.send_pyobj(
......@@ -1957,7 +1963,6 @@ def run_scheduler_process(
dp_rank: Optional[int],
pipe_writer,
):
# Generate the prefix
if dp_rank is None:
prefix = f" TP{tp_rank}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment