"vscode:/vscode.git/clone" did not exist on "14bffe97286030a9efd1cc1a0832c7fc21413fbe"
Unverified Commit 53a2c3b4 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Support controlling nsys start and end range programmatically (#4688)

parent 550586ef
...@@ -20,7 +20,7 @@ import copy ...@@ -20,7 +20,7 @@ import copy
import uuid import uuid
from dataclasses import dataclass, field from dataclasses import dataclass, field
from enum import Enum from enum import Enum
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Literal, Optional, Union
from sglang.srt.managers.schedule_batch import BaseFinishReason from sglang.srt.managers.schedule_batch import BaseFinishReason
from sglang.srt.sampling.sampling_params import SamplingParams from sglang.srt.sampling.sampling_params import SamplingParams
...@@ -650,7 +650,7 @@ class ProfileReqInput: ...@@ -650,7 +650,7 @@ class ProfileReqInput:
# If it is set, profiling is automatically stopped after this step, and # If it is set, profiling is automatically stopped after this step, and
# the caller doesn't need to run stop_profile. # the caller doesn't need to run stop_profile.
num_steps: Optional[int] = None num_steps: Optional[int] = None
activities: Optional[List[str]] = None activities: Optional[List[Literal["CPU", "GPU", "MEM", "CUDA_PROFILER"]]] = None
class ProfileReqType(Enum): class ProfileReqType(Enum):
......
...@@ -379,7 +379,7 @@ class Scheduler( ...@@ -379,7 +379,7 @@ class Scheduler(
# Init profiler # Init profiler
self.torch_profiler = None self.torch_profiler = None
self.torch_profiler_output_dir: Optional[str] = None self.torch_profiler_output_dir: Optional[str] = None
self.torch_profiler_activities: Optional[List[str]] = None self.profiler_activities: Optional[List[str]] = None
self.profiler_target_forward_ct: Optional[int] = None self.profiler_target_forward_ct: Optional[int] = None
# Init metrics stats # Init metrics stats
...@@ -1818,7 +1818,7 @@ class Scheduler( ...@@ -1818,7 +1818,7 @@ class Scheduler(
num_steps: Optional[int], num_steps: Optional[int],
activities: Optional[List[str]], activities: Optional[List[str]],
) -> None: ) -> None:
if self.torch_profiler_activities: if self.profiler_activities:
return ProfileReqOutput( return ProfileReqOutput(
success=False, success=False,
message="Profiling is already in progress. Call /stop_profile first.", message="Profiling is already in progress. Call /stop_profile first.",
...@@ -1830,7 +1830,7 @@ class Scheduler( ...@@ -1830,7 +1830,7 @@ class Scheduler(
activities = ["CPU", "GPU"] activities = ["CPU", "GPU"]
self.torch_profiler_output_dir = output_dir self.torch_profiler_output_dir = output_dir
self.torch_profiler_activities = activities self.profiler_activities = activities
logger.info( logger.info(
"Profiling starts. Traces will be saved to: %s", "Profiling starts. Traces will be saved to: %s",
self.torch_profiler_output_dir, self.torch_profiler_output_dir,
...@@ -1854,6 +1854,9 @@ class Scheduler( ...@@ -1854,6 +1854,9 @@ class Scheduler(
if "MEM" in activities: if "MEM" in activities:
torch.cuda.memory._record_memory_history(max_entries=100000) torch.cuda.memory._record_memory_history(max_entries=100000)
if "CUDA_PROFILER" in activities:
torch.cuda.cudart().cudaProfilerStart()
if num_steps: if num_steps:
self.profiler_target_forward_ct = self.forward_ct + num_steps self.profiler_target_forward_ct = self.forward_ct + num_steps
# The caller will be notified when reaching profiler_target_forward_ct # The caller will be notified when reaching profiler_target_forward_ct
...@@ -1862,7 +1865,7 @@ class Scheduler( ...@@ -1862,7 +1865,7 @@ class Scheduler(
return ProfileReqOutput(success=True, message="Succeeded") return ProfileReqOutput(success=True, message="Succeeded")
def stop_profile(self) -> None: def stop_profile(self) -> None:
if self.torch_profiler_activities is None: if self.profiler_activities is None:
return return
logger.info("Stop profiling...") logger.info("Stop profiling...")
...@@ -1875,7 +1878,7 @@ class Scheduler( ...@@ -1875,7 +1878,7 @@ class Scheduler(
) )
) )
if "MEM" in self.torch_profiler_activities: if "MEM" in self.profiler_activities:
memory_profile_path = os.path.join( memory_profile_path = os.path.join(
self.torch_profiler_trace_dir, self.torch_profiler_trace_dir,
str(time.time()) + f"-TP-{self.tp_rank}-memory" + ".pickle", str(time.time()) + f"-TP-{self.tp_rank}-memory" + ".pickle",
...@@ -1883,13 +1886,16 @@ class Scheduler( ...@@ -1883,13 +1886,16 @@ class Scheduler(
torch.cuda.memory._dump_snapshot(memory_profile_path) torch.cuda.memory._dump_snapshot(memory_profile_path)
torch.cuda.memory._record_memory_history(enabled=None) torch.cuda.memory._record_memory_history(enabled=None)
if "CUDA_PROFILER" in self.profiler_activities:
torch.cuda.cudart().cudaProfilerStop()
logger.info( logger.info(
"Profiling done. Traces are saved to: %s", "Profiling done. Traces are saved to: %s",
self.torch_profiler_output_dir, self.torch_profiler_output_dir,
) )
self.torch_profiler = None self.torch_profiler = None
self.torch_profiler_output_dir = None self.torch_profiler_output_dir = None
self.torch_profiler_activities = None self.profiler_activities = None
if self.profiler_target_forward_ct: if self.profiler_target_forward_ct:
self.send_to_tokenizer.send_pyobj( self.send_to_tokenizer.send_pyobj(
...@@ -1957,7 +1963,6 @@ def run_scheduler_process( ...@@ -1957,7 +1963,6 @@ def run_scheduler_process(
dp_rank: Optional[int], dp_rank: Optional[int],
pipe_writer, pipe_writer,
): ):
# Generate the prefix # Generate the prefix
if dp_rank is None: if dp_rank is None:
prefix = f" TP{tp_rank}" prefix = f" TP{tp_rank}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment