Unverified Commit dd445a41 authored by kyleliang-nv's avatar kyleliang-nv Committed by GitHub
Browse files

[feature] Add start step profile argument in /start_profile (#7608)

parent 7590f522
...@@ -418,6 +418,7 @@ async def start_profile_async(obj: Optional[ProfileReqInput] = None): ...@@ -418,6 +418,7 @@ async def start_profile_async(obj: Optional[ProfileReqInput] = None):
await _global_state.tokenizer_manager.start_profile( await _global_state.tokenizer_manager.start_profile(
output_dir=obj.output_dir, output_dir=obj.output_dir,
start_step=obj.start_step,
num_steps=obj.num_steps, num_steps=obj.num_steps,
activities=obj.activities, activities=obj.activities,
with_stack=obj.with_stack, with_stack=obj.with_stack,
......
...@@ -905,6 +905,7 @@ class ProfileReqInput: ...@@ -905,6 +905,7 @@ class ProfileReqInput:
# If set, it profile as many as this number of steps. # If set, it profile as many as this number of steps.
# If it is set, profiling is automatically stopped after this step, and # If it is set, profiling is automatically stopped after this step, and
# the caller doesn't need to run stop_profile. # the caller doesn't need to run stop_profile.
start_step: Optional[int] = None
num_steps: Optional[int] = None num_steps: Optional[int] = None
activities: Optional[List[str]] = None activities: Optional[List[str]] = None
profile_by_stage: bool = False profile_by_stage: bool = False
...@@ -932,6 +933,7 @@ class ExpertDistributionReqOutput: ...@@ -932,6 +933,7 @@ class ExpertDistributionReqOutput:
class ProfileReq: class ProfileReq:
type: ProfileReqType type: ProfileReqType
output_dir: Optional[str] = None output_dir: Optional[str] = None
start_step: Optional[int] = None
num_steps: Optional[int] = None num_steps: Optional[int] = None
activities: Optional[List[str]] = None activities: Optional[List[str]] = None
profile_by_stage: bool = False profile_by_stage: bool = False
......
...@@ -485,6 +485,8 @@ class Scheduler( ...@@ -485,6 +485,8 @@ class Scheduler(
enable=server_args.enable_memory_saver enable=server_args.enable_memory_saver
) )
self.init_profier() self.init_profier()
# Init metrics stats
self.init_metrics() self.init_metrics()
self.init_kv_events(server_args.kv_events_config) self.init_kv_events(server_args.kv_events_config)
...@@ -628,6 +630,7 @@ class Scheduler( ...@@ -628,6 +630,7 @@ class Scheduler(
self.torch_profiler_output_dir: Optional[str] = None self.torch_profiler_output_dir: Optional[str] = None
self.profiler_activities: Optional[List[str]] = None self.profiler_activities: Optional[List[str]] = None
self.profile_id: Optional[str] = None self.profile_id: Optional[str] = None
self.profiler_start_forward_ct: Optional[int] = None
self.profiler_target_forward_ct: Optional[int] = None self.profiler_target_forward_ct: Optional[int] = None
self.profiler_target_prefill_ct: Optional[int] = None self.profiler_target_prefill_ct: Optional[int] = None
self.profiler_target_decode_ct: Optional[int] = None self.profiler_target_decode_ct: Optional[int] = None
...@@ -2389,9 +2392,10 @@ class Scheduler( ...@@ -2389,9 +2392,10 @@ class Scheduler(
def profile(self, recv_req: ProfileReq): def profile(self, recv_req: ProfileReq):
if recv_req.type == ProfileReqType.START_PROFILE: if recv_req.type == ProfileReqType.START_PROFILE:
if recv_req.profile_by_stage: if recv_req.profile_by_stage or recv_req.start_step:
return self.init_profile( return self.init_profile(
recv_req.output_dir, recv_req.output_dir,
recv_req.start_step,
recv_req.num_steps, recv_req.num_steps,
recv_req.activities, recv_req.activities,
recv_req.with_stack, recv_req.with_stack,
...@@ -2402,6 +2406,7 @@ class Scheduler( ...@@ -2402,6 +2406,7 @@ class Scheduler(
else: else:
self.init_profile( self.init_profile(
recv_req.output_dir, recv_req.output_dir,
recv_req.start_step,
recv_req.num_steps, recv_req.num_steps,
recv_req.activities, recv_req.activities,
recv_req.with_stack, recv_req.with_stack,
...@@ -2416,6 +2421,7 @@ class Scheduler( ...@@ -2416,6 +2421,7 @@ class Scheduler(
def init_profile( def init_profile(
self, self,
output_dir: Optional[str], output_dir: Optional[str],
start_step: Optional[int],
num_steps: Optional[int], num_steps: Optional[int],
activities: Optional[List[str]], activities: Optional[List[str]],
with_stack: Optional[bool], with_stack: Optional[bool],
...@@ -2442,6 +2448,9 @@ class Scheduler( ...@@ -2442,6 +2448,9 @@ class Scheduler(
self.profiler_activities = activities self.profiler_activities = activities
self.profile_id = profile_id self.profile_id = profile_id
if start_step:
self.profiler_start_forward_ct = max(start_step, self.forward_ct + 1)
if num_steps: if num_steps:
self.profile_steps = num_steps self.profile_steps = num_steps
if self.profile_by_stage: if self.profile_by_stage:
...@@ -2449,6 +2458,10 @@ class Scheduler( ...@@ -2449,6 +2458,10 @@ class Scheduler(
self.profiler_target_decode_ct = num_steps self.profiler_target_decode_ct = num_steps
self.profiler_prefill_ct = 0 self.profiler_prefill_ct = 0
self.profiler_decode_ct = 0 self.profiler_decode_ct = 0
elif start_step:
self.profiler_target_forward_ct = (
self.profiler_start_forward_ct + num_steps
)
else: else:
self.profiler_target_forward_ct = self.forward_ct + num_steps self.profiler_target_forward_ct = self.forward_ct + num_steps
# The caller will be notified when reaching profiler_target_forward_ct # The caller will be notified when reaching profiler_target_forward_ct
...@@ -2521,6 +2534,7 @@ class Scheduler( ...@@ -2521,6 +2534,7 @@ class Scheduler(
if "CUDA_PROFILER" in activities: if "CUDA_PROFILER" in activities:
torch.cuda.cudart().cudaProfilerStart() torch.cuda.cudart().cudaProfilerStart()
self.profile_in_progress = True
return ProfileReqOutput(success=True, message="Succeeded") return ProfileReqOutput(success=True, message="Succeeded")
...@@ -2584,6 +2598,7 @@ class Scheduler( ...@@ -2584,6 +2598,7 @@ class Scheduler(
) )
self.torch_profiler = None self.torch_profiler = None
self.profile_in_progress = False self.profile_in_progress = False
self.profiler_start_forward_ct = None
return ProfileReqOutput(success=True, message="Succeeded.") return ProfileReqOutput(success=True, message="Succeeded.")
...@@ -2617,6 +2632,11 @@ class Scheduler( ...@@ -2617,6 +2632,11 @@ class Scheduler(
and self.profiler_target_forward_ct <= self.forward_ct and self.profiler_target_forward_ct <= self.forward_ct
): ):
self.stop_profile() self.stop_profile()
if (
self.profiler_start_forward_ct
and self.profiler_start_forward_ct == self.forward_ct
):
self.start_profile()
def expert_distribution_handle(self, recv_req: ExpertDistributionReq): def expert_distribution_handle(self, recv_req: ExpertDistributionReq):
if recv_req == ExpertDistributionReq.START_RECORD: if recv_req == ExpertDistributionReq.START_RECORD:
......
...@@ -863,6 +863,7 @@ class TokenizerManager: ...@@ -863,6 +863,7 @@ class TokenizerManager:
async def start_profile( async def start_profile(
self, self,
output_dir: Optional[str] = None, output_dir: Optional[str] = None,
start_step: Optional[int] = None,
num_steps: Optional[int] = None, num_steps: Optional[int] = None,
activities: Optional[List[str]] = None, activities: Optional[List[str]] = None,
with_stack: Optional[bool] = None, with_stack: Optional[bool] = None,
...@@ -875,6 +876,7 @@ class TokenizerManager: ...@@ -875,6 +876,7 @@ class TokenizerManager:
req = ProfileReq( req = ProfileReq(
type=ProfileReqType.START_PROFILE, type=ProfileReqType.START_PROFILE,
output_dir=output_dir, output_dir=output_dir,
start_step=start_step,
num_steps=num_steps, num_steps=num_steps,
activities=activities, activities=activities,
with_stack=with_stack, with_stack=with_stack,
......
...@@ -87,6 +87,7 @@ suites = { ...@@ -87,6 +87,7 @@ suites = {
TestFile("test_skip_tokenizer_init.py", 117), TestFile("test_skip_tokenizer_init.py", 117),
TestFile("test_srt_engine.py", 261), TestFile("test_srt_engine.py", 261),
TestFile("test_srt_endpoint.py", 130), TestFile("test_srt_endpoint.py", 130),
TestFile("test_start_profile.py", 60),
TestFile("test_torch_compile.py", 76), TestFile("test_torch_compile.py", 76),
TestFile("test_torch_compile_moe.py", 172), TestFile("test_torch_compile_moe.py", 172),
TestFile("test_torch_native_attention_backend.py", 123), TestFile("test_torch_native_attention_backend.py", 123),
......
"""
Usage:
python3 -m unittest test_srt_engine.TestSRTEngine.test_4_sync_async_stream_combination
"""
import os
import shutil
import unittest
import requests
from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
OUTPUT_DIR = "./profiler_dir"
class TestStartProfile(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
)
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def setUp(self):
self._clear_profile_dir()
def test_start_profile_1(self):
"""Test /start_profile with start_step and num_steps argument. This have to be the first test for start_step to work"""
response = self._start_profile(start_step="15", num_steps=5)
self._post_request()
self._check_non_empty_profile_dir()
def test_start_profile_2(self):
"""Test /start_profile with no argument"""
response = self._start_profile()
self._post_request()
# Before /stop_profile, the profile directory should be empty
self._check_empty_profile_dir()
# Post /stop_profile and check the profile directory is non-empty
response = requests.post(
f"{DEFAULT_URL_FOR_TEST}/stop_profile",
)
self._check_non_empty_profile_dir()
def test_start_profile_3(self):
"""Test /start_profile with num_steps argument"""
response = self._start_profile(num_steps=5)
self._post_request()
self._check_non_empty_profile_dir()
def _start_profile(self, **kwargs):
"""Start profiling with optional parameters."""
response = requests.post(
f"{DEFAULT_URL_FOR_TEST}/start_profile",
json=kwargs if kwargs else None,
)
self.assertEqual(response.status_code, 200)
def _post_request(self):
response = requests.post(
f"{DEFAULT_URL_FOR_TEST}/generate",
json={
"text": "The capital of France is",
"sampling_params": {
"temperature": 0,
"max_new_tokens": 32,
},
},
)
self.assertEqual(response.status_code, 200)
def _clear_profile_dir(self):
if os.path.isdir(OUTPUT_DIR):
# Remove the directory and all its contents
shutil.rmtree(OUTPUT_DIR)
def _check_non_empty_profile_dir(self):
self.assertTrue(os.path.isdir(OUTPUT_DIR), "Output directory does not exist.")
self.assertNotEqual(
len(os.listdir(OUTPUT_DIR)), 0, "Output directory is empty!"
)
def _check_empty_profile_dir(self):
if os.path.isdir(OUTPUT_DIR):
self.assertEqual(
len(os.listdir(OUTPUT_DIR)), 0, "Output directory is non-empty!"
)
if __name__ == "__main__":
os.environ["SGLANG_TORCH_PROFILER_DIR"] = OUTPUT_DIR
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment