Unverified Commit bda3758f authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

[log] Make forward iter count optional (#12116)

parent 7b36c47b
...@@ -111,18 +111,21 @@ class Envs: ...@@ -111,18 +111,21 @@ class Envs:
# Model & File Download # Model & File Download
SGLANG_USE_MODELSCOPE = EnvBool(False) SGLANG_USE_MODELSCOPE = EnvBool(False)
# Logging Options
SGLANG_LOG_GC = EnvBool(False)
SGLANG_LOG_FORWARD_ITERS = EnvBool(False)
SGLANG_DISABLE_REQUEST_LOGGING = EnvBool(False)
# Test & Debug # Test & Debug
SGLANG_IS_IN_CI = EnvBool(False) SGLANG_IS_IN_CI = EnvBool(False)
SGLANG_IS_IN_CI_AMD = EnvBool(False) SGLANG_IS_IN_CI_AMD = EnvBool(False)
SGLANG_SET_CPU_AFFINITY = EnvBool(False) SGLANG_SET_CPU_AFFINITY = EnvBool(False)
SGLANG_PROFILE_WITH_STACK = EnvBool(True) SGLANG_PROFILE_WITH_STACK = EnvBool(True)
SGLANG_RECORD_STEP_TIME = EnvBool(False) SGLANG_RECORD_STEP_TIME = EnvBool(False)
SGLANG_GC_LOG = EnvBool(False)
SGLANG_FORCE_SHUTDOWN = EnvBool(False) SGLANG_FORCE_SHUTDOWN = EnvBool(False)
SGLANG_DEBUG_MEMORY_POOL = EnvBool(False) SGLANG_DEBUG_MEMORY_POOL = EnvBool(False)
SGLANG_TEST_REQUEST_TIME_STATS = EnvBool(False) SGLANG_TEST_REQUEST_TIME_STATS = EnvBool(False)
SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK = EnvBool(False) SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK = EnvBool(False)
SGLANG_DISABLE_REQUEST_LOGGING = EnvBool(False)
SGLANG_SIMULATE_ACC_LEN = EnvFloat(-1) SGLANG_SIMULATE_ACC_LEN = EnvFloat(-1)
SGLANG_SIMULATE_ACC_METHOD = EnvStr("multinomial") SGLANG_SIMULATE_ACC_METHOD = EnvStr("multinomial")
SGLANG_TORCH_PROFILER_DIR = EnvStr("/tmp") SGLANG_TORCH_PROFILER_DIR = EnvStr("/tmp")
...@@ -251,7 +254,17 @@ class Envs: ...@@ -251,7 +254,17 @@ class Envs:
envs = Envs() envs = Envs()
def _print_deprecated_env(new_name: str, old_name: str):
if old_name in os.environ:
warnings.warn(
f"Environment variable {old_name} will be deprecated, please use {new_name} instead"
)
os.environ[new_name] = os.environ[old_name]
def _convert_SGL_to_SGLANG(): def _convert_SGL_to_SGLANG():
_print_deprecated_env("SGLANG_LOG_GC", "SGLANG_GC_LOG")
for key, value in os.environ.items(): for key, value in os.environ.items():
if key.startswith("SGL_"): if key.startswith("SGL_"):
new_key = key.replace("SGL_", "SGLANG_", 1) new_key = key.replace("SGL_", "SGLANG_", 1)
......
...@@ -494,7 +494,7 @@ class Scheduler( ...@@ -494,7 +494,7 @@ class Scheduler(
) )
self.init_disaggregation() self.init_disaggregation()
if get_bool_env_var("SGLANG_GC_LOG"): if envs.SGLANG_LOG_GC.get():
configure_gc_logger() configure_gc_logger()
# Init prefill kv split size when deterministic inference is enabled with various attention backends # Init prefill kv split size when deterministic inference is enabled with various attention backends
......
...@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, List, Optional ...@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, List, Optional
from sglang.srt.disaggregation.kv_events import EventPublisherFactory, KVEventBatch from sglang.srt.disaggregation.kv_events import EventPublisherFactory, KVEventBatch
from sglang.srt.disaggregation.utils import DisaggregationMode from sglang.srt.disaggregation.utils import DisaggregationMode
from sglang.srt.environ import envs
from sglang.srt.managers.schedule_policy import PrefillAdder from sglang.srt.managers.schedule_policy import PrefillAdder
from sglang.srt.managers.scheduler import Req, ScheduleBatch from sglang.srt.managers.scheduler import Req, ScheduleBatch
from sglang.srt.metrics.collector import SchedulerMetricsCollector, SchedulerStats from sglang.srt.metrics.collector import SchedulerMetricsCollector, SchedulerStats
...@@ -18,6 +19,7 @@ if TYPE_CHECKING: ...@@ -18,6 +19,7 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
RECORD_STEP_TIME = get_bool_env_var("SGLANG_RECORD_STEP_TIME") RECORD_STEP_TIME = get_bool_env_var("SGLANG_RECORD_STEP_TIME")
LOG_FORWARD_ITERS = envs.SGLANG_LOG_FORWARD_ITERS.get()
class KvMetrics: class KvMetrics:
...@@ -125,8 +127,10 @@ class SchedulerMetricsMixin: ...@@ -125,8 +127,10 @@ class SchedulerMetricsMixin:
num_used, token_usage, _, _ = self._get_token_info() num_used, token_usage, _, _ = self._get_token_info()
token_usage_msg = f"token usage: {token_usage:.2f}, " token_usage_msg = f"token usage: {token_usage:.2f}, "
iter_msg = f" [{self.forward_ct + 1}]" if LOG_FORWARD_ITERS else ""
f = ( f = (
f"Prefill batch. " f"Prefill batch{iter_msg}, "
f"#new-seq: {len(can_run_list)}, " f"#new-seq: {len(can_run_list)}, "
f"#new-token: {adder.log_input_tokens}, " f"#new-token: {adder.log_input_tokens}, "
f"#cached-token: {adder.log_hit_tokens}, " f"#cached-token: {adder.log_hit_tokens}, "
...@@ -249,7 +253,8 @@ class SchedulerMetricsMixin: ...@@ -249,7 +253,8 @@ class SchedulerMetricsMixin:
gap_latency / self.server_args.decode_log_interval gap_latency / self.server_args.decode_log_interval
) )
msg = f"Decode batch. #running-req: {num_running_reqs}, {token_usage_msg}" iter_msg = f" [{self.forward_ct}]" if LOG_FORWARD_ITERS else ""
msg = f"Decode batch{iter_msg}, #running-req: {num_running_reqs}, {token_usage_msg}"
if self.spec_algorithm.is_none(): if self.spec_algorithm.is_none():
spec_accept_length = 0 spec_accept_length = 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment