"vscode:/vscode.git/clone" did not exist on "6cabab3afcbcbb395afc4c5a9d40907dd38a5add"
Unverified Commit bda3758f authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

[log] Make forward iter count optional (#12116)

parent 7b36c47b
......@@ -111,18 +111,21 @@ class Envs:
# Model & File Download
SGLANG_USE_MODELSCOPE = EnvBool(False)
# Logging Options
SGLANG_LOG_GC = EnvBool(False)
SGLANG_LOG_FORWARD_ITERS = EnvBool(False)
SGLANG_DISABLE_REQUEST_LOGGING = EnvBool(False)
# Test & Debug
SGLANG_IS_IN_CI = EnvBool(False)
SGLANG_IS_IN_CI_AMD = EnvBool(False)
SGLANG_SET_CPU_AFFINITY = EnvBool(False)
SGLANG_PROFILE_WITH_STACK = EnvBool(True)
SGLANG_RECORD_STEP_TIME = EnvBool(False)
SGLANG_GC_LOG = EnvBool(False)
SGLANG_FORCE_SHUTDOWN = EnvBool(False)
SGLANG_DEBUG_MEMORY_POOL = EnvBool(False)
SGLANG_TEST_REQUEST_TIME_STATS = EnvBool(False)
SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK = EnvBool(False)
SGLANG_DISABLE_REQUEST_LOGGING = EnvBool(False)
SGLANG_SIMULATE_ACC_LEN = EnvFloat(-1)
SGLANG_SIMULATE_ACC_METHOD = EnvStr("multinomial")
SGLANG_TORCH_PROFILER_DIR = EnvStr("/tmp")
......@@ -251,7 +254,17 @@ class Envs:
envs = Envs()
def _print_deprecated_env(new_name: str, old_name: str):
if old_name in os.environ:
warnings.warn(
f"Environment variable {old_name} will be deprecated, please use {new_name} instead"
)
os.environ[new_name] = os.environ[old_name]
def _convert_SGL_to_SGLANG():
_print_deprecated_env("SGLANG_LOG_GC", "SGLANG_GC_LOG")
for key, value in os.environ.items():
if key.startswith("SGL_"):
new_key = key.replace("SGL_", "SGLANG_", 1)
......
......@@ -494,7 +494,7 @@ class Scheduler(
)
self.init_disaggregation()
if get_bool_env_var("SGLANG_GC_LOG"):
if envs.SGLANG_LOG_GC.get():
configure_gc_logger()
# Init prefill kv split size when deterministic inference is enabled with various attention backends
......
......@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, List, Optional
from sglang.srt.disaggregation.kv_events import EventPublisherFactory, KVEventBatch
from sglang.srt.disaggregation.utils import DisaggregationMode
from sglang.srt.environ import envs
from sglang.srt.managers.schedule_policy import PrefillAdder
from sglang.srt.managers.scheduler import Req, ScheduleBatch
from sglang.srt.metrics.collector import SchedulerMetricsCollector, SchedulerStats
......@@ -18,6 +19,7 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
RECORD_STEP_TIME = get_bool_env_var("SGLANG_RECORD_STEP_TIME")
LOG_FORWARD_ITERS = envs.SGLANG_LOG_FORWARD_ITERS.get()
class KvMetrics:
......@@ -125,8 +127,10 @@ class SchedulerMetricsMixin:
num_used, token_usage, _, _ = self._get_token_info()
token_usage_msg = f"token usage: {token_usage:.2f}, "
iter_msg = f" [{self.forward_ct + 1}]" if LOG_FORWARD_ITERS else ""
f = (
f"Prefill batch. "
f"Prefill batch{iter_msg}, "
f"#new-seq: {len(can_run_list)}, "
f"#new-token: {adder.log_input_tokens}, "
f"#cached-token: {adder.log_hit_tokens}, "
......@@ -249,7 +253,8 @@ class SchedulerMetricsMixin:
gap_latency / self.server_args.decode_log_interval
)
msg = f"Decode batch. #running-req: {num_running_reqs}, {token_usage_msg}"
iter_msg = f" [{self.forward_ct}]" if LOG_FORWARD_ITERS else ""
msg = f"Decode batch{iter_msg}, #running-req: {num_running_reqs}, {token_usage_msg}"
if self.spec_algorithm.is_none():
spec_accept_length = 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment