Unverified Commit 93b77c8e authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Fix the request loggings to make it fully able to be easily replayed (#2973)

parent 7906d1d2
...@@ -27,6 +27,7 @@ import requests ...@@ -27,6 +27,7 @@ import requests
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--url", type=str, default="http://localhost:30000") parser.add_argument("--url", type=str, default="http://localhost:30000")
parser.add_argument("--log-requests", action="store_true")
parser.add_argument( parser.add_argument(
"--dump-requests-folder", type=str, default="/tmp/sglang_request_dump" "--dump-requests-folder", type=str, default="/tmp/sglang_request_dump"
) )
...@@ -36,6 +37,8 @@ if __name__ == "__main__": ...@@ -36,6 +37,8 @@ if __name__ == "__main__":
response = requests.post( response = requests.post(
args.url + "/configure_logging", args.url + "/configure_logging",
json={ json={
"log_requests": args.log_requests,
"log_requests_level": 1, # Log full requests
"dump_requests_folder": args.dump_requests_folder, "dump_requests_folder": args.dump_requests_folder,
"dump_requests_threshold": args.dump_requests_threshold, "dump_requests_threshold": args.dump_requests_threshold,
}, },
......
...@@ -495,6 +495,7 @@ class ProfileReq(Enum): ...@@ -495,6 +495,7 @@ class ProfileReq(Enum):
@dataclass @dataclass
class ConfigureLoggingReq: class ConfigureLoggingReq:
log_requests: Optional[bool] = None log_requests: Optional[bool] = None
log_requests_level: Optional[int] = None
dump_requests_folder: Optional[str] = None dump_requests_folder: Optional[str] = None
dump_requests_threshold: Optional[int] = None dump_requests_threshold: Optional[int] = None
......
...@@ -117,6 +117,7 @@ class TokenizerManager: ...@@ -117,6 +117,7 @@ class TokenizerManager:
self.server_args = server_args self.server_args = server_args
self.enable_metrics = server_args.enable_metrics self.enable_metrics = server_args.enable_metrics
self.log_requests = server_args.log_requests self.log_requests = server_args.log_requests
self.log_requests_level = 0
# Init inter-process communication # Init inter-process communication
context = zmq.asyncio.Context(2) context = zmq.asyncio.Context(2)
...@@ -276,7 +277,10 @@ class TokenizerManager: ...@@ -276,7 +277,10 @@ class TokenizerManager:
obj.normalize_batch_and_arguments() obj.normalize_batch_and_arguments()
if self.log_requests: if self.log_requests:
logger.info(f"Receive: obj={dataclass_to_string_truncated(obj)}") max_length = 2048 if self.log_requests_level == 0 else 1 << 30
logger.info(
f"Receive: obj={dataclass_to_string_truncated(obj, max_length)}"
)
async with self.model_update_lock.reader_lock: async with self.model_update_lock.reader_lock:
is_single = obj.is_single is_single = obj.is_single
...@@ -419,7 +423,8 @@ class TokenizerManager: ...@@ -419,7 +423,8 @@ class TokenizerManager:
state.out_list = [] state.out_list = []
if state.finished: if state.finished:
if self.log_requests: if self.log_requests:
msg = f"Finish: obj={dataclass_to_string_truncated(obj)}, out={dataclass_to_string_truncated(out)}" max_length = 2048 if self.log_requests_level == 0 else 1 << 30
msg = f"Finish: obj={dataclass_to_string_truncated(obj, max_length)}, out={dataclass_to_string_truncated(out, max_length)}"
logger.info(msg) logger.info(msg)
del self.rid_to_state[obj.rid] del self.rid_to_state[obj.rid]
...@@ -682,6 +687,8 @@ class TokenizerManager: ...@@ -682,6 +687,8 @@ class TokenizerManager:
def configure_logging(self, obj: ConfigureLoggingReq): def configure_logging(self, obj: ConfigureLoggingReq):
if obj.log_requests is not None: if obj.log_requests is not None:
self.log_requests = obj.log_requests self.log_requests = obj.log_requests
if obj.log_requests_level is not None:
self.log_requests_level = obj.log_requests_level
if obj.dump_requests_folder is not None: if obj.dump_requests_folder is not None:
self.dump_requests_folder = obj.dump_requests_folder self.dump_requests_folder = obj.dump_requests_folder
if obj.dump_requests_threshold is not None: if obj.dump_requests_threshold is not None:
......
...@@ -1262,9 +1262,9 @@ def dataclass_to_string_truncated(data, max_length=2048): ...@@ -1262,9 +1262,9 @@ def dataclass_to_string_truncated(data, max_length=2048):
if isinstance(data, str): if isinstance(data, str):
if len(data) > max_length: if len(data) > max_length:
half_length = max_length // 2 half_length = max_length // 2
return f'"{data[:half_length]} ... {data[-half_length:]}"' return f"{repr(data[:half_length])} ... {repr(data[-half_length:])}"
else: else:
return f'"{data}"' return f"{repr(data)}"
elif isinstance(data, (list, tuple)): elif isinstance(data, (list, tuple)):
if len(data) > max_length: if len(data) > max_length:
half_length = max_length // 2 half_length = max_length // 2
...@@ -1275,7 +1275,7 @@ def dataclass_to_string_truncated(data, max_length=2048): ...@@ -1275,7 +1275,7 @@ def dataclass_to_string_truncated(data, max_length=2048):
return ( return (
"{" "{"
+ ", ".join( + ", ".join(
f"{k}: {dataclass_to_string_truncated(v, max_length)}" f"'{k}': {dataclass_to_string_truncated(v, max_length)}"
for k, v in data.items() for k, v in data.items()
) )
+ "}" + "}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment