Unverified Commit 14b8ae02 authored by Tao He's avatar Tao He Committed by GitHub
Browse files

Fixes the misuse/mixuse of time.time()/time.monotonic() (#3220)


Signed-off-by: default avatarTao He <sighingnow@gmail.com>
Co-authored-by: default avatarsimon-mo <simon.mo@hey.com>
parent 03d37f24
......@@ -160,7 +160,7 @@ class Scheduler:
blocks_to_copy: Dict[int, List[int]] = {}
# Fix the current time.
now = time.monotonic()
now = time.time()
# Join waiting sequences if possible.
if not self.swapped:
......
......@@ -604,8 +604,7 @@ class AsyncLLMEngine:
>>> ...
"""
# Preprocess the request.
# This should not be used for logging, as it is monotonic time.
arrival_time = time.monotonic()
arrival_time = time.time()
try:
stream = await self.add_request(
......
......@@ -244,7 +244,7 @@ class LLMEngine:
raise ValueError(f"Cannot request more than "
f"{max_logprobs} logprobs.")
if arrival_time is None:
arrival_time = time.monotonic()
arrival_time = time.time()
prompt_token_ids = self.encode_request(
request_id=request_id,
prompt=prompt,
......@@ -628,7 +628,7 @@ class LLMEngine:
def _get_stats(self,
scheduler_outputs: Optional[SchedulerOutputs]) -> Stats:
"""Get Stats to be Logged to Prometheus."""
now = time.monotonic()
now = time.time()
# KV Cache Usage in %.
num_total_gpu = self.cache_config.num_gpu_blocks
......
......@@ -103,7 +103,7 @@ class OpenAIServingChat(OpenAIServing):
) -> Union[ErrorResponse, AsyncGenerator[str, None]]:
model_name = request.model
created_time = int(time.monotonic())
created_time = int(time.time())
chunk_object_type = "chat.completion.chunk"
first_iteration = True
......@@ -244,7 +244,7 @@ class OpenAIServingChat(OpenAIServing):
request_id: str) -> Union[ErrorResponse, ChatCompletionResponse]:
model_name = request.model
created_time = int(time.monotonic())
created_time = int(time.time())
final_res: RequestOutput = None
async for res in result_generator:
......
......@@ -118,7 +118,7 @@ class OpenAIServingCompletion(OpenAIServing):
model_name = request.model
request_id = f"cmpl-{random_uuid()}"
created_time = int(time.monotonic())
created_time = int(time.time())
# Schedule the request and get the result generator.
generators = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment