Unverified Commit 823ab796 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Update `pre-commit` hooks (#12475)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 6116ca8c
...@@ -311,8 +311,8 @@ class RequestStats: ...@@ -311,8 +311,8 @@ class RequestStats:
return [] return []
latency_s_lst = [] latency_s_lst = []
for i in range(1, len(self.output_token_ts_s_lst)): for i in range(1, len(self.output_token_ts_s_lst)):
assert (self.output_token_ts_s_lst[i] >= assert (self.output_token_ts_s_lst[i]
self.output_token_ts_s_lst[i - 1]) >= self.output_token_ts_s_lst[i - 1])
latency_s = (self.output_token_ts_s_lst[i] - latency_s = (self.output_token_ts_s_lst[i] -
self.output_token_ts_s_lst[i - 1]) self.output_token_ts_s_lst[i - 1])
latency_s_lst.append(latency_s) latency_s_lst.append(latency_s)
......
...@@ -205,7 +205,7 @@ class GPUModelRunner: ...@@ -205,7 +205,7 @@ class GPUModelRunner:
def _update_states(self, scheduler_output: "SchedulerOutput") -> None: def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
# Remove stopped requests from the cached states. # Remove stopped requests from the cached states.
# Keep the states of the pre-empted requests. # Keep the states of the preempted requests.
for req_id in scheduler_output.finished_req_ids: for req_id in scheduler_output.finished_req_ids:
self.requests.pop(req_id, None) self.requests.pop(req_id, None)
self.encoder_cache.pop(req_id, None) self.encoder_cache.pop(req_id, None)
......
...@@ -173,13 +173,13 @@ class HPUWorker(LocalOrDistributedWorkerBase): ...@@ -173,13 +173,13 @@ class HPUWorker(LocalOrDistributedWorkerBase):
cpu_fallback_ctx as cpu_fallback_local_metric: cpu_fallback_ctx as cpu_fallback_local_metric:
output = LocalOrDistributedWorkerBase.execute_model( output = LocalOrDistributedWorkerBase.execute_model(
self, execute_model_req) self, execute_model_req)
if (log_graph_compilation and gc_local_metric.stats()[0][1] > 0 if (log_graph_compilation and gc_local_metric.stats()[0][1]
) or log_graph_compilation_all: > 0) or log_graph_compilation_all:
msg = ("VLLM_HPU_STEP_GRAPH_COMPILATION: " msg = ("VLLM_HPU_STEP_GRAPH_COMPILATION: "
f"{gc_local_metric.stats()}, {input_stats}") f"{gc_local_metric.stats()}, {input_stats}")
logger.warning(msg) logger.warning(msg)
if (log_cpu_fallbacks and cpu_fallback_local_metric.stats()[0][1] > if (log_cpu_fallbacks and cpu_fallback_local_metric.stats()[0][1]
0) or log_cpu_fallbacks_all: > 0) or log_cpu_fallbacks_all:
msg = ("VLLM_HPU_STEP_CPU_FALLBACK: " msg = ("VLLM_HPU_STEP_CPU_FALLBACK: "
f"{cpu_fallback_local_metric.stats()}, {input_stats}") f"{cpu_fallback_local_metric.stats()}, {input_stats}")
logger.warning(msg) logger.warning(msg)
......
...@@ -316,8 +316,8 @@ class TPUModelRunner(ModelRunnerBase[ModelInputForTPU]): ...@@ -316,8 +316,8 @@ class TPUModelRunner(ModelRunnerBase[ModelInputForTPU]):
logger.info("batch_size: %d, seq_len: %d", batch_size, logger.info("batch_size: %d, seq_len: %d", batch_size,
seq_len) seq_len)
num_tokens = batch_size * seq_len num_tokens = batch_size * seq_len
if (num_tokens >= if (num_tokens
self.scheduler_config.max_num_batched_tokens): >= self.scheduler_config.max_num_batched_tokens):
break break
seq_len = seq_len * 2 seq_len = seq_len * 2
end = time.time() end = time.time()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment