Unverified Commit 823ab796 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Update `pre-commit` hooks (#12475)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 6116ca8c
......@@ -311,8 +311,8 @@ class RequestStats:
return []
latency_s_lst = []
for i in range(1, len(self.output_token_ts_s_lst)):
assert (self.output_token_ts_s_lst[i] >=
self.output_token_ts_s_lst[i - 1])
assert (self.output_token_ts_s_lst[i]
>= self.output_token_ts_s_lst[i - 1])
latency_s = (self.output_token_ts_s_lst[i] -
self.output_token_ts_s_lst[i - 1])
latency_s_lst.append(latency_s)
......
......@@ -205,7 +205,7 @@ class GPUModelRunner:
def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
# Remove stopped requests from the cached states.
# Keep the states of the pre-empted requests.
# Keep the states of the preempted requests.
for req_id in scheduler_output.finished_req_ids:
self.requests.pop(req_id, None)
self.encoder_cache.pop(req_id, None)
......
......@@ -173,13 +173,13 @@ class HPUWorker(LocalOrDistributedWorkerBase):
cpu_fallback_ctx as cpu_fallback_local_metric:
output = LocalOrDistributedWorkerBase.execute_model(
self, execute_model_req)
if (log_graph_compilation and gc_local_metric.stats()[0][1] > 0
) or log_graph_compilation_all:
if (log_graph_compilation and gc_local_metric.stats()[0][1]
> 0) or log_graph_compilation_all:
msg = ("VLLM_HPU_STEP_GRAPH_COMPILATION: "
f"{gc_local_metric.stats()}, {input_stats}")
logger.warning(msg)
if (log_cpu_fallbacks and cpu_fallback_local_metric.stats()[0][1] >
0) or log_cpu_fallbacks_all:
if (log_cpu_fallbacks and cpu_fallback_local_metric.stats()[0][1]
> 0) or log_cpu_fallbacks_all:
msg = ("VLLM_HPU_STEP_CPU_FALLBACK: "
f"{cpu_fallback_local_metric.stats()}, {input_stats}")
logger.warning(msg)
......
......@@ -316,8 +316,8 @@ class TPUModelRunner(ModelRunnerBase[ModelInputForTPU]):
logger.info("batch_size: %d, seq_len: %d", batch_size,
seq_len)
num_tokens = batch_size * seq_len
if (num_tokens >=
self.scheduler_config.max_num_batched_tokens):
if (num_tokens
>= self.scheduler_config.max_num_batched_tokens):
break
seq_len = seq_len * 2
end = time.time()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment