Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
823ab796
Unverified
Commit
823ab796
authored
Jan 28, 2025
by
Harry Mellor
Committed by
GitHub
Jan 27, 2025
Browse files
Update `pre-commit` hooks (#12475)
Signed-off-by:
Harry Mellor
<
19981378+hmellor@users.noreply.github.com
>
parent
6116ca8c
Changes
64
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
9 additions
and
9 deletions
+9
-9
vllm/v1/stats/common.py
vllm/v1/stats/common.py
+2
-2
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+1
-1
vllm/worker/hpu_worker.py
vllm/worker/hpu_worker.py
+4
-4
vllm/worker/tpu_model_runner.py
vllm/worker/tpu_model_runner.py
+2
-2
No files found.
vllm/v1/stats/common.py
View file @
823ab796
...
@@ -311,8 +311,8 @@ class RequestStats:
...
@@ -311,8 +311,8 @@ class RequestStats:
return
[]
return
[]
latency_s_lst
=
[]
latency_s_lst
=
[]
for
i
in
range
(
1
,
len
(
self
.
output_token_ts_s_lst
)):
for
i
in
range
(
1
,
len
(
self
.
output_token_ts_s_lst
)):
assert
(
self
.
output_token_ts_s_lst
[
i
]
>=
assert
(
self
.
output_token_ts_s_lst
[
i
]
self
.
output_token_ts_s_lst
[
i
-
1
])
>=
self
.
output_token_ts_s_lst
[
i
-
1
])
latency_s
=
(
self
.
output_token_ts_s_lst
[
i
]
-
latency_s
=
(
self
.
output_token_ts_s_lst
[
i
]
-
self
.
output_token_ts_s_lst
[
i
-
1
])
self
.
output_token_ts_s_lst
[
i
-
1
])
latency_s_lst
.
append
(
latency_s
)
latency_s_lst
.
append
(
latency_s
)
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
823ab796
...
@@ -205,7 +205,7 @@ class GPUModelRunner:
...
@@ -205,7 +205,7 @@ class GPUModelRunner:
def
_update_states
(
self
,
scheduler_output
:
"SchedulerOutput"
)
->
None
:
def
_update_states
(
self
,
scheduler_output
:
"SchedulerOutput"
)
->
None
:
# Remove stopped requests from the cached states.
# Remove stopped requests from the cached states.
# Keep the states of the pre
-
empted requests.
# Keep the states of the preempted requests.
for
req_id
in
scheduler_output
.
finished_req_ids
:
for
req_id
in
scheduler_output
.
finished_req_ids
:
self
.
requests
.
pop
(
req_id
,
None
)
self
.
requests
.
pop
(
req_id
,
None
)
self
.
encoder_cache
.
pop
(
req_id
,
None
)
self
.
encoder_cache
.
pop
(
req_id
,
None
)
...
...
vllm/worker/hpu_worker.py
View file @
823ab796
...
@@ -173,13 +173,13 @@ class HPUWorker(LocalOrDistributedWorkerBase):
...
@@ -173,13 +173,13 @@ class HPUWorker(LocalOrDistributedWorkerBase):
cpu_fallback_ctx
as
cpu_fallback_local_metric
:
cpu_fallback_ctx
as
cpu_fallback_local_metric
:
output
=
LocalOrDistributedWorkerBase
.
execute_model
(
output
=
LocalOrDistributedWorkerBase
.
execute_model
(
self
,
execute_model_req
)
self
,
execute_model_req
)
if
(
log_graph_compilation
and
gc_local_metric
.
stats
()[
0
][
1
]
>
0
if
(
log_graph_compilation
and
gc_local_metric
.
stats
()[
0
][
1
]
)
or
log_graph_compilation_all
:
>
0
)
or
log_graph_compilation_all
:
msg
=
(
"VLLM_HPU_STEP_GRAPH_COMPILATION: "
msg
=
(
"VLLM_HPU_STEP_GRAPH_COMPILATION: "
f
"
{
gc_local_metric
.
stats
()
}
,
{
input_stats
}
"
)
f
"
{
gc_local_metric
.
stats
()
}
,
{
input_stats
}
"
)
logger
.
warning
(
msg
)
logger
.
warning
(
msg
)
if
(
log_cpu_fallbacks
and
cpu_fallback_local_metric
.
stats
()[
0
][
1
]
>
if
(
log_cpu_fallbacks
and
cpu_fallback_local_metric
.
stats
()[
0
][
1
]
0
)
or
log_cpu_fallbacks_all
:
>
0
)
or
log_cpu_fallbacks_all
:
msg
=
(
"VLLM_HPU_STEP_CPU_FALLBACK: "
msg
=
(
"VLLM_HPU_STEP_CPU_FALLBACK: "
f
"
{
cpu_fallback_local_metric
.
stats
()
}
,
{
input_stats
}
"
)
f
"
{
cpu_fallback_local_metric
.
stats
()
}
,
{
input_stats
}
"
)
logger
.
warning
(
msg
)
logger
.
warning
(
msg
)
...
...
vllm/worker/tpu_model_runner.py
View file @
823ab796
...
@@ -316,8 +316,8 @@ class TPUModelRunner(ModelRunnerBase[ModelInputForTPU]):
...
@@ -316,8 +316,8 @@ class TPUModelRunner(ModelRunnerBase[ModelInputForTPU]):
logger
.
info
(
"batch_size: %d, seq_len: %d"
,
batch_size
,
logger
.
info
(
"batch_size: %d, seq_len: %d"
,
batch_size
,
seq_len
)
seq_len
)
num_tokens
=
batch_size
*
seq_len
num_tokens
=
batch_size
*
seq_len
if
(
num_tokens
>=
if
(
num_tokens
self
.
scheduler_config
.
max_num_batched_tokens
):
>=
self
.
scheduler_config
.
max_num_batched_tokens
):
break
break
seq_len
=
seq_len
*
2
seq_len
=
seq_len
*
2
end
=
time
.
time
()
end
=
time
.
time
()
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment