Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
343cea3d
Commit
343cea3d
authored
Feb 23, 2023
by
Woosuk Kwon
Browse files
Add seq_ids to input metadata
parent
4f6f4967
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
3 deletions
+6
-3
cacheflow/models/input_metadata.py
cacheflow/models/input_metadata.py
+3
-0
cacheflow/worker/worker.py
cacheflow/worker/worker.py
+3
-3
No files found.
cacheflow/models/input_metadata.py
View file @
343cea3d
...
...
@@ -7,12 +7,14 @@ class InputMetadata:
def
__init__
(
self
,
seq_ids
:
List
[
int
],
prompt_lens
:
List
[
int
],
slot_mapping
:
torch
.
Tensor
,
context_lens
:
torch
.
Tensor
,
max_context_len
:
int
,
block_tables
:
torch
.
Tensor
,
)
->
None
:
self
.
seq_ids
=
seq_ids
self
.
prompt_lens
=
prompt_lens
self
.
slot_mapping
=
slot_mapping
self
.
context_lens
=
context_lens
...
...
@@ -23,3 +25,4 @@ class InputMetadata:
self
.
num_generation_tokens
=
context_lens
.
shape
[
0
]
self
.
max_num_blocks_per_seq
=
block_tables
.
shape
[
1
]
assert
self
.
num_generation_tokens
==
block_tables
.
shape
[
0
]
assert
self
.
num_prompts
+
self
.
num_generation_tokens
==
len
(
seq_ids
)
cacheflow/worker/worker.py
View file @
343cea3d
from
typing
import
Dict
,
List
,
Tuple
from
typing
import
Dict
,
List
,
Tuple
,
Union
import
torch
...
...
@@ -120,6 +120,7 @@ class Worker:
padded_block_tables
,
dtype
=
int
,
device
=
self
.
device
)
input_metadata
=
InputMetadata
(
seq_ids
=
prompt_seq_ids
+
generation_seq_ids
,
prompt_lens
=
prompt_lens
,
slot_mapping
=
slot_mapping_tensor
,
context_lens
=
context_lens_tensor
,
...
...
@@ -128,7 +129,6 @@ class Worker:
)
return
tokens_tensor
,
positions_tensor
,
input_metadata
@
torch
.
inference_mode
()
def
execute_stage
(
self
,
...
...
@@ -139,7 +139,7 @@ class Worker:
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_copy
:
Dict
[
int
,
int
],
)
->
torch
.
Tensor
:
)
->
Union
[
torch
.
Tensor
,
Dict
[
int
,
Tuple
[
int
,
int
]]]
:
# Issue cache operations.
command_issued
=
False
if
blocks_to_swap_in
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment