Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
4b1ac23f
Commit
4b1ac23f
authored
Feb 23, 2023
by
Woosuk Kwon
Browse files
Fix slot mapping
parent
8290fce4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
6 deletions
+16
-6
cacheflow/models/input_metadata.py
cacheflow/models/input_metadata.py
+1
-1
cacheflow/worker/worker.py
cacheflow/worker/worker.py
+15
-5
No files found.
cacheflow/models/input_metadata.py
View file @
4b1ac23f
...
@@ -14,7 +14,7 @@ class InputMetadata:
...
@@ -14,7 +14,7 @@ class InputMetadata:
block_tables
:
torch
.
Tensor
,
block_tables
:
torch
.
Tensor
,
)
->
None
:
)
->
None
:
self
.
prompt_lens
=
prompt_lens
self
.
prompt_lens
=
prompt_lens
self
.
prompt_block_table
=
slot_mapping
self
.
slot_mapping
=
slot_mapping
self
.
context_lens
=
context_lens
self
.
context_lens
=
context_lens
self
.
max_context_len
=
max_context_len
self
.
max_context_len
=
max_context_len
self
.
block_tables
=
block_tables
self
.
block_tables
=
block_tables
...
...
cacheflow/worker/worker.py
View file @
4b1ac23f
...
@@ -83,12 +83,20 @@ class Worker:
...
@@ -83,12 +83,20 @@ class Worker:
generation_seq_ids
=
sorted
(
generation_tokens
.
keys
())
generation_seq_ids
=
sorted
(
generation_tokens
.
keys
())
for
seq_id
in
generation_seq_ids
:
for
seq_id
in
generation_seq_ids
:
input_tokens
.
append
(
generation_tokens
[
seq_id
])
input_tokens
.
append
(
generation_tokens
[
seq_id
])
input_positions
.
append
(
context_lens
[
seq_id
]
-
1
)
position_id
=
context_lens
[
seq_id
]
-
1
generation_block_tables
.
append
(
block_tables
[
seq_id
])
input_positions
.
append
(
position_id
)
block_table
=
block_tables
[
seq_id
]
generation_block_tables
.
append
(
block_table
)
max_context_len
=
max
(
max_context_len
,
context_lens
[
seq_id
])
max_context_len
=
max
(
max_context_len
,
context_lens
[
seq_id
])
max_num_blocks_per_seq
=
max
(
max_num_blocks_per_seq
=
max
(
max_num_blocks_per_seq
,
len
(
block_tables
[
seq_id
]))
max_num_blocks_per_seq
,
len
(
block_table
))
block_number
=
block_table
[
position_id
//
self
.
block_size
]
block_offset
=
position_id
%
self
.
block_size
slot
=
block_number
*
self
.
block_size
+
block_offset
slot_mapping
.
append
(
slot
)
# Optimization: Pad the input length to be a multiple of 8.
# Optimization: Pad the input length to be a multiple of 8.
# This is required for utilizing the Tensor Cores in NVIDIA GPUs.
# This is required for utilizing the Tensor Cores in NVIDIA GPUs.
...
@@ -105,9 +113,11 @@ class Worker:
...
@@ -105,9 +113,11 @@ class Worker:
context_lens_tensor
=
torch
.
tensor
(
context_lens_tensor
=
torch
.
tensor
(
[
context_lens
[
seq_id
]
for
seq_id
in
generation_seq_ids
],
[
context_lens
[
seq_id
]
for
seq_id
in
generation_seq_ids
],
dtype
=
torch
.
int
,
device
=
self
.
device
)
dtype
=
torch
.
int
,
device
=
self
.
device
)
padded_block_tables
=
[
_pad_to_max
(
block_table
,
max_num_blocks_per_seq
)
for
block_table
in
generation_block_tables
]
block_tables_tensor
=
torch
.
tensor
(
block_tables_tensor
=
torch
.
tensor
(
[
_pad_to_max
(
block_table
)
for
block_table
in
generation_block_tables
],
padded_block_tables
,
dtype
=
int
,
device
=
self
.
device
)
dtype
=
int
,
device
=
self
.
device
)
input_metadata
=
InputMetadata
(
input_metadata
=
InputMetadata
(
prompt_lens
=
prompt_lens
,
prompt_lens
=
prompt_lens
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment