Unverified Commit c8848191 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

Fix eager mode performance (#2377)

parent 05921a9a
...@@ -235,9 +235,11 @@ class ModelRunner: ...@@ -235,9 +235,11 @@ class ModelRunner:
input_block_tables[i, :len(block_table)] = block_table input_block_tables[i, :len(block_table)] = block_table
block_tables = torch.tensor(input_block_tables, device="cuda") block_tables = torch.tensor(input_block_tables, device="cuda")
else: else:
max_block_table_len = (max_context_len + self.block_size -
1) // self.block_size
block_tables = _make_tensor_with_pad( block_tables = _make_tensor_with_pad(
block_tables, block_tables,
max_len=max_context_len, max_len=max_block_table_len,
pad=0, pad=0,
dtype=torch.int, dtype=torch.int,
device="cuda", device="cuda",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment