Commit cd6239dc authored by zhuwenwen's avatar zhuwenwen
Browse files

[fix]修复零消耗引入的cudagraph模式数据准备cpu耗时问题

parent f7be09fc
......@@ -242,8 +242,8 @@ class CommonMetadataBuilder(AttentionMetadataBuilder[TAttentionMetadata]):
input_block_tables[i, :len(block_table)] = block_table
# block_tables = torch.from_numpy(input_block_tables).to(
# device, non_blocking=True)
block_tables = async_tensor_h2d(input_block_tables.tolist(), torch.int32,
device, self.runner.pin_memory)
block_tables = torch.from_numpy(input_block_tables).pin_memory().to(
device, non_blocking=True)
else:
block_tables = make_tensor_with_pad(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment