Unverified Commit 9f659bf0 authored by Roy's avatar Roy Committed by GitHub
Browse files

[Minor] Optimize cuda graph memory usage (#2437)

parent 35c4bc20
...@@ -506,7 +506,9 @@ class ModelRunner: ...@@ -506,7 +506,9 @@ class ModelRunner:
"use '--enforce-eager' in the CLI.") "use '--enforce-eager' in the CLI.")
logger.info("CUDA graphs can take additional 1~3 GiB memory per GPU. " logger.info("CUDA graphs can take additional 1~3 GiB memory per GPU. "
"If you are running out of memory, consider decreasing " "If you are running out of memory, consider decreasing "
"`gpu_memory_utilization` or enforcing eager mode.") "`gpu_memory_utilization` or enforcing eager mode. "
"You can also reduce the `max_num_seqs` as needed "
"to decrease memory usage.")
start_time = time.perf_counter() start_time = time.perf_counter()
# Prepare dummy inputs. These will be reused for all batch sizes. # Prepare dummy inputs. These will be reused for all batch sizes.
...@@ -519,9 +521,15 @@ class ModelRunner: ...@@ -519,9 +521,15 @@ class ModelRunner:
context_lens = torch.ones(max_batch_size, dtype=torch.int32).cuda() context_lens = torch.ones(max_batch_size, dtype=torch.int32).cuda()
block_tables = torch.from_numpy(self.graph_block_tables).cuda() block_tables = torch.from_numpy(self.graph_block_tables).cuda()
graph_batch_size = _get_graph_batch_size(
self.scheduler_config.max_num_seqs)
batch_size_capture_list = [
bs for bs in _BATCH_SIZES_TO_CAPTURE if bs <= graph_batch_size
]
# NOTE: Capturing the largest batch size first may help reduce the # NOTE: Capturing the largest batch size first may help reduce the
# memory usage of CUDA graph. # memory usage of CUDA graph.
for batch_size in reversed(_BATCH_SIZES_TO_CAPTURE): for batch_size in reversed(batch_size_capture_list):
# Create dummy input_metadata. # Create dummy input_metadata.
input_metadata = InputMetadata( input_metadata = InputMetadata(
is_prompt=False, is_prompt=False,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment