Update Help Text for --gpu-memory-utilization Argument (#2183)

290e015c · Suhong Moon · GitHub · 1b7c791d · 290e015c
Unverified Commit 290e015c authored Dec 18, 2023 by Suhong Moon Committed by GitHub Dec 18, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 5 deletions

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +7 -5

No files found.
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -156,11 +156,13 @@ class EngineArgs:
                            type=int,
                            default=EngineArgs.swap_space,
                            help='CPU swap space size (GiB) per GPU')
-        parser.add_argument('--gpu-memory-utilization',
+        parser.add_argument(
-                            type=float,
+            '--gpu-memory-utilization',
-                            default=EngineArgs.gpu_memory_utilization,
+            type=float,
-                            help='the percentage of GPU memory to be used for '
+            default=EngineArgs.gpu_memory_utilization,
-                            'the model executor')
+            help='the fraction of GPU memory to be used for '
+            'the model executor, which can range from 0 to 1.'
+            'If unspecified, will use the default value of 0.9.')
        parser.add_argument('--max-num-batched-tokens',
                            type=int,
                            default=EngineArgs.max_num_batched_tokens,