"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "4f359a911580cfac6da55e2f840d8264d4fa3231"
Unverified Commit 616e600e authored by Marut Pandya's avatar Marut Pandya Committed by GitHub
Browse files

[Misc] add gpu_memory_utilization arg (#5079)


Signed-off-by: default avatarpandyamarut <pandyamarut@gmail.com>
parent dfba529b
...@@ -35,7 +35,8 @@ def main(args: argparse.Namespace): ...@@ -35,7 +35,8 @@ def main(args: argparse.Namespace):
use_v2_block_manager=args.use_v2_block_manager, use_v2_block_manager=args.use_v2_block_manager,
enable_chunked_prefill=args.enable_chunked_prefill, enable_chunked_prefill=args.enable_chunked_prefill,
download_dir=args.download_dir, download_dir=args.download_dir,
block_size=args.block_size) block_size=args.block_size,
gpu_memory_utilization=args.gpu_memory_utilization)
sampling_params = SamplingParams( sampling_params = SamplingParams(
n=args.n, n=args.n,
...@@ -214,5 +215,11 @@ if __name__ == '__main__': ...@@ -214,5 +215,11 @@ if __name__ == '__main__':
type=str, type=str,
default=None, default=None,
help='Path to save the latency results in JSON format.') help='Path to save the latency results in JSON format.')
parser.add_argument('--gpu-memory-utilization',
type=float,
default=0.9,
help='the fraction of GPU memory to be used for '
'the model executor, which can range from 0 to 1.'
'If unspecified, will use the default value of 0.9.')
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment