"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "b73c571f0fa63a31224248da54d26b5899823694"
Unverified Commit f6fef485 authored by Tanmay Verma's avatar Tanmay Verma Committed by GitHub
Browse files

fix(ci): Reduce the free gpu memory fraction (#2433)

parent cebe9219
...@@ -22,7 +22,7 @@ backend: pytorch ...@@ -22,7 +22,7 @@ backend: pytorch
enable_chunked_prefill: true enable_chunked_prefill: true
kv_cache_config: kv_cache_config:
free_gpu_memory_fraction: 0.95 free_gpu_memory_fraction: 0.85
# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603 # NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
# NOTE: overlap_scheduler enabled by default since this commit and changed # NOTE: overlap_scheduler enabled by default since this commit and changed
......
...@@ -25,7 +25,7 @@ cuda_graph_config: ...@@ -25,7 +25,7 @@ cuda_graph_config:
max_batch_size: 16 max_batch_size: 16
kv_cache_config: kv_cache_config:
free_gpu_memory_fraction: 0.95 free_gpu_memory_fraction: 0.85
cache_transceiver_config: cache_transceiver_config:
backend: default backend: default
...@@ -24,7 +24,7 @@ disable_overlap_scheduler: true ...@@ -24,7 +24,7 @@ disable_overlap_scheduler: true
cuda_graph_config: cuda_graph_config:
max_batch_size: 16 max_batch_size: 16
kv_cache_config: kv_cache_config:
free_gpu_memory_fraction: 0.95 free_gpu_memory_fraction: 0.85
cache_transceiver_config: cache_transceiver_config:
backend: default backend: default
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment