"lib/vscode:/vscode.git/clone" did not exist on "db88c9530b76bae22f5fdd790ee0097160bca6ec"
Unverified Commit e8cb972e authored by Indrajit Bhosale's avatar Indrajit Bhosale Committed by GitHub
Browse files

chore: Update trtllm version to 1.1.0rc3 (#2930)


Signed-off-by: default avatarIndrajit Bhosale <iamindrajitb@gmail.com>
parent 241bd014
...@@ -199,7 +199,7 @@ It is recommended to use [NGC PyTorch Container](https://catalog.ngc.nvidia.com/ ...@@ -199,7 +199,7 @@ It is recommended to use [NGC PyTorch Container](https://catalog.ngc.nvidia.com/
> [!Note] > [!Note]
> Ensure that you select a PyTorch container image version that matches the version of TensorRT-LLM you are using. > Ensure that you select a PyTorch container image version that matches the version of TensorRT-LLM you are using.
> For example, if you are using `tensorrt-llm==1.0.0rc6`, use the PyTorch container image version `25.06`. > For example, if you are using `tensorrt-llm==1.1.0rc3`, use the PyTorch container image version `25.06`.
> To find the correct PyTorch container version for your desired `tensorrt-llm` release, visit the [TensorRT-LLM Dockerfile.multi](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docker/Dockerfile.multi) on GitHub. Switch to the branch that matches your `tensorrt-llm` version, and look for the `BASE_TAG` line to identify the recommended PyTorch container tag. > To find the correct PyTorch container version for your desired `tensorrt-llm` release, visit the [TensorRT-LLM Dockerfile.multi](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docker/Dockerfile.multi) on GitHub. Switch to the branch that matches your `tensorrt-llm` version, and look for the `BASE_TAG` line to identify the recommended PyTorch container tag.
> [!Important] > [!Important]
......
...@@ -28,4 +28,4 @@ kv_cache_config: ...@@ -28,4 +28,4 @@ kv_cache_config:
free_gpu_memory_fraction: 0.85 free_gpu_memory_fraction: 0.85
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -54,4 +54,4 @@ cuda_graph_config: ...@@ -54,4 +54,4 @@ cuda_graph_config:
print_iter_log: true print_iter_log: true
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -38,4 +38,4 @@ speculative_config: ...@@ -38,4 +38,4 @@ speculative_config:
num_nextn_predict_layers: 1 num_nextn_predict_layers: 1
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -57,4 +57,4 @@ cuda_graph_config: ...@@ -57,4 +57,4 @@ cuda_graph_config:
print_iter_log: true print_iter_log: true
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -36,4 +36,4 @@ disable_overlap_scheduler: true ...@@ -36,4 +36,4 @@ disable_overlap_scheduler: true
print_iter_log: true print_iter_log: true
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -63,4 +63,4 @@ cuda_graph_config: ...@@ -63,4 +63,4 @@ cuda_graph_config:
print_iter_log: true print_iter_log: true
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -41,4 +41,4 @@ disable_overlap_scheduler: true ...@@ -41,4 +41,4 @@ disable_overlap_scheduler: true
print_iter_log: true print_iter_log: true
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -27,4 +27,4 @@ kv_cache_config: ...@@ -27,4 +27,4 @@ kv_cache_config:
free_gpu_memory_fraction: 0.85 free_gpu_memory_fraction: 0.85
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -26,4 +26,4 @@ kv_cache_config: ...@@ -26,4 +26,4 @@ kv_cache_config:
- 32768 - 32768
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -27,4 +27,4 @@ kv_cache_config: ...@@ -27,4 +27,4 @@ kv_cache_config:
- 32768 - 32768
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -19,7 +19,7 @@ moe_config: ...@@ -19,7 +19,7 @@ moe_config:
cuda_graph_config: cuda_graph_config:
enable_padding: true enable_padding: true
cache_transceiver_config: cache_transceiver_config:
backend: ucx backend: UCX
max_tokens_in_buffer: 65536 max_tokens_in_buffer: 65536
print_iter_log: false print_iter_log: false
stream_interval: 10 stream_interval: 10
...@@ -21,7 +21,7 @@ cuda_graph_config: ...@@ -21,7 +21,7 @@ cuda_graph_config:
max_batch_size: 32 max_batch_size: 32
enable_padding: true enable_padding: true
cache_transceiver_config: cache_transceiver_config:
backend: ucx backend: UCX
max_tokens_in_buffer: 65536 max_tokens_in_buffer: 65536
print_iter_log: false print_iter_log: false
stream_interval: 10 stream_interval: 10
...@@ -49,4 +49,4 @@ cuda_graph_config: ...@@ -49,4 +49,4 @@ cuda_graph_config:
print_iter_log: true print_iter_log: true
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -34,4 +34,4 @@ kv_cache_config: ...@@ -34,4 +34,4 @@ kv_cache_config:
enable_block_reuse: false enable_block_reuse: false
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
...@@ -26,7 +26,7 @@ kv_cache_config: ...@@ -26,7 +26,7 @@ kv_cache_config:
enable_block_reuse: false enable_block_reuse: false
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603 # NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
# NOTE: overlap_scheduler enabled by default since this commit and changed # NOTE: overlap_scheduler enabled by default since this commit and changed
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler': # config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
......
...@@ -26,4 +26,4 @@ kv_cache_config: ...@@ -26,4 +26,4 @@ kv_cache_config:
enable_block_reuse: false enable_block_reuse: false
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
\ No newline at end of file \ No newline at end of file
...@@ -26,4 +26,4 @@ kv_cache_config: ...@@ -26,4 +26,4 @@ kv_cache_config:
enable_block_reuse: false enable_block_reuse: false
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
\ No newline at end of file \ No newline at end of file
...@@ -28,4 +28,4 @@ kv_cache_config: ...@@ -28,4 +28,4 @@ kv_cache_config:
enable_block_reuse: false enable_block_reuse: false
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
\ No newline at end of file \ No newline at end of file
...@@ -28,4 +28,4 @@ kv_cache_config: ...@@ -28,4 +28,4 @@ kv_cache_config:
enable_block_reuse: false enable_block_reuse: false
cache_transceiver_config: cache_transceiver_config:
backend: default backend: DEFAULT
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment