Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
7dfbe4fd
Unverified
Commit
7dfbe4fd
authored
Mar 27, 2026
by
Alec
Committed by
GitHub
Mar 28, 2026
Browse files
chore: remove stale example assets (#7059)
parent
310f8ca9
Changes
70
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
632 deletions
+0
-632
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
...rtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
+0
-7
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_decode.yaml
...ne_configs/deepseek-r1/disagg/wide_ep/wide_ep_decode.yaml
+0
-66
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_prefill.yaml
...e_configs/deepseek-r1/disagg/wide_ep/wide_ep_prefill.yaml
+0
-44
examples/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml
examples/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml
+0
-26
examples/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
...es/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
+0
-29
examples/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
...s/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
+0
-30
examples/backends/trtllm/engine_configs/gpt-oss-120b/agg.yaml
...ples/backends/trtllm/engine_configs/gpt-oss-120b/agg.yaml
+0
-0
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_agg.yml
...backends/trtllm/engine_configs/llama4/eagle/eagle_agg.yml
+0
-39
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
...ends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
+0
-52
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
...nds/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
+0
-37
examples/backends/trtllm/engine_configs/llama4/multimodal/agg.yaml
...backends/trtllm/engine_configs/llama4/multimodal/agg.yaml
+0
-33
examples/backends/trtllm/engine_configs/llama4/multimodal/decode.yaml
...kends/trtllm/engine_configs/llama4/multimodal/decode.yaml
+0
-29
examples/backends/trtllm/engine_configs/llama4/multimodal/prefill.yaml
...ends/trtllm/engine_configs/llama4/multimodal/prefill.yaml
+0
-31
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/agg.yaml
...s/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/agg.yaml
+0
-29
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/decode.yaml
...rtllm/engine_configs/llava-v1.6-mistral-7b-hf/decode.yaml
+0
-29
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/encode.yaml
...rtllm/engine_configs/llava-v1.6-mistral-7b-hf/encode.yaml
+0
-30
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/prefill.yaml
...tllm/engine_configs/llava-v1.6-mistral-7b-hf/prefill.yaml
+0
-31
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/decode.yaml
...ds/trtllm/engine_configs/qwen2-vl-7b-instruct/decode.yaml
+0
-29
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/encode.yaml
...ds/trtllm/engine_configs/qwen2-vl-7b-instruct/encode.yaml
+0
-30
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/prefill.yaml
...s/trtllm/engine_configs/qwen2-vl-7b-instruct/prefill.yaml
+0
-31
No files found.
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# moe_load_balancer settings for TRTLLM based on:
# https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/ep_load_balancer/README.md#online-ep-load-balancer
num_slots
:
288
layer_updates_per_iter
:
2
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
# WideEP related settings
moe_config
:
backend
:
WIDEEP
load_balancer
:
/mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
# TP/EP/PP/DP
tensor_parallel_size
:
16
moe_expert_parallel_size
:
16
pipeline_parallel_size
:
1
enable_attention_dp
:
true
max_batch_size
:
256
max_num_tokens
:
256
# 8448 = 8192 ISL + 256 OSL
max_seq_len
:
8448
kv_cache_config
:
# With dp attention disabled: high free_gpu_memory_fraction is fine.
# free_gpu_memory_fraction: 0.85
# With dp attention enabled: large ISL at high concurrency may need
# free_gpu_memory_fraction low to have enough available memory.
free_gpu_memory_fraction
:
0.30
dtype
:
fp8
# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
# NOTE: overlap_scheduler enabled by default since this commit and changed
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
disable_overlap_scheduler
:
false
cuda_graph_config
:
enable_padding
:
true
# NOTE: For larger max batch size, you may want to
# add larger cuda graph batch sizes below to match.
batch_sizes
:
-
1
-
2
-
4
-
8
-
16
-
32
-
64
-
128
-
256
print_iter_log
:
true
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
# WideEP related settings
moe_config
:
backend
:
WIDEEP
load_balancer
:
/mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
# TP/EP/PP/DP
tensor_parallel_size
:
16
moe_expert_parallel_size
:
16
pipeline_parallel_size
:
1
enable_attention_dp
:
true
max_batch_size
:
1
max_num_tokens
:
4096
max_seq_len
:
8192
kv_cache_config
:
free_gpu_memory_fraction
:
0.3
dtype
:
fp8
# NOTE: This dtype must match in both prefill/decode configs
# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
# NOTE: overlap_scheduler enabled by default since this commit and changed
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
disable_overlap_scheduler
:
true
print_iter_log
:
true
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
backend
:
pytorch
kv_cache_config
:
max_attention_window
:
-
512
-
512
-
512
-
512
-
512
-
32768
examples/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
backend
:
pytorch
kv_cache_config
:
max_attention_window
:
-
512
-
512
-
512
-
512
-
512
-
32768
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
backend
:
pytorch
disable_overlap_scheduler
:
true
kv_cache_config
:
max_attention_window
:
-
512
-
512
-
512
-
512
-
512
-
32768
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/gpt-oss-120b/agg.yaml
deleted
100644 → 0
View file @
310f8ca9
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_agg.yml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
tensor_parallel_size
:
4
moe_expert_parallel_size
:
4
max_batch_size
:
192
max_num_tokens
:
3072
disable_overlap_scheduler
:
false
# Enable Speculative Decoding in the model engine
speculative_config
:
decoding_type
:
Eagle
max_draft_len
:
3
speculative_model_dir
:
nvidia/Llama-4-Maverick-17B-128E-Eagle3
eagle3_one_model
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.2
enable_block_reuse
:
false
cuda_graph_config
:
enable_padding
:
true
batch_sizes
:
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
16
,
32
,
48
,
64
,
128
,
190
,
191
,
192
]
print_iter_log
:
true
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
tensor_parallel_size
:
4
moe_expert_parallel_size
:
4
max_batch_size
:
256
max_num_tokens
:
1024
# 8704 = 8192 ISL + 512 OSL
max_seq_len
:
8704
disable_overlap_scheduler
:
true
# Enable Speculative Decoding in the model engine
speculative_config
:
decoding_type
:
Eagle
max_draft_len
:
3
speculative_model_dir
:
nvidia/Llama-4-Maverick-17B-128E-Eagle3
eagle3_one_model
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.5
enable_block_reuse
:
false
cuda_graph_config
:
enable_padding
:
true
batch_sizes
:
-
1
-
2
-
4
-
8
-
16
-
32
-
64
-
128
-
256
print_iter_log
:
true
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
tensor_parallel_size
:
4
moe_expert_parallel_size
:
4
max_batch_size
:
1
max_num_tokens
:
8192
max_seq_len
:
8192
print_iter_log
:
true
disable_overlap_scheduler
:
true
# Enable Speculative Decoding in the model engine
speculative_config
:
decoding_type
:
Eagle
max_draft_len
:
3
speculative_model_dir
:
nvidia/Llama-4-Maverick-17B-128E-Eagle3
eagle3_one_model
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.5
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/llama4/multimodal/agg.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
8
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
4096
max_batch_size
:
8
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.3
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
# NOTE: overlap_scheduler enabled by default since this commit and changed
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
examples/backends/trtllm/engine_configs/llama4/multimodal/decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
8
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
4096
max_batch_size
:
8
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
disable_overlap_scheduler
:
false
kv_cache_config
:
free_gpu_memory_fraction
:
0.20
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/llama4/multimodal/prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
8
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
4096
max_batch_size
:
8
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
# Overlap scheduler not currently supported in prefill only workers.
disable_overlap_scheduler
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.20
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/agg.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.60
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
disable_overlap_scheduler
:
false
kv_cache_config
:
free_gpu_memory_fraction
:
0.30
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/encode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
# Overlap scheduler not currently supported in prefill only workers.
disable_overlap_scheduler
:
true
# Note: kv_cache_config is not needed for encode workers since MultimodalEncoder
# only runs vision encoder + projector and doesn't need KV cache memory.
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
# Overlap scheduler not currently supported in prefill only workers.
disable_overlap_scheduler
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.30
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
disable_overlap_scheduler
:
false
kv_cache_config
:
free_gpu_memory_fraction
:
0.30
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/encode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
trust_remote_code
:
true
backend
:
pytorch
disable_overlap_scheduler
:
false
cuda_graph_config
:
max_batch_size
:
16
kv_cache_config
:
free_gpu_memory_fraction
:
0.85
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
# Overlap scheduler not currently supported in prefill only workers.
disable_overlap_scheduler
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.30
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment