Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
7dfbe4fd
"ml/vscode:/vscode.git/clone" did not exist on "aaa7818000c42a82fc030212c35ef83f9799efd7"
Unverified
Commit
7dfbe4fd
authored
Mar 27, 2026
by
Alec
Committed by
GitHub
Mar 28, 2026
Browse files
chore: remove stale example assets (#7059)
parent
310f8ca9
Changes
70
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
632 deletions
+0
-632
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
...rtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
+0
-7
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_decode.yaml
...ne_configs/deepseek-r1/disagg/wide_ep/wide_ep_decode.yaml
+0
-66
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_prefill.yaml
...e_configs/deepseek-r1/disagg/wide_ep/wide_ep_prefill.yaml
+0
-44
examples/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml
examples/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml
+0
-26
examples/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
...es/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
+0
-29
examples/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
...s/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
+0
-30
examples/backends/trtllm/engine_configs/gpt-oss-120b/agg.yaml
...ples/backends/trtllm/engine_configs/gpt-oss-120b/agg.yaml
+0
-0
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_agg.yml
...backends/trtllm/engine_configs/llama4/eagle/eagle_agg.yml
+0
-39
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
...ends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
+0
-52
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
...nds/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
+0
-37
examples/backends/trtllm/engine_configs/llama4/multimodal/agg.yaml
...backends/trtllm/engine_configs/llama4/multimodal/agg.yaml
+0
-33
examples/backends/trtllm/engine_configs/llama4/multimodal/decode.yaml
...kends/trtllm/engine_configs/llama4/multimodal/decode.yaml
+0
-29
examples/backends/trtllm/engine_configs/llama4/multimodal/prefill.yaml
...ends/trtllm/engine_configs/llama4/multimodal/prefill.yaml
+0
-31
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/agg.yaml
...s/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/agg.yaml
+0
-29
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/decode.yaml
...rtllm/engine_configs/llava-v1.6-mistral-7b-hf/decode.yaml
+0
-29
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/encode.yaml
...rtllm/engine_configs/llava-v1.6-mistral-7b-hf/encode.yaml
+0
-30
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/prefill.yaml
...tllm/engine_configs/llava-v1.6-mistral-7b-hf/prefill.yaml
+0
-31
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/decode.yaml
...ds/trtllm/engine_configs/qwen2-vl-7b-instruct/decode.yaml
+0
-29
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/encode.yaml
...ds/trtllm/engine_configs/qwen2-vl-7b-instruct/encode.yaml
+0
-30
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/prefill.yaml
...s/trtllm/engine_configs/qwen2-vl-7b-instruct/prefill.yaml
+0
-31
No files found.
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# moe_load_balancer settings for TRTLLM based on:
# https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/ep_load_balancer/README.md#online-ep-load-balancer
num_slots
:
288
layer_updates_per_iter
:
2
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
# WideEP related settings
moe_config
:
backend
:
WIDEEP
load_balancer
:
/mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
# TP/EP/PP/DP
tensor_parallel_size
:
16
moe_expert_parallel_size
:
16
pipeline_parallel_size
:
1
enable_attention_dp
:
true
max_batch_size
:
256
max_num_tokens
:
256
# 8448 = 8192 ISL + 256 OSL
max_seq_len
:
8448
kv_cache_config
:
# With dp attention disabled: high free_gpu_memory_fraction is fine.
# free_gpu_memory_fraction: 0.85
# With dp attention enabled: large ISL at high concurrency may need
# free_gpu_memory_fraction low to have enough available memory.
free_gpu_memory_fraction
:
0.30
dtype
:
fp8
# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
# NOTE: overlap_scheduler enabled by default since this commit and changed
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
disable_overlap_scheduler
:
false
cuda_graph_config
:
enable_padding
:
true
# NOTE: For larger max batch size, you may want to
# add larger cuda graph batch sizes below to match.
batch_sizes
:
-
1
-
2
-
4
-
8
-
16
-
32
-
64
-
128
-
256
print_iter_log
:
true
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
# WideEP related settings
moe_config
:
backend
:
WIDEEP
load_balancer
:
/mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
# TP/EP/PP/DP
tensor_parallel_size
:
16
moe_expert_parallel_size
:
16
pipeline_parallel_size
:
1
enable_attention_dp
:
true
max_batch_size
:
1
max_num_tokens
:
4096
max_seq_len
:
8192
kv_cache_config
:
free_gpu_memory_fraction
:
0.3
dtype
:
fp8
# NOTE: This dtype must match in both prefill/decode configs
# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
# NOTE: overlap_scheduler enabled by default since this commit and changed
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
disable_overlap_scheduler
:
true
print_iter_log
:
true
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
backend
:
pytorch
kv_cache_config
:
max_attention_window
:
-
512
-
512
-
512
-
512
-
512
-
32768
examples/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
backend
:
pytorch
kv_cache_config
:
max_attention_window
:
-
512
-
512
-
512
-
512
-
512
-
32768
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
backend
:
pytorch
disable_overlap_scheduler
:
true
kv_cache_config
:
max_attention_window
:
-
512
-
512
-
512
-
512
-
512
-
32768
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/gpt-oss-120b/agg.yaml
deleted
100644 → 0
View file @
310f8ca9
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_agg.yml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
tensor_parallel_size
:
4
moe_expert_parallel_size
:
4
max_batch_size
:
192
max_num_tokens
:
3072
disable_overlap_scheduler
:
false
# Enable Speculative Decoding in the model engine
speculative_config
:
decoding_type
:
Eagle
max_draft_len
:
3
speculative_model_dir
:
nvidia/Llama-4-Maverick-17B-128E-Eagle3
eagle3_one_model
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.2
enable_block_reuse
:
false
cuda_graph_config
:
enable_padding
:
true
batch_sizes
:
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
16
,
32
,
48
,
64
,
128
,
190
,
191
,
192
]
print_iter_log
:
true
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
tensor_parallel_size
:
4
moe_expert_parallel_size
:
4
max_batch_size
:
256
max_num_tokens
:
1024
# 8704 = 8192 ISL + 512 OSL
max_seq_len
:
8704
disable_overlap_scheduler
:
true
# Enable Speculative Decoding in the model engine
speculative_config
:
decoding_type
:
Eagle
max_draft_len
:
3
speculative_model_dir
:
nvidia/Llama-4-Maverick-17B-128E-Eagle3
eagle3_one_model
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.5
enable_block_reuse
:
false
cuda_graph_config
:
enable_padding
:
true
batch_sizes
:
-
1
-
2
-
4
-
8
-
16
-
32
-
64
-
128
-
256
print_iter_log
:
true
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
backend
:
pytorch
tensor_parallel_size
:
4
moe_expert_parallel_size
:
4
max_batch_size
:
1
max_num_tokens
:
8192
max_seq_len
:
8192
print_iter_log
:
true
disable_overlap_scheduler
:
true
# Enable Speculative Decoding in the model engine
speculative_config
:
decoding_type
:
Eagle
max_draft_len
:
3
speculative_model_dir
:
nvidia/Llama-4-Maverick-17B-128E-Eagle3
eagle3_one_model
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.5
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/llama4/multimodal/agg.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
8
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
4096
max_batch_size
:
8
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.3
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
# NOTE: overlap_scheduler enabled by default since this commit and changed
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
examples/backends/trtllm/engine_configs/llama4/multimodal/decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
8
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
4096
max_batch_size
:
8
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
disable_overlap_scheduler
:
false
kv_cache_config
:
free_gpu_memory_fraction
:
0.20
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/llama4/multimodal/prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
8
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
4096
max_batch_size
:
8
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
# Overlap scheduler not currently supported in prefill only workers.
disable_overlap_scheduler
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.20
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/agg.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.60
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
disable_overlap_scheduler
:
false
kv_cache_config
:
free_gpu_memory_fraction
:
0.30
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/encode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
# Overlap scheduler not currently supported in prefill only workers.
disable_overlap_scheduler
:
true
# Note: kv_cache_config is not needed for encode workers since MultimodalEncoder
# only runs vision encoder + projector and doesn't need KV cache memory.
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
# Overlap scheduler not currently supported in prefill only workers.
disable_overlap_scheduler
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.30
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/decode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
disable_overlap_scheduler
:
false
kv_cache_config
:
free_gpu_memory_fraction
:
0.30
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/encode.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
trust_remote_code
:
true
backend
:
pytorch
disable_overlap_scheduler
:
false
cuda_graph_config
:
max_batch_size
:
16
kv_cache_config
:
free_gpu_memory_fraction
:
0.85
cache_transceiver_config
:
backend
:
DEFAULT
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/prefill.yaml
deleted
100644 → 0
View file @
310f8ca9
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
max_num_tokens
:
8192
max_batch_size
:
16
trust_remote_code
:
true
backend
:
pytorch
enable_chunked_prefill
:
true
# Overlap scheduler not currently supported in prefill only workers.
disable_overlap_scheduler
:
true
kv_cache_config
:
free_gpu_memory_fraction
:
0.30
enable_block_reuse
:
false
cache_transceiver_config
:
backend
:
DEFAULT
\ No newline at end of file
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment