Unverified Commit 88dfd1b3 authored by Ben Hamm's avatar Ben Hamm Committed by GitHub
Browse files

docs: Clean up incomplete recipes and clarify Kubernetes-only focus (#4159)


Signed-off-by: default avatarBen Hamm <ben.hamm@gmail.com>
Signed-off-by: default avatarTanmay Verma <tanmay2592@gmail.com>
Signed-off-by: default avataratchernych <atchernych@nvidia.com>
Co-authored-by: default avatarBiswa Panda <biswa.panda@gmail.com>
Co-authored-by: default avatartanmayv25 <tanmay2592@gmail.com>
Co-authored-by: default avatarTanmay Verma <tanmayv@nvidia.com>
Co-authored-by: default avatarAnant Sharma <anants@nvidia.com>
Co-authored-by: default avataratchernych <atchernych@nvidia.com>
parent 09bb1c68
......@@ -11,7 +11,7 @@ moe_config:
# moe_max_num_tokens = max_batch_size * moe_expert_parallel_size
# 4096 = 256 * 16
# moe_max_num_tokens: 4096
load_balancer: /mnt/recipes/deepseek-r1/trtllm/agg/wide_ep/eplb.yaml
load_balancer: /mnt/examples/backends/trtllm/engine_configs/deepseek-r1/agg/wide_ep/eplb.yaml
tensor_parallel_size: 16
moe_expert_parallel_size: 16
......
......@@ -17,7 +17,7 @@ backend: pytorch
# WideEP related settings
moe_config:
backend: WIDEEP
load_balancer: /mnt/recipes/deepseek-r1/trtllm/disagg/wide_ep/eplb.yaml
load_balancer: /mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
# TP/EP/PP/DP
tensor_parallel_size: 16
......
......@@ -17,7 +17,7 @@ backend: pytorch
# WideEP related settings
moe_config:
backend: WIDEEP
load_balancer: /mnt/recipes/deepseek-r1/trtllm/disagg/wide_ep/eplb.yaml
load_balancer: /mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
# TP/EP/PP/DP
tensor_parallel_size: 16
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
enable_attention_dp: true
disable_overlap_scheduler: false
moe_config:
backend: CUTLASS
cuda_graph_config:
enable_padding: true
cache_transceiver_config:
backend: UCX
max_tokens_in_buffer: 65536
print_iter_log: false
stream_interval: 10
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
enable_attention_dp: false
disable_overlap_scheduler: true
moe_config:
backend: CUTLASS
enable_chunked_prefill: true
cuda_graph_config:
max_batch_size: 32
enable_padding: true
cache_transceiver_config:
backend: UCX
max_tokens_in_buffer: 65536
print_iter_log: false
stream_interval: 10
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment