refactor: move engine configs out of components directory (#3772)

Signed-off-by: Anant Sharma <anants@nvidia.com> Co-authored-by: tanmayv25 <tanmay2592@gmail.com>

refactor: move engine configs out of components directory (#3772)
Signed-off-by: Anant Sharma <anants@nvidia.com> Co-authored-by: tanmayv25 <tanmay2592@gmail.com>
8354d325 · Anant Sharma · GitHub · 90caf3ea · 8354d325 · 8354d325
Unverified Commit 8354d325 authored Oct 24, 2025 by Anant Sharma Committed by GitHub Oct 24, 2025
20 changed files
--- a/benchmarks/router/run_engines.sh
+++ b/benchmarks/router/run_engines.sh
@@ -4,8 +4,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # Parse command-line arguments
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 NUM_WORKERS=8
 MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+RECIPE_PATH="$DYNAMO_HOME/recipes/deepseek-r1-distill-llama-8b/trtllm"
 TENSOR_PARALLEL_SIZE=1
 DATA_PARALLEL_SIZE=1
 USE_MOCKERS=false
@@ -84,13 +86,13 @@ if [ ${#EXTRA_ARGS[@]} -eq 0 ]; then
        )
    elif [ "$USE_TRTLLM" = true ]; then
        # Default args for TensorRT-LLM engine using predefined YAML configs
-        # Config files located at: ../../components/backends/trtllm/engine_configs/{agg,decode,prefill}.yaml
+        # Config files located at: $RECIPE_PATH/{agg,decode,prefill}.yaml
        if [ "$MODE" = "prefill" ]; then
-            ENGINE_CONFIG="../../components/backends/trtllm/engine_configs/prefill.yaml"
+            ENGINE_CONFIG="$RECIPE_PATH/prefill.yaml"
        elif [ "$MODE" = "decode" ]; then
-            ENGINE_CONFIG="../../components/backends/trtllm/engine_configs/decode.yaml"
+            ENGINE_CONFIG="$RECIPE_PATH/decode.yaml"
        else
-            ENGINE_CONFIG="../../components/backends/trtllm/engine_configs/agg.yaml"
+            ENGINE_CONFIG="$RECIPE_PATH/agg.yaml"
        fi
        EXTRA_ARGS=(

--- a/components/backends/trtllm/deploy/agg-with-config.yaml
+++ b/components/backends/trtllm/deploy/agg-with-config.yaml
@@ -55,7 +55,7 @@ spec:
          # mount the configmap as a volume
          volumeMounts:
          - name: nvidia-config
-            mountPath: /workspace/components/backends/trtllm/engine_configs
+            mountPath: /workspace/
            readOnly: true
          command:
          - python3
@@ -67,4 +67,4 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/agg.yaml
+            - ./recipes/qwen3/trtllm/agg.yaml
--- a/components/backends/trtllm/deploy/agg.yaml
+++ b/components/backends/trtllm/deploy/agg.yaml
@@ -25,7 +25,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
          - python3
          - -m
@@ -36,4 +36,4 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/agg.yaml
+            - ./recipes/qwen3/trtllm/agg.yaml
--- a/components/backends/trtllm/deploy/agg_router.yaml
+++ b/components/backends/trtllm/deploy/agg_router.yaml
@@ -28,7 +28,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
          - python3
          - -m
@@ -39,5 +39,5 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/agg.yaml
+            - ./recipes/qwen3/trtllm/agg.yaml
            - --publish-events-and-metrics
--- a/components/backends/trtllm/deploy/disagg-multinode.yaml
+++ b/components/backends/trtllm/deploy/disagg-multinode.yaml
@@ -125,10 +125,10 @@ spec:
        mainContainer:
          volumeMounts:
            - name: nvidia-config
-              mountPath: /workspace/components/backends/trtllm/engine_configs
+              mountPath: /workspace/
              readOnly: true
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
          - python3
          - -m
@@ -139,7 +139,7 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/prefill.yaml
+            - ./recipes/qwen3/trtllm/prefill.yaml
            - --disaggregation-mode
            - prefill
            - --disaggregation-strategy
@@ -165,10 +165,10 @@ spec:
        mainContainer:
          volumeMounts:
            - name: nvidia-config
-              mountPath: /workspace/components/backends/trtllm/engine_configs
+              mountPath: /workspace/
              readOnly: true
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
          - python3
          - -m
@@ -179,7 +179,7 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/decode.yaml
+            - ./recipes/qwen3/trtllm/decode.yaml
            - --disaggregation-mode
            - decode
            - --disaggregation-strategy

--- a/components/backends/trtllm/deploy/disagg.yaml
+++ b/components/backends/trtllm/deploy/disagg.yaml
@@ -26,7 +26,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
          - python3
          - -m
@@ -37,7 +37,7 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/prefill.yaml
+            - ./recipes/qwen3/trtllm/prefill.yaml
            - --disaggregation-mode
            - prefill
            - --disaggregation-strategy
@@ -54,7 +54,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
          - python3
          - -m
@@ -65,7 +65,7 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/decode.yaml
+            - ./recipes/qwen3/trtllm/decode.yaml
            - --disaggregation-mode
            - decode
            - --disaggregation-strategy

--- a/components/backends/trtllm/deploy/disagg_planner.yaml
+++ b/components/backends/trtllm/deploy/disagg_planner.yaml
@@ -86,7 +86,7 @@ spec:
        terminationGracePeriodSeconds: 600
        mainContainer:
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
            - python3
          args:
@@ -97,7 +97,7 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/decode.yaml
+            - ./recipes/qwen3/trtllm/decode.yaml
            - --disaggregation-mode
            - decode
            - --disaggregation-strategy
@@ -115,7 +115,7 @@ spec:
        terminationGracePeriodSeconds: 600
        mainContainer:
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
            - python3
          args:
@@ -126,7 +126,7 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/prefill.yaml
+            - ./recipes/qwen3/trtllm/prefill.yaml
            - --disaggregation-mode
            - prefill
            - --disaggregation-strategy

--- a/components/backends/trtllm/deploy/disagg_router.yaml
+++ b/components/backends/trtllm/deploy/disagg_router.yaml
@@ -28,7 +28,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
          - python3
          - -m
@@ -39,7 +39,7 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/prefill.yaml
+            - ./recipes/qwen3/trtllm/prefill.yaml
            - --disaggregation-mode
            - prefill
            - --disaggregation-strategy
@@ -56,7 +56,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
          command:
          - python3
          - -m
@@ -67,7 +67,7 @@ spec:
            - --served-model-name
            - Qwen/Qwen3-0.6B
            - --extra-engine-args
-            - engine_configs/decode.yaml
+            - ./recipes/qwen3/trtllm/decode.yaml
            - --disaggregation-mode
            - decode
            - --disaggregation-strategy

--- a/components/backends/trtllm/launch/agg.sh
+++ b/components/backends/trtllm/launch/agg.sh
@@ -3,9 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"engine_configs/agg.yaml"}
+export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
 export MODALITY=${MODALITY:-"text"}
 # If you want to use multimodal, set MODALITY to "multimodal"
 #export MODALITY=${MODALITY:-"multimodal"}

--- a/components/backends/trtllm/launch/agg_metrics.sh
+++ b/components/backends/trtllm/launch/agg_metrics.sh
@@ -3,9 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"engine_configs/agg.yaml"}
+export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
 export MODALITY=${MODALITY:-"text"}
 # Setup cleanup trap

--- a/components/backends/trtllm/launch/agg_router.sh
+++ b/components/backends/trtllm/launch/agg_router.sh
@@ -3,9 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"engine_configs/agg.yaml"}
+export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
 # Setup cleanup trap
 cleanup() {

--- a/components/backends/trtllm/launch/disagg.sh
+++ b/components/backends/trtllm/launch/disagg.sh
@@ -3,11 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/prefill.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/decode.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/decode.yaml"}
 export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
 export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}
 export MODALITY=${MODALITY:-"text"}

--- a/components/backends/trtllm/launch/disagg_router.sh
+++ b/components/backends/trtllm/launch/disagg_router.sh
@@ -3,11 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"prefill_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/prefill.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/decode.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/decode.yaml"}
 export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
 export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}

--- a/components/backends/trtllm/launch/epd_disagg.sh
+++ b/components/backends/trtllm/launch/epd_disagg.sh
@@ -3,12 +3,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen2-VL-7B-Instruct"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen2-VL-7B-Instruct"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/prefill.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen2-vl-7b-instruct/trtllm/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/decode.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen2-vl-7b-instruct/trtllm/decode.yaml"}
-export ENCODE_ENGINE_ARGS=${ENCODE_ENGINE_ARGS:-"engine_configs/encode.yaml"}
+export ENCODE_ENGINE_ARGS=${ENCODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen2-vl-7b-instruct/trtllm/encode.yaml"}
 export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
 export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}
 export ENCODE_CUDA_VISIBLE_DEVICES=${ENCODE_CUDA_VISIBLE_DEVICES:-"2"}

--- a/components/backends/trtllm/launch/gpt_oss_disagg.sh
+++ b/components/backends/trtllm/launch/gpt_oss_disagg.sh
@@ -3,11 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"/model"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"openai/gpt-oss-120b"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"prefill_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/gpt_oss/prefill.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/gpt_oss/decode.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/gpt-oss-120b/trtllm/disagg/decode.yaml"}
 set -e
 trap 'echo Cleaning up...; kill 0' EXIT

--- a/container/Dockerfile.trtllm
+++ b/container/Dockerfile.trtllm
@@ -272,6 +272,7 @@ COPY examples /workspace/examples
 COPY benchmarks /workspace/benchmarks
 COPY deploy /workspace/deploy
 COPY components/ /workspace/components/
+COPY recipes/ /workspace/recipes/
 # Copy attribution files
 COPY ATTRIBUTION* LICENSE /workspace/

--- a/docs/backends/trtllm/README.md
+++ b/docs/backends/trtllm/README.md
@@ -162,7 +162,7 @@ cd $DYNAMO_HOME/components/backends/trtllm
 ```bash
 cd $DYNAMO_HOME/components/backends/trtllm
-export AGG_ENGINE_ARGS=./engine_configs/deepseek_r1/mtp/mtp_agg.yaml
+export AGG_ENGINE_ARGS=./recipes/deepseek-r1/trtllm/mtp/mtp_agg.yaml
 export SERVED_MODEL_NAME="nvidia/DeepSeek-R1-FP4"
 # nvidia/DeepSeek-R1-FP4 is a large model
 export MODEL_PATH="nvidia/DeepSeek-R1-FP4"

--- a/docs/backends/trtllm/gemma3_sliding_window_attention.md
+++ b/docs/backends/trtllm/gemma3_sliding_window_attention.md
@@ -30,7 +30,7 @@ VSWA is a mechanism in which a model’s layers alternate between multiple slidi
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
+export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
 ./launch/agg.sh
 ```
@@ -39,7 +39,7 @@ export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
+export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
 ./launch/agg_router.sh
 ```
@@ -48,8 +48,8 @@ export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export PREFILL_ENGINE_ARGS=engine_configs/gemma3/vswa_prefill.yaml
+export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml
-export DECODE_ENGINE_ARGS=engine_configs/gemma3/vswa_decode.yaml
+export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml
 ./launch/disagg.sh
 ```
@@ -58,7 +58,7 @@ export DECODE_ENGINE_ARGS=engine_configs/gemma3/vswa_decode.yaml
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export PREFILL_ENGINE_ARGS=engine_configs/gemma3/vswa_prefill.yaml
+export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml
-export DECODE_ENGINE_ARGS=engine_configs/gemma3/vswa_decode.yaml
+export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml
 ./launch/disagg_router.sh
 ```
--- a/docs/backends/trtllm/gpt-oss.md
+++ b/docs/backends/trtllm/gpt-oss.md
@@ -90,14 +90,14 @@ The deployment uses configuration files and command-line arguments to control be
 #### Configuration Files
-**Prefill Configuration (`engine_configs/gpt_oss/prefill.yaml`)**:
+**Prefill Configuration (`recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml`)**:
 - `enable_attention_dp: false` - Attention data parallelism disabled for prefill
 - `enable_chunked_prefill: true` - Enables efficient chunked prefill processing
 - `moe_config.backend: CUTLASS` - Uses optimized CUTLASS kernels for MoE layers
 - `cache_transceiver_config.backend: ucx` - Uses UCX for efficient KV cache transfer
 - `cuda_graph_config.max_batch_size: 32` - Maximum batch size for CUDA graphs
-**Decode Configuration (`engine_configs/gpt_oss/decode.yaml`)**:
+**Decode Configuration (`recipes/gpt-oss-120b/trtllm/disagg/decode.yaml`)**:
 - `enable_attention_dp: true` - Attention data parallelism enabled for decode
 - `disable_overlap_scheduler: false` - Enables overlapping for decode efficiency
 - `moe_config.backend: CUTLASS` - Uses optimized CUTLASS kernels for MoE layers
@@ -147,7 +147,7 @@ python3 -m dynamo.frontend --router-mode round-robin --http-port 8000 &
 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m dynamo.trtllm \
  --model-path /model \
  --served-model-name openai/gpt-oss-120b \
-  --extra-engine-args engine_configs/gpt_oss/prefill.yaml \
+  --extra-engine-args recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml \
  --dyn-reasoning-parser gpt_oss \
  --dyn-tool-call-parser harmony \
  --disaggregation-mode prefill \
@@ -164,7 +164,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m dynamo.trtllm \
 CUDA_VISIBLE_DEVICES=4,5,6,7 python3 -m dynamo.trtllm \
  --model-path /model \
  --served-model-name openai/gpt-oss-120b \
-  --extra-engine-args engine_configs/gpt_oss/decode.yaml \
+  --extra-engine-args recipes/gpt-oss-120b/trtllm/disagg/decode.yaml \
  --dyn-reasoning-parser gpt_oss \
  --dyn-tool-call-parser harmony \
  --disaggregation-mode decode \

--- a/docs/backends/trtllm/llama4_plus_eagle.md
+++ b/docs/backends/trtllm/llama4_plus_eagle.md
@@ -30,7 +30,7 @@ This guide demonstrates how to deploy Llama 4 Maverick Instruct with Eagle Specu
 For advanced control over how requests are routed between prefill and decode workers in disaggregated mode, refer to the [Disaggregation Strategy](./README.md#disaggregation-strategy) section.
 ## Notes
-* Make sure the (`eagle3_one_model: true`) is set in the LLM API config inside the `engine_configs/llama4/eagle` folder.
+* Make sure the (`eagle3_one_model: true`) is set in the LLM API config inside the `recipes/llama4/trtllm/eagle` folder.
 ## Setup
@@ -54,7 +54,7 @@ See [this](./multinode/multinode-examples.md#setup) section from multinode guide
 ## Aggregated Serving
 ```bash
 export NUM_NODES=1
-export ENGINE_CONFIG="/mnt/engine_configs/llama4/eagle/eagle_agg.yaml"
+export ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/eagle/eagle_agg.yml"
 ./multinode/srun_aggregated.sh
 ```
@@ -62,9 +62,9 @@ export ENGINE_CONFIG="/mnt/engine_configs/llama4/eagle/eagle_agg.yaml"
 ```bash
 export NUM_PREFILL_NODES=1
-export PREFILL_ENGINE_CONFIG="/mnt/engine_configs/llama4/eagle/eagle_prefill.yaml"
+export PREFILL_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/eagle/eagle_prefill.yaml"
 export NUM_DECODE_NODES=1
-export DECODE_ENGINE_CONFIG="/mnt/engine_configs/llama4/eagle/eagle_decode.yaml"
+export DECODE_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/eagle/eagle_decode.yaml"
 ./multinode/srun_disaggregated.sh
 ```