fix: operator defaults (#2398)

Signed-off-by: mohammedabdulwahhab <furkhan324@berkeley.edu>

fix: operator defaults (#2398)
Signed-off-by: mohammedabdulwahhab <furkhan324@berkeley.edu>
81c27803 · mohammedabdulwahhab · GitHub · 9ddb3efd · 81c27803 · 81c27803
Unverified Commit 81c27803 authored Aug 14, 2025 by mohammedabdulwahhab Committed by GitHub Aug 14, 2025
20 changed files
--- a/components/backends/sglang/deploy/agg.yaml
+++ b/components/backends/sglang/deploy/agg.yaml
@@ -8,26 +8,8 @@ metadata:
 spec:
  services:
    Frontend:
-      livenessProbe:
-        httpGet:
-          path: /health
-          port: 8000
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 30
-        failureThreshold: 10
      dynamoNamespace: sglang-agg
-      componentType: main
+      componentType: frontend
      replicas: 1
      resources:
        requests:
@@ -45,21 +27,6 @@ spec:
            - "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg && python3 -m dynamo.frontend --http-port=8000"
    SGLangDecodeWorker:
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        exec:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      dynamoNamespace: sglang-agg
      componentType: worker
      replicas: 1
@@ -72,21 +39,8 @@ spec:
          cpu: "32"
          memory: "80Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /live
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: my-registry/sglang-runtime:my-tag
          workingDir: /workspace/components/backends/sglang
          command:

--- a/components/backends/sglang/deploy/agg_router.yaml
+++ b/components/backends/sglang/deploy/agg_router.yaml
@@ -8,26 +8,8 @@ metadata:
 spec:
  services:
    Frontend:
-      livenessProbe:
-        httpGet:
-          path: /health
-          port: 8000
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 30
-        failureThreshold: 10
      dynamoNamespace: sglang-agg-router
-      componentType: main
+      componentType: frontend
      replicas: 1
      resources:
        requests:
@@ -45,21 +27,6 @@ spec:
            - "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg-router && python3 -m dynamo.frontend --http-port=8000  --router-mode kv"
    SGLangDecodeWorker:
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        exec:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      dynamoNamespace: sglang-agg-router
      componentType: worker
      replicas: 1
@@ -72,21 +39,8 @@ spec:
          cpu: "32"
          memory: "80Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /live
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: my-registry/sglang-runtime:my-tag
          workingDir: /workspace/components/backends/sglang
          command:

--- a/components/backends/sglang/deploy/disagg.yaml
+++ b/components/backends/sglang/deploy/disagg.yaml
@@ -8,26 +8,8 @@ metadata:
 spec:
  services:
    Frontend:
-      livenessProbe:
-        httpGet:
-          path: /health
-          port: 8000
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 30
-        failureThreshold: 10
      dynamoNamespace: sglang-disagg
-      componentType: main
+      componentType: frontend
      replicas: 1
      resources:
        requests:
@@ -45,21 +27,6 @@ spec:
            - "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
    SGLangDecodeWorker:
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        exec:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      dynamoNamespace: sglang-disagg
      componentType: worker
      replicas: 1
@@ -72,21 +39,8 @@ spec:
          cpu: "32"
          memory: "80Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /live
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0808-07
          workingDir: /workspace/components/backends/sglang
          command:
@@ -112,21 +66,6 @@ spec:
            - "nixl"
    SGLangPrefillWorker:
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        exec:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      dynamoNamespace: sglang-disagg
      componentType: worker
      replicas: 1
@@ -139,21 +78,8 @@ spec:
          cpu: "32"
          memory: "80Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0808-07
          workingDir: /workspace/components/backends/sglang
          command:

--- a/components/backends/sglang/deploy/disagg_planner.yaml
+++ b/components/backends/sglang/deploy/disagg_planner.yaml
@@ -16,25 +16,7 @@ spec:
  services:
    Frontend:
      dynamoNamespace: dynamo
-      livenessProbe:
+      componentType: frontend
-        httpGet:
-          path: /health
-          port: 8000
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 30
-        failureThreshold: 10
-      componentType: main
      replicas: 1
      resources:
        requests:
@@ -97,9 +79,9 @@ spec:
            - --backend=sglang
            - --adjustment-interval=60
            - --profile-results-dir=/workspace/profiling_results
-    Prometheus:
+    Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
      dynamoNamespace: dynamo
-      componentType: main
+      componentType: frontend
      replicas: 1
      envs:
        - name: PYTHONPATH
@@ -142,20 +124,6 @@ spec:
    SGLangDecodeWorker:
      dynamoNamespace: dynamo
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      componentType: worker
      replicas: 2
      resources:
@@ -167,21 +135,8 @@ spec:
          cpu: "32"
          memory: "80Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /live
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
          workingDir: /workspace/components/backends/sglang
          args:
@@ -205,20 +160,6 @@ spec:
    SGLangPrefillWorker:
      dynamoNamespace: dynamo
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      componentType: worker
      replicas: 2
      resources:
@@ -230,21 +171,8 @@ spec:
          cpu: "32"
          memory: "80Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
          workingDir: /workspace/components/backends/sglang
          args:

--- a/components/backends/trtllm/deploy/agg.yaml
+++ b/components/backends/trtllm/deploy/agg.yaml
@@ -9,26 +9,7 @@ spec:
  services:
    Frontend:
      dynamoNamespace: trtllm-agg
-      componentType: main
+      componentType: frontend
-      livenessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 3
-        failureThreshold: 10
      replicas: 1
      resources:
        requests:
@@ -48,20 +29,6 @@ spec:
            - "python3 -m dynamo.frontend --http-port 8000"
    TRTLLMWorker:
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 3
-        failureThreshold: 60
      dynamoNamespace: trtllm-agg
      componentType: worker
      replicas: 1
@@ -74,22 +41,8 @@ spec:
          cpu: "10"
          memory: "20Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            timeoutSeconds: 3
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/trtllm
          args:

--- a/components/backends/trtllm/deploy/agg_router.yaml
+++ b/components/backends/trtllm/deploy/agg_router.yaml
@@ -8,35 +8,9 @@ metadata:
 spec:
  services:
    Frontend:
-      livenessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 3
-        failureThreshold: 5
      dynamoNamespace: trtllm-agg-router
-      componentType: main
+      componentType: frontend
      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
@@ -48,20 +22,6 @@ spec:
            - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
    TRTLLMWorker:
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 3
-        failureThreshold: 60
      dynamoNamespace: trtllm-agg-router
      componentType: worker
      replicas: 2
@@ -74,22 +34,8 @@ spec:
          cpu: "10"
          memory: "20Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            timeoutSeconds: 3
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/trtllm
          args:

--- a/components/backends/trtllm/deploy/disagg.yaml
+++ b/components/backends/trtllm/deploy/disagg.yaml
@@ -9,26 +9,7 @@ spec:
  services:
    Frontend:
      dynamoNamespace: trtllm-disagg
-      componentType: main
+      componentType: frontend
-      livenessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 3
-        failureThreshold: 10
      replicas: 1
      resources:
        requests:
@@ -51,20 +32,6 @@ spec:
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 1
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 3
-        failureThreshold: 60
      resources:
        requests:
          cpu: "10"
@@ -76,46 +43,18 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            timeoutSeconds: 3
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh
            - -c
          args:
-            - "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy decode_first  2>&1 | tee /tmp/trtllm.log"
+            - "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy decode_first"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
    TRTLLMDecodeWorker:
      dynamoNamespace: trtllm-disagg
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 1
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 3
-        failureThreshold: 60
      resources:
        requests:
          cpu: "10"
@@ -127,24 +66,10 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            timeoutSeconds: 3
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh
            - -c
          args:
-            - "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy decode_first 2>&1 | tee /tmp/trtllm.log"
+            - "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy decode_first"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
--- a/components/backends/trtllm/deploy/disagg_router.yaml
+++ b/components/backends/trtllm/deploy/disagg_router.yaml
@@ -9,26 +9,7 @@ spec:
  services:
    Frontend:
      dynamoNamespace: trtllm-v1-disagg-router
-      componentType: main
+      componentType: frontend
-      livenessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 3
-        failureThreshold: 10
      replicas: 1
      resources:
        requests:
@@ -51,20 +32,6 @@ spec:
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 2
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 3
-        failureThreshold: 60
      resources:
        requests:
          cpu: "10"
@@ -76,46 +43,18 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            timeoutSeconds: 3
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh
            - -c
          args:
-            - "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy prefill_first --publish-events-and-metrics 2>&1 | tee /tmp/trtllm.log"
+            - "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy prefill_first --publish-events-and-metrics"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
    TRTLLMDecodeWorker:
      dynamoNamespace: trtllm-v1-disagg-router
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 1
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 3
-        failureThreshold: 3
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 3
-        failureThreshold: 60
      resources:
        requests:
          cpu: "10"
@@ -127,24 +66,10 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            timeoutSeconds: 3
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh
            - -c
          args:
-            - "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy prefill_first  2>&1 | tee /tmp/trtllm.log"
+            - "python3 -m dynamo.trtllm --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy prefill_first"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
--- a/components/backends/vllm/deploy/agg.yaml
+++ b/components/backends/vllm/deploy/agg.yaml
@@ -8,34 +8,9 @@ metadata:
 spec:
  services:
    Frontend:
-      livenessProbe:
-        httpGet:
-          path: /health
-          port: 8000
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 10
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
      dynamoNamespace: vllm-agg
-      componentType: main
+      componentType: frontend
      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
@@ -47,55 +22,15 @@ spec:
            - "python3 -m dynamo.frontend --http-port 8000"
    VllmDecodeWorker:
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        initialDelaySeconds: 30
-        periodSeconds: 10
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        initialDelaySeconds: 30
-        periodSeconds: 10
-        timeoutSeconds: 5
-        failureThreshold: 60
      dynamoNamespace: vllm-agg
      componentType: worker
      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            initialDelaySeconds: 10
-            periodSeconds: 10
-            timeoutSeconds: 5
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
            - -c
          args:
-            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B  2>&1 | tee /tmp/vllm.log
+            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
--- a/components/backends/vllm/deploy/agg_router.yaml
+++ b/components/backends/vllm/deploy/agg_router.yaml
@@ -8,34 +8,9 @@ metadata:
 spec:
  services:
    Frontend:
-      livenessProbe:
-        httpGet:
-          path: /health
-          port: 8000
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 30
-        failureThreshold: 10
      dynamoNamespace: vllm-agg-router
-      componentType: main
+      componentType: frontend
      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
@@ -47,51 +22,15 @@ spec:
            - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
    VllmDecodeWorker:
      envFromSecret: hf-token-secret
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      dynamoNamespace: vllm-agg-router
      componentType: worker
      replicas: 2
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
            - -c
          args:
-            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B  2>&1 | tee /tmp/vllm.log
+            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
--- a/components/backends/vllm/deploy/disagg.yaml
+++ b/components/backends/vllm/deploy/disagg.yaml
@@ -9,26 +9,8 @@ spec:
  services:
    Frontend:
      dynamoNamespace: vllm-disagg
-      componentType: main
+      componentType: frontend
      replicas: 1
-      livenessProbe:
-        httpGet:
-          path: /health
-          port: 8000
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 30
-        failureThreshold: 10
      resources:
        requests:
          cpu: "32"
@@ -50,20 +32,6 @@ spec:
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 1
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      resources:
        requests:
          cpu: "32"
@@ -73,47 +41,20 @@ spec:
          cpu: "32"
          memory: "40Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
            - -c
          args:
-            - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B  2>&1 | tee /tmp/vllm.log"
+            - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B"
    VllmPrefillWorker:
      dynamoNamespace: vllm-disagg
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 1
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      resources:
        requests:
          cpu: "32"
@@ -123,25 +64,12 @@ spec:
          cpu: "32"
          memory: "40Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
            - -c
          args:
-            - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B  --is-prefill-worker 2>&1 | tee /tmp/vllm.log"
+            - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker"
--- a/components/backends/vllm/deploy/disagg_planner.yaml
+++ b/components/backends/vllm/deploy/disagg_planner.yaml
@@ -16,26 +16,8 @@ spec:
  services:
    Frontend:
      dynamoNamespace: vllm-disagg-planner
-      componentType: main
+      componentType: frontend
      replicas: 1
-      livenessProbe:
-        httpGet:
-          path: /health
-          port: 8000
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 30
-        failureThreshold: 10
      resources:
        requests:
          cpu: "32"
@@ -99,9 +81,9 @@ spec:
            - --backend=vllm
            - --adjustment-interval=60
            - --profile-results-dir=/workspace/profiling_results
-    Prometheus:
+    Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
      dynamoNamespace: vllm-disagg-planner
-      componentType: main
+      componentType: frontend
      replicas: 1
      envs:
        - name: PYTHONPATH
@@ -146,20 +128,6 @@ spec:
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 2
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      resources:
        requests:
          cpu: "8"
@@ -169,13 +137,6 @@ spec:
          cpu: "8"
          memory: "16Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
          startupProbe:
@@ -190,26 +151,12 @@ spec:
            - /bin/sh
            - -c
          args:
-            - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --migration-limit=3 2>&1 | tee /tmp/vllm.log"
+            - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --migration-limit=3"
    VllmPrefillWorker:
      dynamoNamespace: vllm-disagg-planner
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 2
-      livenessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
      resources:
        requests:
          cpu: "8"
@@ -219,13 +166,6 @@ spec:
          cpu: "8"
          memory: "16Gi"
          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
          startupProbe:
@@ -240,4 +180,4 @@ spec:
            - /bin/sh
            - -c
          args:
-            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker --migration-limit=3 2>&1 | tee /tmp/vllm.log
+            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker --migration-limit=3
--- a/components/backends/vllm/deploy/disagg_router.yaml
+++ b/components/backends/vllm/deploy/disagg_router.yaml
@@ -9,33 +9,8 @@ spec:
  services:
    Frontend:
      dynamoNamespace: vllm-v1-disagg-router
-      componentType: main
+      componentType: frontend
      replicas: 1
-      livenessProbe:
-        httpGet:
-          path: /health
-          port: 8000
-        initialDelaySeconds: 20
-        periodSeconds: 5
-        timeoutSeconds: 5
-        failureThreshold: 3
-      readinessProbe:
-        exec:
-          command:
-            - /bin/sh
-            - -c
-            - 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
-        initialDelaySeconds: 60
-        periodSeconds: 60
-        timeoutSeconds: 30
-        failureThreshold: 10
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
@@ -50,96 +25,26 @@ spec:
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 2
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
            - -c
          args:
-            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B  2>&1 | tee /tmp/vllm.log
+            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
    VllmPrefillWorker:
      dynamoNamespace: vllm-v1-disagg-router
      envFromSecret: hf-token-secret
      componentType: worker
      replicas: 1
-      livenessProbe:
-        httpGet:
-          path: /live
-          port: 9090
-        periodSeconds: 5
-        timeoutSeconds: 30
-        failureThreshold: 1
-      readinessProbe:
-        httpGet:
-          path: /health
-          port: 9090
-        periodSeconds: 10
-        timeoutSeconds: 30
-        failureThreshold: 60
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      envs:
-        - name: DYN_SYSTEM_ENABLED
-          value: "true"
-        - name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
-          value: "[\"generate\"]"
-        - name: DYN_SYSTEM_PORT
-          value: "9090"
      extraPodSpec:
        mainContainer:
-          startupProbe:
-            httpGet:
-              path: /health
-              port: 9090
-            periodSeconds: 10
-            failureThreshold: 60
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
            - -c
          args:
-            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B  --is-prefill-worker 2>&1 | tee /tmp/vllm.log
+            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker
--- a/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
+++ b/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
@@ -181,8 +181,8 @@ func (s *DynamoComponentDeployment) SetSpec(spec any) {
 	s.Spec = spec.(DynamoComponentDeploymentSpec)
 }
-func (s *DynamoComponentDeployment) IsMainComponent() bool {
+func (s *DynamoComponentDeployment) IsFrontendComponent() bool {
-	return strings.HasSuffix(s.Spec.DynamoTag, s.Spec.ServiceName) || s.Spec.ComponentType == commonconsts.ComponentTypeMain
+	return strings.HasSuffix(s.Spec.DynamoTag, s.Spec.ServiceName) || s.Spec.ComponentType == commonconsts.ComponentTypeFrontend
 }
 func (s *DynamoComponentDeployment) GetDynamoDeploymentConfig() []byte {

--- a/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types_test.go
+++ b/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types_test.go
@@ -28,7 +28,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
-func TestDynamoComponentDeployment_IsMainComponent(t *testing.T) {
+func TestDynamoComponentDeployment_IsFrontendComponent(t *testing.T) {
 	type fields struct {
 		TypeMeta   metav1.TypeMeta
 		ObjectMeta metav1.ObjectMeta
@@ -73,8 +73,8 @@ func TestDynamoComponentDeployment_IsMainComponent(t *testing.T) {
 				Spec:       tt.fields.Spec,
 				Status:     tt.fields.Status,
 			}
-			if got := s.IsMainComponent(); got != tt.want {
+			if got := s.IsFrontendComponent(); got != tt.want {
-				t.Errorf("DynamoComponentDeployment.IsMainComponent() = %v, want %v", got, tt.want)
+				t.Errorf("DynamoComponentDeployment.IsFrontendComponent() = %v, want %v", got, tt.want)
 			}
 		})
 	}

--- a/deploy/cloud/operator/internal/consts/consts.go
+++ b/deploy/cloud/operator/internal/consts/consts.go
@@ -38,8 +38,9 @@ const (
 	DynamoDeploymentConfigEnvVar = "DYN_DEPLOYMENT_CONFIG"
 	ComponentTypePlanner      = "planner"
-	ComponentTypeMain         = "main"
+	ComponentTypeFrontend     = "frontend"
 	ComponentTypeWorker       = "worker"
+	ComponentTypeDefault      = "default"
 	PlannerServiceAccountName = "planner-serviceaccount"
 	DefaultIngressSuffix = "local"

--- a/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
+++ b/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
@@ -1341,7 +1341,7 @@ func (r *DynamoComponentDeploymentReconciler) generateService(opt generateResour
 		},
 	}
-	if !opt.dynamoComponentDeployment.IsMainComponent() || (!opt.isGenericService && !opt.containsStealingTrafficDebugModeEnabled) {
+	if !opt.dynamoComponentDeployment.IsFrontendComponent() || (!opt.isGenericService && !opt.containsStealingTrafficDebugModeEnabled) {
 		// if it's not the main component or if it's not a generic service and not contains stealing traffic debug mode enabled, we don't need to create the service
 		return kubeService, true, nil
 	}

--- a/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
+++ b/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
@@ -940,12 +940,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
 										Image:   "test-image:latest",
 										Command: []string{"sh", "-c"},
 										Args:    []string{"ray start --head --port=6379 && some dynamo command"},
-										Env:     []corev1.EnvVar{{Name: "TEST_ENV_FROM_DYNAMO_COMPONENT_DEPLOYMENT_SPEC", Value: "test_value_from_dynamo_component_deployment_spec"}, {Name: "TEST_ENV_FROM_EXTRA_POD_SPEC", Value: "test_value_from_extra_pod_spec"}, {Name: "DYNAMO_PORT", Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort)}},
+										Env:     []corev1.EnvVar{{Name: "TEST_ENV_FROM_DYNAMO_COMPONENT_DEPLOYMENT_SPEC", Value: "test_value_from_dynamo_component_deployment_spec"}, {Name: "TEST_ENV_FROM_EXTRA_POD_SPEC", Value: "test_value_from_extra_pod_spec"}},
-										Ports: []corev1.ContainerPort{
+										Ports:   nil,
-											{
-												Protocol: corev1.ProtocolTCP, Name: commonconsts.DynamoServicePortName, ContainerPort: commonconsts.DynamoServicePort,
-											},
-										},
 										VolumeMounts: []corev1.VolumeMount{
 											{
 												Name:      "shared-memory",
@@ -1000,8 +996,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
 										Image:   "test-image:latest",
 										Command: []string{"sh", "-c"},
 										Args:    []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"},
-										Env:     []corev1.EnvVar{{Name: "TEST_ENV_FROM_DYNAMO_COMPONENT_DEPLOYMENT_SPEC", Value: "test_value_from_dynamo_component_deployment_spec"}, {Name: "TEST_ENV_FROM_EXTRA_POD_SPEC", Value: "test_value_from_extra_pod_spec"}, {Name: "DYNAMO_PORT", Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort)}},
+										Env:     []corev1.EnvVar{{Name: "TEST_ENV_FROM_DYNAMO_COMPONENT_DEPLOYMENT_SPEC", Value: "test_value_from_dynamo_component_deployment_spec"}, {Name: "TEST_ENV_FROM_EXTRA_POD_SPEC", Value: "test_value_from_extra_pod_spec"}},
-										Ports:   []corev1.ContainerPort{{Protocol: corev1.ProtocolTCP, Name: commonconsts.DynamoServicePortName, ContainerPort: commonconsts.DynamoServicePort}},
+										Ports:   nil,
 										VolumeMounts: []corev1.VolumeMount{
 											{
 												Name:      "shared-memory",

--- a/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
+++ b/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
@@ -179,7 +179,7 @@ func (r *DynamoGraphDeploymentReconciler) reconcileGroveResources(ctx context.Co
 	})
 	resources := []Resource{groveGangSetAsResource}
 	for componentName, component := range dynamoDeployment.Spec.Services {
-		if component.ComponentType == consts.ComponentTypeMain {
+		if component.ComponentType == consts.ComponentTypeFrontend {
 			// generate the main component service
 			mainComponentService, err := dynamo.GenerateComponentService(ctx, dynamo.GetDynamoComponentName(dynamoDeployment, componentName), dynamoDeployment.Namespace)
 			if err != nil {

--- a/deploy/cloud/operator/internal/dynamo/component_common.go
+++ b/deploy/cloud/operator/internal/dynamo/component_common.go
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package dynamo
+import (
+	commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
+	corev1 "k8s.io/api/core/v1"
+)
+// ComponentDefaults interface defines how defaults should be provided
+type ComponentDefaults interface {
+	// GetBaseContainer returns the base container configuration for this component type
+	// The numberOfNodes parameter indicates the total number of nodes in the deployment
+	GetBaseContainer(numberOfNodes int32) (corev1.Container, error)
+	// GetBasePodSpec returns the base pod spec configuration for this component type
+	// The numberOfNodes parameter indicates the total number of nodes in the deployment
+	GetBasePodSpec(numberOfNodes int32) (corev1.PodSpec, error)
+}
+// ComponentDefaultsFactory creates appropriate defaults based on component type and number of nodes
+func ComponentDefaultsFactory(componentType string, numberOfNodes int32) ComponentDefaults {
+	switch componentType {
+	case commonconsts.ComponentTypeFrontend:
+		return NewFrontendDefaults()
+	case commonconsts.ComponentTypeWorker:
+		return NewWorkerDefaults()
+	case commonconsts.ComponentTypePlanner:
+		return NewPlannerDefaults()
+	default:
+		return &BaseComponentDefaults{}
+	}
+}
+// BaseComponentDefaults provides common defaults shared by all components
+type BaseComponentDefaults struct{}
+func (b *BaseComponentDefaults) GetBaseContainer(numberOfNodes int32) (corev1.Container, error) {
+	return b.getCommonContainer(), nil
+}
+func (b *BaseComponentDefaults) GetBasePodSpec(numberOfNodes int32) (corev1.PodSpec, error) {
+	return corev1.PodSpec{}, nil
+}
+func (b *BaseComponentDefaults) getCommonContainer() corev1.Container {
+	container := corev1.Container{
+		Name: "main",
+		Command: []string{
+			"/bin/sh",
+			"-c",
+		},
+	}
+	return container
+}