fix(planner): use dynamo-planner image for profiler job and planner pods...

fix(planner): use dynamo-planner image for profiler job and planner pods [DYN-2733][DYN-2746] (#8407) Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

fix(planner): use dynamo-planner image for profiler job and planner pods...
fix(planner): use dynamo-planner image for profiler job and planner pods [DYN-2733][DYN-2746] (#8407) Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
e8ecf6ff · Hongkuan Zhou · GitHub · f923777e · e8ecf6ff · e8ecf6ff
Unverified Commit e8ecf6ff authored Apr 21, 2026 by Hongkuan Zhou Committed by GitHub Apr 21, 2026
7 changed files
--- a/deploy/operator/internal/controller/dynamographdeploymentrequest_controller.go
+++ b/deploy/operator/internal/controller/dynamographdeploymentrequest_controller.go
@@ -1253,6 +1253,13 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context.

 		// Use image from spec; the defaulting webhook fills this in for production builds.
 		// Guard against empty image in case the webhook didn't run (e.g. local dev builds).
+		//
+		// Starting with Dynamo 1.1.0, the profiler's runtime dependencies
+		// (kubernetes_asyncio, pmdarima, prophet, aiconfigurator, ...) live in the
+		// dedicated dynamo-planner image, not in backend runtime or frontend images.
+		// Users on 1.1.0+ must set spec.image to a planner image
+		// (e.g. nvcr.io/nvidia/ai-dynamo/dynamo-planner:<version>); earlier versions
+		// can continue using the frontend/backend image they were using before.
 		imageName := dgdr.Spec.Image
 		if imageName == "" {
 			return nil, false, fmt.Errorf("spec.image is required but not set; ensure the defaulting webhook ran or set spec.image explicitly")

--- a/deploy/operator/internal/webhook/defaulting/dynamographdeploymentrequest_handler.go
+++ b/deploy/operator/internal/webhook/defaulting/dynamographdeploymentrequest_handler.go
@@ -35,7 +35,13 @@ const (

 	// defaultImage is the default profiler image used when spec.image is not set.
 	// Default image derivation is only supported for public release versions (1.0.0+).
-	defaultImage = "nvcr.io/nvidia/ai-dynamo/dynamo-frontend"
+	//
+	// Starting with Dynamo 1.1.0, the profiler's runtime dependencies
+	// (kubernetes_asyncio, pmdarima, prophet, aiconfigurator, ...) ship only in the
+	// dedicated dynamo-planner image, so we default to that image here. Users who
+	// pin an earlier version may continue to override spec.image explicitly with
+	// the frontend image they were using before.
+	defaultImage = "nvcr.io/nvidia/ai-dynamo/dynamo-planner"
 )

 // DGDRDefaulter is a mutating webhook handler that fills in default values for
@@ -43,7 +49,7 @@ const (
 //
 // If spec.image is not set, it is derived as:
 //
-//	nvcr.io/nvidia/ai-dynamo/dynamo-frontend:<operatorVersion>
+//	nvcr.io/nvidia/ai-dynamo/dynamo-planner:<operatorVersion>
 //
 // Defaulting requires a known operator version and is only supported for
 // operator versions 1.0.0 and later.

--- a/deploy/operator/internal/webhook/defaulting/dynamographdeploymentrequest_handler_test.go
+++ b/deploy/operator/internal/webhook/defaulting/dynamographdeploymentrequest_handler_test.go
@@ -28,6 +28,8 @@ import (
 )

 func TestDGDRDefaulter_defaultImageFor(t *testing.T) {
+	// Note: the default planner image is only published starting from Dynamo 1.1.0,
+	// so these tests use 1.1.0 as the earliest known valid version.
 	tests := []struct {
 		name            string
 		operatorVersion string
@@ -35,13 +37,13 @@ func TestDGDRDefaulter_defaultImageFor(t *testing.T) {
 	}{
 		{
 			name:            "known version produces default image",
-			operatorVersion: "1.0.0",
-			expectedImage:   "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0",
+			operatorVersion: "1.1.0",
+			expectedImage:   "nvcr.io/nvidia/ai-dynamo/dynamo-planner:1.1.0",
 		},
 		{
 			name:            "pre-release version is valid",
-			operatorVersion: "1.0.0-rc1",
-			expectedImage:   "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0-rc1",
+			operatorVersion: "1.1.0-rc1",
+			expectedImage:   "nvcr.io/nvidia/ai-dynamo/dynamo-planner:1.1.0-rc1",
 		},
 		{
 			name:            "unknown operator version cannot be defaulted",
@@ -85,14 +87,14 @@ func TestDGDRDefaulter_Default(t *testing.T) {
 	}{
 		{
 			name:          "CREATE with empty image defaults to operator version",
-			version:       "1.0.0",
+			version:       "1.1.0",
 			operation:     admissionv1.Create,
 			initialImage:  "",
-			expectedImage: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0",
+			expectedImage: "nvcr.io/nvidia/ai-dynamo/dynamo-planner:1.1.0",
 		},
 		{
 			name:          "CREATE with preset image is not overwritten",
-			version:       "1.0.0",
+			version:       "1.1.0",
 			operation:     admissionv1.Create,
 			initialImage:  "my-registry/my-image:custom",
 			expectedImage: "my-registry/my-image:custom",
@@ -106,7 +108,7 @@ func TestDGDRDefaulter_Default(t *testing.T) {
 		},
 		{
 			name:          "UPDATE does not default image",
-			version:       "1.0.0",
+			version:       "1.1.0",
 			operation:     admissionv1.Update,
 			initialImage:  "",
 			expectedImage: "",

--- a/docs/kubernetes/dgdr.md
+++ b/docs/kubernetes/dgdr.md
@@ -80,8 +80,12 @@ spec:
  model: Qwen/Qwen3-0.6B

  # Container image for the profiling job — must match your installed platform version.
-  # This is the same dynamo-frontend image used by the deployed inference service.
-  image: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:${RELEASE_VERSION}"
+  #   Dynamo >= 1.1.0: use the dedicated planner/profiler image (dynamo-planner).
+  #     Planner/profiler runtime deps (kubernetes_asyncio, pmdarima, prophet,
+  #     aiconfigurator, ...) ship only in this image; the frontend and backend
+  #     runtime images do not.
+  #   Dynamo <  1.1.0: use the dynamo-frontend image you deploy with.
+  image: "nvcr.io/nvidia/ai-dynamo/dynamo-planner:${RELEASE_VERSION}"
 ```

 Apply it (uses `envsubst` to substitute the `RELEASE_VERSION` shell variable into the YAML):
@@ -95,7 +99,7 @@ envsubst < qwen3-first-model.yaml | kubectl apply -f - -n ${NAMESPACE}
 | Field | Required | Default | Purpose |
 |---|---|---|---|
 | `model` | Yes | — | HuggingFace model ID (e.g. `Qwen/Qwen3-0.6B`) |
-| `image` | No | — | Container image for the profiling job (`dynamo-frontend`) |
+| `image` | No | — | Container image for the profiling job. For Dynamo ≥ 1.1.0 this must be the `dynamo-planner` image; for earlier versions it is the `dynamo-frontend` image. |
 | `backend` | No | `auto` | Inference engine (`auto`, `vllm`, `sglang`, `trtllm`) |
 | `searchStrategy` | No | `rapid` | Profiling depth — `rapid` (~30s, AIC simulation) or `thorough` (2–4h, real GPUs) |
 | `autoApply` | No | `true` | Automatically create and start the deployment after profiling |

--- a/examples/backends/sglang/deploy/disagg_planner.yaml
+++ b/examples/backends/sglang/deploy/disagg_planner.yaml
@@ -62,7 +62,14 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: my-registry/sglang-runtime:my-tag
+          # Planner image selection:
+          #   Dynamo >= 1.1.0: use the dedicated planner image
+          #     <registry>/dynamo-planner:<version>
+          #     (backend runtime images no longer ship planner runtime deps
+          #     such as kubernetes_asyncio, pmdarima, prophet, aiconfigurator).
+          #   Dynamo <  1.1.0: use the backend runtime image
+          #     <registry>/sglang-runtime:<version>.
+          image: my-registry/dynamo-planner:my-tag
          command:
          - python3
          - -m

--- a/examples/backends/trtllm/deploy/disagg_planner.yaml
+++ b/examples/backends/trtllm/deploy/disagg_planner.yaml
@@ -79,7 +79,14 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: my-registry/tensorrtllm-runtime:my-tag
+          # Planner image selection:
+          #   Dynamo >= 1.1.0: use the dedicated planner image
+          #     <registry>/dynamo-planner:<version>
+          #     (backend runtime images no longer ship planner runtime deps
+          #     such as kubernetes_asyncio, pmdarima, prophet, aiconfigurator).
+          #   Dynamo <  1.1.0: use the backend runtime image
+          #     <registry>/tensorrtllm-runtime:<version>.
+          image: my-registry/dynamo-planner:my-tag
          ports:
            - name: metrics
              containerPort: 9085

--- a/examples/backends/vllm/deploy/disagg_planner.yaml
+++ b/examples/backends/vllm/deploy/disagg_planner.yaml
@@ -61,7 +61,14 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
+          # Planner image selection:
+          #   Dynamo >= 1.1.0: use the dedicated planner image
+          #     nvcr.io/nvidia/ai-dynamo/dynamo-planner:<version>
+          #     (backend runtime images no longer ship planner runtime deps
+          #     such as kubernetes_asyncio, pmdarima, prophet, aiconfigurator).
+          #   Dynamo <  1.1.0: use the backend runtime image
+          #     nvcr.io/nvidia/ai-dynamo/vllm-runtime:<version>.
+          image: nvcr.io/nvidia/ai-dynamo/dynamo-planner:my-tag
          command:
          - python3
          - -m