feat: use generic image and use single node for oss-gpt-120b recipe (#3454)

af7a41c3 · Biswa Panda · GitHub · ff625465 · af7a41c3 · af7a41c3
Unverified Commit af7a41c3 authored Oct 07, 2025 by Biswa Panda Committed by GitHub Oct 07, 2025
Showing with 6 additions and 6 deletions

recipes/gpt-oss-120b/trtllm/agg/deploy.yaml recipes/gpt-oss-120b/trtllm/agg/deploy.yaml +5 -5

recipes/gpt-oss-120b/trtllm/agg/perf.yaml recipes/gpt-oss-120b/trtllm/agg/perf.yaml +1 -1

No files found.
--- a/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
+++ b/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
@@ -30,12 +30,12 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3
+          image: my-registry/trtllm-runtime:my-tag
      pvc:
        create: false
        mountPoint: /model-store
        name: model-cache
-      replicas: 18
+      replicas: 1
    TrtllmWorker:
      componentType: main
      dynamoNamespace: gpt-oss-agg
@@ -69,7 +69,7 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3
+          image: my-registry/trtllm-runtime:my-tag
          env:
          - name: TRTLLM_ENABLE_PDL
            value: "1"
@@ -80,7 +80,7 @@ spec:
          - name: ENGINE_ARGS
            value: "/opt/dynamo/configs/config.yaml"
          - name: MODEL_PATH
-            value: "/model-store/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a"
+            value: "/model-store/hub/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a"
          volumeMounts:
          - mountPath: /opt/dynamo/configs
            name: llm-config
@@ -94,7 +94,7 @@ spec:
        create: false
        mountPoint: /model-store
        name: model-cache
-      replicas: 18
+      replicas: 1
      resources:
        limits:
          gpu: "4"

--- a/recipes/gpt-oss-120b/trtllm/agg/perf.yaml
+++ b/recipes/gpt-oss-120b/trtllm/agg/perf.yaml
@@ -114,7 +114,7 @@ spec:
        - name: CONCURRENCY_PER_GPU
          value: "900"
        - name: DEPLOYMENT_GPU_COUNT
-          value: "72"
+          value: "4"
        - name: ISL
          value: "128"
        - name: OSL