Unverified Commit af7a41c3 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: use generic image and use single node for oss-gpt-120b recipe (#3454)

parent ff625465
......@@ -30,12 +30,12 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3
image: my-registry/trtllm-runtime:my-tag
pvc:
create: false
mountPoint: /model-store
name: model-cache
replicas: 18
replicas: 1
TrtllmWorker:
componentType: main
dynamoNamespace: gpt-oss-agg
......@@ -69,7 +69,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3
image: my-registry/trtllm-runtime:my-tag
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......@@ -80,7 +80,7 @@ spec:
- name: ENGINE_ARGS
value: "/opt/dynamo/configs/config.yaml"
- name: MODEL_PATH
value: "/model-store/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a"
value: "/model-store/hub/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a"
volumeMounts:
- mountPath: /opt/dynamo/configs
name: llm-config
......@@ -94,7 +94,7 @@ spec:
create: false
mountPoint: /model-store
name: model-cache
replicas: 18
replicas: 1
resources:
limits:
gpu: "4"
......
......@@ -114,7 +114,7 @@ spec:
- name: CONCURRENCY_PER_GPU
value: "900"
- name: DEPLOYMENT_GPU_COUNT
value: "72"
value: "4"
- name: ISL
value: "128"
- name: OSL
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment