"examples/vscode:/vscode.git/clone" did not exist on "9498f016e2675d5cdd76cf846c7b680485fef404"
Unverified Commit 1a5016b0 authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

feat: add subComponentType in DGD API and uptake in planner (#3200)


Signed-off-by: default avatartmontfort <tmontfort@nvidia.com>
Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 13156361
......@@ -216,20 +216,6 @@ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Install prometheus
ARG PROM_VERSION=3.4.1
RUN ARCH=$(dpkg --print-architecture) && \
case "$ARCH" in \
amd64) PLATFORM=linux-amd64 ;; \
arm64) PLATFORM=linux-arm64 ;; \
*) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
esac && \
curl -fsSL --retry 5 --retry-delay 5 "https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz" \
| tar -xz -C /tmp && \
mv "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus" /usr/local/bin/ && \
chmod +x /usr/local/bin/prometheus && \
rm -rf "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}"
# Copy UCX from dev image as plugin for NIXL
# Copy NIXL source from devr image
# Copy dynamo wheels for gitlab artifacts
......
......@@ -683,7 +683,7 @@ spec:
Typically corresponds to a component defined in the packaged Dynamo artifacts.
type: string
dynamoNamespace:
description: dynamo namespace of the service (allows to override the dynamo namespace of the service defined in annotations inside the dynamo archive)
description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
type: string
dynamoTag:
description: 'contains the tag of the DynamoComponent: for example, "my_package:MyService"'
......@@ -816,8 +816,9 @@ spec:
type: object
extraPodSpec:
description: |-
ExtraPodSpec merges additional fields into the generated PodSpec for advanced
customization (tolerations, node selectors, affinity, etc.).
ExtraPodSpec allows to override the main pod spec configuration.
It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
that allows overriding the main container configuration.
properties:
activeDeadlineSeconds:
description: |-
......@@ -10239,7 +10240,7 @@ spec:
type: object
type: object
serviceName:
description: contains the name of the component
description: The name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
......@@ -10253,6 +10254,9 @@ spec:
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
type: object
status:
description: Status reflects the current observed state of the component deployment.
......
......@@ -785,7 +785,7 @@ spec:
description: ComponentType indicates the role of this component (for example, "main").
type: string
dynamoNamespace:
description: dynamo namespace of the service (allows to override the dynamo namespace of the service defined in annotations inside the dynamo archive)
description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
type: string
envFromSecret:
description: |-
......@@ -915,8 +915,9 @@ spec:
type: object
extraPodSpec:
description: |-
ExtraPodSpec merges additional fields into the generated PodSpec for advanced
customization (tolerations, node selectors, affinity, etc.).
ExtraPodSpec allows to override the main pod spec configuration.
It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
that allows overriding the main container configuration.
properties:
activeDeadlineSeconds:
description: |-
......@@ -10338,7 +10339,7 @@ spec:
type: object
type: object
serviceName:
description: contains the name of the component
description: The name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
......@@ -10352,6 +10353,9 @@ spec:
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
type: object
description: |-
Services allows per-service overrides of the component deployment settings.
......
......@@ -27,9 +27,14 @@ spec:
any: true
{{- end }}
podMetricsEndpoints:
- interval: 30s
- interval: 5s
path: /metrics
port: http
relabelings:
- action: replace
sourceLabels:
- __meta_kubernetes_pod_label_nvidia_com_dynamo_namespace
targetLabel: dynamo_namespace
selector:
matchLabels:
nvidia.com/dynamo-component-type: frontend
......@@ -49,7 +54,7 @@ spec:
any: true
{{- end }}
podMetricsEndpoints:
- interval: 30s
- interval: 5s
path: /metrics
port: system
selector:
......@@ -71,7 +76,7 @@ spec:
any: true
{{- end }}
podMetricsEndpoints:
- interval: 30s
- interval: 5s
path: /metrics
port: metrics
selector:
......
......@@ -73,6 +73,9 @@ type DynamoComponentDeploymentSharedSpec struct {
// ComponentType indicates the role of this component (for example, "main").
ComponentType string `json:"componentType,omitempty"`
// SubComponentType indicates the sub-role of this component (for example, "prefill").
SubComponentType string `json:"subComponentType,omitempty"`
// Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
DynamoNamespace *string `json:"dynamoNamespace,omitempty"`
......
......@@ -683,7 +683,7 @@ spec:
Typically corresponds to a component defined in the packaged Dynamo artifacts.
type: string
dynamoNamespace:
description: dynamo namespace of the service (allows to override the dynamo namespace of the service defined in annotations inside the dynamo archive)
description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
type: string
dynamoTag:
description: 'contains the tag of the DynamoComponent: for example, "my_package:MyService"'
......@@ -816,8 +816,9 @@ spec:
type: object
extraPodSpec:
description: |-
ExtraPodSpec merges additional fields into the generated PodSpec for advanced
customization (tolerations, node selectors, affinity, etc.).
ExtraPodSpec allows to override the main pod spec configuration.
It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
that allows overriding the main container configuration.
properties:
activeDeadlineSeconds:
description: |-
......@@ -10239,7 +10240,7 @@ spec:
type: object
type: object
serviceName:
description: contains the name of the component
description: The name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
......@@ -10253,6 +10254,9 @@ spec:
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
type: object
status:
description: Status reflects the current observed state of the component deployment.
......
......@@ -785,7 +785,7 @@ spec:
description: ComponentType indicates the role of this component (for example, "main").
type: string
dynamoNamespace:
description: dynamo namespace of the service (allows to override the dynamo namespace of the service defined in annotations inside the dynamo archive)
description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
type: string
envFromSecret:
description: |-
......@@ -915,8 +915,9 @@ spec:
type: object
extraPodSpec:
description: |-
ExtraPodSpec merges additional fields into the generated PodSpec for advanced
customization (tolerations, node selectors, affinity, etc.).
ExtraPodSpec allows to override the main pod spec configuration.
It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
that allows overriding the main container configuration.
properties:
activeDeadlineSeconds:
description: |-
......@@ -10338,7 +10339,7 @@ spec:
type: object
type: object
serviceName:
description: contains the name of the component
description: The name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
......@@ -10352,6 +10353,9 @@ spec:
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
type: object
description: |-
Services allows per-service overrides of the component deployment settings.
......
......@@ -37,6 +37,7 @@ const (
KubeLabelDynamoNamespace = "nvidia.com/dynamo-namespace"
KubeLabelDynamoDeploymentTargetType = "nvidia.com/dynamo-deployment-target-type"
KubeLabelDynamoComponentType = "nvidia.com/dynamo-component-type"
KubeLabelDynamoSubComponentType = "nvidia.com/dynamo-sub-component-type"
KubeLabelValueFalse = "false"
KubeLabelValueTrue = "true"
......
......@@ -1154,6 +1154,10 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
podLabels[commonconsts.KubeLabelDynamoComponentType] = opt.dynamoComponentDeployment.Spec.ComponentType
}
if opt.dynamoComponentDeployment.Spec.SubComponentType != "" {
podLabels[commonconsts.KubeLabelDynamoSubComponentType] = opt.dynamoComponentDeployment.Spec.SubComponentType
}
podAnnotations := make(map[string]string)
kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.isStealingTrafficDebugModeEnabled)
......
......@@ -698,9 +698,10 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Value: "test_value_from_dynamo_component_deployment_spec",
},
},
ComponentType: string(commonconsts.ComponentTypeWorker),
ServiceName: "test-lws-deploy-service",
DynamoNamespace: &[]string{"default"}[0],
ComponentType: string(commonconsts.ComponentTypeWorker),
SubComponentType: "test-sub-component",
ServiceName: "test-lws-deploy-service",
DynamoNamespace: &[]string{"default"}[0],
Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2,
},
......@@ -783,6 +784,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"role": "leader",
"nvidia.com/label1": "label1",
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelDynamoGraphDeploymentName: "",
},
Annotations: map[string]string{
......@@ -893,6 +895,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"role": "worker",
"nvidia.com/label1": "label1",
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelDynamoGraphDeploymentName: "",
},
Annotations: map[string]string{
......
......@@ -100,10 +100,6 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
if err = r.Get(ctx, req.NamespacedName, dynamoDeployment); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
if err != nil {
// not found, nothing to do
return ctrl.Result{}, nil
}
defer func() {
if err != nil {
......@@ -129,7 +125,7 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
err = r.Status().Update(ctx, dynamoDeployment)
if err != nil {
logger.Error(err, "Unable to update the CRD status", "crd", req.NamespacedName)
logger.Error(err, "Unable to update the CRD status", "crd", req.NamespacedName, "state", state, "reason", reason, "message", message)
}
logger.Info("Reconciliation done")
}()
......
......@@ -992,6 +992,9 @@ func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec,
if component.ComponentType != "" {
labels[commonconsts.KubeLabelDynamoComponentType] = component.ComponentType
}
if component.SubComponentType != "" {
labels[commonconsts.KubeLabelDynamoSubComponentType] = component.SubComponentType
}
setMetricsLabels(labels, dynamoDeployment)
if component.Labels != nil {
err := mergo.Merge(&labels, component.Labels, mergo.WithOverride)
......
......@@ -62,9 +62,10 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
Services: map[string]*v1alpha1.DynamoComponentDeploymentOverridesSpec{
"service1": {
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoNamespace: &[]string{"default"}[0],
ComponentType: "frontend",
Replicas: &[]int32{3}[0],
DynamoNamespace: &[]string{"default"}[0],
ComponentType: "frontend",
SubComponentType: "test-sub-component",
Replicas: &[]int32{3}[0],
Resources: &common.Resources{
Requests: &common.ResourceItem{
CPU: "1",
......@@ -106,10 +107,11 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
},
Spec: v1alpha1.DynamoComponentDeploymentSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ServiceName: "service1",
DynamoNamespace: &[]string{"default"}[0],
ComponentType: "frontend",
Replicas: &[]int32{3}[0],
ServiceName: "service1",
DynamoNamespace: &[]string{"default"}[0],
ComponentType: "frontend",
SubComponentType: "test-sub-component",
Replicas: &[]int32{3}[0],
Resources: &common.Resources{
Requests: &common.ResourceItem{
CPU: "1",
......@@ -1088,7 +1090,8 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Services: map[string]*v1alpha1.DynamoComponentDeploymentOverridesSpec{
"Frontend": {
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: "frontend", // Frontend component
ComponentType: "frontend", // Frontend component
SubComponentType: "test-sub-component",
ExtraPodMetadata: &common.ExtraPodMetadata{
Annotations: map[string]string{
"nvidia.com/annotation1": "annotation1",
......@@ -1240,6 +1243,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-frontend",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeFrontend,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
"nvidia.com/label1": "label1",
"nvidia.com/label2": "label2",
......@@ -1642,8 +1646,9 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
"nvidia.com/label2": "label2",
},
},
Replicas: &[]int32{5}[0],
ComponentType: commonconsts.ComponentTypeWorker,
Replicas: &[]int32{5}[0],
ComponentType: commonconsts.ComponentTypeWorker,
SubComponentType: "test-sub-component",
ExtraPodSpec: &common.ExtraPodSpec{
MainContainer: &corev1.Container{
Image: "worker-image",
......@@ -1767,6 +1772,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "worker-ldr",
Labels: map[string]string{
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-ldr",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
......@@ -1917,6 +1923,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "worker-wkr",
Labels: map[string]string{
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-wkr",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
......
......@@ -14,7 +14,12 @@ spec:
podMetricsEndpoints:
- port: http
path: /metrics
interval: 2s
interval: 5s
relabelings:
- action: replace
sourceLabels:
- __meta_kubernetes_pod_label_nvidia_com_dynamo_namespace
targetLabel: dynamo_namespace
namespaceSelector:
matchNames:
- ${NAMESPACE}
......@@ -14,7 +14,7 @@ spec:
podMetricsEndpoints:
- port: metrics
path: /metrics
interval: 2s
interval: 5s
namespaceSelector:
matchNames:
- $NAMESPACE
\ No newline at end of file
......@@ -14,7 +14,7 @@ spec:
podMetricsEndpoints:
- port: system
path: /metrics
interval: 2s
interval: 5s
namespaceSelector:
matchNames:
- ${NAMESPACE}
......@@ -9,10 +9,12 @@ Quick deployment guide for the disaggregated planner with automatic scaling.
**Components:**
- **Frontend**: Serves requests and exposes `/metrics`
- **Prometheus**: Scrapes frontend metrics every 5 seconds
- **Planner**: Queries Prometheus and adjusts worker scaling every 60 seconds
- **Prometheus**: Scrapes frontend metrics every adjustment interval
- **Planner**: Queries Prometheus and adjusts worker scaling every adjustment interval
- **Workers**: prefill and backend workers handle inference
The adjustment interval can be defined in the planner manifest as an argument. The default interval value can be found in this [file](/components/planner/src/dynamo/planner/defaults.py).
```mermaid
flowchart LR
Frontend --"/metrics"--> Prometheus
......@@ -25,6 +27,7 @@ flowchart LR
- Kubernetes cluster with GPU nodes
- [Pre-Deployment Profiling](/docs/benchmarks/pre_deployment_profiling.md) completed and its results saved to `dynamo-pvc` PVC.
- Prefill and decode worker uses the best parallelization mapping suggested by the pre-deployment profiling script.
- [kube-prometheus-stack](/docs/kubernetes/metrics.md) installed and running.
> [!NOTE]
> **Important**: The profiling that occurs before Planner deployment requires additional Kubernetes manifests (ServiceAccount, Role, RoleBinding, PVC) that are not included in standard Dynamo deployments. Apply these manifests in the same namespace as `$NAMESPACE`. For a complete setup, start with the [Quick Start guide](/deploy/utils/README.md#quick-start), which provides a fully encapsulated deployment including all required manifests.
......@@ -50,7 +53,6 @@ Expected pods (all should be `1/1 Running`):
```
# For vLLM:
vllm-disagg-planner-frontend-* 1/1 Running
vllm-disagg-planner-prometheus-* 1/1 Running
vllm-disagg-planner-planner-* 1/1 Running
vllm-disagg-planner-backend-* 1/1 Running
vllm-disagg-planner-prefill-* 1/1 Running
......@@ -103,8 +105,8 @@ kubectl logs -n $NAMESPACE deployment/vllm-disagg-planner-planner --tail=10
**Connection Issues:**
```bash
# Verify Prometheus is accessible (runs on port 8000)
kubectl port-forward -n $NAMESPACE deployment/vllm-disagg-planner-prometheus 9090:8000
# Verify Prometheus is accessible
kubectl port-forward svc/prometheus-kube-prometheus-prometheus -n monitoring 9090:9090
curl "http://localhost:9090/api/v1/query?query=up"
```
......@@ -119,3 +121,11 @@ curl http://localhost:8000/metrics | grep nv_llm_http_service
- Large models can take 10+ minutes to initialize
- Check worker logs: `kubectl logs -n $NAMESPACE deployment/vllm-disagg-planner-backend`
- Ensure GPU resources are available for workers
**Unknown Field subComponentType:**
If you encounter the following error when attempting to apply the deployment:
```bash
Error from server (BadRequest): error when creating "components/backends/vllm/deploy/disagg.yaml": DynamoGraphDeployment in version "v1alpha1" cannot be handled as a DynamoGraphDeployment: strict decoding error: unknown field "spec.services.DecodeWorker.subComponentType", unknown field "spec.services.PrefillWorker.subComponentType"
```
This is because the `subComponentType` field has only been added in newer versions of the DynamoGraphDeployment CRD (> 0.5.0). You can upgrade the CRD version by following the instructions [here](/docs/kubernetes/installation_guide.md).
......@@ -55,9 +55,6 @@ spec:
envFromSecret: hf-token-secret
componentType: planner
replicas: 1
envs:
- name: PROMETHEUS_PORT
value: "8000"
livenessProbe:
exec:
command:
......@@ -98,47 +95,11 @@ spec:
--adjustment-interval=60
--prometheus-port=9085
--no-correction
Prometheus:
dynamoNamespace: vllm-disagg-planner
componentType: main
replicas: 1
envs:
- name: PYTHONPATH
value: "/workspace/components/planner/src"
- name: PROMETHEUS_PORT
value: "8000"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.planner.prometheus"
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
replicas: 1
livenessProbe:
httpGet:
......@@ -195,6 +156,7 @@ spec:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
replicas: 1
livenessProbe:
httpGet:
......
......@@ -11,8 +11,6 @@ spec:
value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}'
- name: DYNAMO_NAMESPACE
value: "vllm-disagg-planner"
- name: PROMETHEUS_PORT
value: "8000"
services:
Frontend:
dynamoNamespace: vllm-disagg-planner
......@@ -63,45 +61,11 @@ spec:
--itl=0.01
--load-predictor=constant
--no-correction
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: vllm-disagg-planner
componentType: frontend
replicas: 1
envs:
- name: PYTHONPATH
value: "/workspace/components/planner/src"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.planner.prometheus"
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
replicas: 1
resources:
limits:
......@@ -125,6 +89,7 @@ spec:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
replicas: 1
resources:
limits:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment