Unverified Commit a604c7f0 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

feat: remove DYN_DEPLOYMENT_CONFIG in examples (#1820)

parent 93acc631
...@@ -6,7 +6,7 @@ const ( ...@@ -6,7 +6,7 @@ const (
DefaultUserId = "default" DefaultUserId = "default"
DefaultOrgId = "default" DefaultOrgId = "default"
DynamoServicePort = 3000 DynamoServicePort = 8000
DynamoServicePortName = "http" DynamoServicePortName = "http"
DynamoContainerPortName = "http" DynamoContainerPortName = "http"
......
...@@ -308,7 +308,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) { ...@@ -308,7 +308,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
Backend: networkingv1.IngressBackend{ Backend: networkingv1.IngressBackend{
Service: &networkingv1.IngressServiceBackend{ Service: &networkingv1.IngressServiceBackend{
Name: "service1", Name: "service1",
Port: networkingv1.ServiceBackendPort{Number: 3000}, Port: networkingv1.ServiceBackendPort{Number: commonconsts.DynamoServicePort},
}, },
}, },
}, },
...@@ -465,7 +465,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T ...@@ -465,7 +465,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
Destination: &istioNetworking.Destination{ Destination: &istioNetworking.Destination{
Host: "service1", Host: "service1",
Port: &istioNetworking.PortSelector{ Port: &istioNetworking.PortSelector{
Number: 3000, Number: commonconsts.DynamoServicePort,
}, },
}, },
}, },
...@@ -898,7 +898,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -898,7 +898,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Image: "test-image:latest", Image: "test-image:latest",
Command: []string{"sh", "-c"}, Command: []string{"sh", "-c"},
Args: []string{"ray start --head --port=6379 && cd src && uv run dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-checks --service-name test-lws-deploy-service test-tag --test-lws-deploy-service.ServiceArgs.dynamo.namespace=default"}, Args: []string{"ray start --head --port=6379 && cd src && uv run dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-checks --service-name test-lws-deploy-service test-tag --test-lws-deploy-service.ServiceArgs.dynamo.namespace=default"},
Env: []corev1.EnvVar{{Name: "DYNAMO_PORT", Value: "3000"}}, Env: []corev1.EnvVar{{Name: "DYNAMO_PORT", Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort)}},
VolumeMounts: []corev1.VolumeMount{ VolumeMounts: []corev1.VolumeMount{
{ {
Name: "shared-memory", MountPath: "/dev/shm", Name: "shared-memory", MountPath: "/dev/shm",
...@@ -950,7 +950,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -950,7 +950,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Image: "test-image:latest", Image: "test-image:latest",
Command: []string{"sh", "-c"}, Command: []string{"sh", "-c"},
Args: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"}, Args: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"},
Env: []corev1.EnvVar{{Name: "DYNAMO_PORT", Value: "3000"}}, Env: []corev1.EnvVar{{Name: "DYNAMO_PORT", Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort)}},
VolumeMounts: []corev1.VolumeMount{{Name: "shared-memory", MountPath: "/dev/shm"}}, VolumeMounts: []corev1.VolumeMount{{Name: "shared-memory", MountPath: "/dev/shm"}},
Ports: []corev1.ContainerPort{{Protocol: corev1.ProtocolTCP, Name: commonconsts.DynamoServicePortName, ContainerPort: commonconsts.DynamoServicePort}, { Ports: []corev1.ContainerPort{{Protocol: corev1.ProtocolTCP, Name: commonconsts.DynamoServicePortName, ContainerPort: commonconsts.DynamoServicePort}, {
Protocol: corev1.ProtocolTCP, Name: commonconsts.DynamoHealthPortName, ContainerPort: commonconsts.DynamoHealthPort, Protocol: corev1.ProtocolTCP, Name: commonconsts.DynamoHealthPortName, ContainerPort: commonconsts.DynamoHealthPort,
......
...@@ -19,6 +19,7 @@ package dynamo ...@@ -19,6 +19,7 @@ package dynamo
import ( import (
"context" "context"
"fmt"
"reflect" "reflect"
"sort" "sort"
"testing" "testing"
...@@ -513,7 +514,7 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) { ...@@ -513,7 +514,7 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
Envs: []corev1.EnvVar{ Envs: []corev1.EnvVar{
{ {
Name: "DYN_DEPLOYMENT_CONFIG", Name: "DYN_DEPLOYMENT_CONFIG",
Value: `{"service1":{"ServiceArgs":{"Resources":{"CPU":"2","GPU":"2","Memory":"2Gi"},"Workers":2},"port":3000}}`, Value: fmt.Sprintf(`{"service1":{"ServiceArgs":{"Resources":{"CPU":"2","GPU":"2","Memory":"2Gi"},"Workers":2},"port":%d}}`, commonconsts.DynamoServicePort),
}, },
}, },
}, },
...@@ -770,9 +771,9 @@ func Test_updateDynDeploymentConfig(t *testing.T) { ...@@ -770,9 +771,9 @@ func Test_updateDynDeploymentConfig(t *testing.T) {
}, },
}, },
}, },
newPort: 3000, newPort: commonconsts.DynamoServicePort,
}, },
want: []byte(`{"Frontend":{"port":3000},"Planner":{"environment":"kubernetes"}}`), want: []byte(fmt.Sprintf(`{"Frontend":{"port":%d},"Planner":{"environment":"kubernetes"}}`, commonconsts.DynamoServicePort)),
wantErr: false, wantErr: false,
}, },
{ {
...@@ -792,9 +793,9 @@ func Test_updateDynDeploymentConfig(t *testing.T) { ...@@ -792,9 +793,9 @@ func Test_updateDynDeploymentConfig(t *testing.T) {
}, },
}, },
}, },
newPort: 3000, newPort: commonconsts.DynamoServicePort,
}, },
want: []byte(`{"Frontend":{"port":8000},"Planner":{"environment":"kubernetes"}}`), want: []byte(fmt.Sprintf(`{"Frontend":{"port":%d},"Planner":{"environment":"kubernetes"}}`, commonconsts.DynamoServicePort)),
wantErr: false, wantErr: false,
}, },
{ {
......
...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment ...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: agg name: agg
spec: spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"enforce-eager":true,"max-num-batched-tokens":16384,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len"]}}'
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-v0-agg dynamoNamespace: vllm-v0-agg
...@@ -46,6 +43,8 @@ spec: ...@@ -46,6 +43,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- Frontend - Frontend
- -f
- ./configs/agg.yaml
VllmWorker: VllmWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-v0-agg dynamoNamespace: vllm-v0-agg
...@@ -73,3 +72,5 @@ spec: ...@@ -73,3 +72,5 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- VllmWorker - VllmWorker
- -f
- ./configs/agg.yaml
...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment ...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: disagg name: disagg
spec: spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384,"kv-transfer-config":"{\"kv_connector\":\"DynamoNixlConnector\"}"},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"remote-prefill":true,"conditional-disagg":true,"max-local-prefill-length":10,"max-prefill-queue-size":2,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"PrefillWorker":{"max-num-batched-tokens":16384,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]}}'
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-v0-disagg dynamoNamespace: vllm-v0-disagg
...@@ -46,6 +43,8 @@ spec: ...@@ -46,6 +43,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- Frontend - Frontend
- -f
- ./configs/disagg.yaml
VllmWorker: VllmWorker:
dynamoNamespace: vllm-v0-disagg dynamoNamespace: vllm-v0-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -73,6 +72,8 @@ spec: ...@@ -73,6 +72,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- VllmWorker - VllmWorker
- -f
- ./configs/disagg.yaml
PrefillWorker: PrefillWorker:
dynamoNamespace: vllm-v0-disagg dynamoNamespace: vllm-v0-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -100,3 +101,5 @@ spec: ...@@ -100,3 +101,5 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- PrefillWorker - PrefillWorker
- -f
- ./configs/disagg.yaml
...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment ...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: disagg-planner name: disagg-planner
spec: spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384,"kv-transfer-config":"{\"kv_connector\":\"DynamoNixlConnector\"}"},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"remote-prefill":true,"conditional-disagg":true,"max-local-prefill-length":10,"max-prefill-queue-size":2,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"PrefillWorker":{"max-num-batched-tokens":16384,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["localhost:8000"]}]}]},"Planner":{"adjustment-interval":180,"profile-results-dir":"/workspace/examples/profiling_results","isl":3000,"osl":150,"ttft":0.5,"itl":0.05,"load-predictor":"arima"}}'
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-v0-disagg-planner dynamoNamespace: vllm-v0-disagg-planner
...@@ -74,7 +71,8 @@ spec: ...@@ -74,7 +71,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- VllmWorker - VllmWorker
- -f
- ./configs/disagg_planner.yaml
PrefillWorker: PrefillWorker:
dynamoNamespace: vllm-v0-disagg-planner dynamoNamespace: vllm-v0-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -102,7 +100,8 @@ spec: ...@@ -102,7 +100,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- PrefillWorker - PrefillWorker
- -f
- ./configs/disagg_planner.yaml
Planner: Planner:
dynamoNamespace: vllm-v0-disagg-planner dynamoNamespace: vllm-v0-disagg-planner
replicas: 1 replicas: 1
...@@ -129,7 +128,8 @@ spec: ...@@ -129,7 +128,8 @@ spec:
- --service-name - --service-name
- Planner - Planner
- --Planner.environment=kubernetes - --Planner.environment=kubernetes
- -f
- ./configs/disagg_planner.yaml
Prometheus: Prometheus:
dynamoNamespace: vllm-v0-disagg-planner dynamoNamespace: vllm-v0-disagg-planner
replicas: 1 replicas: 1
...@@ -154,3 +154,5 @@ spec: ...@@ -154,3 +154,5 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- Prometheus - Prometheus
- -f
- ./configs/disagg_planner.yaml
...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment ...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: agg name: agg
spec: spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"enforce-eager":true,"max-num-batched-tokens":16384,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len"]}}'
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-v1-agg dynamoNamespace: vllm-v1-agg
...@@ -46,6 +43,8 @@ spec: ...@@ -46,6 +43,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- Frontend - Frontend
- -f
- ./configs/agg.yaml
SimpleLoadBalancer: SimpleLoadBalancer:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-v1-agg dynamoNamespace: vllm-v1-agg
...@@ -71,6 +70,8 @@ spec: ...@@ -71,6 +70,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- SimpleLoadBalancer - SimpleLoadBalancer
- -f
- ./configs/agg.yaml
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-v1-agg dynamoNamespace: vllm-v1-agg
...@@ -98,3 +99,5 @@ spec: ...@@ -98,3 +99,5 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- VllmDecodeWorker - VllmDecodeWorker
- -f
- ./configs/agg.yaml
...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment ...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: disagg name: disagg
spec: spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384,"kv-transfer-config":"{\"kv_connector\":\"DynamoNixlConnector\"}"},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"remote-prefill":true,"conditional-disagg":true,"max-local-prefill-length":10,"max-prefill-queue-size":2,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"PrefillWorker":{"max-num-batched-tokens":16384,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]}}'
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-v1-disagg dynamoNamespace: vllm-v1-disagg
...@@ -46,6 +43,8 @@ spec: ...@@ -46,6 +43,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- Frontend - Frontend
- -f
- ./configs/disagg.yaml
SimpleLoadBalancer: SimpleLoadBalancer:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-v1-disagg dynamoNamespace: vllm-v1-disagg
...@@ -71,6 +70,8 @@ spec: ...@@ -71,6 +70,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- SimpleLoadBalancer - SimpleLoadBalancer
- -f
- ./configs/disagg.yaml
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg dynamoNamespace: vllm-v1-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -98,6 +99,8 @@ spec: ...@@ -98,6 +99,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- VllmDecodeWorker - VllmDecodeWorker
- -f
- ./configs/disagg.yaml
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg dynamoNamespace: vllm-v1-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -125,3 +128,5 @@ spec: ...@@ -125,3 +128,5 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- VllmPrefillWorker - VllmPrefillWorker
- -f
- ./configs/disagg.yaml
...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment ...@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: disagg-planner name: disagg-planner
spec: spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384,"kv-transfer-config":"{\"kv_connector\":\"DynamoNixlConnector\"}"},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"remote-prefill":true,"conditional-disagg":true,"max-local-prefill-length":10,"max-prefill-queue-size":2,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"PrefillWorker":{"max-num-batched-tokens":16384,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["localhost:8000"]}]}]},"Planner":{"adjustment-interval":180,"profile-results-dir":"/workspace/examples/profiling_results","isl":3000,"osl":150,"ttft":0.5,"itl":0.05,"load-predictor":"arima"}}'
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-v1-disagg-planner dynamoNamespace: vllm-v1-disagg-planner
...@@ -46,7 +43,8 @@ spec: ...@@ -46,7 +43,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- Frontend - Frontend
- -f
- ./configs/disagg_planner.yaml
SimpleLoadBalancer: SimpleLoadBalancer:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-v1-disagg-planner dynamoNamespace: vllm-v1-disagg-planner
...@@ -72,7 +70,8 @@ spec: ...@@ -72,7 +70,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- SimpleLoadBalancer - SimpleLoadBalancer
- -f
- ./configs/disagg_planner.yaml
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-planner dynamoNamespace: vllm-v1-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -100,7 +99,8 @@ spec: ...@@ -100,7 +99,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- VllmDecodeWorker - VllmDecodeWorker
- -f
- ./configs/disagg_planner.yaml
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg-planner dynamoNamespace: vllm-v1-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -128,7 +128,8 @@ spec: ...@@ -128,7 +128,8 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- VllmPrefillWorker - VllmPrefillWorker
- -f
- ./configs/disagg_planner.yaml
Planner: Planner:
dynamoNamespace: vllm-v1-disagg-planner dynamoNamespace: vllm-v1-disagg-planner
replicas: 1 replicas: 1
...@@ -155,7 +156,8 @@ spec: ...@@ -155,7 +156,8 @@ spec:
- --service-name - --service-name
- Planner - Planner
- --Planner.environment=kubernetes - --Planner.environment=kubernetes
- -f
- ./configs/disagg_planner.yaml
Prometheus: Prometheus:
dynamoNamespace: vllm-v1-disagg-planner dynamoNamespace: vllm-v1-disagg-planner
replicas: 1 replicas: 1
...@@ -180,3 +182,5 @@ spec: ...@@ -180,3 +182,5 @@ spec:
- --use-default-health-checks - --use-default-health-checks
- --service-name - --service-name
- Prometheus - Prometheus
- -f
- ./configs/disagg_planner.yaml
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment