Unverified Commit a604c7f0 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

feat: remove DYN_DEPLOYMENT_CONFIG in examples (#1820)

parent 93acc631
......@@ -6,7 +6,7 @@ const (
DefaultUserId = "default"
DefaultOrgId = "default"
DynamoServicePort = 3000
DynamoServicePort = 8000
DynamoServicePortName = "http"
DynamoContainerPortName = "http"
......
......@@ -308,7 +308,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
Backend: networkingv1.IngressBackend{
Service: &networkingv1.IngressServiceBackend{
Name: "service1",
Port: networkingv1.ServiceBackendPort{Number: 3000},
Port: networkingv1.ServiceBackendPort{Number: commonconsts.DynamoServicePort},
},
},
},
......@@ -465,7 +465,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
Destination: &istioNetworking.Destination{
Host: "service1",
Port: &istioNetworking.PortSelector{
Number: 3000,
Number: commonconsts.DynamoServicePort,
},
},
},
......@@ -898,7 +898,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Image: "test-image:latest",
Command: []string{"sh", "-c"},
Args: []string{"ray start --head --port=6379 && cd src && uv run dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-checks --service-name test-lws-deploy-service test-tag --test-lws-deploy-service.ServiceArgs.dynamo.namespace=default"},
Env: []corev1.EnvVar{{Name: "DYNAMO_PORT", Value: "3000"}},
Env: []corev1.EnvVar{{Name: "DYNAMO_PORT", Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort)}},
VolumeMounts: []corev1.VolumeMount{
{
Name: "shared-memory", MountPath: "/dev/shm",
......@@ -950,7 +950,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Image: "test-image:latest",
Command: []string{"sh", "-c"},
Args: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"},
Env: []corev1.EnvVar{{Name: "DYNAMO_PORT", Value: "3000"}},
Env: []corev1.EnvVar{{Name: "DYNAMO_PORT", Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort)}},
VolumeMounts: []corev1.VolumeMount{{Name: "shared-memory", MountPath: "/dev/shm"}},
Ports: []corev1.ContainerPort{{Protocol: corev1.ProtocolTCP, Name: commonconsts.DynamoServicePortName, ContainerPort: commonconsts.DynamoServicePort}, {
Protocol: corev1.ProtocolTCP, Name: commonconsts.DynamoHealthPortName, ContainerPort: commonconsts.DynamoHealthPort,
......
......@@ -19,6 +19,7 @@ package dynamo
import (
"context"
"fmt"
"reflect"
"sort"
"testing"
......@@ -513,7 +514,7 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
Envs: []corev1.EnvVar{
{
Name: "DYN_DEPLOYMENT_CONFIG",
Value: `{"service1":{"ServiceArgs":{"Resources":{"CPU":"2","GPU":"2","Memory":"2Gi"},"Workers":2},"port":3000}}`,
Value: fmt.Sprintf(`{"service1":{"ServiceArgs":{"Resources":{"CPU":"2","GPU":"2","Memory":"2Gi"},"Workers":2},"port":%d}}`, commonconsts.DynamoServicePort),
},
},
},
......@@ -770,9 +771,9 @@ func Test_updateDynDeploymentConfig(t *testing.T) {
},
},
},
newPort: 3000,
newPort: commonconsts.DynamoServicePort,
},
want: []byte(`{"Frontend":{"port":3000},"Planner":{"environment":"kubernetes"}}`),
want: []byte(fmt.Sprintf(`{"Frontend":{"port":%d},"Planner":{"environment":"kubernetes"}}`, commonconsts.DynamoServicePort)),
wantErr: false,
},
{
......@@ -792,9 +793,9 @@ func Test_updateDynDeploymentConfig(t *testing.T) {
},
},
},
newPort: 3000,
newPort: commonconsts.DynamoServicePort,
},
want: []byte(`{"Frontend":{"port":8000},"Planner":{"environment":"kubernetes"}}`),
want: []byte(fmt.Sprintf(`{"Frontend":{"port":%d},"Planner":{"environment":"kubernetes"}}`, commonconsts.DynamoServicePort)),
wantErr: false,
},
{
......
......@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata:
name: agg
spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"enforce-eager":true,"max-num-batched-tokens":16384,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len"]}}'
services:
Frontend:
dynamoNamespace: vllm-v0-agg
......@@ -46,6 +43,8 @@ spec:
- --use-default-health-checks
- --service-name
- Frontend
- -f
- ./configs/agg.yaml
VllmWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-v0-agg
......@@ -73,3 +72,5 @@ spec:
- --use-default-health-checks
- --service-name
- VllmWorker
- -f
- ./configs/agg.yaml
......@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata:
name: disagg
spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384,"kv-transfer-config":"{\"kv_connector\":\"DynamoNixlConnector\"}"},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"remote-prefill":true,"conditional-disagg":true,"max-local-prefill-length":10,"max-prefill-queue-size":2,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"PrefillWorker":{"max-num-batched-tokens":16384,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]}}'
services:
Frontend:
dynamoNamespace: vllm-v0-disagg
......@@ -46,6 +43,8 @@ spec:
- --use-default-health-checks
- --service-name
- Frontend
- -f
- ./configs/disagg.yaml
VllmWorker:
dynamoNamespace: vllm-v0-disagg
envFromSecret: hf-token-secret
......@@ -73,6 +72,8 @@ spec:
- --use-default-health-checks
- --service-name
- VllmWorker
- -f
- ./configs/disagg.yaml
PrefillWorker:
dynamoNamespace: vllm-v0-disagg
envFromSecret: hf-token-secret
......@@ -100,3 +101,5 @@ spec:
- --use-default-health-checks
- --service-name
- PrefillWorker
- -f
- ./configs/disagg.yaml
......@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata:
name: disagg-planner
spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384,"kv-transfer-config":"{\"kv_connector\":\"DynamoNixlConnector\"}"},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"remote-prefill":true,"conditional-disagg":true,"max-local-prefill-length":10,"max-prefill-queue-size":2,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"PrefillWorker":{"max-num-batched-tokens":16384,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["localhost:8000"]}]}]},"Planner":{"adjustment-interval":180,"profile-results-dir":"/workspace/examples/profiling_results","isl":3000,"osl":150,"ttft":0.5,"itl":0.05,"load-predictor":"arima"}}'
services:
Frontend:
dynamoNamespace: vllm-v0-disagg-planner
......@@ -74,7 +71,8 @@ spec:
- --use-default-health-checks
- --service-name
- VllmWorker
- -f
- ./configs/disagg_planner.yaml
PrefillWorker:
dynamoNamespace: vllm-v0-disagg-planner
envFromSecret: hf-token-secret
......@@ -102,7 +100,8 @@ spec:
- --use-default-health-checks
- --service-name
- PrefillWorker
- -f
- ./configs/disagg_planner.yaml
Planner:
dynamoNamespace: vllm-v0-disagg-planner
replicas: 1
......@@ -129,7 +128,8 @@ spec:
- --service-name
- Planner
- --Planner.environment=kubernetes
- -f
- ./configs/disagg_planner.yaml
Prometheus:
dynamoNamespace: vllm-v0-disagg-planner
replicas: 1
......@@ -154,3 +154,5 @@ spec:
- --use-default-health-checks
- --service-name
- Prometheus
- -f
- ./configs/disagg_planner.yaml
......@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata:
name: agg
spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"enforce-eager":true,"max-num-batched-tokens":16384,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len"]}}'
services:
Frontend:
dynamoNamespace: vllm-v1-agg
......@@ -46,6 +43,8 @@ spec:
- --use-default-health-checks
- --service-name
- Frontend
- -f
- ./configs/agg.yaml
SimpleLoadBalancer:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-v1-agg
......@@ -71,6 +70,8 @@ spec:
- --use-default-health-checks
- --service-name
- SimpleLoadBalancer
- -f
- ./configs/agg.yaml
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-v1-agg
......@@ -98,3 +99,5 @@ spec:
- --use-default-health-checks
- --service-name
- VllmDecodeWorker
- -f
- ./configs/agg.yaml
......@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata:
name: disagg
spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384,"kv-transfer-config":"{\"kv_connector\":\"DynamoNixlConnector\"}"},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"remote-prefill":true,"conditional-disagg":true,"max-local-prefill-length":10,"max-prefill-queue-size":2,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"PrefillWorker":{"max-num-batched-tokens":16384,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]}}'
services:
Frontend:
dynamoNamespace: vllm-v1-disagg
......@@ -46,6 +43,8 @@ spec:
- --use-default-health-checks
- --service-name
- Frontend
- -f
- ./configs/disagg.yaml
SimpleLoadBalancer:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-v1-disagg
......@@ -71,6 +70,8 @@ spec:
- --use-default-health-checks
- --service-name
- SimpleLoadBalancer
- -f
- ./configs/disagg.yaml
VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg
envFromSecret: hf-token-secret
......@@ -98,6 +99,8 @@ spec:
- --use-default-health-checks
- --service-name
- VllmDecodeWorker
- -f
- ./configs/disagg.yaml
VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg
envFromSecret: hf-token-secret
......@@ -125,3 +128,5 @@ spec:
- --use-default-health-checks
- --service-name
- VllmPrefillWorker
- -f
- ./configs/disagg.yaml
......@@ -17,9 +17,6 @@ kind: DynamoGraphDeployment
metadata:
name: disagg-planner
spec:
envs:
- name: DYN_DEPLOYMENT_CONFIG
value: '{"Common":{"model":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","block-size":64,"max-model-len":16384,"kv-transfer-config":"{\"kv_connector\":\"DynamoNixlConnector\"}"},"Frontend":{"served_model_name":"deepseek-ai/DeepSeek-R1-Distill-Llama-8B","endpoint":"dynamo.VllmWorker.generate","port":8000,"router":"round-robin","common-configs":["block-size"]},"VllmWorker":{"remote-prefill":true,"conditional-disagg":true,"max-local-prefill-length":10,"max-prefill-queue-size":2,"enable-prefix-caching":true,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"PrefillWorker":{"max-num-batched-tokens":16384,"common-configs":["model","block-size","max-model-len","kv-transfer-config"]},"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["localhost:8000"]}]}]},"Planner":{"adjustment-interval":180,"profile-results-dir":"/workspace/examples/profiling_results","isl":3000,"osl":150,"ttft":0.5,"itl":0.05,"load-predictor":"arima"}}'
services:
Frontend:
dynamoNamespace: vllm-v1-disagg-planner
......@@ -46,7 +43,8 @@ spec:
- --use-default-health-checks
- --service-name
- Frontend
- -f
- ./configs/disagg_planner.yaml
SimpleLoadBalancer:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-v1-disagg-planner
......@@ -72,7 +70,8 @@ spec:
- --use-default-health-checks
- --service-name
- SimpleLoadBalancer
- -f
- ./configs/disagg_planner.yaml
VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-planner
envFromSecret: hf-token-secret
......@@ -100,7 +99,8 @@ spec:
- --use-default-health-checks
- --service-name
- VllmDecodeWorker
- -f
- ./configs/disagg_planner.yaml
VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg-planner
envFromSecret: hf-token-secret
......@@ -128,7 +128,8 @@ spec:
- --use-default-health-checks
- --service-name
- VllmPrefillWorker
- -f
- ./configs/disagg_planner.yaml
Planner:
dynamoNamespace: vllm-v1-disagg-planner
replicas: 1
......@@ -155,7 +156,8 @@ spec:
- --service-name
- Planner
- --Planner.environment=kubernetes
- -f
- ./configs/disagg_planner.yaml
Prometheus:
dynamoNamespace: vllm-v1-disagg-planner
replicas: 1
......@@ -180,3 +182,5 @@ spec:
- --use-default-health-checks
- --service-name
- Prometheus
- -f
- ./configs/disagg_planner.yaml
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment