"docs/vscode:/vscode.git/clone" did not exist on "6a1a801c2dd1a29b435cf37f144fd35e2519ff4a"
Unverified Commit 28546bad authored by mohammedabdulwahhab's avatar mohammedabdulwahhab Committed by GitHub
Browse files

fix: set frontend defaults (#2484)

parent 8cde945e
...@@ -14,10 +14,6 @@ spec: ...@@ -14,10 +14,6 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg && python3 -m dynamo.frontend --http-port=8000"
SGLangDecodeWorker: SGLangDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: sglang-agg dynamoNamespace: sglang-agg
......
...@@ -14,10 +14,9 @@ spec: ...@@ -14,10 +14,9 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang envs:
command: ["sh", "-c"] - name: DYN_ROUTER_MODE
args: value: kv
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg-router && python3 -m dynamo.frontend --http-port=8000 --router-mode kv"
SGLangDecodeWorker: SGLangDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: sglang-agg-router dynamoNamespace: sglang-agg-router
......
...@@ -23,10 +23,6 @@ spec: ...@@ -23,10 +23,6 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg-multinode && python3 -m dynamo.frontend --http-port=8000"
decode: decode:
multinode: multinode:
nodeCount: 2 nodeCount: 2
......
...@@ -14,10 +14,6 @@ spec: ...@@ -14,10 +14,6 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
SGLangDecodeWorker: SGLangDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: sglang-disagg dynamoNamespace: sglang-disagg
......
...@@ -21,10 +21,6 @@ spec: ...@@ -21,10 +21,6 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1 image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
Planner: Planner:
dynamoNamespace: dynamo dynamoNamespace: dynamo
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
......
...@@ -14,12 +14,6 @@ spec: ...@@ -14,12 +14,6 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
TRTLLMWorker: TRTLLMWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg dynamoNamespace: trtllm-agg
......
...@@ -14,12 +14,9 @@ spec: ...@@ -14,12 +14,9 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm envs:
command: - name: DYN_ROUTER_MODE
- /bin/sh value: kv
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
TRTLLMWorker: TRTLLMWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg-router dynamoNamespace: trtllm-agg-router
......
...@@ -14,12 +14,6 @@ spec: ...@@ -14,12 +14,6 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
TRTLLMPrefillWorker: TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
......
...@@ -14,12 +14,9 @@ spec: ...@@ -14,12 +14,9 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm envs:
command: - name: DYN_ROUTER_MODE
- /bin/sh value: kv
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
TRTLLMPrefillWorker: TRTLLMPrefillWorker:
dynamoNamespace: trtllm-v1-disagg-router dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
......
...@@ -14,12 +14,6 @@ spec: ...@@ -14,12 +14,6 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg dynamoNamespace: vllm-agg
......
...@@ -14,12 +14,9 @@ spec: ...@@ -14,12 +14,9 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm envs:
command: - name: DYN_ROUTER_MODE
- /bin/sh value: kv
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-router dynamoNamespace: vllm-agg-router
......
...@@ -14,12 +14,6 @@ spec: ...@@ -14,12 +14,6 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
......
...@@ -21,12 +21,6 @@ spec: ...@@ -21,12 +21,6 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
Planner: Planner:
dynamoNamespace: vllm-disagg-planner dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
......
...@@ -14,12 +14,9 @@ spec: ...@@ -14,12 +14,9 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm envs:
command: - name: DYN_ROUTER_MODE
- /bin/sh value: kv
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-router dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
......
...@@ -88,7 +88,10 @@ def parse_args(): ...@@ -88,7 +88,10 @@ def parse_args():
"--kv-cache-block-size", type=int, help="KV cache block size (u32)." "--kv-cache-block-size", type=int, help="KV cache block size (u32)."
) )
parser.add_argument( parser.add_argument(
"--http-port", type=int, default=8080, help="HTTP port for the engine (u16)." "--http-port",
type=int,
default=int(os.environ.get("DYN_HTTP_PORT", "8080")),
help="HTTP port for the engine (u16). Can be set via DYN_HTTP_PORT env var.",
) )
parser.add_argument( parser.add_argument(
"--tls-cert-path", "--tls-cert-path",
...@@ -106,8 +109,8 @@ def parse_args(): ...@@ -106,8 +109,8 @@ def parse_args():
"--router-mode", "--router-mode",
type=str, type=str,
choices=["round-robin", "random", "kv"], choices=["round-robin", "random", "kv"],
default="round-robin", default=os.environ.get("DYN_ROUTER_MODE", "round-robin"),
help="How to route the request", help="How to route the request. Can be set via DYN_ROUTER_MODE env var.",
) )
parser.add_argument( parser.add_argument(
"--kv-overlap-score-weight", "--kv-overlap-score-weight",
......
...@@ -26,6 +26,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co ...@@ -26,6 +26,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
// Frontend doesn't need backend-specific config // Frontend doesn't need backend-specific config
container := f.getCommonContainer(context) container := f.getCommonContainer(context)
// Set default command and args
container.Command = []string{"python3"}
container.Args = []string{"-m", "dynamo.frontend"}
// Add HTTP port // Add HTTP port
container.Ports = []corev1.ContainerPort{ container.Ports = []corev1.ContainerPort{
{ {
...@@ -71,6 +75,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co ...@@ -71,6 +75,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
Name: commonconsts.EnvDynamoServicePort, Name: commonconsts.EnvDynamoServicePort,
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort), Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
}, },
{
Name: "DYN_HTTP_PORT", // TODO: need to reconcile DYNAMO_PORT and DYN_HTTP_PORT
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
}...) }...)
return container, nil return container, nil
......
...@@ -1286,6 +1286,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1286,6 +1286,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
Env: []corev1.EnvVar{ Env: []corev1.EnvVar{
{
Name: "DYN_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{ {
Name: "DYNAMO_POD_GANG_SET_REPLICAS", Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1", Value: "1",
...@@ -2036,6 +2040,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2036,6 +2040,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
Env: []corev1.EnvVar{ Env: []corev1.EnvVar{
{
Name: "DYN_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{ {
Name: "DYNAMO_POD_GANG_SET_REPLICAS", Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1", Value: "1",
...@@ -2798,6 +2806,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2798,6 +2806,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
Env: []corev1.EnvVar{ Env: []corev1.EnvVar{
{
Name: "DYN_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{ {
Name: "DYNAMO_POD_GANG_SET_REPLICAS", Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1", Value: "1",
...@@ -4243,6 +4255,87 @@ func XTestGenerateGrovePodGangSet_StartsAfterDependencies(t *testing.T) { ...@@ -4243,6 +4255,87 @@ func XTestGenerateGrovePodGangSet_StartsAfterDependencies(t *testing.T) {
} }
} }
func TestGenerateBasePodSpec_Frontend(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
dynamoDeployment := &v1alpha1.DynamoGraphDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-deployment",
Namespace: "default",
},
}
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
backendFramework BackendFramework
wantEnvVars map[string]string
wantErr bool
}{
{
name: "frontend with default command",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
},
},
backendFramework: BackendFrameworkVLLM,
wantEnvVars: map[string]string{
"DYN_HTTP_PORT": fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
podSpec, err := GenerateBasePodSpec(
tt.component,
tt.backendFramework,
secretsRetriever,
dynamoDeployment.Name,
dynamoDeployment.Namespace,
RoleMain,
1,
controllerConfig,
commonconsts.MultinodeDeploymentTypeGrove,
"test-service",
)
if (err != nil) != tt.wantErr {
t.Errorf("GenerateBasePodSpec() error = %v, wantErr %v", err, tt.wantErr)
return
}
if tt.wantErr {
return
}
// Check command and args
wantCommand := []string{"python3"}
wantArgs := []string{"-m", "dynamo.frontend"}
if !reflect.DeepEqual(podSpec.Containers[0].Command, wantCommand) {
t.Errorf("GenerateBasePodSpec() command = %v, want %v",
podSpec.Containers[0].Command, wantCommand)
}
if !reflect.DeepEqual(podSpec.Containers[0].Args, wantArgs) {
t.Errorf("GenerateBasePodSpec() args = %v, want %v",
podSpec.Containers[0].Args, wantArgs)
}
// Check environment variables
envVars := make(map[string]string)
for _, env := range podSpec.Containers[0].Env {
envVars[env.Name] = env.Value
}
for k, v := range tt.wantEnvVars {
if envVars[k] != v {
t.Errorf("GenerateBasePodSpec() env var %s = %v, want %v",
k, envVars[k], v)
}
}
})
}
}
func TestGenerateBasePodSpec_PlannerServiceAccount(t *testing.T) { func TestGenerateBasePodSpec_PlannerServiceAccount(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{} secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{} controllerConfig := controller_common.Config{}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment