Unverified Commit 6e213d90 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

fix: fix operator tests (#3904)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 59535682
...@@ -734,11 +734,15 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -734,11 +734,15 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
MainContainer: &corev1.Container{ MainContainer: &corev1.Container{
Image: "test-image:latest", Image: "test-image:latest",
Command: []string{ Command: []string{
"sh", "some",
"-c", "dynamo",
"command",
}, },
Args: []string{ Args: []string{
"some dynamo command", "--tensor-parallel-size",
"4",
"--pipeline-parallel-size",
"1",
}, },
Env: []corev1.EnvVar{ Env: []corev1.EnvVar{
{ {
...@@ -817,8 +821,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -817,8 +821,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
{ {
Name: commonconsts.MainContainerName, Name: commonconsts.MainContainerName,
Image: "test-image:latest", Image: "test-image:latest",
Command: []string{"sh", "-c"}, Command: []string{"/bin/sh", "-c"},
Args: []string{"ray start --head --port=6379 && some dynamo command"}, Args: []string{"ray start --head --port=6379 && some dynamo command --tensor-parallel-size 4 --pipeline-parallel-size 1"},
Env: []corev1.EnvVar{ Env: []corev1.EnvVar{
{Name: "DYN_NAMESPACE", Value: "default"}, {Name: "DYN_NAMESPACE", Value: "default"},
{Name: "DYN_PARENT_DGD_K8S_NAME", Value: "test-lws-deploy"}, {Name: "DYN_PARENT_DGD_K8S_NAME", Value: "test-lws-deploy"},
...@@ -931,7 +935,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -931,7 +935,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
{ {
Name: commonconsts.MainContainerName, Name: commonconsts.MainContainerName,
Image: "test-image:latest", Image: "test-image:latest",
Command: []string{"sh", "-c"}, Command: []string{"/bin/sh", "-c"},
Args: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"}, Args: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"},
Env: []corev1.EnvVar{ Env: []corev1.EnvVar{
{Name: "DYN_NAMESPACE", Value: "default"}, {Name: "DYN_NAMESPACE", Value: "default"},
......
...@@ -106,7 +106,7 @@ func injectRayDistributedLaunchFlags(container *corev1.Container, role Role, ser ...@@ -106,7 +106,7 @@ func injectRayDistributedLaunchFlags(container *corev1.Container, role Role, ser
leaderHostname := multinodeDeployer.GetLeaderHostname(serviceName) leaderHostname := multinodeDeployer.GetLeaderHostname(serviceName)
container.Args = []string{fmt.Sprintf("ray start --address=%s:%s --block", leaderHostname, VLLMPort)} container.Args = []string{fmt.Sprintf("ray start --address=%s:%s --block", leaderHostname, VLLMPort)}
} }
container.Command = []string{"sh", "-c"} // ensure cmd is a shell container.Command = []string{"/bin/sh", "-c"} // ensure cmd is a shell
} }
func injectDataParallelLaunchFlags(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer) { func injectDataParallelLaunchFlags(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer) {
......
...@@ -303,7 +303,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) { ...@@ -303,7 +303,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
name: "leader prepends ray start --head", name: "leader prepends ray start --head",
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}}, initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}},
expectedArgs: []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm %s 16", VLLMPort, tensorParallelSizeFlag)}, expectedArgs: []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm %s 16", VLLMPort, tensorParallelSizeFlag)},
}, },
{ {
......
...@@ -2417,11 +2417,17 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -2417,11 +2417,17 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
MainContainer: &corev1.Container{ MainContainer: &corev1.Container{
Image: "worker-image", Image: "worker-image",
Command: []string{ Command: []string{
"/bin/sh", "python3",
"-c", "-m",
"dynamo.vllm",
}, },
Args: []string{ Args: []string{
"python3 -m dynamo.vllm --custom-flag custom-value", "--custom-flag",
"custom-value",
"--tensor-parallel-size",
"4",
"--pipeline-parallel-size",
"1",
}, },
StartupProbe: &corev1.Probe{ StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
...@@ -2598,7 +2604,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -2598,7 +2604,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
"-c", "-c",
}, },
Args: []string{ Args: []string{
"ray start --head --port=6379 && python3 -m dynamo.vllm --custom-flag custom-value", "ray start --head --port=6379 && python3 -m dynamo.vllm --custom-flag custom-value --tensor-parallel-size 4 --pipeline-parallel-size 1",
}, },
Ports: []corev1.ContainerPort{ Ports: []corev1.ContainerPort{
{ {
...@@ -3345,7 +3351,7 @@ func TestGeneratePodSpecForComponent_VLLM(t *testing.T) { ...@@ -3345,7 +3351,7 @@ func TestGeneratePodSpecForComponent_VLLM(t *testing.T) {
ComponentType: commonconsts.ComponentTypeWorker, ComponentType: commonconsts.ComponentTypeWorker,
ExtraPodSpec: &common.ExtraPodSpec{ ExtraPodSpec: &common.ExtraPodSpec{
MainContainer: &corev1.Container{ MainContainer: &corev1.Container{
Args: []string{"python3", "-m", "dynamo.vllm"}, Args: []string{"python3", "-m", "dynamo.vllm", "--tensor-parallel-size", "4", "--pipeline-parallel-size", "1"},
}, },
}, },
}, },
...@@ -3359,6 +3365,11 @@ func TestGeneratePodSpecForComponent_VLLM(t *testing.T) { ...@@ -3359,6 +3365,11 @@ func TestGeneratePodSpecForComponent_VLLM(t *testing.T) {
name: "VLLM multinode worker", name: "VLLM multinode worker",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeWorker, ComponentType: commonconsts.ComponentTypeWorker,
ExtraPodSpec: &common.ExtraPodSpec{
MainContainer: &corev1.Container{
Args: []string{"python3", "-m", "dynamo.vllm", "--tensor-parallel-size", "4", "--pipeline-parallel-size", "1"},
},
},
}, },
backendFramework: BackendFrameworkVLLM, backendFramework: BackendFrameworkVLLM,
role: RoleWorker, role: RoleWorker,
...@@ -4757,7 +4768,7 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) { ...@@ -4757,7 +4768,7 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec component *v1alpha1.DynamoComponentDeploymentSharedSpec
expectError bool expectError bool
expectedResourceClaims []corev1.ResourceClaim expectedResourceClaims []corev1.ResourceClaim
expectedPodClaims []corev1.PodResourceClaim expectedPodClaims []corev1.PodResourceClaim
...@@ -4765,8 +4776,7 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) { ...@@ -4765,8 +4776,7 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
}{ }{
{ {
name: "component with resource claims", name: "component with resource claims",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeWorker, ComponentType: commonconsts.ComponentTypeWorker,
Resources: &common.Resources{ Resources: &common.Resources{
Requests: &common.ResourceItem{ Requests: &common.ResourceItem{
...@@ -4818,7 +4828,6 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) { ...@@ -4818,7 +4828,6 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
}, },
}, },
}, },
},
expectError: false, expectError: false,
expectedResourceClaims: []corev1.ResourceClaim{ expectedResourceClaims: []corev1.ResourceClaim{
{ {
...@@ -4853,8 +4862,7 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) { ...@@ -4853,8 +4862,7 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
}, },
{ {
name: "component with multiple resource claims", name: "component with multiple resource claims",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeWorker, ComponentType: commonconsts.ComponentTypeWorker,
Resources: &common.Resources{ Resources: &common.Resources{
Claims: []corev1.ResourceClaim{ Claims: []corev1.ResourceClaim{
...@@ -4886,7 +4894,6 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) { ...@@ -4886,7 +4894,6 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
}, },
}, },
}, },
},
expectError: false, expectError: false,
expectedResourceClaims: []corev1.ResourceClaim{ expectedResourceClaims: []corev1.ResourceClaim{
{ {
...@@ -4909,8 +4916,7 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) { ...@@ -4909,8 +4916,7 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
}, },
{ {
name: "component without resource claims", name: "component without resource claims",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend, ComponentType: commonconsts.ComponentTypeFrontend,
Resources: &common.Resources{ Resources: &common.Resources{
Requests: &common.ResourceItem{ Requests: &common.ResourceItem{
...@@ -4919,7 +4925,6 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) { ...@@ -4919,7 +4925,6 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
}, },
}, },
}, },
},
expectError: false, expectError: false,
expectedResourceClaims: nil, expectedResourceClaims: nil,
expectedPodClaims: nil, expectedPodClaims: nil,
......
...@@ -39,7 +39,8 @@ helm fetch https://helm.ngc.nvidia.com/nvidia/ai-dynamo/charts/dynamo-platform-$ ...@@ -39,7 +39,8 @@ helm fetch https://helm.ngc.nvidia.com/nvidia/ai-dynamo/charts/dynamo-platform-$
helm install dynamo-platform dynamo-platform-${RELEASE_VERSION}.tgz --namespace ${NAMESPACE} --create-namespace helm install dynamo-platform dynamo-platform-${RELEASE_VERSION}.tgz --namespace ${NAMESPACE} --create-namespace
``` ```
For namespace-restricted installations (shared clusters): For namespace-restricted installations (shared clusters), you'll need to install the Dynamo platform in each namespace you want to deploy to.
Namespace restriction is enabled by setting the `dynamo-operator.namespaceRestriction.enabled` flag to `true`.
```bash ```bash
helm install dynamo-platform dynamo-platform-${RELEASE_VERSION}.tgz \ helm install dynamo-platform dynamo-platform-${RELEASE_VERSION}.tgz \
--namespace ${NAMESPACE} \ --namespace ${NAMESPACE} \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment