Unverified Commit 179f993a authored by Alec's avatar Alec Committed by GitHub
Browse files

fix: profilier bug fixes and doc improvements (#3530)


Signed-off-by: default avataralec-flowers <aflowers@nvidia.com>
Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent bfbcae7a
...@@ -145,11 +145,6 @@ def remove_valued_arguments(args: list[str], key: str) -> list[str]: ...@@ -145,11 +145,6 @@ def remove_valued_arguments(args: list[str], key: str) -> list[str]:
return args return args
def join_arguments(args: list[str]) -> list[str]:
# Use shlex.join to properly quote arguments that contain spaces or special characters
return [shlex.join(args)]
def append_argument(args: list[str], to_append) -> list[str]: def append_argument(args: list[str], to_append) -> list[str]:
idx = find_arg_index(args) idx = find_arg_index(args)
if isinstance(to_append, list): if isinstance(to_append, list):
...@@ -469,7 +464,7 @@ class VllmV1ConfigModifier: ...@@ -469,7 +464,7 @@ class VllmV1ConfigModifier:
if "--no-enable-prefix-caching" not in args: if "--no-enable-prefix-caching" not in args:
args = append_argument(args, "--no-enable-prefix-caching") args = append_argument(args, "--no-enable-prefix-caching")
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
elif target == "decode": elif target == "decode":
# Get service names by inferring from subComponentType first # Get service names by inferring from subComponentType first
...@@ -500,7 +495,7 @@ class VllmV1ConfigModifier: ...@@ -500,7 +495,7 @@ class VllmV1ConfigModifier:
if "--no-enable-prefix-caching" in args: if "--no-enable-prefix-caching" in args:
args.remove("--no-enable-prefix-caching") args.remove("--no-enable-prefix-caching")
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
# set num workers to 1 # set num workers to 1
# Use the inferred decode service name # Use the inferred decode service name
...@@ -537,7 +532,7 @@ class VllmV1ConfigModifier: ...@@ -537,7 +532,7 @@ class VllmV1ConfigModifier:
except ValueError: except ValueError:
args = append_argument(args, ["--tensor-parallel-size", str(tp_size)]) args = append_argument(args, ["--tensor-parallel-size", str(tp_size)])
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
return cfg.model_dump() return cfg.model_dump()
...@@ -695,7 +690,7 @@ class SGLangConfigModifier: ...@@ -695,7 +690,7 @@ class SGLangConfigModifier:
if "--disable-radix-cache" not in args: if "--disable-radix-cache" not in args:
args = append_argument(args, "--disable-radix-cache") args = append_argument(args, "--disable-radix-cache")
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
elif target == "decode": elif target == "decode":
# Get service names by inferring from subComponentType first # Get service names by inferring from subComponentType first
...@@ -739,7 +734,7 @@ class SGLangConfigModifier: ...@@ -739,7 +734,7 @@ class SGLangConfigModifier:
args, ["--load-balance-method", "round_robin"] args, ["--load-balance-method", "round_robin"]
) )
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
# set num workers to 1 # set num workers to 1
# Use the inferred decode service name # Use the inferred decode service name
...@@ -772,7 +767,7 @@ class SGLangConfigModifier: ...@@ -772,7 +767,7 @@ class SGLangConfigModifier:
# Set --tp argument # Set --tp argument
args = set_argument_value(args, "--tp", str(tp_size)) args = set_argument_value(args, "--tp", str(tp_size))
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
return cfg.model_dump() return cfg.model_dump()
@classmethod @classmethod
...@@ -807,7 +802,7 @@ class SGLangConfigModifier: ...@@ -807,7 +802,7 @@ class SGLangConfigModifier:
if "--enable-dp-attention" in args: if "--enable-dp-attention" in args:
args.remove("--enable-dp-attention") args.remove("--enable-dp-attention")
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
return cfg.model_dump() return cfg.model_dump()
@classmethod @classmethod
...@@ -842,7 +837,7 @@ class SGLangConfigModifier: ...@@ -842,7 +837,7 @@ class SGLangConfigModifier:
# 4. Set --ep-size=dep_size (expert parallelism size) # 4. Set --ep-size=dep_size (expert parallelism size)
args = set_argument_value(args, "--ep-size", str(dep_size)) args = set_argument_value(args, "--ep-size", str(dep_size))
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
return cfg.model_dump() return cfg.model_dump()
@classmethod @classmethod
...@@ -989,7 +984,7 @@ class TrtllmConfigModifier: ...@@ -989,7 +984,7 @@ class TrtllmConfigModifier:
override_str = json.dumps(override_dict) override_str = json.dumps(override_dict)
args = append_argument(args, ["--override-engine-args", override_str]) args = append_argument(args, ["--override-engine-args", override_str])
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
elif target == "decode": elif target == "decode":
# Get service names by inferring from subComponentType first # Get service names by inferring from subComponentType first
...@@ -1037,7 +1032,7 @@ class TrtllmConfigModifier: ...@@ -1037,7 +1032,7 @@ class TrtllmConfigModifier:
override_str = json.dumps(override_dict) override_str = json.dumps(override_dict)
args = append_argument(args, ["--override-engine-args", override_str]) args = append_argument(args, ["--override-engine-args", override_str])
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
# Set num workers to 1 # Set num workers to 1
# Use the inferred decode service name # Use the inferred decode service name
...@@ -1082,7 +1077,7 @@ class TrtllmConfigModifier: ...@@ -1082,7 +1077,7 @@ class TrtllmConfigModifier:
override_str = json.dumps(override_dict) override_str = json.dumps(override_dict)
args = append_argument(args, ["--override-engine-args", override_str]) args = append_argument(args, ["--override-engine-args", override_str])
worker_service.extraPodSpec.mainContainer.args = join_arguments(args) worker_service.extraPodSpec.mainContainer.args = args
return cfg.model_dump() return cfg.model_dump()
......
...@@ -34,6 +34,26 @@ This includes: ...@@ -34,6 +34,26 @@ This includes:
After setting up Dynamo Cloud, use this script to prepare your namespace with the additional resources needed for benchmarking and profiling workflows: After setting up Dynamo Cloud, use this script to prepare your namespace with the additional resources needed for benchmarking and profiling workflows:
The setup script creates a `dynamo-pvc` with `ReadWriteMany` (RWX). If your cluster's default `storageClassName` does not support RWX, set `storageClassName` in `deploy/utils/manifests/pvc.yaml` to an RWX-capable class before running the script.
Example (add under `spec` in `deploy/utils/manifests/pvc.yaml`):
```yaml
...
spec:
accessModes:
- ReadWriteMany
storageClassName: <your-rwx-storageclass>
...
```
> [!TIP]
> **Check your clusters storage classes**
>
> - List storage classes and provisioners:
> ```bash
> kubectl get sc -o wide
> ```
```bash ```bash
export NAMESPACE=your-dynamo-namespace export NAMESPACE=your-dynamo-namespace
export HF_TOKEN=<HF_TOKEN> # Optional: for HuggingFace model access export HF_TOKEN=<HF_TOKEN> # Optional: for HuggingFace model access
......
...@@ -224,6 +224,14 @@ If you see `ErrImagePull` or `ImagePullBackOff` errors with 401 unauthorized mes ...@@ -224,6 +224,14 @@ If you see `ErrImagePull` or `ImagePullBackOff` errors with 401 unauthorized mes
3. The service account should show `imagePullSecrets` containing `nvcr-imagepullsecret`. 3. The service account should show `imagePullSecrets` containing `nvcr-imagepullsecret`.
If it doesn't, create the secret
```bash
export NGC_API_KEY=<you-ngc-api-key-here>
kubectl create secret docker-registry nvcr-imagepullsecret --docker-server=nvcr.io --docker-username='$oauthtoken' --docker-password=$NGC_API_KEY
```
## Running the Profiling Script with AI Configurator ## Running the Profiling Script with AI Configurator
......
...@@ -144,16 +144,24 @@ kubectl create secret docker-registry docker-imagepullsecret \ ...@@ -144,16 +144,24 @@ kubectl create secret docker-registry docker-imagepullsecret \
--docker-password=${DOCKER_PASSWORD} \ --docker-password=${DOCKER_PASSWORD} \
--namespace=${NAMESPACE} --namespace=${NAMESPACE}
cd deploy/cloud/helm
# 4. Install CRDs # 4. Install CRDs
helm upgrade --install dynamo-crds ./crds/ --namespace default helm upgrade --install dynamo-crds ./crds/ --namespace default
# 5. Install Platform # 5. Install Platform
helm dep build ./platform/ helm dep build ./platform/
# To install cluster-wide instead, set NS_RESTRICT_FLAGS="" (empty) or omit that line entirely.
NS_RESTRICT_FLAGS="--set dynamo-operator.namespaceRestriction.enabled=true"
helm install dynamo-platform ./platform/ \ helm install dynamo-platform ./platform/ \
--namespace ${NAMESPACE} \ --namespace "${NAMESPACE}" \
--set "dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator" \ --set "dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator" \
--set "dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG}" \ --set "dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG}" \
--set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret" --set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret" \
${NS_RESTRICT_FLAGS}
``` ```
[Verify Installation](#verify-installation) [Verify Installation](#verify-installation)
...@@ -166,7 +174,7 @@ kubectl get crd | grep dynamo ...@@ -166,7 +174,7 @@ kubectl get crd | grep dynamo
# Check operator and platform pods # Check operator and platform pods
kubectl get pods -n ${NAMESPACE} kubectl get pods -n ${NAMESPACE}
# Expected: dynamo-operator-* and etcd-* pods Running # Expected: dynamo-operator-* and etcd-* and nats-* pods Running
``` ```
## Next Steps ## Next Steps
......
...@@ -39,7 +39,8 @@ flowchart TD ...@@ -39,7 +39,8 @@ flowchart TD
Before deploying the SLA planner, ensure: Before deploying the SLA planner, ensure:
- **Dynamo platform installed** (see [Installation Guide](/docs/kubernetes/installation_guide.md)) - **Dynamo platform installed** (see [Installation Guide](/docs/kubernetes/installation_guide.md))
- **[kube-prometheus-stack](/docs/kubernetes/metrics.md) installed and running.** By default, the prometheus server is not deployed in the `monitoring` namespace. If it is deployed to a different namespace, set `dynamo-operator.dynamo.metrics.prometheusEndpoint="http://prometheus-kube-prometheus-prometheus.<namespace>.svc.cluster.local:9090"`. - **[kube-prometheus-stack](/docs/kubernetes/metrics.md) installed and running.** By default, the prometheus server is not deployed in the `monitoring` namespace. If it is deployed to a different namespace, set `dynamo-operator.dynamo.metrics.prometheusEndpoint="http://prometheus-kube-prometheus-prometheus.<namespace>.svc.cluster.local:9090"`.
- **Benchmarking resources setup** (see [Kubernetes utilities for Dynamo Benchmarking and Profiling](../../deploy/utils/README.md)) The script will create a `dynamo-pvc` with `ReadWriteMany` access, if your cluster's default storageClassName does not allow `ReadWriteMany`, you need to specify a different storageClassName in `pvc.yaml`. - **Benchmarking resources setup** (see [Kubernetes utilities for Dynamo Benchmarking and Profiling](../../deploy/utils/README.md)) The script will create a `dynamo-pvc` with `ReadWriteMany` access, if your cluster's default storageClassName does not allow `ReadWriteMany`, you need to specify a different storageClassName in `deploy/utils/manifests/pvc.yaml` which does support `ReadWriteMany`.
## Pre-Deployment Profiling ## Pre-Deployment Profiling
...@@ -260,4 +261,4 @@ This is because the `subComponentType` field has only been added in newer versio ...@@ -260,4 +261,4 @@ This is because the `subComponentType` field has only been added in newer versio
--- ---
> [!TIP] > [!TIP]
> **Need Help?** If you encounter issues, check the [troubleshooting section](#troubleshooting) or refer to the detailed guides linked in [Next Steps](#next-steps). > **Need Help?** If you encounter issues, check the [troubleshooting section](#troubleshooting) or refer to the detailed guides linked in [Next Steps](#next-steps).
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment