Unverified Commit 13560ab2 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: sglang examples launch and deploy (#2068)

parent 7a0013be
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
metadata:
name: sglang-agg
spec:
services:
Frontend:
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
dynamoNamespace: sglang-agg
componentType: main
replicas: 1
resources:
requests:
cpu: "5"
memory: "10Gi"
limits:
cpu: "5"
memory: "10Gi"
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && python3 -m dynamo.frontend"
SGLangDecodeWorker:
envFromSecret: hf-token-secret
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
dynamoNamespace: sglang-agg
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "10"
memory: "20Gi"
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
args:
- "python3"
- "-m"
- "dynamo.sglang.worker"
- "--model-path"
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
- "--served-model-name"
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
- "--page-size"
- "16"
- "--tp"
- "1"
- "--trust-remote-code"
- "--skip-tokenizer-init"
...@@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \ ...@@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \
--page-size 16 \ --page-size 16 \
--tp 1 \ --tp 1 \
--trust-remote-code \ --trust-remote-code \
--skip-tokenizer-init \ --skip-tokenizer-init
...@@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \ ...@@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \
--page-size 16 \ --page-size 16 \
--tp 1 \ --tp 1 \
--trust-remote-code \ --trust-remote-code \
--skip-tokenizer-init \ --skip-tokenizer-init
...@@ -463,6 +463,9 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la ...@@ -463,6 +463,9 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc echo "cat ~/.launch_screen" >> ~/.bashrc
# Once UX refactor is merged, we can remove these files
# Python components will have been pip installed and packaged in wheel
COPY components/ /workspace/components/
# Copy benchmarks, examples, and tests for CI # Copy benchmarks, examples, and tests for CI
# TODO: Remove this once we have a functional CI image built on top of the runtime image # TODO: Remove this once we have a functional CI image built on top of the runtime image
COPY tests /workspace/tests COPY tests /workspace/tests
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment