Unverified Commit 8f005a61 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

ci: fix sglang paths for nightly (#2275)

parent 26dc6281
......@@ -65,6 +65,7 @@ jobs:
- 'container/Dockerfile.sglang-deepep'
- 'components/backends/sglang/**'
- 'container/build.sh'
- 'tests/serve/test_sglang.py'
- name: Check if Validation Workflow has run
id: check_workflow
uses: actions/github-script@v6
......
......@@ -19,7 +19,7 @@ SGLang allows you to deploy multi-node sized models by adding in the `dist-init-
Node 1: Run HTTP ingress, processor, and 8 shards of the prefill worker
```bash
# run ingress
dynamo run in=http out=dyn &
python3 -m dynamo.frontend --http-port=8000 &
# run prefill worker
python3 -m dynamo.sglang.worker \
--model-path /model/ \
......@@ -102,7 +102,7 @@ SGLang typically requires a warmup period to ensure the DeepGEMM kernels are loa
curl ${HEAD_PREFILL_NODE_IP}:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"model": "deepseek-ai/DeepSeek-R1",
"messages": [
{
"role": "user",
......
......@@ -28,7 +28,7 @@ class SGLangProcess(ManagedProcess):
def __init__(self, script_name, request):
self.port = 8000
sglang_dir = "/workspace/examples/sglang"
sglang_dir = "/workspace/components/backends/sglang"
script_path = os.path.join(sglang_dir, "launch", script_name)
# Verify script exists
......@@ -166,6 +166,9 @@ def test_sglang_disagg_dp_attention(request, runtime_services):
timeout=120,
)
# TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around
# flush_cache and expert distribution recording
assert response.status_code == 200
result = response.json()
assert "choices" in result
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment