Unverified Commit 8f005a61 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

ci: fix sglang paths for nightly (#2275)

parent 26dc6281
...@@ -65,6 +65,7 @@ jobs: ...@@ -65,6 +65,7 @@ jobs:
- 'container/Dockerfile.sglang-deepep' - 'container/Dockerfile.sglang-deepep'
- 'components/backends/sglang/**' - 'components/backends/sglang/**'
- 'container/build.sh' - 'container/build.sh'
- 'tests/serve/test_sglang.py'
- name: Check if Validation Workflow has run - name: Check if Validation Workflow has run
id: check_workflow id: check_workflow
uses: actions/github-script@v6 uses: actions/github-script@v6
......
...@@ -19,7 +19,7 @@ SGLang allows you to deploy multi-node sized models by adding in the `dist-init- ...@@ -19,7 +19,7 @@ SGLang allows you to deploy multi-node sized models by adding in the `dist-init-
Node 1: Run HTTP ingress, processor, and 8 shards of the prefill worker Node 1: Run HTTP ingress, processor, and 8 shards of the prefill worker
```bash ```bash
# run ingress # run ingress
dynamo run in=http out=dyn & python3 -m dynamo.frontend --http-port=8000 &
# run prefill worker # run prefill worker
python3 -m dynamo.sglang.worker \ python3 -m dynamo.sglang.worker \
--model-path /model/ \ --model-path /model/ \
...@@ -102,7 +102,7 @@ SGLang typically requires a warmup period to ensure the DeepGEMM kernels are loa ...@@ -102,7 +102,7 @@ SGLang typically requires a warmup period to ensure the DeepGEMM kernels are loa
curl ${HEAD_PREFILL_NODE_IP}:8000/v1/chat/completions \ curl ${HEAD_PREFILL_NODE_IP}:8000/v1/chat/completions \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "model": "deepseek-ai/DeepSeek-R1",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
......
...@@ -28,7 +28,7 @@ class SGLangProcess(ManagedProcess): ...@@ -28,7 +28,7 @@ class SGLangProcess(ManagedProcess):
def __init__(self, script_name, request): def __init__(self, script_name, request):
self.port = 8000 self.port = 8000
sglang_dir = "/workspace/examples/sglang" sglang_dir = "/workspace/components/backends/sglang"
script_path = os.path.join(sglang_dir, "launch", script_name) script_path = os.path.join(sglang_dir, "launch", script_name)
# Verify script exists # Verify script exists
...@@ -166,6 +166,9 @@ def test_sglang_disagg_dp_attention(request, runtime_services): ...@@ -166,6 +166,9 @@ def test_sglang_disagg_dp_attention(request, runtime_services):
timeout=120, timeout=120,
) )
# TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around
# flush_cache and expert distribution recording
assert response.status_code == 200 assert response.status_code == 200
result = response.json() result = response.json()
assert "choices" in result assert "choices" in result
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment