"vscode:/vscode.git/clone" did not exist on "08df63a6f8b9d27ae80cf705ab9496632c8a18c2"
Unverified Commit 6e95f5e5 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Simplify `Router` arguments passing and build it in docker image (#9964)

parent 0e9387a9
import os
from setuptools import setup
from setuptools_rust import Binding, RustExtension
no_rust = os.environ.get("SGLANG_ROUTER_BUILD_NO_RUST") == "1"
rust_extensions = []
if not no_rust:
rust_extensions.append(
RustExtension(
target="sglang_router_rs",
path="Cargo.toml",
binding=Binding.PyO3,
)
)
setup(
rust_extensions=rust_extensions,
zip_safe=False,
)
import json
import os
import subprocess
import time
import unittest
from types import SimpleNamespace
......@@ -18,6 +17,7 @@ from sglang.test.test_utils import (
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_pd_server,
popen_with_error_check,
)
......@@ -47,7 +47,9 @@ class TestDisaggregationAccuracy(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
......@@ -59,9 +61,7 @@ class TestDisaggregationAccuracy(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
......@@ -228,7 +228,9 @@ class TestDisaggregationMooncakeFailure(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
......@@ -240,9 +242,7 @@ class TestDisaggregationMooncakeFailure(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
......@@ -383,7 +383,9 @@ class TestDisaggregationMooncakeSpec(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
......@@ -395,9 +397,7 @@ class TestDisaggregationMooncakeSpec(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
......@@ -509,7 +509,9 @@ class TestDisaggregationSimulatedRetract(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
......@@ -521,9 +523,7 @@ class TestDisaggregationSimulatedRetract(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
......
......@@ -15,7 +15,7 @@ from sglang.test.test_utils import (
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_pd_server,
run_with_timeout,
popen_with_error_check,
)
......@@ -49,7 +49,9 @@ class TestDisaggregationMooncakePrefillLargerTP(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
......@@ -61,9 +63,7 @@ class TestDisaggregationMooncakePrefillLargerTP(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
......@@ -183,7 +183,9 @@ class TestDisaggregationMooncakeDecodeLargerTP(CustomTestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
......@@ -195,9 +197,7 @@ class TestDisaggregationMooncakeDecodeLargerTP(CustomTestCase):
]
print("Starting load balancer:", " ".join(lb_command))
cls.process_lb = subprocess.Popen(
lb_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
cls.process_lb = popen_with_error_check(lb_command)
cls.wait_server_ready(cls.lb_url + "/health")
@classmethod
......
......@@ -49,7 +49,9 @@ class TestPDPPAccuracy(unittest.TestCase):
lb_command = [
"python3",
"-m",
"sglang.srt.disaggregation.mini_lb",
"sglang_router.launch_router",
"--pd-disaggregation",
"--mini-lb", # FIXME: remove this
"--prefill",
cls.prefill_url,
"--decode",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment