test_worker_management.py 1.74 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pytest
import requests


@pytest.mark.integration
def test_add_and_remove_worker(mock_worker, router_manager, mock_workers):
    # Start with a single worker
    proc1, url1, id1 = mock_worker
    rh = router_manager.start_router(worker_urls=[url1], policy="round_robin")

    # Add a second worker

    procs2, urls2, ids2 = mock_workers(n=1)
    url2 = urls2[0]
    id2 = ids2[0]
    router_manager.add_worker(rh.url, url2)

    # Send some requests and ensure both workers are seen
    seen = set()
    with requests.Session() as s:
        for i in range(20):
            r = s.post(
                f"{rh.url}/v1/completions",
                json={
                    "model": "test-model",
                    "prompt": f"x{i}",
                    "max_tokens": 1,
                    "stream": False,
                },
            )
            assert r.status_code == 200
            wid = r.headers.get("X-Worker-Id") or r.json().get("worker_id")
            seen.add(wid)
            if len(seen) == 2:
                break

    assert id1 in seen and id2 in seen

    # Now remove the second worker
    router_manager.remove_worker(rh.url, url2)

    # After removal, subsequent requests should only come from first worker
    with requests.Session() as s:
        for i in range(10):
            r = s.post(
                f"{rh.url}/v1/completions",
                json={
                    "model": "test-model",
                    "prompt": f"y{i}",
                    "max_tokens": 1,
                    "stream": False,
                },
            )
            assert r.status_code == 200
            wid = r.headers.get("X-Worker-Id") or r.json().get("worker_id")
            assert wid == id1
    # mock_workers fixture handles cleanup