client.py 2.32 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Simple client demonstration of request cancellation using context.stop_generating()
"""

import asyncio
import sys

from dynamo._core import Context, DistributedRuntime


async def demo_cancellation(client):
    """Perform the generation request with cancellation demonstration"""
    # Create context for cancellation control
    context = Context()

    # Start streaming request
    print("Starting streaming request...")
    stream = await client.generate("dummy_request", context=context)

    iteration_count = 0
    async for response in stream:
        number = response.data()
        print(f"Client: Received {number}")

        # Cancel after receiving 3 responses
        if iteration_count >= 2:
            print("Client: Cancelling after 3 responses...")
            context.stop_generating()
            break

        iteration_count += 1

    print("Client: Stream stopped")


async def main():
    """Connect to server and demonstrate cancellation"""
    # Parse command line argument
    use_middle_server = False  # Default to direct connection
    if len(sys.argv) > 1:
        if sys.argv[1] == "--middle":
            use_middle_server = True
        else:
            print("Usage: python3 client.py [--middle]")
            print("  (no flag): Connect directly to backend server (default)")
            print("  --middle: Connect through middle server")
            return

    loop = asyncio.get_running_loop()
    runtime = DistributedRuntime(loop, True)

    # Connect to middle server or direct server based on argument
    if use_middle_server:
        endpoint = runtime.namespace("demo").component("middle").endpoint("generate")
        print("Client connecting to middle server...")
    else:
        endpoint = runtime.namespace("demo").component("server").endpoint("generate")
        print("Client connecting directly to backend server...")

    client = await endpoint.client()
    await client.wait_for_instances()

    print(
        f"Client connected to {'middle server' if use_middle_server else 'backend server'}"
    )

    # Perform the generation request with cancellation
    await demo_cancellation(client)

    runtime.shutdown()


if __name__ == "__main__":
    asyncio.run(main())