runner.py 1.99 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
"""GPU Memory Service allocation server runner.
5

6
7
This module provides the CLI runner for the GPU Memory Service server,
which manages GPU memory allocations with connection-based RW/RO locking.
8
9

Usage:
10
11
    python -m gpu_memory_service --device 0
    python -m gpu_memory_service --device 0 --socket-path /tmp/gpu_memory_service_{device}.sock
12
13
14
15
16
17
"""

import asyncio
import logging

import uvloop
18
from gpu_memory_service.server.rpc import GMSRPCServer
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

from .args import parse_args

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)


async def worker() -> None:
    """Main async worker function."""
    config = parse_args()

    # Configure logging level
    if config.verbose:
        logging.getLogger().setLevel(logging.DEBUG)
36
        logging.getLogger("gpu_memory_service").setLevel(logging.DEBUG)
37
38

    logger.info(f"Starting GPU Memory Service Server for device {config.device}")
39
    logger.info("GMS tag: %s", config.tag)
40
    logger.info(f"Socket path: {config.socket_path}")
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
    logger.info(
        "Allocation retry config: interval=%ss timeout=%s",
        config.alloc_retry_interval,
        (
            f"{config.alloc_retry_timeout}s"
            if config.alloc_retry_timeout is not None
            else "none"
        ),
    )

    server = GMSRPCServer(
        config.socket_path,
        device=config.device,
        allocation_retry_interval=config.alloc_retry_interval,
        allocation_retry_timeout=config.alloc_retry_timeout,
    )
57
58

    logger.info("GPU Memory Service Server ready, waiting for connections...")
59
    logger.info(f"Clients can connect via socket: {config.socket_path}")
60
    await server.serve()
61
62
63
64
65
66
67
68
69
70


def main() -> None:
    """Entry point for GPU Memory Service server."""
    uvloop.install()
    asyncio.run(worker())


if __name__ == "__main__":
    main()