setup.py 4.07 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Build script for GPU Memory Service with C++ extensions.

This setup.py builds the C++ extensions as part of pip install.
The _allocator_ext extension only requires Python headers (no CUDA or PyTorch needed).

Following the torch_memory_saver pattern of using pure setuptools for extension building.
"""

from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext


class BuildExtension(build_ext):
    """Custom build extension for C++ modules."""

    def build_extensions(self):
        import os

        # Use CXX environment variable if set, otherwise default to g++
        cxx = os.environ.get("CXX", "g++")
        self.compiler.set_executable("compiler_so", cxx)
        self.compiler.set_executable("compiler_cxx", cxx)
        self.compiler.set_executable("linker_so", f"{cxx} -shared")

        build_ext.build_extensions(self)


def _create_ext_modules():
    """Create extension modules for gpu_memory_service."""
    # Common compile arguments
    extra_compile_args = ["-std=c++17", "-O3", "-fPIC"]

    # _allocator_ext: CUDAPluggableAllocator shim using only Python C API
    # No CUDA or PyTorch dependency - just provides my_malloc/my_free that call Python callbacks
    return [
        Extension(
            name="gpu_memory_service.client.torch.extensions._allocator_ext",
            sources=["client/torch/extensions/allocator.cpp"],
            extra_compile_args=extra_compile_args,
        )
    ]


setup(
    name="gpu-memory-service",
49
    version="0.9.0",
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
    description="GPU Memory Service for Dynamo - CUDA VMM-based GPU memory allocation and sharing",
    author="NVIDIA Inc.",
    author_email="sw-dl-dynamo@nvidia.com",
    license="Apache-2.0",
    python_requires=">=3.10",
    install_requires=[
        "msgpack>=1.0",
        "uvloop>=0.21.0",
    ],
    extras_require={
        "test": [
            "pytest>=8.3.4",
            "pytest-asyncio",
        ],
    },
    # Package directory mapping: the current directory IS the gpu_memory_service package
    packages=[
        "gpu_memory_service",
68
        "gpu_memory_service.cli",
69
70
71
72
73
74
        "gpu_memory_service.common",
        "gpu_memory_service.common.protocol",
        "gpu_memory_service.server",
        "gpu_memory_service.client",
        "gpu_memory_service.client.torch",
        "gpu_memory_service.client.torch.extensions",
75
76
        "gpu_memory_service.failover_lock",
        "gpu_memory_service.failover_lock.flock",
77
78
79
        "gpu_memory_service.integrations",
        "gpu_memory_service.integrations.common",
        "gpu_memory_service.integrations.sglang",
80
        "gpu_memory_service.integrations.trtllm",
81
        "gpu_memory_service.integrations.vllm",
82
83
84
    ],
    package_dir={
        "gpu_memory_service": ".",
85
        "gpu_memory_service.cli": "cli",
86
87
88
89
90
91
        "gpu_memory_service.common": "common",
        "gpu_memory_service.common.protocol": "common/protocol",
        "gpu_memory_service.server": "server",
        "gpu_memory_service.client": "client",
        "gpu_memory_service.client.torch": "client/torch",
        "gpu_memory_service.client.torch.extensions": "client/torch/extensions",
92
93
        "gpu_memory_service.failover_lock": "failover_lock",
        "gpu_memory_service.failover_lock.flock": "failover_lock/flock",
94
95
96
        "gpu_memory_service.integrations": "integrations",
        "gpu_memory_service.integrations.common": "integrations/common",
        "gpu_memory_service.integrations.sglang": "integrations/sglang",
97
        "gpu_memory_service.integrations.trtllm": "integrations/trtllm",
98
        "gpu_memory_service.integrations.vllm": "integrations/vllm",
99
100
101
102
    },
    package_data={
        "gpu_memory_service.client.torch.extensions": ["*.cpp"],
    },
103
104
105
106
107
108
    entry_points={
        "console_scripts": [
            "gpu-memory-service=gpu_memory_service.cli.runner:main",
            "gms-storage-client=gpu_memory_service.cli.storage_runner:main",
        ]
    },
109
110
111
    ext_modules=_create_ext_modules(),
    cmdclass={"build_ext": BuildExtension},
)