install_nixl_from_source_ubuntu.py 7.89 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# install_prerequisites.py
import argparse
import glob
import os
import subprocess
import sys

# --- Configuration ---
WHEELS_CACHE_HOME = os.environ.get("WHEELS_CACHE_HOME", "/tmp/wheels_cache")
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
13
14
15
16
17
UCX_DIR = os.path.join("/tmp", "ucx_source")
NIXL_DIR = os.path.join("/tmp", "nixl_source")
UCX_INSTALL_DIR = os.path.join("/tmp", "ucx_install")
UCX_REPO_URL = "https://github.com/openucx/ucx.git"
NIXL_REPO_URL = "https://github.com/ai-dynamo/nixl.git"
18
19
20


# --- Helper Functions ---
21
def run_command(command, cwd=".", env=None):
22
23
24
25
26
27
28
29
    """Helper function to run a shell command and check for errors."""
    print(f"--> Running command: {' '.join(command)} in '{cwd}'", flush=True)
    subprocess.check_call(command, cwd=cwd, env=env)


def is_pip_package_installed(package_name):
    """Checks if a package is installed via pip without raising an exception."""
    result = subprocess.run(
30
        [sys.executable, "-m", "pip", "show", package_name],
31
        stdout=subprocess.DEVNULL,
32
33
        stderr=subprocess.DEVNULL,
    )
34
35
36
37
38
39
    return result.returncode == 0


def find_nixl_wheel_in_cache(cache_dir):
    """Finds a nixl wheel file in the specified cache directory."""
    # The repaired wheel will have a 'manylinux' tag, but this glob still works.
40
    search_pattern = os.path.join(cache_dir, "nixl*.whl")
41
42
43
44
45
46
47
48
49
50
51
52
    wheels = glob.glob(search_pattern)
    if wheels:
        # Sort to get the most recent/highest version if multiple exist
        wheels.sort()
        return wheels[-1]
    return None


def install_system_dependencies():
    """Installs required system packages using apt-get if run as root."""
    if os.geteuid() != 0:
        print("\n---", flush=True)
53
54
        print(
            "WARNING: Not running as root. \
55
            Skipping system dependency installation.",
56
57
            flush=True,
        )
58
59
        print(
            "Please ensure the listed packages are installed on your system:",
60
61
62
63
            flush=True,
        )
        print(
            "  patchelf build-essential git cmake ninja-build \
64
            autotools-dev automake meson libtool libtool-bin",
65
66
            flush=True,
        )
67
68
69
        print("---\n", flush=True)
        return

70
    print("--- Running as root. Installing system dependencies... ---", flush=True)
71
72
73
74
75
76
77
78
79
80
    apt_packages = [
        "patchelf",  # <-- Add patchelf here
        "build-essential",
        "git",
        "cmake",
        "ninja-build",
        "autotools-dev",
        "automake",
        "meson",
        "libtool",
81
        "libtool-bin",
82
    ]
83
84
    run_command(["apt-get", "update"])
    run_command(["apt-get", "install", "-y"] + apt_packages)
85
86
87
88
89
90
    print("--- System dependencies installed successfully. ---\n", flush=True)


def build_and_install_prerequisites(args):
    """Builds UCX and NIXL from source, creating a self-contained wheel."""

91
    if not args.force_reinstall and is_pip_package_installed("nixl"):
92
93
94
95
96
        print("--> NIXL is already installed. Nothing to do.", flush=True)
        return

    cached_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME)
    if not args.force_reinstall and cached_wheel:
97
98
        print(
            f"\n--> Found self-contained wheel: \
99
                {os.path.basename(cached_wheel)}.",
100
101
102
103
            flush=True,
        )
        print("--> Installing from cache, skipping all source builds.", flush=True)
        install_command = [sys.executable, "-m", "pip", "install", cached_wheel]
104
105
106
107
        run_command(install_command)
        print("\n--- Installation from cache complete. ---", flush=True)
        return

108
109
    print(
        "\n--> No installed package or cached wheel found. \
110
         Starting full build process...",
111
112
        flush=True,
    )
113
    print("\n--> Installing auditwheel...", flush=True)
114
    run_command([sys.executable, "-m", "pip", "install", "auditwheel"])
115
116
117
118
119
120
121
122
    install_system_dependencies()
    ucx_install_path = os.path.abspath(UCX_INSTALL_DIR)
    print(f"--> Using wheel cache directory: {WHEELS_CACHE_HOME}", flush=True)
    os.makedirs(WHEELS_CACHE_HOME, exist_ok=True)

    # -- Step 1: Build UCX from source --
    print("\n[1/3] Configuring and building UCX from source...", flush=True)
    if not os.path.exists(UCX_DIR):
123
        run_command(["git", "clone", UCX_REPO_URL, UCX_DIR])
124
    ucx_source_path = os.path.abspath(UCX_DIR)
125
126
    run_command(["git", "checkout", "v1.19.x"], cwd=ucx_source_path)
    run_command(["./autogen.sh"], cwd=ucx_source_path)
127
    configure_command = [
128
129
130
131
132
133
134
135
136
137
        "./configure",
        f"--prefix={ucx_install_path}",
        "--enable-shared",
        "--disable-static",
        "--disable-doxygen-doc",
        "--enable-optimizations",
        "--enable-cma",
        "--enable-devel-headers",
        "--with-verbs",
        "--enable-mt",
138
        "--with-ze=no",
139
140
    ]
    run_command(configure_command, cwd=ucx_source_path)
141
142
    run_command(["make", "-j", str(os.cpu_count() or 1)], cwd=ucx_source_path)
    run_command(["make", "install"], cwd=ucx_source_path)
143
144
145
146
147
    print("--- UCX build and install complete ---", flush=True)

    # -- Step 2: Build NIXL wheel from source --
    print("\n[2/3] Building NIXL wheel from source...", flush=True)
    if not os.path.exists(NIXL_DIR):
148
        run_command(["git", "clone", NIXL_REPO_URL, NIXL_DIR])
149
150

    build_env = os.environ.copy()
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
    build_env["PKG_CONFIG_PATH"] = os.path.join(ucx_install_path, "lib", "pkgconfig")
    ucx_lib_path = os.path.join(ucx_install_path, "lib")
    ucx_plugin_path = os.path.join(ucx_lib_path, "ucx")
    existing_ld_path = os.environ.get("LD_LIBRARY_PATH", "")
    build_env["LD_LIBRARY_PATH"] = (
        f"{ucx_lib_path}:{ucx_plugin_path}:{existing_ld_path}".strip(":")
    )
    print(f"--> Using LD_LIBRARY_PATH: {build_env['LD_LIBRARY_PATH']}", flush=True)

    temp_wheel_dir = os.path.join(ROOT_DIR, "temp_wheelhouse")
    run_command(
        [
            sys.executable,
            "-m",
            "pip",
            "wheel",
            ".",
            "--no-deps",
            f"--wheel-dir={temp_wheel_dir}",
        ],
        cwd=os.path.abspath(NIXL_DIR),
        env=build_env,
    )
174
175

    # -- Step 3: Repair the wheel by copying UCX libraries --
176
    print("\n[3/3] Repairing NIXL wheel to include UCX libraries...", flush=True)
177
178
179
180
181
182
    unrepaired_wheel = find_nixl_wheel_in_cache(temp_wheel_dir)
    if not unrepaired_wheel:
        raise RuntimeError("Failed to find the NIXL wheel after building it.")

    # We tell auditwheel to ignore the plugin that mesonpy already handled.
    auditwheel_command = [
183
184
185
186
        "auditwheel",
        "repair",
        "--exclude",
        "libplugin_UCX.so",  # <-- Exclude because mesonpy already includes it
187
        unrepaired_wheel,
188
        f"--wheel-dir={WHEELS_CACHE_HOME}",
189
190
191
192
193
    ]
    run_command(auditwheel_command, env=build_env)

    # --- CLEANUP ---
    # No more temporary files to remove, just the temp wheelhouse
194
    run_command(["rm", "-rf", temp_wheel_dir])
195
196
197
198
199
200
    # --- END CLEANUP ---

    newly_built_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME)
    if not newly_built_wheel:
        raise RuntimeError("Failed to find the repaired NIXL wheel.")

201
202
    print(
        f"--> Successfully built self-contained wheel: \
203
            {os.path.basename(newly_built_wheel)}. Now installing...",
204
205
206
        flush=True,
    )
    install_command = [sys.executable, "-m", "pip", "install", newly_built_wheel]
207
    if args.force_reinstall:
208
        install_command.insert(-1, "--force-reinstall")
209
210
211
212
213
214
215

    run_command(install_command)
    print("--- NIXL installation complete ---", flush=True)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
216
217
218
219
220
221
222
223
        description="Build and install UCX and NIXL dependencies."
    )
    parser.add_argument(
        "--force-reinstall",
        action="store_true",
        help="Force rebuild and reinstall of UCX and NIXL \
        even if they are already installed.",
    )
224
225
    args = parser.parse_args()
    build_and_install_prerequisites(args)