relocate.py 12.3 KB
Newer Older
1
2
3
4
"""Helper script to package wheels and relocate binaries."""

import glob
import hashlib
5
6
7
8

# Standard library imports
import os
import os.path as osp
9
import platform
10
import shutil
11
import subprocess
12
13
import sys
import zipfile
14
15
16
from base64 import urlsafe_b64encode

# Third party imports
17
if sys.platform == "linux":
18
19
20
    from auditwheel.lddtree import lddtree


21
ALLOWLIST = {
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
    "libgcc_s.so.1",
    "libstdc++.so.6",
    "libm.so.6",
    "libdl.so.2",
    "librt.so.1",
    "libc.so.6",
    "libnsl.so.1",
    "libutil.so.1",
    "libpthread.so.0",
    "libresolv.so.2",
    "libX11.so.6",
    "libXext.so.6",
    "libXrender.so.1",
    "libICE.so.6",
    "libSM.so.6",
    "libGL.so.1",
    "libgobject-2.0.so.0",
    "libgthread-2.0.so.0",
    "libglib-2.0.so.0",
    "ld-linux-x86-64.so.2",
    "ld-2.17.so",
43
44
}

45
WINDOWS_ALLOWLIST = {
46
47
48
49
50
51
52
53
54
55
56
57
    "MSVCP140.dll",
    "KERNEL32.dll",
    "VCRUNTIME140_1.dll",
    "VCRUNTIME140.dll",
    "api-ms-win-crt-heap-l1-1-0.dll",
    "api-ms-win-crt-runtime-l1-1-0.dll",
    "api-ms-win-crt-stdio-l1-1-0.dll",
    "api-ms-win-crt-filesystem-l1-1-0.dll",
    "api-ms-win-crt-string-l1-1-0.dll",
    "api-ms-win-crt-environment-l1-1-0.dll",
    "api-ms-win-crt-math-l1-1-0.dll",
    "api-ms-win-crt-convert-l1-1-0.dll",
58
59
60
61
62
63
64
65
66
67
68
69
70
}


HERE = osp.dirname(osp.abspath(__file__))
PACKAGE_ROOT = osp.dirname(osp.dirname(HERE))
PLATFORM_ARCH = platform.machine()
PYTHON_VERSION = sys.version_info


def rehash(path, blocksize=1 << 20):
    """Return (hash, length) for path using hashlib.sha256()"""
    h = hashlib.sha256()
    length = 0
71
    with open(path, "rb") as f:
72
        while block := f.read(blocksize):
73
74
            length += len(block)
            h.update(block)
75
    digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
76
77
78
79
80
81
    # unicode/str python2 issues
    return (digest, str(length))  # type: ignore


def unzip_file(file, dest):
    """Decompress zip `file` into directory `dest`."""
82
    with zipfile.ZipFile(file, "r") as zip_ref:
83
84
85
86
87
88
89
90
91
92
        zip_ref.extractall(dest)


def is_program_installed(basename):
    """
    Return program absolute path if installed in PATH.
    Otherwise, return None
    On macOS systems, a .app is considered installed if
    it exists.
    """
93
    if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename):
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
        return basename

    for path in os.environ["PATH"].split(os.pathsep):
        abspath = osp.join(path, basename)
        if osp.isfile(abspath):
            return abspath


def find_program(basename):
    """
    Find program in PATH and return absolute path
    Try adding .exe or .bat to basename on Windows platforms
    (return None if not found)
    """
    names = [basename]
109
    if os.name == "nt":
110
        # Windows platforms
111
        extensions = (".exe", ".bat", ".cmd", ".dll")
112
113
114
115
116
117
118
119
120
121
        if not basename.endswith(extensions):
            names = [basename + ext for ext in extensions] + [basename]
    for name in names:
        path = is_program_installed(name)
        if path:
            return path


def patch_new_path(library_path, new_dir):
    library = osp.basename(library_path)
122
123
124
125
    name, *rest = library.split(".")
    rest = ".".join(rest)
    hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8]
    new_name = ".".join([name, hash_id, rest])
126
127
128
129
    return osp.join(new_dir, new_name)


def find_dll_dependencies(dumpbin, binary):
130
131
132
133
    out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE)
    out = out.stdout.strip().decode("utf-8")
    start_index = out.find("dependencies:") + len("dependencies:")
    end_index = out.find("Summary")
134
135
136
137
138
139
140
141
142
143
144
145
146
147
    dlls = out[start_index:end_index].strip()
    dlls = dlls.split(os.linesep)
    dlls = [dll.strip() for dll in dlls]
    return dlls


def relocate_elf_library(patchelf, output_dir, output_library, binary):
    """
    Relocate an ELF shared library to be packaged on a wheel.

    Given a shared library, find the transitive closure of its dependencies,
    rename and copy them into the wheel while updating their respective rpaths.
    """

148
    print(f"Relocating {binary}")
149
150
151
    binary_path = osp.join(output_library, binary)

    ld_tree = lddtree(binary_path)
152
    tree_libs = ld_tree["libs"]
153

154
    binary_queue = [(n, binary) for n in ld_tree["needed"]]
155
156
157
158
159
160
161
162
    binary_paths = {binary: binary_path}
    binary_dependencies = {}

    while binary_queue != []:
        library, parent = binary_queue.pop(0)
        library_info = tree_libs[library]
        print(library)

163
        if library_info["path"] is None:
164
            print(f"Omitting {library}")
165
166
            continue

167
        if library in ALLOWLIST:
168
            # Omit glibc/gcc/system libraries
169
            print(f"Omitting {library}")
170
171
172
173
174
175
176
177
178
            continue

        parent_dependencies = binary_dependencies.get(parent, [])
        parent_dependencies.append(library)
        binary_dependencies[parent] = parent_dependencies

        if library in binary_paths:
            continue

179
180
        binary_paths[library] = library_info["path"]
        binary_queue += [(n, library) for n in library_info["needed"]]
181

182
183
    print("Copying dependencies to wheel directory")
    new_libraries_path = osp.join(output_dir, "torchvision.libs")
184
185
186
187
188
189
190
191
    os.makedirs(new_libraries_path)

    new_names = {binary: binary_path}

    for library in binary_paths:
        if library != binary:
            library_path = binary_paths[library]
            new_library_path = patch_new_path(library_path, new_libraries_path)
192
            print(f"{library} -> {new_library_path}")
193
194
195
            shutil.copyfile(library_path, new_library_path)
            new_names[library] = new_library_path

196
    print("Updating dependency names by new files")
197
198
199
200
201
202
203
204
    for library in binary_paths:
        if library != binary:
            if library not in binary_dependencies:
                continue
            library_dependencies = binary_dependencies[library]
            new_library_name = new_names[library]
            for dep in library_dependencies:
                new_dep = osp.basename(new_names[dep])
205
                print(f"{library}: {dep} -> {new_dep}")
206
                subprocess.check_output(
207
208
209
210
211
212
213
                    [patchelf, "--replace-needed", dep, new_dep, new_library_name], cwd=new_libraries_path
                )

            print("Updating library rpath")
            subprocess.check_output([patchelf, "--set-rpath", "$ORIGIN", new_library_name], cwd=new_libraries_path)

            subprocess.check_output([patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path)
214
215
216
217
218

    print("Update library dependencies")
    library_dependencies = binary_dependencies[binary]
    for dep in library_dependencies:
        new_dep = osp.basename(new_names[dep])
219
        print(f"{binary}: {dep} -> {new_dep}")
220
221
222
        subprocess.check_output([patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library)

    print("Update library rpath")
223
    subprocess.check_output(
224
        [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchvision.libs", binary_path], cwd=output_library
225
226
227
228
    )


def relocate_dll_library(dumpbin, output_dir, output_library, binary):
229
230
231
232
233
234
    """
    Relocate a DLL/PE shared library to be packaged on a wheel.

    Given a shared library, find the transitive closure of its dependencies,
    rename and copy them into the wheel.
    """
235
    print(f"Relocating {binary}")
236
237
238
239
240
241
242
243
244
    binary_path = osp.join(output_library, binary)

    library_dlls = find_dll_dependencies(dumpbin, binary_path)
    binary_queue = [(dll, binary) for dll in library_dlls]
    binary_paths = {binary: binary_path}
    binary_dependencies = {}

    while binary_queue != []:
        library, parent = binary_queue.pop(0)
245
        if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"):
246
            print(f"Omitting {library}")
247
248
249
250
            continue

        library_path = find_program(library)
        if library_path is None:
251
            print(f"{library} not found")
252
253
            continue

254
        if osp.basename(osp.dirname(library_path)) == "system32":
255
256
            continue

257
        print(f"{library}: {library_path}")
258
259
260
261
262
263
264
265
266
267
268
        parent_dependencies = binary_dependencies.get(parent, [])
        parent_dependencies.append(library)
        binary_dependencies[parent] = parent_dependencies

        if library in binary_paths:
            continue

        binary_paths[library] = library_path
        downstream_dlls = find_dll_dependencies(dumpbin, library_path)
        binary_queue += [(n, library) for n in downstream_dlls]

269
270
    print("Copying dependencies to wheel directory")
    package_dir = osp.join(output_dir, "torchvision")
271
272
273
274
    for library in binary_paths:
        if library != binary:
            library_path = binary_paths[library]
            new_library_path = osp.join(package_dir, library)
275
            print(f"{library} -> {new_library_path}")
276
277
278
279
280
            shutil.copyfile(library_path, new_library_path)


def compress_wheel(output_dir, wheel, wheel_dir, wheel_name):
    """Create RECORD file and compress wheel distribution."""
281
282
283
    print("Update RECORD file in wheel")
    dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0]
    record_file = osp.join(dist_info, "RECORD")
284

285
    with open(record_file, "w") as f:
286
287
288
289
290
        for root, _, files in os.walk(output_dir):
            for this_file in files:
                full_file = osp.join(root, this_file)
                rel_file = osp.relpath(full_file, output_dir)
                if full_file == record_file:
291
                    f.write(f"{rel_file},,\n")
292
293
                else:
                    digest, size = rehash(full_file)
294
                    f.write(f"{rel_file},{digest},{size}\n")
295

296
    print("Compressing wheel")
297
    base_wheel_name = osp.join(wheel_dir, wheel_name)
298
    shutil.make_archive(base_wheel_name, "zip", output_dir)
299
    os.remove(wheel)
300
    shutil.move(f"{base_wheel_name}.zip", wheel)
301
302
303
304
305
    shutil.rmtree(output_dir)


def patch_linux():
    # Get patchelf location
306
    patchelf = find_program("patchelf")
307
    if patchelf is None:
308
        raise FileNotFoundError("Patchelf was not found in the system, please make sure that is available on the PATH.")
309
310

    # Find wheel
311
312
313
    print("Finding wheels...")
    wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
    output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
314

315
316
    image_binary = "image.so"
    video_binary = "video_reader.so"
317
318
319
320
321
322
323
    torchvision_binaries = [image_binary, video_binary]
    for wheel in wheels:
        if osp.exists(output_dir):
            shutil.rmtree(output_dir)

        os.makedirs(output_dir)

324
        print("Unzipping wheel...")
325
326
        wheel_file = osp.basename(wheel)
        wheel_dir = osp.dirname(wheel)
327
        print(f"{wheel_file}")
328
329
330
        wheel_name, _ = osp.splitext(wheel_file)
        unzip_file(wheel, output_dir)

331
332
        print("Finding ELF dependencies...")
        output_library = osp.join(output_dir, "torchvision")
333
334
        for binary in torchvision_binaries:
            if osp.exists(osp.join(output_library, binary)):
335
                relocate_elf_library(patchelf, output_dir, output_library, binary)
336
337
338
339
340
341

        compress_wheel(output_dir, wheel, wheel_dir, wheel_name)


def patch_win():
    # Get dumpbin location
342
    dumpbin = find_program("dumpbin")
343
    if dumpbin is None:
344
        raise FileNotFoundError("Dumpbin was not found in the system, please make sure that is available on the PATH.")
345
346

    # Find wheel
347
348
349
    print("Finding wheels...")
    wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
    output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
350

351
352
    image_binary = "image.pyd"
    video_binary = "video_reader.pyd"
353
354
355
356
357
358
359
    torchvision_binaries = [image_binary, video_binary]
    for wheel in wheels:
        if osp.exists(output_dir):
            shutil.rmtree(output_dir)

        os.makedirs(output_dir)

360
        print("Unzipping wheel...")
361
362
        wheel_file = osp.basename(wheel)
        wheel_dir = osp.dirname(wheel)
363
        print(f"{wheel_file}")
364
365
366
        wheel_name, _ = osp.splitext(wheel_file)
        unzip_file(wheel, output_dir)

367
368
        print("Finding DLL/PE dependencies...")
        output_library = osp.join(output_dir, "torchvision")
369
370
        for binary in torchvision_binaries:
            if osp.exists(osp.join(output_library, binary)):
371
                relocate_dll_library(dumpbin, output_dir, output_library, binary)
372
373
374
375

        compress_wheel(output_dir, wheel, wheel_dir, wheel_name)


376
377
if __name__ == "__main__":
    if sys.platform == "linux":
378
        patch_linux()
379
    elif sys.platform == "win32":
380
        patch_win()