relocate.py 12.5 KB
Newer Older
1
2
3
4
"""Helper script to package wheels and relocate binaries."""

import glob
import hashlib
5
6
7
8
9
import io

# Standard library imports
import os
import os.path as osp
10
import platform
11
import shutil
12
import subprocess
13
14
import sys
import zipfile
15
16
17
from base64 import urlsafe_b64encode

# Third party imports
18
if sys.platform == "linux":
19
20
21
    from auditwheel.lddtree import lddtree


22
ALLOWLIST = {
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
    "libgcc_s.so.1",
    "libstdc++.so.6",
    "libm.so.6",
    "libdl.so.2",
    "librt.so.1",
    "libc.so.6",
    "libnsl.so.1",
    "libutil.so.1",
    "libpthread.so.0",
    "libresolv.so.2",
    "libX11.so.6",
    "libXext.so.6",
    "libXrender.so.1",
    "libICE.so.6",
    "libSM.so.6",
    "libGL.so.1",
    "libgobject-2.0.so.0",
    "libgthread-2.0.so.0",
    "libglib-2.0.so.0",
    "ld-linux-x86-64.so.2",
    "ld-2.17.so",
44
45
}

46
WINDOWS_ALLOWLIST = {
47
48
49
50
51
52
53
54
55
56
57
58
    "MSVCP140.dll",
    "KERNEL32.dll",
    "VCRUNTIME140_1.dll",
    "VCRUNTIME140.dll",
    "api-ms-win-crt-heap-l1-1-0.dll",
    "api-ms-win-crt-runtime-l1-1-0.dll",
    "api-ms-win-crt-stdio-l1-1-0.dll",
    "api-ms-win-crt-filesystem-l1-1-0.dll",
    "api-ms-win-crt-string-l1-1-0.dll",
    "api-ms-win-crt-environment-l1-1-0.dll",
    "api-ms-win-crt-math-l1-1-0.dll",
    "api-ms-win-crt-convert-l1-1-0.dll",
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
}


HERE = osp.dirname(osp.abspath(__file__))
PACKAGE_ROOT = osp.dirname(osp.dirname(HERE))
PLATFORM_ARCH = platform.machine()
PYTHON_VERSION = sys.version_info


def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
    """Yield pieces of data from a file-like object until EOF."""
    while True:
        chunk = file.read(size)
        if not chunk:
            break
        yield chunk


def rehash(path, blocksize=1 << 20):
    """Return (hash, length) for path using hashlib.sha256()"""
    h = hashlib.sha256()
    length = 0
81
    with open(path, "rb") as f:
82
83
84
        for block in read_chunks(f, size=blocksize):
            length += len(block)
            h.update(block)
85
    digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
86
87
88
89
90
91
    # unicode/str python2 issues
    return (digest, str(length))  # type: ignore


def unzip_file(file, dest):
    """Decompress zip `file` into directory `dest`."""
92
    with zipfile.ZipFile(file, "r") as zip_ref:
93
94
95
96
97
98
99
100
101
102
        zip_ref.extractall(dest)


def is_program_installed(basename):
    """
    Return program absolute path if installed in PATH.
    Otherwise, return None
    On macOS systems, a .app is considered installed if
    it exists.
    """
103
    if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename):
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
        return basename

    for path in os.environ["PATH"].split(os.pathsep):
        abspath = osp.join(path, basename)
        if osp.isfile(abspath):
            return abspath


def find_program(basename):
    """
    Find program in PATH and return absolute path
    Try adding .exe or .bat to basename on Windows platforms
    (return None if not found)
    """
    names = [basename]
119
    if os.name == "nt":
120
        # Windows platforms
121
        extensions = (".exe", ".bat", ".cmd", ".dll")
122
123
124
125
126
127
128
129
130
131
        if not basename.endswith(extensions):
            names = [basename + ext for ext in extensions] + [basename]
    for name in names:
        path = is_program_installed(name)
        if path:
            return path


def patch_new_path(library_path, new_dir):
    library = osp.basename(library_path)
132
133
134
135
    name, *rest = library.split(".")
    rest = ".".join(rest)
    hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8]
    new_name = ".".join([name, hash_id, rest])
136
137
138
139
    return osp.join(new_dir, new_name)


def find_dll_dependencies(dumpbin, binary):
140
141
142
143
    out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE)
    out = out.stdout.strip().decode("utf-8")
    start_index = out.find("dependencies:") + len("dependencies:")
    end_index = out.find("Summary")
144
145
146
147
148
149
150
151
152
153
154
155
156
157
    dlls = out[start_index:end_index].strip()
    dlls = dlls.split(os.linesep)
    dlls = [dll.strip() for dll in dlls]
    return dlls


def relocate_elf_library(patchelf, output_dir, output_library, binary):
    """
    Relocate an ELF shared library to be packaged on a wheel.

    Given a shared library, find the transitive closure of its dependencies,
    rename and copy them into the wheel while updating their respective rpaths.
    """

158
    print(f"Relocating {binary}")
159
160
161
    binary_path = osp.join(output_library, binary)

    ld_tree = lddtree(binary_path)
162
    tree_libs = ld_tree["libs"]
163

164
    binary_queue = [(n, binary) for n in ld_tree["needed"]]
165
166
167
168
169
170
171
172
    binary_paths = {binary: binary_path}
    binary_dependencies = {}

    while binary_queue != []:
        library, parent = binary_queue.pop(0)
        library_info = tree_libs[library]
        print(library)

173
        if library_info["path"] is None:
174
            print(f"Omitting {library}")
175
176
            continue

177
        if library in ALLOWLIST:
178
            # Omit glibc/gcc/system libraries
179
            print(f"Omitting {library}")
180
181
182
183
184
185
186
187
188
            continue

        parent_dependencies = binary_dependencies.get(parent, [])
        parent_dependencies.append(library)
        binary_dependencies[parent] = parent_dependencies

        if library in binary_paths:
            continue

189
190
        binary_paths[library] = library_info["path"]
        binary_queue += [(n, library) for n in library_info["needed"]]
191

192
193
    print("Copying dependencies to wheel directory")
    new_libraries_path = osp.join(output_dir, "torchvision.libs")
194
195
196
197
198
199
200
201
    os.makedirs(new_libraries_path)

    new_names = {binary: binary_path}

    for library in binary_paths:
        if library != binary:
            library_path = binary_paths[library]
            new_library_path = patch_new_path(library_path, new_libraries_path)
202
            print(f"{library} -> {new_library_path}")
203
204
205
            shutil.copyfile(library_path, new_library_path)
            new_names[library] = new_library_path

206
    print("Updating dependency names by new files")
207
208
209
210
211
212
213
214
    for library in binary_paths:
        if library != binary:
            if library not in binary_dependencies:
                continue
            library_dependencies = binary_dependencies[library]
            new_library_name = new_names[library]
            for dep in library_dependencies:
                new_dep = osp.basename(new_names[dep])
215
                print(f"{library}: {dep} -> {new_dep}")
216
                subprocess.check_output(
217
218
219
220
221
222
223
                    [patchelf, "--replace-needed", dep, new_dep, new_library_name], cwd=new_libraries_path
                )

            print("Updating library rpath")
            subprocess.check_output([patchelf, "--set-rpath", "$ORIGIN", new_library_name], cwd=new_libraries_path)

            subprocess.check_output([patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path)
224
225
226
227
228

    print("Update library dependencies")
    library_dependencies = binary_dependencies[binary]
    for dep in library_dependencies:
        new_dep = osp.basename(new_names[dep])
229
        print(f"{binary}: {dep} -> {new_dep}")
230
231
232
        subprocess.check_output([patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library)

    print("Update library rpath")
233
    subprocess.check_output(
234
        [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchvision.libs", binary_path], cwd=output_library
235
236
237
238
    )


def relocate_dll_library(dumpbin, output_dir, output_library, binary):
239
240
241
242
243
244
    """
    Relocate a DLL/PE shared library to be packaged on a wheel.

    Given a shared library, find the transitive closure of its dependencies,
    rename and copy them into the wheel.
    """
245
    print(f"Relocating {binary}")
246
247
248
249
250
251
252
253
254
    binary_path = osp.join(output_library, binary)

    library_dlls = find_dll_dependencies(dumpbin, binary_path)
    binary_queue = [(dll, binary) for dll in library_dlls]
    binary_paths = {binary: binary_path}
    binary_dependencies = {}

    while binary_queue != []:
        library, parent = binary_queue.pop(0)
255
        if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"):
256
            print(f"Omitting {library}")
257
258
259
260
            continue

        library_path = find_program(library)
        if library_path is None:
261
            print(f"{library} not found")
262
263
            continue

264
        if osp.basename(osp.dirname(library_path)) == "system32":
265
266
            continue

267
        print(f"{library}: {library_path}")
268
269
270
271
272
273
274
275
276
277
278
        parent_dependencies = binary_dependencies.get(parent, [])
        parent_dependencies.append(library)
        binary_dependencies[parent] = parent_dependencies

        if library in binary_paths:
            continue

        binary_paths[library] = library_path
        downstream_dlls = find_dll_dependencies(dumpbin, library_path)
        binary_queue += [(n, library) for n in downstream_dlls]

279
280
    print("Copying dependencies to wheel directory")
    package_dir = osp.join(output_dir, "torchvision")
281
282
283
284
    for library in binary_paths:
        if library != binary:
            library_path = binary_paths[library]
            new_library_path = osp.join(package_dir, library)
285
            print(f"{library} -> {new_library_path}")
286
287
288
289
290
            shutil.copyfile(library_path, new_library_path)


def compress_wheel(output_dir, wheel, wheel_dir, wheel_name):
    """Create RECORD file and compress wheel distribution."""
291
292
293
    print("Update RECORD file in wheel")
    dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0]
    record_file = osp.join(dist_info, "RECORD")
294

295
    with open(record_file, "w") as f:
296
297
298
299
300
        for root, _, files in os.walk(output_dir):
            for this_file in files:
                full_file = osp.join(root, this_file)
                rel_file = osp.relpath(full_file, output_dir)
                if full_file == record_file:
301
                    f.write(f"{rel_file},,\n")
302
303
                else:
                    digest, size = rehash(full_file)
304
                    f.write(f"{rel_file},{digest},{size}\n")
305

306
    print("Compressing wheel")
307
    base_wheel_name = osp.join(wheel_dir, wheel_name)
308
    shutil.make_archive(base_wheel_name, "zip", output_dir)
309
    os.remove(wheel)
310
    shutil.move(f"{base_wheel_name}.zip", wheel)
311
312
313
314
315
    shutil.rmtree(output_dir)


def patch_linux():
    # Get patchelf location
316
    patchelf = find_program("patchelf")
317
    if patchelf is None:
318
        raise FileNotFoundError("Patchelf was not found in the system, please make sure that is available on the PATH.")
319
320

    # Find wheel
321
322
323
    print("Finding wheels...")
    wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
    output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
324

325
326
    image_binary = "image.so"
    video_binary = "video_reader.so"
327
328
329
330
331
332
333
    torchvision_binaries = [image_binary, video_binary]
    for wheel in wheels:
        if osp.exists(output_dir):
            shutil.rmtree(output_dir)

        os.makedirs(output_dir)

334
        print("Unzipping wheel...")
335
336
        wheel_file = osp.basename(wheel)
        wheel_dir = osp.dirname(wheel)
337
        print(f"{wheel_file}")
338
339
340
        wheel_name, _ = osp.splitext(wheel_file)
        unzip_file(wheel, output_dir)

341
342
        print("Finding ELF dependencies...")
        output_library = osp.join(output_dir, "torchvision")
343
344
        for binary in torchvision_binaries:
            if osp.exists(osp.join(output_library, binary)):
345
                relocate_elf_library(patchelf, output_dir, output_library, binary)
346
347
348
349
350
351

        compress_wheel(output_dir, wheel, wheel_dir, wheel_name)


def patch_win():
    # Get dumpbin location
352
    dumpbin = find_program("dumpbin")
353
    if dumpbin is None:
354
        raise FileNotFoundError("Dumpbin was not found in the system, please make sure that is available on the PATH.")
355
356

    # Find wheel
357
358
359
    print("Finding wheels...")
    wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
    output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
360

361
362
    image_binary = "image.pyd"
    video_binary = "video_reader.pyd"
363
364
365
366
367
368
369
    torchvision_binaries = [image_binary, video_binary]
    for wheel in wheels:
        if osp.exists(output_dir):
            shutil.rmtree(output_dir)

        os.makedirs(output_dir)

370
        print("Unzipping wheel...")
371
372
        wheel_file = osp.basename(wheel)
        wheel_dir = osp.dirname(wheel)
373
        print(f"{wheel_file}")
374
375
376
        wheel_name, _ = osp.splitext(wheel_file)
        unzip_file(wheel, output_dir)

377
378
        print("Finding DLL/PE dependencies...")
        output_library = osp.join(output_dir, "torchvision")
379
380
        for binary in torchvision_binaries:
            if osp.exists(osp.join(output_library, binary)):
381
                relocate_dll_library(dumpbin, output_dir, output_library, binary)
382
383
384
385

        compress_wheel(output_dir, wheel, wheel_dir, wheel_name)


386
387
if __name__ == "__main__":
    if sys.platform == "linux":
388
        patch_linux()
389
    elif sys.platform == "win32":
390
        patch_win()