async_io.py 3.29 KB
Newer Older
aiss's avatar
aiss committed
1
2
3
4
5
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

aiss's avatar
aiss committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import distutils.spawn
import subprocess

from .builder import OpBuilder


class AsyncIOBuilder(OpBuilder):
    BUILD_VAR = "DS_BUILD_AIO"
    NAME = "async_io"

    def __init__(self):
        super().__init__(name=self.NAME)

    def absolute_name(self):
        return f'deepspeed.ops.aio.{self.NAME}_op'

    def sources(self):
        return [
aiss's avatar
aiss committed
24
25
26
27
            'csrc/aio/py_lib/deepspeed_py_copy.cpp', 'csrc/aio/py_lib/py_ds_aio.cpp',
            'csrc/aio/py_lib/deepspeed_py_aio.cpp', 'csrc/aio/py_lib/deepspeed_py_aio_handle.cpp',
            'csrc/aio/py_lib/deepspeed_aio_thread.cpp', 'csrc/aio/common/deepspeed_aio_utils.cpp',
            'csrc/aio/common/deepspeed_aio_common.cpp', 'csrc/aio/common/deepspeed_aio_types.cpp',
aiss's avatar
aiss committed
28
            'csrc/aio/py_lib/deepspeed_pin_tensor.cpp'
aiss's avatar
aiss committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
        ]

    def include_paths(self):
        return ['csrc/aio/py_lib', 'csrc/aio/common']

    def cxx_args(self):
        # -O0 for improved debugging, since performance is bound by I/O
        CPU_ARCH = self.cpu_arch()
        SIMD_WIDTH = self.simd_width()
        return [
            '-g',
            '-Wall',
            '-O0',
            '-std=c++14',
            '-shared',
            '-fPIC',
            '-Wno-reorder',
            CPU_ARCH,
            '-fopenmp',
            SIMD_WIDTH,
            '-laio',
        ]

    def extra_ldflags(self):
aiss's avatar
aiss committed
53
54
        #return ['-laio']
        return ['-laio', '-liomp5']
aiss's avatar
aiss committed
55
56
57

    def check_for_libaio_pkg(self):
        libs = dict(
aiss's avatar
aiss committed
58
59
60
            dpkg=["-l", "libaio-dev", "apt"],
            pacman=["-Q", "libaio", "pacman"],
            rpm=["-q", "libaio-devel", "yum"],
aiss's avatar
aiss committed
61
62
63
64
65
66
67
68
        )

        found = False
        for pkgmgr, data in libs.items():
            flag, lib, tool = data
            path = distutils.spawn.find_executable(pkgmgr)
            if path is not None:
                cmd = f"{pkgmgr} {flag} {lib}"
aiss's avatar
aiss committed
69
                result = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
aiss's avatar
aiss committed
70
71
72
                if result.wait() == 0:
                    found = True
                else:
aiss's avatar
aiss committed
73
                    self.warning(f"{self.NAME}: please install the {lib} package with {tool}")
aiss's avatar
aiss committed
74
75
76
77
78
79
80
81
82
83
84
                break
        return found

    def is_compatible(self, verbose=True):
        # Check for the existence of libaio by using distutils
        # to compile and link a test program that calls io_submit,
        # which is a function provided by libaio that is used in the async_io op.
        # If needed, one can define -I and -L entries in CFLAGS and LDFLAGS
        # respectively to specify the directories for libaio.h and libaio.so.
        aio_compatible = self.has_function('io_submit', ('aio', ))
        if verbose and not aio_compatible:
aiss's avatar
aiss committed
85
            self.warning(f"{self.NAME} requires the dev libaio .so object and headers but these were not found.")
aiss's avatar
aiss committed
86
87
88
89
90
91
92
93
94

            # Check for the libaio package via known package managers
            # to print suggestions on which package to install.
            self.check_for_libaio_pkg()

            self.warning(
                "If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found."
            )
        return super().is_compatible(verbose) and aio_compatible