Unverified Commit 9a7235fa authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Performance] Use allocator from PyTorch if possible (#2328)

* first commit

* some thoughts

* move around

* more commit

* more fixes

* now it uses torch allocator

* fix symbol export error

* fix

* fixes

* test fix

* add script

* building separate library per version

* fix for vs2019

* more fixes

* fix on windows build

* update jenkinsfile

* auto copy built dlls for windows

* lint and installation guide update

* fix

* specify conda environment

* set environment for ci

* fix

* fix

* fix

* fix again

* revert

* fix cmake

* fix

* switch to using python interpreter path

* remove scripts

* debug

* oops sorry

* Update index.rst

* Update index.rst

* copies automatically, no need for this

* do not print message if library not found

* tiny fixes

* debug on nightly

* replace add_compile_definitions to make CMake 3.5 happy

* fix linking to wrong lib for multiple pytorch envs

* changed building strategy

* fix nightly

* fix windows

* fix windows again

* setup bugfix

* address comments

* change README
parent 4444a43a
...@@ -61,9 +61,13 @@ include_directories("third_party/minigun/minigun") ...@@ -61,9 +61,13 @@ include_directories("third_party/minigun/minigun")
include_directories("third_party/minigun/third_party/moderngpu/src") include_directories("third_party/minigun/third_party/moderngpu/src")
include_directories("third_party/phmap/") include_directories("third_party/phmap/")
include_directories("third_party/xbyak/") include_directories("third_party/xbyak/")
include_directories("tensoradapter/include")
# initial variables # initial variables
set(DGL_LINKER_LIBS "") if(NOT MSVC)
set(DGL_LINKER_LIBS "dl")
endif(NOT MSVC)
if(MSVC OR CMAKE_SYSTEM_NAME STREQUAL "Darwin") if(MSVC OR CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set(DGL_RUNTIME_LINKER_LIBS "") set(DGL_RUNTIME_LINKER_LIBS "")
else(MSVC OR CMAKE_SYSTEM_NAME STREQUAL "Darwin") else(MSVC OR CMAKE_SYSTEM_NAME STREQUAL "Darwin")
...@@ -110,7 +114,8 @@ if(USE_OPENMP) ...@@ -110,7 +114,8 @@ if(USE_OPENMP)
endif(USE_OPENMP) endif(USE_OPENMP)
if(USE_AVX) if(USE_AVX)
add_compile_definitions(USE_AVX) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_AVX")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_AVX")
endif(USE_AVX) endif(USE_AVX)
# To compile METIS correct for DGL. # To compile METIS correct for DGL.
...@@ -183,6 +188,46 @@ if (LIBCXX_ENABLE_PARALLEL_ALGORITHMS) ...@@ -183,6 +188,46 @@ if (LIBCXX_ENABLE_PARALLEL_ALGORITHMS)
endif(LIBCXX_ENABLE_PARALLEL_ALGORITHMS) endif(LIBCXX_ENABLE_PARALLEL_ALGORITHMS)
target_link_libraries(dgl ${DGL_LINKER_LIBS} ${DGL_RUNTIME_LINKER_LIBS}) target_link_libraries(dgl ${DGL_LINKER_LIBS} ${DGL_RUNTIME_LINKER_LIBS})
if(MSVC)
add_custom_command(
TARGET dgl POST_BUILD COMMAND
cmd.exe /c "COPY /Y Release\\dgl.dll .")
endif(MSVC)
# Tensor adapter libraries
# Linking against LibTorch involves linking against a bunch of other libraries
# returned by PyTorch's CMake (e.g. C10 or NVTools). Because CMake caches
# the found libraries in find_library(), often times CMake will look into the libraries
# of the wrong version when I build everything in the same CMake process. As
# a result, I (BarclayII) am launching an individual CMake build for every PyTorch version.
if(BUILD_TORCH)
file(TO_NATIVE_PATH ${CMAKE_CURRENT_BINARY_DIR} BINDIR)
file(TO_NATIVE_PATH ${CMAKE_COMMAND} CMAKE_CMD)
if(MSVC)
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/tensoradapter/pytorch/build.bat BUILD_SCRIPT)
add_custom_target(
tensoradapter_pytorch
${CMAKE_COMMAND} -E env
CMAKE_COMMAND=${CMAKE_CMD}
CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}
BINDIR=${BINDIR}
cmd /e:on /c ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS}
DEPENDS ${BUILD_SCRIPT}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/tensoradapter/pytorch)
else(MSVC)
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/tensoradapter/pytorch/build.sh BUILD_SCRIPT)
add_custom_target(
tensoradapter_pytorch
${CMAKE_COMMAND} -E env
CMAKE_COMMAND=${CMAKE_CMD}
CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}
BINDIR=${CMAKE_CURRENT_BINARY_DIR}
bash ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS}
DEPENDS ${BUILD_SCRIPT}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/tensoradapter/pytorch)
endif(MSVC)
add_dependencies(dgl tensoradapter_pytorch)
endif(BUILD_TORCH)
# Installation rules # Installation rules
install(TARGETS dgl DESTINATION lib${LIB_SUFFIX}) install(TARGETS dgl DESTINATION lib${LIB_SUFFIX})
......
#!/usr/bin/env groovy #!/usr/bin/env groovy
dgl_linux_libs = "build/libdgl.so, build/runUnitTests, python/dgl/_ffi/_cy3/core.cpython-36m-x86_64-linux-gnu.so" dgl_linux_libs = "build/libdgl.so, build/runUnitTests, python/dgl/_ffi/_cy3/core.cpython-36m-x86_64-linux-gnu.so, build/tensoradapter/pytorch/*.so"
// Currently DGL on Windows is not working with Cython yet // Currently DGL on Windows is not working with Cython yet
dgl_win64_libs = "build\\dgl.dll, build\\runUnitTests.exe" dgl_win64_libs = "build\\dgl.dll, build\\runUnitTests.exe, build\\tensoradapter\\pytorch\\*.dll"
def init_git() { def init_git() {
sh "rm -rf *" sh "rm -rf *"
......
...@@ -288,9 +288,7 @@ Right now, DGL works on [PyTorch](https://pytorch.org) 1.5.0+, [MXNet](https://m ...@@ -288,9 +288,7 @@ Right now, DGL works on [PyTorch](https://pytorch.org) 1.5.0+, [MXNet](https://m
``` ```
conda install -c dglteam dgl # cpu version conda install -c dglteam dgl # cpu version
conda install -c dglteam dgl-cuda9.0 # CUDA 9.0
conda install -c dglteam dgl-cuda9.2 # CUDA 9.2 conda install -c dglteam dgl-cuda9.2 # CUDA 9.2
conda install -c dglteam dgl-cuda10.0 # CUDA 10.0
conda install -c dglteam dgl-cuda10.1 # CUDA 10.1 conda install -c dglteam dgl-cuda10.1 # CUDA 10.1
conda install -c dglteam dgl-cuda10.2 # CUDA 10.2 conda install -c dglteam dgl-cuda10.2 # CUDA 10.2
conda install -c dglteam dgl-cuda11.0 # CUDA 11.0 conda install -c dglteam dgl-cuda11.0 # CUDA 11.0
...@@ -302,9 +300,7 @@ conda install -c dglteam dgl-cuda11.0 # CUDA 11.0 ...@@ -302,9 +300,7 @@ conda install -c dglteam dgl-cuda11.0 # CUDA 11.0
| | Latest Nightly Build Version | Stable Version | | | Latest Nightly Build Version | Stable Version |
|-----------|-------------------------------|-------------------------| |-----------|-------------------------------|-------------------------|
| CPU | `pip install --pre dgl` | `pip install dgl` | | CPU | `pip install --pre dgl` | `pip install dgl` |
| CUDA 9.0 | `pip install --pre dgl-cu90` | `pip install dgl-cu90` |
| CUDA 9.2 | `pip install --pre dgl-cu92` | `pip install dgl-cu92` | | CUDA 9.2 | `pip install --pre dgl-cu92` | `pip install dgl-cu92` |
| CUDA 10.0 | `pip install --pre dgl-cu100` | `pip install dgl-cu100` |
| CUDA 10.1 | `pip install --pre dgl-cu101` | `pip install dgl-cu101` | | CUDA 10.1 | `pip install --pre dgl-cu101` | `pip install dgl-cu101` |
| CUDA 10.2 | `pip install --pre dgl-cu102` | `pip install dgl-cu102` | | CUDA 10.2 | `pip install --pre dgl-cu102` | `pip install dgl-cu102` |
| CUDA 11.0 | `pip install --pre dgl-cu110` | `pip install dgl-cu110` | | CUDA 11.0 | `pip install --pre dgl-cu110` | `pip install dgl-cu110` |
......
...@@ -42,3 +42,6 @@ set(USE_OPENMP ON) ...@@ -42,3 +42,6 @@ set(USE_OPENMP ON)
# Whether to enable Intel's avx optimized kernel # Whether to enable Intel's avx optimized kernel
set(USE_AVX ON) set(USE_AVX ON)
# Whether to build PyTorch plugins
set(BUILD_TORCH ON)
...@@ -246,6 +246,9 @@ macro(dgl_config_cuda out_variable) ...@@ -246,6 +246,9 @@ macro(dgl_config_cuda out_variable)
# 0. Add host flags # 0. Add host flags
message(STATUS "${CMAKE_CXX_FLAGS}") message(STATUS "${CMAKE_CXX_FLAGS}")
string(REGEX REPLACE "[ \t\n\r]" "," CXX_HOST_FLAGS "${CMAKE_CXX_FLAGS}") string(REGEX REPLACE "[ \t\n\r]" "," CXX_HOST_FLAGS "${CMAKE_CXX_FLAGS}")
if(MSVC AND NOT USE_MSVC_MT)
string(CONCAT CXX_HOST_FLAGS ${CXX_HOST_FLAGS} ",/MD")
endif()
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler ,${CXX_HOST_FLAGS}") list(APPEND CUDA_NVCC_FLAGS "-Xcompiler ,${CXX_HOST_FLAGS}")
# 1. Add arch flags # 1. Add arch flags
...@@ -260,7 +263,7 @@ macro(dgl_config_cuda out_variable) ...@@ -260,7 +263,7 @@ macro(dgl_config_cuda out_variable)
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14) check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14)
string(REPLACE "-std=c++11" "" CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") string(REPLACE "-std=c++11" "" CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
list(APPEND CUDA_NVCC_FLAGS "--std=c++14") list(APPEND CUDA_NVCC_FLAGS "-std=c++14")
message(STATUS "CUDA flags: ${CUDA_NVCC_FLAGS}") message(STATUS "CUDA flags: ${CUDA_NVCC_FLAGS}")
......
...@@ -11,7 +11,7 @@ DGL works with the following operating systems: ...@@ -11,7 +11,7 @@ DGL works with the following operating systems:
* macOS X * macOS X
* Windows 10 * Windows 10
DGL requires Python version 3.6 or later. DGL requires Python version 3.6, 3.7, 3.8 or 3.9.
DGL supports multiple tensor libraries as backends, e.g., PyTorch, MXNet. For requirements on backends and how to select one, see :ref:`backends`. DGL supports multiple tensor libraries as backends, e.g., PyTorch, MXNet. For requirements on backends and how to select one, see :ref:`backends`.
...@@ -121,34 +121,32 @@ install the Python binding for DGL. ...@@ -121,34 +121,32 @@ install the Python binding for DGL.
Windows Windows
``````` ```````
The Windows source build is tested with CMake and MinGW/GCC. We highly recommend You can build DGL with MSBuild. With `MS Build Tools <https://go.microsoft.com/fwlink/?linkid=840931>`_
using CMake and GCC from `conda installations <https://conda.io/miniconda.html>`_. To and `CMake on Windows <https://cmake.org/download/>`_ installed, run the following
get started, run the following: in VS2019 x64 Native tools command prompt.
.. code:: bash
conda install cmake m2w64-gcc m2w64-make
Build the shared library and install the Python binding.
.. code:: - CPU only build
.. code::
md build MD build
cd build CD build
cmake -DCMAKE_CXX_FLAGS="-DDMLC_LOG_STACK_TRACE=0 -DDGL_EXPORTS" -DCMAKE_MAKE_PROGRAM=mingw32-make .. -G "MSYS Makefiles" cmake -DCMAKE_CXX_FLAGS="/DDGL_EXPORTS" -DCMAKE_CONFIGURATION_TYPES="Release" -DDMLC_FORCE_SHARED_CRT=ON .. -G "Visual Studio 16 2019"
mingw32-make msbuild dgl.sln /m
cd ..\python CD ..\python
python setup.py install python setup.py install
- CUDA build
.. code::
You can also build DGL with MSBuild. With `MS Build Tools <https://go.microsoft.com/fwlink/?linkid=840931>`_ MD build
and `CMake on Windows <https://cmake.org/download/>`_ installed, run the following CD build
in VS2017 x64 Native tools command prompt. cmake -DCMAKE_CXX_FLAGS="/DDGL_EXPORTS" -DCMAKE_CONFIGURATION_TYPES="Release" -DDMLC_FORCE_SHARED_CRT=ON -DUSE_CUDA=ON .. -G "Visual Studio 16 2019"
msbuild dgl.sln /m
CD ..\python
python setup.py install
.. code:: Optional Flags
``````````````
MD build - If you are using PyTorch, you can add ``-DBUILD_TORCH=ON`` flag in CMake
CD build to build PyTorch plugins for further performance optimization. This applies for Linux,
cmake -DCMAKE_CXX_FLAGS="/DDGL_EXPORTS" -DCMAKE_CONFIGURATION_TYPES="Release" .. -G "Visual Studio 15 2017 Win64" Windows, and Mac.
msbuild dgl.sln
cd ..\python
python setup.py install
...@@ -540,6 +540,11 @@ DGL_DLL int DGLStreamStreamSynchronize(int device_type, ...@@ -540,6 +540,11 @@ DGL_DLL int DGLStreamStreamSynchronize(int device_type,
DGLStreamHandle src, DGLStreamHandle src,
DGLStreamHandle dst); DGLStreamHandle dst);
/*!
* \brief Sets the path to the tensoradapter library
*/
DGL_DLL void DGLSetTAPath(const char *path_cstr);
/*! /*!
* \brief Bug report macro. * \brief Bug report macro.
* *
......
/*!
* Copyright (c) 2017 by Contributors
* \file dgl/runtime/env.h
* \brief Structure for holding DGL global environment variables
*/
#ifndef DGL_RUNTIME_ENV_H_
#define DGL_RUNTIME_ENV_H_
#include <string>
/*!
* \brief Global environment variables.
*/
struct Env {
static Env* Global() {
static Env inst;
return &inst;
}
/*! \brief the path to the tensoradapter library */
std::string ta_path;
};
#endif // DGL_RUNTIME_ENV_H_
/*!
* Copyright (c) 2020 by Contributors
* \file array/tensordispatch.h
* \brief This file defines the dispatcher of tensor operators to framework-specific
* implementations.
*
* The dispatcher consists of a TensorDispatcher singleton in DGL C library and
* one separately-built shared library per supported backend.
*
* Those shared libraries contain wrappers of the framework-specific operators.
* The wrappers have almost the same signatures as functions in aten namespace,
* except that they accept and return DLManagedTensors instead of NDArrays.
* The wrappers are defined with extern "C", meaning that the C++ compiler will
* not do name mangling for those functions so that DGL can conveniently locate
* them using dlsym(3) (or GetProcAddress in Windows).
*
* The TensorDispatcher singleton maintains a mapping from an array operator to
* the address of the corresponding symbol in the shared library. During
* initialization, the TensorDispatcher checks which backend DGL is using.
* It then locates and opens the corresponding shared library using dlopen(3) (or
* LoadLibrary in Windows), and populates the said mapping above with dlsym(3)
* (or GetProcAddress in Windows).
*
* A tensor operator in TensorDispatcher first checks whether the corresponding symbol
* address is found in the mapping. If so, it calls the function located at the
* symbol address instead, translating NDArrays to DLManagedTensors using
* NDArray::ToDLPack(), and translates the DLManagedTensors in the return values
* back to NDArrays using NDArray::FromDLPack(). If not, it falls back to the
* implementation in dgl::aten namespace.
*/
#ifndef DGL_RUNTIME_TENSORDISPATCH_H_
#define DGL_RUNTIME_TENSORDISPATCH_H_
#include <dlpack/dlpack.h>
#include <tensoradapter.h>
#if defined(WIN32) || defined(_WIN32)
#include <windows.h>
#endif // WIN32
#include <vector>
#include "ndarray.h"
/*! \brief Casts a pointer \c entry to a function pointer with signature of \c func */
#define FUNCCAST(func, entry) (*reinterpret_cast<decltype(&(func))>(entry))
namespace dgl {
namespace runtime {
/*!
* \brief Dispatcher that delegates the function calls to framework-specific C++ APIs.
*/
class TensorDispatcher {
public:
/*! \brief Get the singleton instance. */
static TensorDispatcher* Global() {
static TensorDispatcher inst;
return &inst;
}
/*! \brief Whether an adapter library is available */
inline bool IsAvailable() {
return available_;
}
/*!
* \brief Allocate an empty tensor.
*
* Used in NDArray::Empty().
*/
inline NDArray Empty(std::vector<int64_t> shape, DLDataType dtype, DLContext ctx) const {
auto entry = entrypoints_[Op::kEmpty];
auto result = FUNCCAST(tensoradapter::TAempty, entry)(shape, dtype, ctx);
return NDArray::FromDLPack(result);
}
private:
/*! \brief ctor */
TensorDispatcher();
/*! \brief dtor */
~TensorDispatcher();
/*!
* \brief List of symbols in the adapter library.
*
* Must match the functions in tensoradapter/include/tensoradapter.h.
*/
static constexpr const char *names_[] = {
"TAempty",
};
/*! \brief Index of each function to the symbol list */
class Op {
public:
static constexpr int kEmpty = 0;
};
/*! \brief Number of functions */
static constexpr int num_entries_ = sizeof(names_) / sizeof(names_[0]);
/*! \brief Entrypoints of each function */
void* entrypoints_[num_entries_] = {nullptr};
bool available_ = false;
#if defined(WIN32) || defined(_WIN32)
HINSTANCE handle_;
#else // !WIN32
void* handle_;
#endif // WIN32
};
}; // namespace runtime
}; // namespace dgl
#endif // DGL_RUNTIME_TENSORDISPATCH_H_
...@@ -9,8 +9,7 @@ and transforming graphs. ...@@ -9,8 +9,7 @@ and transforming graphs.
# This initializes Winsock and performs cleanup at termination as required # This initializes Winsock and performs cleanup at termination as required
import socket import socket
# Need to ensure that the backend framework is imported before load dgl libs, # Should import backend before importing anything else
# otherwise weird cuda problem happens
from .backend import load_backend, backend_name from .backend import load_backend, backend_name
from . import function from . import function
......
...@@ -31,15 +31,17 @@ class DGLError(Exception): ...@@ -31,15 +31,17 @@ class DGLError(Exception):
def _load_lib(): def _load_lib():
"""Load libary by searching possible path.""" """Load libary by searching possible path."""
lib_path = libinfo.find_lib_path() lib_path = libinfo.find_lib_path()
lib = ctypes.CDLL(lib_path[0], ctypes.RTLD_GLOBAL) lib = ctypes.CDLL(lib_path[0])
dirname = os.path.dirname(lib_path[0])
basename = os.path.basename(lib_path[0])
# DMatrix functions # DMatrix functions
lib.DGLGetLastError.restype = ctypes.c_char_p lib.DGLGetLastError.restype = ctypes.c_char_p
return lib, os.path.basename(lib_path[0]) return lib, basename, dirname
# version number # version number
__version__ = libinfo.__version__ __version__ = libinfo.__version__
# library instance of nnvm # library instance of nnvm
_LIB, _LIB_NAME = _load_lib() _LIB, _LIB_NAME, _DIR_NAME = _load_lib()
# The FFI mode of DGL # The FFI mode of DGL
_FFI_MODE = os.environ.get("DGL_FFI", "auto") _FFI_MODE = os.environ.get("DGL_FFI", "auto")
...@@ -109,3 +111,26 @@ def decorate(func, fwrapped): ...@@ -109,3 +111,26 @@ def decorate(func, fwrapped):
""" """
import decorator import decorator
return decorator.decorate(func, fwrapped) return decorator.decorate(func, fwrapped)
def set_ta_path(backend, version):
"""Tell DGL which tensoradapter library to look for symbols.
Parameters
----------
backend : str
The backend (currently ``pytorch``, ``mxnet`` or ``tensorflow``).
version : str
The version number of the backend.
"""
version = version.split('+')[0]
if sys.platform.startswith('linux'):
basename = 'libtensoradapter_%s_%s.so' % (backend, version)
elif sys.platform.startswith('darwin'):
basename = 'libtensoradapter_%s_%s.dylib' % (backend, version)
elif sys.platform.startswith('win'):
basename = 'tensoradapter_%s_%s.dll' % (backend, version)
else:
raise NotImplementedError('Unsupported system: %s' % sys.platform)
path = os.path.join(_DIR_NAME, 'tensoradapter', backend, basename)
_LIB.DGLSetTAPath(path.encode('utf-8'))
...@@ -19,6 +19,29 @@ def _gen_missing_api(api, mod_name): ...@@ -19,6 +19,29 @@ def _gen_missing_api(api, mod_name):
return _missing_api return _missing_api
def load_backend(mod_name): def load_backend(mod_name):
# Load backend does four things:
# (1) Import backend framework (PyTorch, MXNet, Tensorflow, etc.)
# (2) Import DGL C library. DGL imports it *after* PyTorch/MXNet/Tensorflow. Otherwise
# DGL will crash with errors like `munmap_chunk(): invalid pointer`.
# (3) Sets up the tensoradapter library path.
# (4) Import the Python wrappers of the backend framework. DGL does this last because
# it already depends on both the backend framework and the DGL C library.
if mod_name == 'pytorch':
import torch
mod = torch
elif mod_name == 'mxnet':
import mxnet
mod = mxnet
elif mod_name == 'tensorflow':
import tensorflow
mod = tensorflow
else:
raise NotImplementedError('Unsupported backend: %s' % mod_name)
from .._ffi.base import set_ta_path # imports DGL C library
version = mod.__version__
set_ta_path(mod_name, version)
print('Using backend: %s' % mod_name, file=sys.stderr) print('Using backend: %s' % mod_name, file=sys.stderr)
mod = importlib.import_module('.%s' % mod_name, __name__) mod = importlib.import_module('.%s' % mod_name, __name__)
thismod = sys.modules[__name__] thismod = sys.modules[__name__]
......
...@@ -35,11 +35,45 @@ def get_lib_path(): ...@@ -35,11 +35,45 @@ def get_lib_path():
return libs, version return libs, version
def get_ta_lib_pattern():
if sys.platform.startswith('linux'):
ta_lib_pattern = 'libtensoradapter_*.so'
elif sys.platform.startswith('darwin'):
ta_lib_pattern = 'libtensoradapter_*.dylib'
elif sys.platform.startswith('win'):
ta_lib_pattern = 'tensoradapter_*.dll'
else:
raise NotImplementedError('Unsupported system: %s' % sys.platform)
return ta_lib_pattern
LIBS, VERSION = get_lib_path() LIBS, VERSION = get_lib_path()
BACKENDS = ['pytorch']
TA_LIB_PATTERN = get_ta_lib_pattern()
def cleanup():
# Wheel cleanup
try:
os.remove("MANIFEST.in")
except:
pass
for path in LIBS:
_, libname = os.path.split(path)
try:
os.remove(os.path.join("dgl", libname))
except:
pass
for backend in BACKENDS:
for ta_path in glob.glob(
os.path.join(CURRENT_DIR, "dgl", "tensoradapter", backend, TA_LIB_PATTERN)):
try:
os.remove(ta_path)
except:
pass
def config_cython(): def config_cython():
"""Try to configure cython and return cython configuration""" """Try to configure cython and return cython configuration"""
if os.name == 'nt': if sys.platform.startswith('win'):
print("WARNING: Cython is not supported on Windows, will compile without cython module") print("WARNING: Cython is not supported on Windows, will compile without cython module")
return [] return []
sys_cflags = sysconfig.get_config_var("CFLAGS") sys_cflags = sysconfig.get_config_var("CFLAGS")
...@@ -84,6 +118,8 @@ include_libs = False ...@@ -84,6 +118,8 @@ include_libs = False
wheel_include_libs = False wheel_include_libs = False
if "bdist_wheel" in sys.argv or os.getenv('CONDA_BUILD'): if "bdist_wheel" in sys.argv or os.getenv('CONDA_BUILD'):
wheel_include_libs = True wheel_include_libs = True
elif "clean" in sys.argv:
cleanup()
else: else:
include_libs = True include_libs = True
...@@ -94,8 +130,18 @@ if wheel_include_libs: ...@@ -94,8 +130,18 @@ if wheel_include_libs:
with open("MANIFEST.in", "w") as fo: with open("MANIFEST.in", "w") as fo:
for path in LIBS: for path in LIBS:
shutil.copy(path, os.path.join(CURRENT_DIR, 'dgl')) shutil.copy(path, os.path.join(CURRENT_DIR, 'dgl'))
_, libname = os.path.split(path) dir_, libname = os.path.split(path)
fo.write("include dgl/%s\n" % libname) fo.write("include dgl/%s\n" % libname)
for backend in BACKENDS:
for ta_path in glob.glob(os.path.join(dir_, "tensoradapter", backend, TA_LIB_PATTERN)):
ta_name = os.path.basename(ta_path)
os.makedirs(os.path.join(CURRENT_DIR, 'dgl', 'tensoradapter', backend), exist_ok=True)
shutil.copy(
os.path.join(dir_, 'tensoradapter', backend, ta_name),
os.path.join(CURRENT_DIR, 'dgl', 'tensoradapter', backend))
fo.write("include dgl/tensoradapter/%s/%s\n" % (backend, ta_name))
setup_kwargs = { setup_kwargs = {
"include_package_data": True "include_package_data": True
} }
...@@ -104,9 +150,17 @@ if wheel_include_libs: ...@@ -104,9 +150,17 @@ if wheel_include_libs:
# Conda build also includes the binary library # Conda build also includes the binary library
if include_libs: if include_libs:
rpath = [os.path.relpath(path, CURRENT_DIR) for path in LIBS] rpath = [os.path.relpath(path, CURRENT_DIR) for path in LIBS]
data_files = [('dgl', rpath)]
for path in LIBS:
for backend in BACKENDS:
data_files.append((
'dgl/tensoradapter/%s' % backend,
glob.glob(os.path.join(
os.path.dirname(os.path.relpath(path, CURRENT_DIR)),
'tensoradapter', backend, TA_LIB_PATTERN))))
setup_kwargs = { setup_kwargs = {
"include_package_data": True, "include_package_data": True,
"data_files": [('dgl', rpath)] "data_files": data_files
} }
setup( setup(
...@@ -136,8 +190,4 @@ setup( ...@@ -136,8 +190,4 @@ setup(
) )
if wheel_include_libs: if wheel_include_libs:
# Wheel cleanup cleanup()
os.remove("MANIFEST.in")
for path in LIBS:
_, libname = os.path.split(path)
os.remove("dgl/%s" % libname)
...@@ -144,6 +144,11 @@ struct PairIterator : public std::iterator<std::random_access_iterator_tag, ...@@ -144,6 +144,11 @@ struct PairIterator : public std::iterator<std::random_access_iterator_tag,
return PairRef<V1, V2>(row, col); return PairRef<V1, V2>(row, col);
} }
// required for random access iterators in VS2019
PairRef<V1, V2> operator[](size_t offset) const {
return PairRef<V1, V2>(row + offset, col + offset);
}
V1 *row; V1 *row;
V2 *col; V2 *col;
}; };
......
...@@ -151,6 +151,11 @@ struct CooIterator : public std::iterator<std::random_access_iterator_tag, ...@@ -151,6 +151,11 @@ struct CooIterator : public std::iterator<std::random_access_iterator_tag,
return TupleRef<IdType>(row, col, data); return TupleRef<IdType>(row, col, data);
} }
// required for random access iterators in VS2019
TupleRef<IdType> operator[](size_t offset) const {
return TupleRef<IdType>(row + offset, col + offset, data + offset);
}
IdType *row, *col, *data; IdType *row, *col, *data;
}; };
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <dgl/runtime/module.h> #include <dgl/runtime/module.h>
#include <dgl/runtime/registry.h> #include <dgl/runtime/registry.h>
#include <dgl/runtime/device_api.h> #include <dgl/runtime/device_api.h>
#include <dgl/runtime/env.h>
#include <array> #include <array>
#include <algorithm> #include <algorithm>
#include <string> #include <string>
...@@ -378,6 +379,10 @@ int DGLCbArgToReturn(DGLValue* value, int code) { ...@@ -378,6 +379,10 @@ int DGLCbArgToReturn(DGLValue* value, int code) {
API_END(); API_END();
} }
void DGLSetTAPath(const char *path_cstr) {
Env::Global()->ta_path = std::string(path_cstr);
}
// set device api // set device api
DGL_REGISTER_GLOBAL(dgl::runtime::symbol::dgl_set_device) DGL_REGISTER_GLOBAL(dgl::runtime::symbol::dgl_set_device)
.set_body([](DGLArgs args, DGLRetValue *ret) { .set_body([](DGLArgs args, DGLRetValue *ret) {
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <dgl/runtime/device_api.h> #include <dgl/runtime/device_api.h>
#include <dgl/runtime/shared_mem.h> #include <dgl/runtime/shared_mem.h>
#include <dgl/zerocopy_serializer.h> #include <dgl/zerocopy_serializer.h>
#include <dgl/runtime/tensordispatch.h>
#include "runtime_base.h" #include "runtime_base.h"
// deleter for arrays used by DLPack exporter // deleter for arrays used by DLPack exporter
...@@ -200,6 +201,10 @@ NDArray NDArray::EmptyShared(const std::string &name, ...@@ -200,6 +201,10 @@ NDArray NDArray::EmptyShared(const std::string &name,
NDArray NDArray::Empty(std::vector<int64_t> shape, NDArray NDArray::Empty(std::vector<int64_t> shape,
DLDataType dtype, DLDataType dtype,
DLContext ctx) { DLContext ctx) {
TensorDispatcher* td = TensorDispatcher::Global();
if (td->IsAvailable())
return td->Empty(shape, dtype, ctx);
NDArray ret = Internal::Create(shape, dtype, ctx); NDArray ret = Internal::Create(shape, dtype, ctx);
// setup memory content // setup memory content
size_t size = GetDataSize(ret.data_->dl_tensor); size_t size = GetDataSize(ret.data_->dl_tensor);
......
/*!
* Copyright (c) 2019 by Contributors
* \file runtime/tensordispatch.cc
* \brief Adapter library caller
*/
#include <dgl/runtime/tensordispatch.h>
#include <dgl/runtime/registry.h>
#include <dgl/runtime/env.h>
#include <dgl/packed_func_ext.h>
#if defined(WIN32) || defined(_WIN32)
#include <windows.h>
#else // !WIN32
#include <dlfcn.h>
#endif // WIN32
#include <cstring>
namespace dgl {
namespace runtime {
constexpr const char *TensorDispatcher::names_[];
TensorDispatcher::TensorDispatcher() {
const std::string& path = Env::Global()->ta_path;
if (path == "")
// does not have dispatcher library; all operators fall back to DGL's implementation
return;
#if defined(WIN32) || defined(_WIN32)
handle_ = LoadLibrary(path.c_str());
if (!handle_)
return;
for (int i = 0; i < num_entries_; ++i)
entrypoints_[i] = reinterpret_cast<void*>(GetProcAddress(handle_, names_[i]));
#else // !WIN32
handle_ = dlopen(path.c_str(), RTLD_LAZY);
if (!handle_)
return;
for (int i = 0; i < num_entries_; ++i)
entrypoints_[i] = dlsym(handle_, names_[i]);
#endif // WIN32
available_ = true;
}
TensorDispatcher::~TensorDispatcher() {
if (handle_) {
#if defined(WIN32) || defined(_WIN32)
FreeLibrary(handle_);
#else // !WIN32
dlclose(handle_);
#endif // WIN32
}
}
}; // namespace runtime
}; // namespace dgl
/*!
* Copyright (c) 2020 by Contributors
* \file tensoradapter.h
* \brief Header file for functions exposed by the adapter library.
*
* Functions in this library must be exported with extern "C" so that DGL can locate
* them with dlsym(3) (or GetProcAddress on Windows).
*/
#ifndef TENSORADAPTER_H_
#define TENSORADAPTER_H_
#include <dlpack/dlpack.h>
#include <vector>
#if defined(WIN32) || defined(_WIN32)
#define TA_EXPORTS __declspec(dllexport)
#else
#define TA_EXPORTS
#endif
namespace tensoradapter {
extern "C" {
/*!
* \brief Allocate an empty tensor
*
* \param shape The shape
* \param dtype The data type
* \param ctx The device
* \return The allocated tensor
*/
TA_EXPORTS DLManagedTensor* TAempty(
std::vector<int64_t> shape, DLDataType dtype, DLContext ctx);
}
}; // namespace tensoradapter
#endif // TENSORADAPTER_H_
cmake_minimum_required(VERSION 3.5)
project(tensoradapter_pytorch C CXX)
# Find PyTorch cmake files and PyTorch versions with the python interpreter $PYTHON_INTERP
# (or "python" if empty)
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/find_cmake.py FIND_CMAKE_PY)
if(NOT PYTHON_INTERP)
set(PYTHON_INTERP python)
endif()
message(STATUS "Using Python interpreter: ${PYTHON_INTERP}")
execute_process(
COMMAND ${PYTHON_INTERP} ${FIND_CMAKE_PY}
OUTPUT_VARIABLE TORCH_PREFIX_VER
OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "find_cmake.py output: ${TORCH_PREFIX_VER}")
list(GET TORCH_PREFIX_VER 0 TORCH_PREFIX)
list(GET TORCH_PREFIX_VER 1 TORCH_VER)
message(STATUS "Configuring for PyTorch ${TORCH_VER}")
set(Torch_DIR "${TORCH_PREFIX}/Torch")
message(STATUS "Setting directory to ${Torch_DIR}")
find_package(Torch REQUIRED)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${TORCH_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb")
set(TORCH_TARGET_NAME "tensoradapter_pytorch_${TORCH_VER}")
file(GLOB TA_TORCH_SRC *.cpp)
add_library(${TORCH_TARGET_NAME} SHARED "${TA_TORCH_SRC}")
message(STATUS "tensoradapter found PyTorch includes: ${TORCH_INCLUDE_DIRS}")
message(STATUS "tensoradapter found PyTorch lib: ${TORCH_LIBRARIES}")
target_include_directories(
${TORCH_TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../include")
target_include_directories(
${TORCH_TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/dlpack/include")
target_include_directories(
${TORCH_TARGET_NAME} PRIVATE "${TORCH_INCLUDE_DIRS}")
target_link_libraries(${TORCH_TARGET_NAME} PRIVATE "${TORCH_LIBRARIES}")
set_property(TARGET ${TORCH_TARGET_NAME} PROPERTY CXX_STANDARD 14)
message(STATUS "Configured target ${TORCH_TARGET_NAME}")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment