Unverified Commit 9ff56d20 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Cleanup] Remove featgraph and unused TVM dependency. (#5767)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal>
parent ee00729b
...@@ -13,9 +13,6 @@ ...@@ -13,9 +13,6 @@
[submodule "third_party/phmap"] [submodule "third_party/phmap"]
path = third_party/phmap path = third_party/phmap
url = https://github.com/greg7mdp/parallel-hashmap.git url = https://github.com/greg7mdp/parallel-hashmap.git
[submodule "third_party/tvm"]
path = third_party/tvm
url = https://github.com/apache/incubator-tvm
[submodule "third_party/nanoflann"] [submodule "third_party/nanoflann"]
path = third_party/nanoflann path = third_party/nanoflann
url = https://github.com/jlblancoc/nanoflann url = https://github.com/jlblancoc/nanoflann
......
...@@ -25,7 +25,6 @@ endif() ...@@ -25,7 +25,6 @@ endif()
dgl_option(USE_CUDA "Build with CUDA" OFF) dgl_option(USE_CUDA "Build with CUDA" OFF)
dgl_option(USE_OPENMP "Build with OpenMP" ON) dgl_option(USE_OPENMP "Build with OpenMP" ON)
dgl_option(USE_LIBXSMM "Build with LIBXSMM library optimization" ON) dgl_option(USE_LIBXSMM "Build with LIBXSMM library optimization" ON)
dgl_option(USE_TVM "Build with TVM kernels" OFF)
dgl_option(BUILD_CPP_TEST "Build cpp unittest executables" OFF) dgl_option(BUILD_CPP_TEST "Build cpp unittest executables" OFF)
dgl_option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF) dgl_option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF)
dgl_option(USE_S3 "Build with S3 support" OFF) dgl_option(USE_S3 "Build with S3 support" OFF)
...@@ -252,19 +251,6 @@ if(NOT MSVC) ...@@ -252,19 +251,6 @@ if(NOT MSVC)
endif() endif()
endif(NOT MSVC) endif(NOT MSVC)
# Compile TVM Runtime and Featgraph
# (NOTE) We compile a dynamic library called featgraph_runtime, which the DGL library links to.
# Kernels are packed in a separate dynamic library called featgraph_kernels, which DGL
# will load during runtime.
if(USE_TVM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_TVM")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_TVM")
target_include_directories(dgl PRIVATE "featgraph/include")
add_subdirectory("featgraph/")
list(APPEND DGL_LINKER_LIBS featgraph_runtime)
message(STATUS "Build with TVM runtime and featgraph kernels.")
endif(USE_TVM)
# support PARALLEL_ALGORITHMS # support PARALLEL_ALGORITHMS
if (LIBCXX_ENABLE_PARALLEL_ALGORITHMS) if (LIBCXX_ENABLE_PARALLEL_ALGORITHMS)
add_definitions(-DPARALLEL_ALGORITHMS) add_definitions(-DPARALLEL_ALGORITHMS)
......
...@@ -43,8 +43,5 @@ set(USE_OPENMP ON) ...@@ -43,8 +43,5 @@ set(USE_OPENMP ON)
# Whether to build PyTorch plugins. # Whether to build PyTorch plugins.
set(BUILD_TORCH ON) set(BUILD_TORCH ON)
# Whether to enable CUDA kernels compiled with TVM.
set(USE_TVM OFF)
# Whether to build DGL sparse library. # Whether to build DGL sparse library.
set(BUILD_SPARSE ON) set(BUILD_SPARSE ON)
cmake_minimum_required(VERSION 3.5)
project(featgraph C CXX)
message(STATUS "Start configuring project ${PROJECT_NAME}")
# Find CUDA
include(../cmake/util/FindCUDA.cmake)
find_cuda(ON)
message(STATUS "${CUDA_INCLUDE_DIRS}")
add_custom_target(
featgraph_kernel
COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/pack_featgraph.py
COMMENT "Creating featgraph kernels..."
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC")
file(GLOB FEATGRAPH_SRC
src/featgraph.cc
src/tvm_runtime_pack.cc
)
add_library(featgraph_runtime SHARED ${FEATGRAPH_SRC})
target_include_directories(featgraph_runtime PRIVATE ${CUDA_INCLUDE_DIRS})
target_include_directories(featgraph_runtime PRIVATE "./include")
target_include_directories(featgraph_runtime PRIVATE "../third_party/tvm/include")
target_include_directories(featgraph_runtime PRIVATE "../third_party/tvm/3rdparty/dmlc-core/include")
target_include_directories(featgraph_runtime PRIVATE "../third_party/tvm/3rdparty/dlpack/include")
target_link_libraries(featgraph_runtime "dl" # dynamic linking
${CUDA_CUDART_LIBRARY}
${CUDA_CUDA_LIBRARY}
${CUDA_NVRTC_LIBRARY})
add_dependencies(featgraph_runtime featgraph_kernel)
install(TARGETS featgraph_runtime LIBRARY DESTINATION lib)
# FeatGraph-DGL
FeatGraph is an efficient backend for Graph Neural Networks based on TVM.
- Original repo: https://github.com/amazon-research/FeatGraph
- SC2020 Paper: https://www.csl.cornell.edu/~zhiruz/pdfs/featgraph-sc2020.pdf
This folder contains the code for integrating featgraph kernels to DGL.
## Usage
After building DGL with `USE_TVM=ON`, you should be able to run:
```bash
python test.py
```
to verify correctness.
## Reference
- [TVM Tutorial on Deploy TVM Module using C++ API](https://tvm.apache.org/docs/deploy/cpp_deploy.html).
/**
* Copyright (c) 2020 by Contributors
* @file featgraph/include/featgraph.h
* @brief FeatGraph kernel headers.
*/
#ifndef FEATGRAPH_H_
#define FEATGRAPH_H_
#include <dlpack/dlpack.h>
namespace dgl {
namespace featgraph {
/* @brief Load Featgraph module from given path. */
void LoadFeatGraphModule(const std::string& path);
/* @brief Call Featgraph's SDDMM kernel. */
void SDDMMTreeReduction(
DLManagedTensor* row, DLManagedTensor* col, DLManagedTensor* lhs,
DLManagedTensor* rhs, DLManagedTensor* out);
} // namespace featgraph
} // namespace dgl
#endif // FEATGRAPH_H_
""" Export featgraph kernels to a shared library. """
import tvm
from sddmm import sddmm_tree_reduction_gpu
def get_sddmm_kernels_gpu(idtypes, dtypes):
"""
Parameters
----------
idtypes: List[str]
Possible index types.
dtypes: List[str]
Possible data types.
Returns
-------
List[IRModule]:
The list of IRModules.
"""
ret = []
# SDDMM Tree Reduction
for dtype in dtypes:
for idtype in idtypes:
ret.append(sddmm_tree_reduction_gpu(idtype, dtype))
return ret
if __name__ == "__main__":
binary_path = "libfeatgraph_kernels.so"
kernels = []
idtypes = ["int32", "int64"]
dtypes = ["float16", "float64", "float32", "int32", "int64"]
kernels += get_sddmm_kernels_gpu(idtypes, dtypes)
# build kernels and export the module to libfeatgraph_kernels.so
module = tvm.build(kernels, target="cuda", target_host="llvm")
module.export_library(binary_path)
""" The compute function and schedules for SDDMM kernels written in TVM. """
import tvm
from tvm import te
def sddmm_tree_reduction_gpu(idx_type, feat_type):
"""SDDMM kernels on GPU optimized with Tree Reduction.
Parameters
----------
idx_type : str
The data type for indexing tensors.
feat_type : str
The data type of feature tensor.
Returns
-------
IRModule
The result IRModule.
"""
# define vars and placeholders
nnz = te.var("nnz", idx_type)
num_rows = te.var("num_rows", idx_type)
num_cols = te.var("num_cols", idx_type)
H = te.var("num_heads", idx_type)
D = te.var("feat_len", idx_type)
row = te.placeholder((nnz,), idx_type, "row")
col = te.placeholder((nnz,), idx_type, "col")
ufeat = te.placeholder((num_rows, H, D), feat_type, "ufeat")
vfeat = te.placeholder((num_cols, H, D), feat_type, "vfeat")
# define edge computation function
def edge_func(eid, h, i):
k = te.reduce_axis((0, D), name="k")
return te.sum(ufeat[row[eid], h, k] * vfeat[col[eid], h, k], axis=k)
out = te.compute(
(nnz, H, tvm.tir.IntImm(idx_type, 1)), edge_func, name="out"
)
# define schedules
sched = te.create_schedule(out.op)
edge_axis, head_axis, _ = out.op.axis
reduce_axis = out.op.reduce_axis[0]
_, red_inner = sched[out].split(reduce_axis, factor=32)
edge_outer, edge_inner = sched[out].split(edge_axis, factor=32)
sched[out].bind(red_inner, te.thread_axis("threadIdx.x"))
sched[out].bind(edge_inner, te.thread_axis("threadIdx.y"))
sched[out].bind(edge_outer, te.thread_axis("blockIdx.x"))
sched[out].bind(head_axis, te.thread_axis("blockIdx.y"))
return tvm.lower(
sched,
[row, col, ufeat, vfeat, out],
name="SDDMMTreeReduction_{}_{}".format(idx_type, feat_type),
)
if __name__ == "__main__":
kernel0 = sddmm_tree_reduction_gpu("int32", "float32")
print(kernel0)
/**
* Copyright (c) 2020 by Contributors
* @file featgraph/src/featgraph.cc
* @brief FeatGraph kernels.
*/
#include <dmlc/logging.h>
#include <featgraph.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/registry.h>
namespace dgl {
namespace featgraph {
/* @brief Singleton that loads the featgraph module. */
class FeatGraphModule {
public:
static FeatGraphModule* Global() {
static FeatGraphModule inst;
return &inst;
}
void Load(const std::string& path) {
mod = tvm::runtime::Module::LoadFromFile(path);
}
inline tvm::runtime::ModuleNode* Get() {
auto ret = mod.operator->();
if (!ret) {
LOG(FATAL) << "FeatGraph module have not been loaded. "
<< "Please set path of featgraph shared library.";
}
return ret;
}
private:
tvm::runtime::Module mod;
FeatGraphModule() {}
};
/* @brief Load Featgraph module from given path. */
void LoadFeatGraphModule(const std::string& path) {
FeatGraphModule::Global()->Load(path);
}
/* @brief Convert DLDataType to string. */
inline std::string DTypeAsStr(const DLDataType& t) {
switch (t.code) {
case 0U:
return "int" + std::to_string(t.bits);
case 1U:
return "uint" + std::to_string(t.bits);
case 2U:
return "float" + std::to_string(t.bits);
case 3U:
return "bfloat" + std::to_string(t.bits);
default:
LOG(FATAL) << "Type code " << t.code << " not recognized";
}
}
/* @brief Get operator filename. */
inline std::string GetOperatorName(
const std::string& base_name, const DLDataType& dtype,
const DLDataType& idtype) {
return base_name + "_" + DTypeAsStr(dtype) + "_" + DTypeAsStr(idtype);
}
/* @brief Call FeatGraph's SDDMM kernel. */
void SDDMMTreeReduction(
DLManagedTensor* row, DLManagedTensor* col, DLManagedTensor* lhs,
DLManagedTensor* rhs, DLManagedTensor* out) {
tvm::runtime::ModuleNode* mod = FeatGraphModule::Global()->Get();
std::string f_name = GetOperatorName(
"SDDMMTreeReduction", (row->dl_tensor).dtype, (lhs->dl_tensor).dtype);
tvm::runtime::PackedFunc f = mod->GetFunction(f_name);
if (f != nullptr) f(row, col, lhs, rhs, out);
}
} // namespace featgraph
} // namespace dgl
/**
* NOTE(zihao): this file was modified from TVM project:
* -
* https://github.com/apache/tvm/blob/9713d675c64ae3075e10be5acadeef1328a44bb5/apps/howto_deploy/tvm_runtime_pack.cc
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/**
* @brief This is an all in one TVM runtime file.
*
* You only have to use this file to compile libtvm_runtime to
* include in your project.
*
* - Copy this file into your project which depends on tvm runtime.
* - Compile with -std=c++14
* - Add the following include path
* - /path/to/tvm/include/
* - /path/to/tvm/3rdparty/dmlc-core/include/
* - /path/to/tvm/3rdparty/dlpack/include/
* - Add -lpthread -ldl to the linked library.
* - You are good to go.
* - See the Makefile in the same folder for example.
*
* The include files here are presented with relative path
* You need to remember to change it to point to the right file.
*
*/
#include <../src/runtime/c_runtime_api.cc>
#include <../src/runtime/cpu_device_api.cc>
#include <../src/runtime/file_utils.cc>
#include <../src/runtime/library_module.cc>
#include <../src/runtime/module.cc>
#include <../src/runtime/ndarray.cc>
#include <../src/runtime/object.cc>
#include <../src/runtime/registry.cc>
#include <../src/runtime/thread_pool.cc>
#include <../src/runtime/threading_backend.cc>
#include <../src/runtime/workspace_pool.cc>
// NOTE: all the files after this are optional modules
// that you can include remove, depending on how much feature you use.
// Likely we only need to enable one of the following
// If you use Module::Load, use dso_module
// For system packed library, use system_lib_module
#include <../src/runtime/dso_library.cc>
// #include <../src/runtime/system_library.cc>
// Graph runtime
// #include "../../src/runtime/graph/graph_runtime.cc"
// #include "../../src/runtime/graph/graph_runtime_factory.cc"
// Uncomment the following lines to enable RPC
// #include "../../src/runtime/rpc/rpc_session.cc"
// #include "../../src/runtime/rpc/rpc_event_impl.cc"
// #include "../../src/runtime/rpc/rpc_server_env.cc"
// These macros enables the device API when uncommented.
#define TVM_CUDA_RUNTIME 1
// #define TVM_METAL_RUNTIME 1
// #define TVM_OPENCL_RUNTIME 1
// Uncomment the following lines to enable Metal
// #include "../../src/runtime/metal/metal_device_api.mm"
// #include "../../src/runtime/metal/metal_module.mm"
// Uncomment the following lines to enable CUDA
#include <../src/runtime/cuda/cuda_device_api.cc>
#include <../src/runtime/cuda/cuda_module.cc>
// Uncomment the following lines to enable OpenCL
// #include "../../src/runtime/opencl/opencl_device_api.cc"
// #include "../../src/runtime/opencl/opencl_module.cc"
import dgl
import dgl.backend as F
import torch
g = dgl.rand_graph(10, 15).int().to(torch.device(0))
gidx = g._graph
u = torch.rand((10, 2, 8), device=torch.device(0))
v = torch.rand((10, 2, 8), device=torch.device(0))
e = dgl.ops.gsddmm(g, "dot", u, v)
print(e)
e = torch.zeros((15, 2, 1), device=torch.device(0))
u = F.zerocopy_to_dgl_ndarray(u)
v = F.zerocopy_to_dgl_ndarray(v)
e = F.zerocopy_to_dgl_ndarray_for_write(e)
dgl.sparse._CAPI_FG_LoadModule("../build/featgraph/libfeatgraph_kernels.so")
dgl.sparse._CAPI_FG_SDDMMTreeReduction(gidx, u, v, e)
print(e)
...@@ -6,11 +6,6 @@ ...@@ -6,11 +6,6 @@
#include <dgl/base_heterograph.h> #include <dgl/base_heterograph.h>
#include <dgl/packed_func_ext.h> #include <dgl/packed_func_ext.h>
#ifdef USE_TVM
#include <dgl/runtime/dlpack_convert.h>
#include <featgraph.h>
#endif // USE_TVM
#include "../c_api_common.h" #include "../c_api_common.h"
#include "./check.h" #include "./check.h"
#include "kernel_decl.h" #include "kernel_decl.h"
...@@ -804,36 +799,5 @@ DGL_REGISTER_GLOBAL("sparse._CAPI_DGLCSRMask") ...@@ -804,36 +799,5 @@ DGL_REGISTER_GLOBAL("sparse._CAPI_DGLCSRMask")
*rv = result; *rv = result;
}); });
#ifdef USE_TVM
DGL_REGISTER_GLOBAL("sparse._CAPI_FG_LoadModule")
.set_body([](DGLArgs args, DGLRetValue* rv) {
const std::string path = args[0];
dgl::featgraph::LoadFeatGraphModule(path);
});
DGL_REGISTER_GLOBAL("sparse._CAPI_FG_SDDMMTreeReduction")
.set_body([](DGLArgs args, DGLRetValue* rv) {
HeteroGraphRef graph = args[0];
NDArray lhs = args[1];
NDArray rhs = args[2];
NDArray out = args[3];
CheckCtx(graph->Context(), {lhs, rhs, out}, {"lhs", "rhs", "out"});
CheckContiguous({lhs, rhs, out}, {"lhs", "rhs", "out"});
CHECK_EQ(graph->NumEdgeTypes(), 1);
// auto pair = graph->meta_graph()->FindEdge(0); // only one etype in the
// graph. const dgl_type_t src_vtype = pair.first; const dgl_type_t
// dst_vtype = pair.second; CheckShape(
// {graph->NumVertices(src_vtype), graph->NumEdges(0),
// graph->NumVertices(dst_vtype)}, {lhs_target, rhs_target, 1}, {lhs,
// rhs, out},
// {"U_data", "E_data", "V_data"});
COOMatrix coo = graph.sptr()->GetCOOMatrix(0);
dgl::featgraph::SDDMMTreeReduction(
DLPackConvert::ToDLPack(coo.row), DLPackConvert::ToDLPack(coo.col),
DLPackConvert::ToDLPack(lhs), DLPackConvert::ToDLPack(rhs),
DLPackConvert::ToDLPack(out));
});
#endif // USE_TVM
} // namespace aten } // namespace aten
} // namespace dgl } // namespace dgl
...@@ -20,10 +20,6 @@ ...@@ -20,10 +20,6 @@
#include <vector> #include <vector>
#ifdef USE_TVM
#include <featgraph.h>
#endif // USE_TVM
#include "../c_api_common.h" #include "../c_api_common.h"
#include "./check.h" #include "./check.h"
#include "kernel_decl.h" #include "kernel_decl.h"
......
Subproject commit b2e418cb109df4cd1f17a2cf2894a1b396a6b838
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment