[Cleanup] Remove featgraph and unused TVM dependency. (#5767)

Co-authored-by: Ubuntu <ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal>

[Cleanup] Remove featgraph and unused TVM dependency. (#5767)
Co-authored-by: Ubuntu <ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal>
9ff56d20 · Hongzhi (Steve), Chen · GitHub · ee00729b · 9ff56d20 · 9ff56d20
Unverified Commit 9ff56d20 authored Jun 02, 2023 by Hongzhi (Steve), Chen Committed by GitHub Jun 02, 2023
14 changed files
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,9 +13,6 @@
 [submodule "third_party/phmap"]
 	path = third_party/phmap
 	url = https://github.com/greg7mdp/parallel-hashmap.git
-[submodule "third_party/tvm"]
-	path = third_party/tvm
-	url = https://github.com/apache/incubator-tvm
 [submodule "third_party/nanoflann"]
 	path = third_party/nanoflann
 	url = https://github.com/jlblancoc/nanoflann

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,7 +25,6 @@ endif()
 dgl_option(USE_CUDA "Build with CUDA" OFF)
 dgl_option(USE_OPENMP "Build with OpenMP" ON)
 dgl_option(USE_LIBXSMM "Build with LIBXSMM library optimization" ON)
-dgl_option(USE_TVM "Build with TVM kernels" OFF)
 dgl_option(BUILD_CPP_TEST "Build cpp unittest executables" OFF)
 dgl_option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF)
 dgl_option(USE_S3 "Build with S3 support" OFF)
@@ -252,19 +251,6 @@ if(NOT MSVC)
  endif()
 endif(NOT MSVC)
-# Compile TVM Runtime and Featgraph
-# (NOTE) We compile a dynamic library called featgraph_runtime, which the DGL library links to.
-# Kernels are packed in a separate dynamic library called featgraph_kernels, which DGL
-# will load during runtime.
-if(USE_TVM)
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_TVM")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_TVM")
-  target_include_directories(dgl PRIVATE "featgraph/include")
-  add_subdirectory("featgraph/")
-  list(APPEND DGL_LINKER_LIBS featgraph_runtime)
-  message(STATUS "Build with TVM runtime and featgraph kernels.")
-endif(USE_TVM)
 # support PARALLEL_ALGORITHMS
 if (LIBCXX_ENABLE_PARALLEL_ALGORITHMS)
  add_definitions(-DPARALLEL_ALGORITHMS)

--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -43,8 +43,5 @@ set(USE_OPENMP ON)
 # Whether to build PyTorch plugins.
 set(BUILD_TORCH ON)
-# Whether to enable CUDA kernels compiled with TVM.
-set(USE_TVM OFF)
 # Whether to build DGL sparse library.
 set(BUILD_SPARSE ON)
--- a/featgraph/CMakeLists.txt
+++ b/featgraph/CMakeLists.txt
-cmake_minimum_required(VERSION 3.5)
-project(featgraph C CXX)
-message(STATUS "Start configuring project ${PROJECT_NAME}")
-# Find CUDA
-include(../cmake/util/FindCUDA.cmake)
-find_cuda(ON)
-message(STATUS "${CUDA_INCLUDE_DIRS}")
-add_custom_target(
-  featgraph_kernel
-  COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/pack_featgraph.py
-  COMMENT "Creating featgraph kernels..."
-)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC")
-file(GLOB FEATGRAPH_SRC
-  src/featgraph.cc
-  src/tvm_runtime_pack.cc
-)
-add_library(featgraph_runtime SHARED ${FEATGRAPH_SRC})
-target_include_directories(featgraph_runtime PRIVATE ${CUDA_INCLUDE_DIRS})
-target_include_directories(featgraph_runtime PRIVATE "./include")
-target_include_directories(featgraph_runtime PRIVATE "../third_party/tvm/include")
-target_include_directories(featgraph_runtime PRIVATE "../third_party/tvm/3rdparty/dmlc-core/include")
-target_include_directories(featgraph_runtime PRIVATE "../third_party/tvm/3rdparty/dlpack/include")
-target_link_libraries(featgraph_runtime "dl"  # dynamic linking
-                                        ${CUDA_CUDART_LIBRARY}
-                                        ${CUDA_CUDA_LIBRARY}
-                                        ${CUDA_NVRTC_LIBRARY})
-add_dependencies(featgraph_runtime featgraph_kernel)
-install(TARGETS featgraph_runtime LIBRARY DESTINATION lib)
--- a/featgraph/README.md
+++ b/featgraph/README.md
-# FeatGraph-DGL
-FeatGraph is an efficient backend for Graph Neural Networks based on TVM.
- Original repo: https://github.com/amazon-research/FeatGraph
- SC2020 Paper: https://www.csl.cornell.edu/~zhiruz/pdfs/featgraph-sc2020.pdf 
-This folder contains the code for integrating featgraph kernels to DGL.
-## Usage
-After building DGL with `USE_TVM=ON`, you should be able to run:
-```bash
-python test.py
-```
-to verify correctness.
-## Reference
- [TVM Tutorial on Deploy TVM Module using C++ API](https://tvm.apache.org/docs/deploy/cpp_deploy.html).
--- a/featgraph/include/featgraph.h
+++ b/featgraph/include/featgraph.h
-/**
- *  Copyright (c) 2020 by Contributors
- * @file featgraph/include/featgraph.h
- * @brief FeatGraph kernel headers.
- */
-#ifndef FEATGRAPH_H_
-#define FEATGRAPH_H_
-#include <dlpack/dlpack.h>
-namespace dgl {
-namespace featgraph {
-/* @brief Load Featgraph module from given path. */
-void LoadFeatGraphModule(const std::string& path);
-/* @brief Call Featgraph's SDDMM kernel. */
-void SDDMMTreeReduction(
-    DLManagedTensor* row, DLManagedTensor* col, DLManagedTensor* lhs,
-    DLManagedTensor* rhs, DLManagedTensor* out);
-}  // namespace featgraph
-}  // namespace dgl
-#endif  // FEATGRAPH_H_
--- a/featgraph/pack_featgraph.py
+++ b/featgraph/pack_featgraph.py
-""" Export featgraph kernels to a shared library. """
-import tvm
-from sddmm import sddmm_tree_reduction_gpu
-def get_sddmm_kernels_gpu(idtypes, dtypes):
-    """
-    Parameters
-    ----------
-    idtypes: List[str]
-        Possible index types.
-    dtypes: List[str]
-        Possible data types.
-    Returns
-    -------
-    List[IRModule]:
-        The list of IRModules.
-    """
-    ret = []
-    # SDDMM Tree Reduction
-    for dtype in dtypes:
-        for idtype in idtypes:
-            ret.append(sddmm_tree_reduction_gpu(idtype, dtype))
-    return ret
-if __name__ == "__main__":
-    binary_path = "libfeatgraph_kernels.so"
-    kernels = []
-    idtypes = ["int32", "int64"]
-    dtypes = ["float16", "float64", "float32", "int32", "int64"]
-    kernels += get_sddmm_kernels_gpu(idtypes, dtypes)
-    # build kernels and export the module to libfeatgraph_kernels.so
-    module = tvm.build(kernels, target="cuda", target_host="llvm")
-    module.export_library(binary_path)
--- a/featgraph/sddmm.py
+++ b/featgraph/sddmm.py
-""" The compute function and schedules for SDDMM kernels written in TVM. """
-import tvm
-from tvm import te
-def sddmm_tree_reduction_gpu(idx_type, feat_type):
-    """SDDMM kernels on GPU optimized with Tree Reduction.
-    Parameters
-    ----------
-    idx_type : str
-        The data type for indexing tensors.
-    feat_type : str
-        The data type of feature tensor.
-    Returns
-    -------
-    IRModule
-        The result IRModule.
-    """
-    # define vars and placeholders
-    nnz = te.var("nnz", idx_type)
-    num_rows = te.var("num_rows", idx_type)
-    num_cols = te.var("num_cols", idx_type)
-    H = te.var("num_heads", idx_type)
-    D = te.var("feat_len", idx_type)
-    row = te.placeholder((nnz,), idx_type, "row")
-    col = te.placeholder((nnz,), idx_type, "col")
-    ufeat = te.placeholder((num_rows, H, D), feat_type, "ufeat")
-    vfeat = te.placeholder((num_cols, H, D), feat_type, "vfeat")
-    # define edge computation function
-    def edge_func(eid, h, i):
-        k = te.reduce_axis((0, D), name="k")
-        return te.sum(ufeat[row[eid], h, k] * vfeat[col[eid], h, k], axis=k)
-    out = te.compute(
-        (nnz, H, tvm.tir.IntImm(idx_type, 1)), edge_func, name="out"
-    )
-    # define schedules
-    sched = te.create_schedule(out.op)
-    edge_axis, head_axis, _ = out.op.axis
-    reduce_axis = out.op.reduce_axis[0]
-    _, red_inner = sched[out].split(reduce_axis, factor=32)
-    edge_outer, edge_inner = sched[out].split(edge_axis, factor=32)
-    sched[out].bind(red_inner, te.thread_axis("threadIdx.x"))
-    sched[out].bind(edge_inner, te.thread_axis("threadIdx.y"))
-    sched[out].bind(edge_outer, te.thread_axis("blockIdx.x"))
-    sched[out].bind(head_axis, te.thread_axis("blockIdx.y"))
-    return tvm.lower(
-        sched,
-        [row, col, ufeat, vfeat, out],
-        name="SDDMMTreeReduction_{}_{}".format(idx_type, feat_type),
-    )
-if __name__ == "__main__":
-    kernel0 = sddmm_tree_reduction_gpu("int32", "float32")
-    print(kernel0)
--- a/featgraph/src/featgraph.cc
+++ b/featgraph/src/featgraph.cc
-/**
- *  Copyright (c) 2020 by Contributors
- * @file featgraph/src/featgraph.cc
- * @brief FeatGraph kernels.
- */
-#include <dmlc/logging.h>
-#include <featgraph.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/registry.h>
-namespace dgl {
-namespace featgraph {
-/* @brief Singleton that loads the featgraph module. */
-class FeatGraphModule {
- public:
-  static FeatGraphModule* Global() {
-    static FeatGraphModule inst;
-    return &inst;
-  }
-  void Load(const std::string& path) {
-    mod = tvm::runtime::Module::LoadFromFile(path);
-  }
-  inline tvm::runtime::ModuleNode* Get() {
-    auto ret = mod.operator->();
-    if (!ret) {
-      LOG(FATAL) << "FeatGraph module have not been loaded. "
-                 << "Please set path of featgraph shared library.";
-    }
-    return ret;
-  }
- private:
-  tvm::runtime::Module mod;
-  FeatGraphModule() {}
-};
-/* @brief Load Featgraph module from given path. */
-void LoadFeatGraphModule(const std::string& path) {
-  FeatGraphModule::Global()->Load(path);
-}
-/* @brief Convert DLDataType to string. */
-inline std::string DTypeAsStr(const DLDataType& t) {
-  switch (t.code) {
-    case 0U:
-      return "int" + std::to_string(t.bits);
-    case 1U:
-      return "uint" + std::to_string(t.bits);
-    case 2U:
-      return "float" + std::to_string(t.bits);
-    case 3U:
-      return "bfloat" + std::to_string(t.bits);
-    default:
-      LOG(FATAL) << "Type code " << t.code << " not recognized";
-  }
-}
-/* @brief Get operator filename. */
-inline std::string GetOperatorName(
-    const std::string& base_name, const DLDataType& dtype,
-    const DLDataType& idtype) {
-  return base_name + "_" + DTypeAsStr(dtype) + "_" + DTypeAsStr(idtype);
-}
-/* @brief Call FeatGraph's SDDMM kernel. */
-void SDDMMTreeReduction(
-    DLManagedTensor* row, DLManagedTensor* col, DLManagedTensor* lhs,
-    DLManagedTensor* rhs, DLManagedTensor* out) {
-  tvm::runtime::ModuleNode* mod = FeatGraphModule::Global()->Get();
-  std::string f_name = GetOperatorName(
-      "SDDMMTreeReduction", (row->dl_tensor).dtype, (lhs->dl_tensor).dtype);
-  tvm::runtime::PackedFunc f = mod->GetFunction(f_name);
-  if (f != nullptr) f(row, col, lhs, rhs, out);
-}
-}  // namespace featgraph
-}  // namespace dgl
--- a/featgraph/src/tvm_runtime_pack.cc
+++ b/featgraph/src/tvm_runtime_pack.cc
-/**
- * NOTE(zihao): this file was modified from TVM project:
- * -
- * https://github.com/apache/tvm/blob/9713d675c64ae3075e10be5acadeef1328a44bb5/apps/howto_deploy/tvm_runtime_pack.cc
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/**
- * @brief This is an all in one TVM runtime file.
- *
- *   You only have to use this file to compile libtvm_runtime to
- *   include in your project.
- *
- *  - Copy this file into your project which depends on tvm runtime.
- *  - Compile with -std=c++14
- *  - Add the following include path
- *     - /path/to/tvm/include/
- *     - /path/to/tvm/3rdparty/dmlc-core/include/
- *     - /path/to/tvm/3rdparty/dlpack/include/
- *   - Add -lpthread -ldl to the linked library.
- *   - You are good to go.
- *   - See the Makefile in the same folder for example.
- *
- *  The include files here are presented with relative path
- *  You need to remember to change it to point to the right file.
- *
- */
-#include <../src/runtime/c_runtime_api.cc>
-#include <../src/runtime/cpu_device_api.cc>
-#include <../src/runtime/file_utils.cc>
-#include <../src/runtime/library_module.cc>
-#include <../src/runtime/module.cc>
-#include <../src/runtime/ndarray.cc>
-#include <../src/runtime/object.cc>
-#include <../src/runtime/registry.cc>
-#include <../src/runtime/thread_pool.cc>
-#include <../src/runtime/threading_backend.cc>
-#include <../src/runtime/workspace_pool.cc>
-// NOTE: all the files after this are optional modules
-// that you can include remove, depending on how much feature you use.
-// Likely we only need to enable one of the following
-// If you use Module::Load, use dso_module
-// For system packed library, use system_lib_module
-#include <../src/runtime/dso_library.cc>
-// #include <../src/runtime/system_library.cc>
-// Graph runtime
-// #include "../../src/runtime/graph/graph_runtime.cc"
-// #include "../../src/runtime/graph/graph_runtime_factory.cc"
-// Uncomment the following lines to enable RPC
-// #include "../../src/runtime/rpc/rpc_session.cc"
-// #include "../../src/runtime/rpc/rpc_event_impl.cc"
-// #include "../../src/runtime/rpc/rpc_server_env.cc"
-// These macros enables the device API when uncommented.
-#define TVM_CUDA_RUNTIME 1
-// #define TVM_METAL_RUNTIME 1
-// #define TVM_OPENCL_RUNTIME 1
-// Uncomment the following lines to enable Metal
-// #include "../../src/runtime/metal/metal_device_api.mm"
-// #include "../../src/runtime/metal/metal_module.mm"
-// Uncomment the following lines to enable CUDA
-#include <../src/runtime/cuda/cuda_device_api.cc>
-#include <../src/runtime/cuda/cuda_module.cc>
-// Uncomment the following lines to enable OpenCL
-// #include "../../src/runtime/opencl/opencl_device_api.cc"
-// #include "../../src/runtime/opencl/opencl_module.cc"
--- a/featgraph/test.py
+++ b/featgraph/test.py
-import dgl
-import dgl.backend as F
-import torch
-g = dgl.rand_graph(10, 15).int().to(torch.device(0))
-gidx = g._graph
-u = torch.rand((10, 2, 8), device=torch.device(0))
-v = torch.rand((10, 2, 8), device=torch.device(0))
-e = dgl.ops.gsddmm(g, "dot", u, v)
-print(e)
-e = torch.zeros((15, 2, 1), device=torch.device(0))
-u = F.zerocopy_to_dgl_ndarray(u)
-v = F.zerocopy_to_dgl_ndarray(v)
-e = F.zerocopy_to_dgl_ndarray_for_write(e)
-dgl.sparse._CAPI_FG_LoadModule("../build/featgraph/libfeatgraph_kernels.so")
-dgl.sparse._CAPI_FG_SDDMMTreeReduction(gidx, u, v, e)
-print(e)
--- a/src/array/kernel.cc
+++ b/src/array/kernel.cc
@@ -6,11 +6,6 @@
 #include <dgl/base_heterograph.h>
 #include <dgl/packed_func_ext.h>
-#ifdef USE_TVM
-#include <dgl/runtime/dlpack_convert.h>
-#include <featgraph.h>
-#endif  // USE_TVM
 #include "../c_api_common.h"
 #include "./check.h"
 #include "kernel_decl.h"
@@ -804,36 +799,5 @@ DGL_REGISTER_GLOBAL("sparse._CAPI_DGLCSRMask")
      *rv = result;
    });
-#ifdef USE_TVM
-DGL_REGISTER_GLOBAL("sparse._CAPI_FG_LoadModule")
-    .set_body([](DGLArgs args, DGLRetValue* rv) {
-      const std::string path = args[0];
-      dgl::featgraph::LoadFeatGraphModule(path);
-    });
-DGL_REGISTER_GLOBAL("sparse._CAPI_FG_SDDMMTreeReduction")
-    .set_body([](DGLArgs args, DGLRetValue* rv) {
-      HeteroGraphRef graph = args[0];
-      NDArray lhs = args[1];
-      NDArray rhs = args[2];
-      NDArray out = args[3];
-      CheckCtx(graph->Context(), {lhs, rhs, out}, {"lhs", "rhs", "out"});
-      CheckContiguous({lhs, rhs, out}, {"lhs", "rhs", "out"});
-      CHECK_EQ(graph->NumEdgeTypes(), 1);
-      // auto pair = graph->meta_graph()->FindEdge(0);  // only one etype in the
-      // graph. const dgl_type_t src_vtype = pair.first; const dgl_type_t
-      // dst_vtype = pair.second; CheckShape(
-      //     {graph->NumVertices(src_vtype), graph->NumEdges(0),
-      //     graph->NumVertices(dst_vtype)}, {lhs_target, rhs_target, 1}, {lhs,
-      //     rhs, out},
-      //     {"U_data", "E_data", "V_data"});
-      COOMatrix coo = graph.sptr()->GetCOOMatrix(0);
-      dgl::featgraph::SDDMMTreeReduction(
-          DLPackConvert::ToDLPack(coo.row), DLPackConvert::ToDLPack(coo.col),
-          DLPackConvert::ToDLPack(lhs), DLPackConvert::ToDLPack(rhs),
-          DLPackConvert::ToDLPack(out));
-    });
-#endif  // USE_TVM
 }  // namespace aten
 }  // namespace dgl
--- a/src/array/libra_partition.cc
+++ b/src/array/libra_partition.cc
@@ -20,10 +20,6 @@
 #include <vector>
-#ifdef USE_TVM
-#include <featgraph.h>
-#endif  // USE_TVM
 #include "../c_api_common.h"
 #include "./check.h"
 #include "kernel_decl.h"

--- a/tvm @ b2e418cb
+++ b/tvm @ b2e418cb
-Subproject commit b2e418cb109df4cd1f17a2cf2894a1b396a6b838