[Misc] clang-format auto fix. (#4810)

* [Misc] clang-format auto fix. * manual * manual Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

[Misc] clang-format auto fix. (#4810)
* [Misc] clang-format auto fix. * manual * manual Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
8f0df39e · Hongzhi (Steve), Chen · GitHub · 401e1278 · 8f0df39e · 8f0df39e
Unverified Commit 8f0df39e authored Nov 04, 2022 by Hongzhi (Steve), Chen Committed by GitHub Nov 04, 2022
20 changed files
--- a/src/graph/serialize/zerocopy_serializer.cc
+++ b/src/graph/serialize/zerocopy_serializer.cc
@@ -13,8 +13,8 @@ namespace dgl {

 using dgl::runtime::NDArray;

-NDArray CreateNDArrayFromRawData(std::vector<int64_t> shape, DGLDataType dtype,
-                                 DGLContext ctx, void* raw) {
+NDArray CreateNDArrayFromRawData(
+    std::vector<int64_t> shape, DGLDataType dtype, DGLContext ctx, void* raw) {
  return NDArray::CreateFromRaw(shape, dtype, ctx, raw, true);
 }

@@ -40,7 +40,8 @@ void StreamWithBuffer::PushNDArray(const NDArray& tensor) {
    // If the stream is for remote communication or the data is not stored in
    // shared memory, serialize the data content as a buffer.
    this->Write<bool>(false);
-    // If this is a null ndarray, we will not push it into the underlying buffer_list
+    // If this is a null ndarray, we will not push it into the underlying
+    // buffer_list
    if (data_byte_size != 0) {
      buffer_list_.emplace_back(tensor, tensor->data, data_byte_size);
    }
@@ -90,8 +91,8 @@ NDArray StreamWithBuffer::PopNDArray() {
      // Mean this is a null ndarray
      ret = CreateNDArrayFromRawData(shape, dtype, cpu_ctx, nullptr);
    } else {
-      ret = CreateNDArrayFromRawData(shape, dtype, cpu_ctx,
-                                     buffer_list_.front().data);
+      ret = CreateNDArrayFromRawData(
+          shape, dtype, cpu_ctx, buffer_list_.front().data);
      buffer_list_.pop_front();
    }
    return ret;

--- a/src/graph/shared_mem_manager.cc
+++ b/src/graph/shared_mem_manager.cc
@@ -31,8 +31,8 @@ using namespace dgl::aten;
 namespace dgl {

 template <>
-NDArray SharedMemManager::CopyToSharedMem<NDArray>(const NDArray &data,
-                                                   std::string name) {
+NDArray SharedMemManager::CopyToSharedMem<NDArray>(
+    const NDArray &data, std::string name) {
  DGLContext ctx = {kDGLCPU, 0};
  std::vector<int64_t> shape(data->shape, data->shape + data->ndim);
  strm_->Write(data->ndim);
@@ -53,21 +53,22 @@ NDArray SharedMemManager::CopyToSharedMem<NDArray>(const NDArray &data,
 }

 template <>
-CSRMatrix SharedMemManager::CopyToSharedMem<CSRMatrix>(const CSRMatrix &csr,
-                                                       std::string name) {
+CSRMatrix SharedMemManager::CopyToSharedMem<CSRMatrix>(
+    const CSRMatrix &csr, std::string name) {
  auto indptr_shared_mem = CopyToSharedMem(csr.indptr, name + "_indptr");
  auto indices_shared_mem = CopyToSharedMem(csr.indices, name + "_indices");
  auto data_shared_mem = CopyToSharedMem(csr.data, name + "_data");
  strm_->Write(csr.num_rows);
  strm_->Write(csr.num_cols);
  strm_->Write(csr.sorted);
-  return CSRMatrix(csr.num_rows, csr.num_cols, indptr_shared_mem,
-                   indices_shared_mem, data_shared_mem, csr.sorted);
+  return CSRMatrix(
+      csr.num_rows, csr.num_cols, indptr_shared_mem, indices_shared_mem,
+      data_shared_mem, csr.sorted);
 }

 template <>
-COOMatrix SharedMemManager::CopyToSharedMem<COOMatrix>(const COOMatrix &coo,
-                                                       std::string name) {
+COOMatrix SharedMemManager::CopyToSharedMem<COOMatrix>(
+    const COOMatrix &coo, std::string name) {
  auto row_shared_mem = CopyToSharedMem(coo.row, name + "_row");
  auto col_shared_mem = CopyToSharedMem(coo.col, name + "_col");
  auto data_shared_mem = CopyToSharedMem(coo.data, name + "_data");
@@ -75,13 +76,14 @@ COOMatrix SharedMemManager::CopyToSharedMem<COOMatrix>(const COOMatrix &coo,
  strm_->Write(coo.num_cols);
  strm_->Write(coo.row_sorted);
  strm_->Write(coo.col_sorted);
-  return COOMatrix(coo.num_rows, coo.num_cols, row_shared_mem, col_shared_mem,
+  return COOMatrix(
+      coo.num_rows, coo.num_cols, row_shared_mem, col_shared_mem,
      data_shared_mem, coo.row_sorted, coo.col_sorted);
 }

 template <>
-bool SharedMemManager::CreateFromSharedMem<NDArray>(NDArray *nd,
-                                                    std::string name) {
+bool SharedMemManager::CreateFromSharedMem<NDArray>(
+    NDArray *nd, std::string name) {
  int ndim;
  DGLContext ctx = {kDGLCPU, 0};
  DGLDataType dtype;
@@ -98,15 +100,14 @@ bool SharedMemManager::CreateFromSharedMem<NDArray>(NDArray *nd,
  if (is_null) {
    *nd = NDArray::Empty(shape, dtype, ctx);
  } else {
-    *nd =
-      NDArray::EmptyShared(graph_name_ + name, shape, dtype, ctx, false);
+    *nd = NDArray::EmptyShared(graph_name_ + name, shape, dtype, ctx, false);
  }
  return true;
 }

 template <>
-bool SharedMemManager::CreateFromSharedMem<COOMatrix>(COOMatrix *coo,
-                                                      std::string name) {
+bool SharedMemManager::CreateFromSharedMem<COOMatrix>(
+    COOMatrix *coo, std::string name) {
  CreateFromSharedMem(&coo->row, name + "_row");
  CreateFromSharedMem(&coo->col, name + "_col");
  CreateFromSharedMem(&coo->data, name + "_data");
@@ -118,8 +119,8 @@ bool SharedMemManager::CreateFromSharedMem<COOMatrix>(COOMatrix *coo,
 }

 template <>
-bool SharedMemManager::CreateFromSharedMem<CSRMatrix>(CSRMatrix *csr,
-                                                      std::string name) {
+bool SharedMemManager::CreateFromSharedMem<CSRMatrix>(
+    CSRMatrix *csr, std::string name) {
  CreateFromSharedMem(&csr->indptr, name + "_indptr");
  CreateFromSharedMem(&csr->indices, name + "_indices");
  CreateFromSharedMem(&csr->data, name + "_data");

--- a/src/graph/shared_mem_manager.h
+++ b/src/graph/shared_mem_manager.h
@@ -29,8 +29,7 @@ const size_t SHARED_MEM_METAINFO_SIZE_MAX = 1024 * 32;
 class SharedMemManager : public dmlc::Stream {
 public:
  explicit SharedMemManager(std::string graph_name, dmlc::Stream* strm)
-      : graph_name_(graph_name),
-        strm_(strm) {}
+      : graph_name_(graph_name), strm_(strm) {}

  template <typename T>
  T CopyToSharedMem(const T& data, std::string name);

--- a/src/graph/subgraph.cc
+++ b/src/graph/subgraph.cc
@@ -11,7 +11,8 @@ namespace dgl {
 HeteroSubgraph InEdgeGraphRelabelNodes(
    const HeteroGraphPtr graph, const std::vector<IdArray>& vids) {
  CHECK_EQ(vids.size(), graph->NumVertexTypes())
-    << "Invalid input: the input list size must be the same as the number of vertex types.";
+      << "Invalid input: the input list size must be the same as the number of "
+         "vertex types.";
  std::vector<IdArray> eids(graph->NumEdgeTypes());
  DGLContext ctx = aten::GetContextOf(vids);
  for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) {
@@ -29,9 +30,11 @@ HeteroSubgraph InEdgeGraphRelabelNodes(

 HeteroSubgraph InEdgeGraphNoRelabelNodes(
    const HeteroGraphPtr graph, const std::vector<IdArray>& vids) {
-  // TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR graphs
+  // TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR
+  // graphs
  CHECK_EQ(vids.size(), graph->NumVertexTypes())
-    << "Invalid input: the input list size must be the same as the number of vertex types.";
+      << "Invalid input: the input list size must be the same as the number of "
+         "vertex types.";
  std::vector<HeteroGraphPtr> subrels(graph->NumEdgeTypes());
  std::vector<IdArray> induced_edges(graph->NumEdgeTypes());
  DGLContext ctx = aten::GetContextOf(vids);
@@ -43,30 +46,28 @@ HeteroSubgraph InEdgeGraphNoRelabelNodes(
    if (aten::IsNullArray(vids[dst_vtype])) {
      // create a placeholder graph
      subrels[etype] = UnitGraph::Empty(
-        relgraph->NumVertexTypes(),
-        graph->NumVertices(src_vtype),
-        graph->NumVertices(dst_vtype),
-        graph->DataType(), ctx);
-      induced_edges[etype] = IdArray::Empty({0}, graph->DataType(), graph->Context());
+          relgraph->NumVertexTypes(), graph->NumVertices(src_vtype),
+          graph->NumVertices(dst_vtype), graph->DataType(), ctx);
+      induced_edges[etype] =
+          IdArray::Empty({0}, graph->DataType(), graph->Context());
    } else {
      const auto& earr = graph->InEdges(etype, {vids[dst_vtype]});
      subrels[etype] = UnitGraph::CreateFromCOO(
-        relgraph->NumVertexTypes(),
-        graph->NumVertices(src_vtype),
-        graph->NumVertices(dst_vtype),
-        earr.src,
-        earr.dst);
+          relgraph->NumVertexTypes(), graph->NumVertices(src_vtype),
+          graph->NumVertices(dst_vtype), earr.src, earr.dst);
      induced_edges[etype] = earr.id;
    }
  }
  HeteroSubgraph ret;
-  ret.graph = CreateHeteroGraph(graph->meta_graph(), subrels, graph->NumVerticesPerType());
+  ret.graph = CreateHeteroGraph(
+      graph->meta_graph(), subrels, graph->NumVerticesPerType());
  ret.induced_edges = std::move(induced_edges);
  return ret;
 }

 HeteroSubgraph InEdgeGraph(
-    const HeteroGraphPtr graph, const std::vector<IdArray>& vids, bool relabel_nodes) {
+    const HeteroGraphPtr graph, const std::vector<IdArray>& vids,
+    bool relabel_nodes) {
  if (relabel_nodes) {
    return InEdgeGraphRelabelNodes(graph, vids);
  } else {
@@ -77,7 +78,8 @@ HeteroSubgraph InEdgeGraph(
 HeteroSubgraph OutEdgeGraphRelabelNodes(
    const HeteroGraphPtr graph, const std::vector<IdArray>& vids) {
  CHECK_EQ(vids.size(), graph->NumVertexTypes())
-    << "Invalid input: the input list size must be the same as the number of vertex types.";
+      << "Invalid input: the input list size must be the same as the number of "
+         "vertex types.";
  std::vector<IdArray> eids(graph->NumEdgeTypes());
  DGLContext ctx = aten::GetContextOf(vids);
  for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) {
@@ -95,9 +97,11 @@ HeteroSubgraph OutEdgeGraphRelabelNodes(

 HeteroSubgraph OutEdgeGraphNoRelabelNodes(
    const HeteroGraphPtr graph, const std::vector<IdArray>& vids) {
-  // TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR graphs
+  // TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR
+  // graphs
  CHECK_EQ(vids.size(), graph->NumVertexTypes())
-    << "Invalid input: the input list size must be the same as the number of vertex types.";
+      << "Invalid input: the input list size must be the same as the number of "
+         "vertex types.";
  std::vector<HeteroGraphPtr> subrels(graph->NumEdgeTypes());
  std::vector<IdArray> induced_edges(graph->NumEdgeTypes());
  DGLContext ctx = aten::GetContextOf(vids);
@@ -109,30 +113,28 @@ HeteroSubgraph OutEdgeGraphNoRelabelNodes(
    if (aten::IsNullArray(vids[src_vtype])) {
      // create a placeholder graph
      subrels[etype] = UnitGraph::Empty(
-        relgraph->NumVertexTypes(),
-        graph->NumVertices(src_vtype),
-        graph->NumVertices(dst_vtype),
-        graph->DataType(), ctx);
-      induced_edges[etype] = IdArray::Empty({0}, graph->DataType(), graph->Context());
+          relgraph->NumVertexTypes(), graph->NumVertices(src_vtype),
+          graph->NumVertices(dst_vtype), graph->DataType(), ctx);
+      induced_edges[etype] =
+          IdArray::Empty({0}, graph->DataType(), graph->Context());
    } else {
      const auto& earr = graph->OutEdges(etype, {vids[src_vtype]});
      subrels[etype] = UnitGraph::CreateFromCOO(
-          relgraph->NumVertexTypes(),
-          graph->NumVertices(src_vtype),
-          graph->NumVertices(dst_vtype),
-          earr.src,
-          earr.dst);
+          relgraph->NumVertexTypes(), graph->NumVertices(src_vtype),
+          graph->NumVertices(dst_vtype), earr.src, earr.dst);
      induced_edges[etype] = earr.id;
    }
  }
  HeteroSubgraph ret;
-  ret.graph = CreateHeteroGraph(graph->meta_graph(), subrels, graph->NumVerticesPerType());
+  ret.graph = CreateHeteroGraph(
+      graph->meta_graph(), subrels, graph->NumVerticesPerType());
  ret.induced_edges = std::move(induced_edges);
  return ret;
 }

 HeteroSubgraph OutEdgeGraph(
-    const HeteroGraphPtr graph, const std::vector<IdArray>& vids, bool relabel_nodes) {
+    const HeteroGraphPtr graph, const std::vector<IdArray>& vids,
+    bool relabel_nodes) {
  if (relabel_nodes) {
    return OutEdgeGraphRelabelNodes(graph, vids);
  } else {

--- a/src/graph/transform/compact.cc
+++ b/src/graph/transform/compact.cc
@@ -19,18 +19,20 @@

 #include "compact.h"

-#include <dgl/base_heterograph.h>
-#include <dgl/transform.h>
 #include <dgl/array.h>
+#include <dgl/base_heterograph.h>
 #include <dgl/packed_func_ext.h>
-#include <dgl/runtime/registry.h>
 #include <dgl/runtime/container.h>
-#include <vector>
+#include <dgl/runtime/registry.h>
+#include <dgl/transform.h>
+
 #include <utility>
+#include <vector>
+
 #include "../../c_api_common.h"
 #include "../unit_graph.h"
-// TODO(BarclayII): currently CompactGraphs depend on IdHashMap implementation which
-// only works on CPU.  Should fix later to make it device agnostic.
+// TODO(BarclayII): currently CompactGraphs depend on IdHashMap implementation
+// which only works on CPU.  Should fix later to make it device agnostic.
 #include "../../array/cpu/array_utils.h"

 namespace dgl {
@@ -42,16 +44,16 @@ namespace transform {

 namespace {

-template<typename IdType>
-std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
-CompactGraphsCPU(
+template <typename IdType>
+std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> CompactGraphsCPU(
    const std::vector<HeteroGraphPtr> &graphs,
    const std::vector<IdArray> &always_preserve) {
-  // TODO(BarclayII): check whether the node space and metagraph of each graph is the same.
-  // Step 1: Collect the nodes that has connections for each type.
+  // TODO(BarclayII): check whether the node space and metagraph of each graph
+  // is the same. Step 1: Collect the nodes that has connections for each type.
  const int64_t num_ntypes = graphs[0]->NumVertexTypes();
  std::vector<aten::IdHashMap<IdType>> hashmaps(num_ntypes);
-  std::vector<std::vector<EdgeArray>> all_edges(graphs.size());   // all_edges[i][etype]
+  std::vector<std::vector<EdgeArray>> all_edges(
+      graphs.size());  // all_edges[i][etype]

  std::vector<int64_t> max_vertex_cnt(num_ntypes, 0);
  for (size_t i = 0; i < graphs.size(); ++i) {
@@ -98,7 +100,8 @@ CompactGraphsCPU(
    }
  }

-  // Step 2: Relabel the nodes for each type to a smaller ID space and save the mapping.
+  // Step 2: Relabel the nodes for each type to a smaller ID space and save the
+  // mapping.
  std::vector<IdArray> induced_nodes(num_ntypes);
  std::vector<int64_t> num_induced_nodes(num_ntypes);
  for (int64_t i = 0; i < num_ntypes; ++i) {
@@ -123,14 +126,12 @@ CompactGraphsCPU(
      const IdArray mapped_cols = hashmaps[dsttype].Map(edges.dst, -1);

      rel_graphs.push_back(UnitGraph::CreateFromCOO(
-          srctype == dsttype ? 1 : 2,
-          induced_nodes[srctype]->shape[0],
-          induced_nodes[dsttype]->shape[0],
-          mapped_rows,
-          mapped_cols));
+          srctype == dsttype ? 1 : 2, induced_nodes[srctype]->shape[0],
+          induced_nodes[dsttype]->shape[0], mapped_rows, mapped_cols));
    }

-    new_graphs.push_back(CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
+    new_graphs.push_back(
+        CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
  }

  return std::make_pair(new_graphs, induced_nodes);
@@ -138,7 +139,7 @@ CompactGraphsCPU(

 };  // namespace

-template<>
+template <>
 std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
 CompactGraphs<kDGLCPU, int32_t>(
    const std::vector<HeteroGraphPtr> &graphs,
@@ -146,7 +147,7 @@ CompactGraphs<kDGLCPU, int32_t>(
  return CompactGraphsCPU<int32_t>(graphs, always_preserve);
 }

-template<>
+template <>
 std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
 CompactGraphs<kDGLCPU, int64_t>(
    const std::vector<HeteroGraphPtr> &graphs,
@@ -155,26 +156,26 @@ CompactGraphs<kDGLCPU, int64_t>(
 }

 DGL_REGISTER_GLOBAL("transform._CAPI_DGLCompactGraphs")
-.set_body([] (DGLArgs args, DGLRetValue* rv) {
+    .set_body([](DGLArgs args, DGLRetValue *rv) {
      List<HeteroGraphRef> graph_refs = args[0];
      List<Value> always_preserve_refs = args[1];

      std::vector<HeteroGraphPtr> graphs;
      std::vector<IdArray> always_preserve;
-    for (HeteroGraphRef gref : graph_refs)
-      graphs.push_back(gref.sptr());
+      for (HeteroGraphRef gref : graph_refs) graphs.push_back(gref.sptr());
      for (Value array : always_preserve_refs)
        always_preserve.push_back(array->data);

      // TODO(BarclayII): check for all IdArrays
-    CHECK(graphs[0]->DataType() == always_preserve[0]->dtype) << "data type mismatch.";
+      CHECK(graphs[0]->DataType() == always_preserve[0]->dtype)
+          << "data type mismatch.";

      std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> result_pair;

-    ATEN_XPU_SWITCH_CUDA(graphs[0]->Context().device_type, XPU, "CompactGraphs", {
+      ATEN_XPU_SWITCH_CUDA(
+          graphs[0]->Context().device_type, XPU, "CompactGraphs", {
            ATEN_ID_TYPE_SWITCH(graphs[0]->DataType(), IdType, {
-        result_pair = CompactGraphs<XPU, IdType>(
-          graphs, always_preserve);
+              result_pair = CompactGraphs<XPU, IdType>(graphs, always_preserve);
            });
          });


--- a/src/graph/transform/compact.h
+++ b/src/graph/transform/compact.h
@@ -24,8 +24,8 @@
 #include <dgl/array.h>
 #include <dgl/base_heterograph.h>

-#include <vector>
 #include <utility>
+#include <vector>

 namespace dgl {
 namespace transform {
@@ -41,9 +41,8 @@ namespace transform {
 *
 * @return The vector of compacted graphs and the vector of induced nodes.
 */
-template<DGLDeviceType XPU, typename IdType>
-std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
-CompactGraphs(
+template <DGLDeviceType XPU, typename IdType>
+std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> CompactGraphs(
    const std::vector<HeteroGraphPtr> &graphs,
    const std::vector<IdArray> &always_preserve);


--- a/src/graph/transform/cpu/kdtree_ndarray_adapter.h
+++ b/src/graph/transform/cpu/kdtree_ndarray_adapter.h
@@ -9,7 +9,9 @@

 #include <dgl/array.h>
 #include <dmlc/logging.h>
+
 #include <nanoflann.hpp>
+
 #include "../../../c_api_common.h"

 namespace dgl {
@@ -17,29 +19,34 @@ namespace transform {
 namespace knn_utils {

 /*!
- * \brief A simple 2D NDArray adapter for nanoflann, without duplicating the storage.
+ * \brief A simple 2D NDArray adapter for nanoflann, without duplicating the
+ *        storage.
 *
- * \tparam FloatType: The type of the point coordinates (typically, double or float).
- * \tparam IdType: The type for indices in the KD-tree index (typically, size_t of int)
- * \tparam FeatureDim: If set to > 0, it specifies a compile-time fixed dimensionality
- *         for the points in the data set, allowing more compiler optimizations.
- * \tparam Dist: The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2,
- *         nanoflann::metric_L2_Simple, etc.
- * \note The spelling of dgl's adapter ("adapter") is different from naneflann ("adaptor")
+ * \tparam FloatType: The type of the point coordinates (typically, double or
+ *         float).
+ * \tparam IdType: The type for indices in the KD-tree index (typically,
+ *         size_t of int)
+ * \tparam FeatureDim: If set to > 0, it specifies a compile-time fixed
+ *         dimensionality for the points in the data set, allowing more compiler
+ *         optimizations.
+ * \tparam Dist: The distance metric to use: nanoflann::metric_L1,
+           nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc.
+ * \note The spelling of dgl's adapter ("adapter") is different from naneflann
+ *       ("adaptor")
 */
-template <typename FloatType,
-          typename IdType,
-          int FeatureDim = -1,
+template <
+    typename FloatType, typename IdType, int FeatureDim = -1,
    typename Dist = nanoflann::metric_L2>
 class KDTreeNDArrayAdapter {
 public:
  using self_type = KDTreeNDArrayAdapter<FloatType, IdType, FeatureDim, Dist>;
-  using metric_type = typename Dist::template traits<FloatType, self_type>::distance_t;
+  using metric_type =
+      typename Dist::template traits<FloatType, self_type>::distance_t;
  using index_type = nanoflann::KDTreeSingleIndexAdaptor<
      metric_type, self_type, FeatureDim, IdType>;

-  KDTreeNDArrayAdapter(const size_t /* dims */,
-                       const NDArray data_points,
+  KDTreeNDArrayAdapter(
+      const size_t /* dims */, const NDArray data_points,
      const int leaf_max_size = 10)
      : data_(data_points) {
    CHECK(data_points->shape[0] != 0 && data_points->shape[1] != 0)
@@ -49,46 +56,38 @@ class KDTreeNDArrayAdapter {
        << "Data set feature dimension does not match the 'FeatureDim' "
        << "template argument.";
    index_ = new index_type(
-      static_cast<int>(dims), *this, nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size));
+        static_cast<int>(dims), *this,
+        nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size));
    index_->buildIndex();
  }

-  ~KDTreeNDArrayAdapter() {
-    delete index_;
-  }
+  ~KDTreeNDArrayAdapter() { delete index_; }

-  index_type* GetIndex() {
-    return index_;
-  }
+  index_type* GetIndex() { return index_; }

  /*!
   * \brief Query for the \a num_closest points to a given point
   *  Note that this is a short-cut method for GetIndex()->findNeighbors().
   */
-  void query(const FloatType* query_pt, const size_t num_closest,
-             IdType* out_idxs, FloatType* out_dists) const {
+  void query(
+      const FloatType* query_pt, const size_t num_closest, IdType* out_idxs,
+      FloatType* out_dists) const {
    nanoflann::KNNResultSet<FloatType, IdType> resultSet(num_closest);
    resultSet.init(out_idxs, out_dists);
    index_->findNeighbors(resultSet, query_pt, nanoflann::SearchParams());
  }

  /*! \brief Interface expected by KDTreeSingleIndexAdaptor */
-  const self_type& derived() const {
-    return *this;
-  }
+  const self_type& derived() const { return *this; }

  /*! \brief Interface expected by KDTreeSingleIndexAdaptor */
-  self_type& derived() {
-    return *this;
-  }
+  self_type& derived() { return *this; }

  /*!
   * \brief Interface expected by KDTreeSingleIndexAdaptor,
   *  return the number of data points
   */
-  size_t kdtree_get_point_count() const {
-    return data_->shape[0];
-  }
+  size_t kdtree_get_point_count() const { return data_->shape[0]; }

  /*!
   * \brief Interface expected by KDTreeSingleIndexAdaptor,

--- a/src/graph/transform/cpu/knn.cc
+++ b/src/graph/transform/cpu/knn.cc
--- a/src/graph/transform/cuda/cuda_compact_graph.cu
+++ b/src/graph/transform/cuda/cuda_compact_graph.cu
@@ -18,13 +18,13 @@
 * all given graphs with the same set of nodes.
 */

-
-#include <dgl/runtime/device_api.h>
-#include <dgl/immutable_graph.h>
 #include <cuda_runtime.h>
-#include <utility>
+#include <dgl/immutable_graph.h>
+#include <dgl/runtime/device_api.h>
+
 #include <algorithm>
 #include <memory>
+#include <utility>

 #include "../../../runtime/cuda/cuda_common.h"
 #include "../../heterograph.h"
@@ -41,54 +41,45 @@ namespace transform {
 namespace {

 /**
-* \brief This function builds node maps for each node type, preserving the
-* order of the input nodes. Here it is assumed the nodes are not unique,
-* and thus a unique list is generated.
-*
-* \param input_nodes The set of input nodes.
-* \param node_maps The node maps to be constructed.
-* \param count_unique_device The number of unique nodes (on the GPU).
-* \param unique_nodes_device The unique nodes (on the GPU).
-* \param stream The stream to operate on.
-*/
-template<typename IdType>
+ * \brief This function builds node maps for each node type, preserving the
+ * order of the input nodes. Here it is assumed the nodes are not unique,
+ * and thus a unique list is generated.
+ *
+ * \param input_nodes The set of input nodes.
+ * \param node_maps The node maps to be constructed.
+ * \param count_unique_device The number of unique nodes (on the GPU).
+ * \param unique_nodes_device The unique nodes (on the GPU).
+ * \param stream The stream to operate on.
+ */
+template <typename IdType>
 void BuildNodeMaps(
-    const std::vector<IdArray>& input_nodes,
-    DeviceNodeMap<IdType> * const node_maps,
-    int64_t * const count_unique_device,
-    std::vector<IdArray>* const unique_nodes_device,
-    cudaStream_t stream) {
+    const std::vector<IdArray> &input_nodes,
+    DeviceNodeMap<IdType> *const node_maps, int64_t *const count_unique_device,
+    std::vector<IdArray> *const unique_nodes_device, cudaStream_t stream) {
  const int64_t num_ntypes = static_cast<int64_t>(input_nodes.size());

  CUDA_CALL(cudaMemsetAsync(
-    count_unique_device,
-    0,
-    num_ntypes*sizeof(*count_unique_device),
+      count_unique_device, 0, num_ntypes * sizeof(*count_unique_device),
      stream));

  // possibly duplicated nodes
  for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
-    const IdArray& nodes = input_nodes[ntype];
+    const IdArray &nodes = input_nodes[ntype];
    if (nodes->shape[0] > 0) {
      CHECK_EQ(nodes->ctx.device_type, kDGLCUDA);
      node_maps->LhsHashTable(ntype).FillWithDuplicates(
-          nodes.Ptr<IdType>(),
-          nodes->shape[0],
+          nodes.Ptr<IdType>(), nodes->shape[0],
          (*unique_nodes_device)[ntype].Ptr<IdType>(),
-          count_unique_device+ntype,
-          stream);
+          count_unique_device + ntype, stream);
    }
  }
 }

-
-template<typename IdType>
-std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
-CompactGraphsGPU(
+template <typename IdType>
+std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> CompactGraphsGPU(
    const std::vector<HeteroGraphPtr> &graphs,
    const std::vector<IdArray> &always_preserve) {
-
-  const auto& ctx = graphs[0]->Context();
+  const auto &ctx = graphs[0]->Context();
  auto device = runtime::DeviceAPI::Get(ctx);
  cudaStream_t stream = runtime::getCurrentCUDAStream();

@@ -96,7 +87,8 @@ CompactGraphsGPU(

  // Step 1: Collect the nodes that has connections for each type.
  const uint64_t num_ntypes = graphs[0]->NumVertexTypes();
-  std::vector<std::vector<EdgeArray>> all_edges(graphs.size());   // all_edges[i][etype]
+  std::vector<std::vector<EdgeArray>> all_edges(
+      graphs.size());  // all_edges[i][etype]

  // count the number of nodes per type
  std::vector<int64_t> max_vertex_cnt(num_ntypes, 0);
@@ -123,19 +115,18 @@ CompactGraphsGPU(
  std::vector<int64_t> node_offsets(num_ntypes, 0);

  for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
-    all_nodes[ntype] = NewIdArray(max_vertex_cnt[ntype], ctx,
-      sizeof(IdType)*8);
+    all_nodes[ntype] =
+        NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8);
    // copy the nodes in always_preserve
-    if (ntype < always_preserve.size() && always_preserve[ntype]->shape[0] > 0) {
+    if (ntype < always_preserve.size() &&
+        always_preserve[ntype]->shape[0] > 0) {
      device->CopyDataFromTo(
          always_preserve[ntype].Ptr<IdType>(), 0,
-          all_nodes[ntype].Ptr<IdType>(),
-          node_offsets[ntype],
-          sizeof(IdType)*always_preserve[ntype]->shape[0],
-          always_preserve[ntype]->ctx,
-          all_nodes[ntype]->ctx,
+          all_nodes[ntype].Ptr<IdType>(), node_offsets[ntype],
+          sizeof(IdType) * always_preserve[ntype]->shape[0],
+          always_preserve[ntype]->ctx, all_nodes[ntype]->ctx,
          always_preserve[ntype]->dtype);
-      node_offsets[ntype] += sizeof(IdType)*always_preserve[ntype]->shape[0];
+      node_offsets[ntype] += sizeof(IdType) * always_preserve[ntype]->shape[0];
    }
  }

@@ -152,25 +143,17 @@ CompactGraphsGPU(

      if (edges.src.defined()) {
        device->CopyDataFromTo(
-            edges.src.Ptr<IdType>(), 0,
-            all_nodes[srctype].Ptr<IdType>(),
-            node_offsets[srctype],
-            sizeof(IdType)*edges.src->shape[0],
-            edges.src->ctx,
-            all_nodes[srctype]->ctx,
-            edges.src->dtype);
-        node_offsets[srctype] += sizeof(IdType)*edges.src->shape[0];
+            edges.src.Ptr<IdType>(), 0, all_nodes[srctype].Ptr<IdType>(),
+            node_offsets[srctype], sizeof(IdType) * edges.src->shape[0],
+            edges.src->ctx, all_nodes[srctype]->ctx, edges.src->dtype);
+        node_offsets[srctype] += sizeof(IdType) * edges.src->shape[0];
      }
      if (edges.dst.defined()) {
        device->CopyDataFromTo(
-            edges.dst.Ptr<IdType>(), 0,
-            all_nodes[dsttype].Ptr<IdType>(),
-            node_offsets[dsttype],
-            sizeof(IdType)*edges.dst->shape[0],
-            edges.dst->ctx,
-            all_nodes[dsttype]->ctx,
-            edges.dst->dtype);
-        node_offsets[dsttype] += sizeof(IdType)*edges.dst->shape[0];
+            edges.dst.Ptr<IdType>(), 0, all_nodes[dsttype].Ptr<IdType>(),
+            node_offsets[dsttype], sizeof(IdType) * edges.dst->shape[0],
+            edges.dst->ctx, all_nodes[dsttype]->ctx, edges.dst->dtype);
+        node_offsets[dsttype] += sizeof(IdType) * edges.dst->shape[0];
      }
      all_edges[i].push_back(edges);
    }
@@ -185,29 +168,22 @@ CompactGraphsGPU(
  // number of unique nodes per type on CPU
  std::vector<int64_t> num_induced_nodes(num_ntypes);
  // number of unique nodes per type on GPU
-  int64_t * count_unique_device = static_cast<int64_t*>(
-      device->AllocWorkspace(ctx, sizeof(int64_t)*num_ntypes));
+  int64_t *count_unique_device = static_cast<int64_t *>(
+      device->AllocWorkspace(ctx, sizeof(int64_t) * num_ntypes));
  // the set of unique nodes per type
  std::vector<IdArray> induced_nodes(num_ntypes);
  for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
-    induced_nodes[ntype] = NewIdArray(max_vertex_cnt[ntype], ctx,
-      sizeof(IdType)*8);
+    induced_nodes[ntype] =
+        NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8);
  }

  BuildNodeMaps(
-    all_nodes,
-    &node_maps,
-    count_unique_device,
-    &induced_nodes,
-    stream);
+      all_nodes, &node_maps, count_unique_device, &induced_nodes, stream);

  device->CopyDataFromTo(
-    count_unique_device, 0,
-    num_induced_nodes.data(), 0,
-    sizeof(*num_induced_nodes.data())*num_ntypes,
-    ctx,
-    DGLContext{kDGLCPU, 0},
-    DGLDataType{kDGLInt, 64, 1});
+      count_unique_device, 0, num_induced_nodes.data(), 0,
+      sizeof(*num_induced_nodes.data()) * num_ntypes, ctx,
+      DGLContext{kDGLCPU, 0}, DGLDataType{kDGLInt, 64, 1});
  device->StreamSync(ctx, stream);

  // wait for the node counts to finish transferring
@@ -230,22 +206,20 @@ CompactGraphsGPU(

    std::vector<IdArray> new_src;
    std::vector<IdArray> new_dst;
-    std::tie(new_src, new_dst) = MapEdges(
-      curr_graph, all_edges[i], node_maps, stream);
+    std::tie(new_src, new_dst) =
+        MapEdges(curr_graph, all_edges[i], node_maps, stream);

    for (IdType etype = 0; etype < num_etypes; ++etype) {
      IdType srctype, dsttype;
      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);

      rel_graphs.push_back(UnitGraph::CreateFromCOO(
-          srctype == dsttype ? 1 : 2,
-          induced_nodes[srctype]->shape[0],
-          induced_nodes[dsttype]->shape[0],
-          new_src[etype],
-          new_dst[etype]));
+          srctype == dsttype ? 1 : 2, induced_nodes[srctype]->shape[0],
+          induced_nodes[dsttype]->shape[0], new_src[etype], new_dst[etype]));
    }

-    new_graphs.push_back(CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
+    new_graphs.push_back(
+        CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
  }

  return std::make_pair(new_graphs, induced_nodes);
@@ -253,7 +227,7 @@ CompactGraphsGPU(

 }  // namespace

-template<>
+template <>
 std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
 CompactGraphs<kDGLCUDA, int32_t>(
    const std::vector<HeteroGraphPtr> &graphs,
@@ -261,7 +235,7 @@ CompactGraphs<kDGLCUDA, int32_t>(
  return CompactGraphsGPU<int32_t>(graphs, always_preserve);
 }

-template<>
+template <>
 std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
 CompactGraphs<kDGLCUDA, int64_t>(
    const std::vector<HeteroGraphPtr> &graphs,

--- a/src/graph/transform/cuda/cuda_map_edges.cuh
+++ b/src/graph/transform/cuda/cuda_map_edges.cuh
@@ -20,13 +20,14 @@
 #ifndef DGL_GRAPH_TRANSFORM_CUDA_CUDA_MAP_EDGES_CUH_
 #define DGL_GRAPH_TRANSFORM_CUDA_CUDA_MAP_EDGES_CUH_

-#include <dgl/runtime/c_runtime_api.h>
 #include <cuda_runtime.h>
+#include <dgl/runtime/c_runtime_api.h>
+
 #include <algorithm>
 #include <memory>
 #include <tuple>
-#include <vector>
 #include <utility>
+#include <vector>

 #include "../../../runtime/cuda/cuda_common.h"
 #include "../../../runtime/cuda/cuda_hashtable.cuh"
@@ -39,48 +40,46 @@ namespace transform {

 namespace cuda {

-template<typename IdType, int BLOCK_SIZE, IdType TILE_SIZE>
+template <typename IdType, int BLOCK_SIZE, IdType TILE_SIZE>
 __device__ void map_vertex_ids(
-    const IdType * const global,
-    IdType * const new_global,
-    const IdType num_vertices,
-    const DeviceOrderedHashTable<IdType>& table) {
+    const IdType* const global, IdType* const new_global,
+    const IdType num_vertices, const DeviceOrderedHashTable<IdType>& table) {
  assert(BLOCK_SIZE == blockDim.x);

  using Mapping = typename OrderedHashTable<IdType>::Mapping;

-  const IdType tile_start = TILE_SIZE*blockIdx.x;
-  const IdType tile_end = min(TILE_SIZE*(blockIdx.x+1), num_vertices);
+  const IdType tile_start = TILE_SIZE * blockIdx.x;
+  const IdType tile_end = min(TILE_SIZE * (blockIdx.x + 1), num_vertices);

-  for (IdType idx = threadIdx.x+tile_start; idx < tile_end; idx+=BLOCK_SIZE) {
+  for (IdType idx = threadIdx.x + tile_start; idx < tile_end;
+       idx += BLOCK_SIZE) {
    const Mapping& mapping = *table.Search(global[idx]);
    new_global[idx] = mapping.local;
  }
 }

 /**
-* \brief Generate mapped edge endpoint ids.
-*
-* \tparam IdType The type of id.
-* \tparam BLOCK_SIZE The size of each thread block.
-* \tparam TILE_SIZE The number of edges to process per thread block.
-* \param global_srcs_device The source ids to map.
-* \param new_global_srcs_device The mapped source ids (output).
-* \param global_dsts_device The destination ids to map.
-* \param new_global_dsts_device The mapped destination ids (output).
-* \param num_edges The number of edges to map.
-* \param src_mapping The mapping of sources ids.
-* \param src_hash_size The the size of source id hash table/mapping.
-* \param dst_mapping The mapping of destination ids.
-* \param dst_hash_size The the size of destination id hash table/mapping.
-*/
-template<typename IdType, int BLOCK_SIZE, IdType TILE_SIZE>
+ * \brief Generate mapped edge endpoint ids.
+ *
+ * \tparam IdType The type of id.
+ * \tparam BLOCK_SIZE The size of each thread block.
+ * \tparam TILE_SIZE The number of edges to process per thread block.
+ * \param global_srcs_device The source ids to map.
+ * \param new_global_srcs_device The mapped source ids (output).
+ * \param global_dsts_device The destination ids to map.
+ * \param new_global_dsts_device The mapped destination ids (output).
+ * \param num_edges The number of edges to map.
+ * \param src_mapping The mapping of sources ids.
+ * \param src_hash_size The the size of source id hash table/mapping.
+ * \param dst_mapping The mapping of destination ids.
+ * \param dst_hash_size The the size of destination id hash table/mapping.
+ */
+template <typename IdType, int BLOCK_SIZE, IdType TILE_SIZE>
 __global__ void map_edge_ids(
-    const IdType * const global_srcs_device,
-    IdType * const new_global_srcs_device,
-    const IdType * const global_dsts_device,
-    IdType * const new_global_dsts_device,
-    const IdType num_edges,
+    const IdType* const global_srcs_device,
+    IdType* const new_global_srcs_device,
+    const IdType* const global_dsts_device,
+    IdType* const new_global_dsts_device, const IdType num_edges,
    DeviceOrderedHashTable<IdType> src_mapping,
    DeviceOrderedHashTable<IdType> dst_mapping) {
  assert(BLOCK_SIZE == blockDim.x);
@@ -88,39 +87,32 @@ __global__ void map_edge_ids(

  if (blockIdx.y == 0) {
    map_vertex_ids<IdType, BLOCK_SIZE, TILE_SIZE>(
-        global_srcs_device,
-        new_global_srcs_device,
-        num_edges,
-        src_mapping);
+        global_srcs_device, new_global_srcs_device, num_edges, src_mapping);
  } else {
    map_vertex_ids<IdType, BLOCK_SIZE, TILE_SIZE>(
-        global_dsts_device,
-        new_global_dsts_device,
-        num_edges,
-        dst_mapping);
+        global_dsts_device, new_global_dsts_device, num_edges, dst_mapping);
  }
 }

 /**
-* \brief Device level node maps for each node type.
-*
-* \param num_nodes Number of nodes per type.
-* \param offset When offset is set to 0, LhsHashTable is identical to RhsHashTable. 
-* Or set to num_nodes.size()/2 to use seperated LhsHashTable and RhsHashTable.
-* \param ctx The DGL context.
-* \param stream The stream to operate on.
-*/
-template<typename IdType>
+ * \brief Device level node maps for each node type.
+ *
+ * \param num_nodes Number of nodes per type.
+ * \param offset When offset is set to 0, LhsHashTable is identical to
+ *        RhsHashTable. Or set to num_nodes.size()/2 to use seperated
+ *        LhsHashTable and RhsHashTable.
+ * \param ctx The DGL context.
+ * \param stream The stream to operate on.
+ */
+template <typename IdType>
 class DeviceNodeMap {
 public:
  using Mapping = typename OrderedHashTable<IdType>::Mapping;

  DeviceNodeMap(
-      const std::vector<int64_t>& num_nodes,
-      const int64_t offset,
-      DGLContext ctx,
-      cudaStream_t stream) :
-    num_types_(num_nodes.size()),
+      const std::vector<int64_t>& num_nodes, const int64_t offset,
+      DGLContext ctx, cudaStream_t stream)
+      : num_types_(num_nodes.size()),
        rhs_offset_(offset),
        hash_tables_(),
        ctx_(ctx) {
@@ -129,46 +121,33 @@ class DeviceNodeMap {
    hash_tables_.reserve(num_types_);
    for (int64_t i = 0; i < num_types_; ++i) {
      hash_tables_.emplace_back(
-          new OrderedHashTable<IdType>(
-            num_nodes[i],
-            ctx_,
-            stream));
+          new OrderedHashTable<IdType>(num_nodes[i], ctx_, stream));
    }
  }

-  OrderedHashTable<IdType>& LhsHashTable(
-      const size_t index) {
+  OrderedHashTable<IdType>& LhsHashTable(const size_t index) {
    return HashData(index);
  }

-  OrderedHashTable<IdType>& RhsHashTable(
-      const size_t index) {
-    return HashData(index+rhs_offset_);
+  OrderedHashTable<IdType>& RhsHashTable(const size_t index) {
+    return HashData(index + rhs_offset_);
  }

-  const OrderedHashTable<IdType>& LhsHashTable(
-      const size_t index) const {
+  const OrderedHashTable<IdType>& LhsHashTable(const size_t index) const {
    return HashData(index);
  }

-  const OrderedHashTable<IdType>& RhsHashTable(
-      const size_t index) const {
-    return HashData(index+rhs_offset_);
+  const OrderedHashTable<IdType>& RhsHashTable(const size_t index) const {
+    return HashData(index + rhs_offset_);
  }

-  IdType LhsHashSize(
-      const size_t index) const {
-    return HashSize(index);
-  }
+  IdType LhsHashSize(const size_t index) const { return HashSize(index); }

-  IdType RhsHashSize(
-      const size_t index) const {
-    return HashSize(rhs_offset_+index);
+  IdType RhsHashSize(const size_t index) const {
+    return HashSize(rhs_offset_ + index);
  }

-  size_t Size() const {
-    return hash_tables_.size();
-  }
+  size_t Size() const { return hash_tables_.size(); }

 private:
  int64_t num_types_;
@@ -176,45 +155,35 @@ class DeviceNodeMap {
  std::vector<std::unique_ptr<OrderedHashTable<IdType>>> hash_tables_;
  DGLContext ctx_;

-  inline OrderedHashTable<IdType>& HashData(
-      const size_t index) {
+  inline OrderedHashTable<IdType>& HashData(const size_t index) {
    CHECK_LT(index, hash_tables_.size());
    return *hash_tables_[index];
  }

-  inline const OrderedHashTable<IdType>& HashData(
-      const size_t index) const {
+  inline const OrderedHashTable<IdType>& HashData(const size_t index) const {
    CHECK_LT(index, hash_tables_.size());
    return *hash_tables_[index];
  }

-  inline IdType HashSize(
-      const size_t index) const {
+  inline IdType HashSize(const size_t index) const {
    return HashData(index).size();
  }
 };

-template<typename IdType>
-inline size_t RoundUpDiv(
-    const IdType num,
-    const size_t divisor) {
-  return static_cast<IdType>(num/divisor) + (num % divisor == 0 ? 0 : 1);
+template <typename IdType>
+inline size_t RoundUpDiv(const IdType num, const size_t divisor) {
+  return static_cast<IdType>(num / divisor) + (num % divisor == 0 ? 0 : 1);
 }

-template<typename IdType>
-inline IdType RoundUp(
-    const IdType num,
-    const size_t unit) {
-  return RoundUpDiv(num, unit)*unit;
+template <typename IdType>
+inline IdType RoundUp(const IdType num, const size_t unit) {
+  return RoundUpDiv(num, unit) * unit;
 }

-template<typename IdType>
-std::tuple<std::vector<IdArray>, std::vector<IdArray>>
-MapEdges(
-    HeteroGraphPtr graph,
-    const std::vector<EdgeArray>& edge_sets,
-    const DeviceNodeMap<IdType>& node_map,
-    cudaStream_t stream) {
+template <typename IdType>
+std::tuple<std::vector<IdArray>, std::vector<IdArray>> MapEdges(
+    HeteroGraphPtr graph, const std::vector<EdgeArray>& edge_sets,
+    const DeviceNodeMap<IdType>& node_map, cudaStream_t stream) {
  constexpr const int BLOCK_SIZE = 128;
  constexpr const size_t TILE_SIZE = 1024;

@@ -233,8 +202,8 @@ MapEdges(
    if (edges.id.defined() && edges.src->shape[0] > 0) {
      const int64_t num_edges = edges.src->shape[0];

-      new_lhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType)*8));
-      new_rhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType)*8));
+      new_lhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType) * 8));
+      new_rhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType) * 8));

      const auto src_dst_types = graph->GetEndpointTypes(etype);
      const int src_type = src_dst_types.first;
@@ -244,20 +213,17 @@ MapEdges(
      const dim3 block(BLOCK_SIZE);

      // map the srcs
-      CUDA_KERNEL_CALL((map_edge_ids<IdType, BLOCK_SIZE, TILE_SIZE>),
-        grid, block, 0, stream,
-        edges.src.Ptr<IdType>(),
-        new_lhs.back().Ptr<IdType>(),
-        edges.dst.Ptr<IdType>(),
-        new_rhs.back().Ptr<IdType>(),
-        num_edges,
+      CUDA_KERNEL_CALL(
+          (map_edge_ids<IdType, BLOCK_SIZE, TILE_SIZE>), grid, block, 0, stream,
+          edges.src.Ptr<IdType>(), new_lhs.back().Ptr<IdType>(),
+          edges.dst.Ptr<IdType>(), new_rhs.back().Ptr<IdType>(), num_edges,
          node_map.LhsHashTable(src_type).DeviceHandle(),
          node_map.RhsHashTable(dst_type).DeviceHandle());
    } else {
      new_lhs.emplace_back(
-          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx));
+          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx));
      new_rhs.emplace_back(
-          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx));
+          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx));
    }
  }

@@ -265,7 +231,6 @@ MapEdges(
      std::move(new_lhs), std::move(new_rhs));
 }

-
 }  // namespace cuda
 }  // namespace transform
 }  // namespace dgl

--- a/src/graph/transform/cuda/cuda_to_block.cu
+++ b/src/graph/transform/cuda/cuda_to_block.cu
@@ -18,13 +18,13 @@
 * ids.
 */

-
-#include <dgl/runtime/device_api.h>
-#include <dgl/immutable_graph.h>
 #include <cuda_runtime.h>
-#include <utility>
+#include <dgl/immutable_graph.h>
+#include <dgl/runtime/device_api.h>
+
 #include <algorithm>
 #include <memory>
+#include <utility>

 #include "../../../runtime/cuda/cuda_common.h"
 #include "../../heterograph.h"
@@ -40,14 +40,13 @@ namespace transform {

 namespace {

-template<typename IdType>
+template <typename IdType>
 class DeviceNodeMapMaker {
 public:
-  explicit DeviceNodeMapMaker(
-      const std::vector<int64_t>& maxNodesPerType) :
-      max_num_nodes_(0) {
-    max_num_nodes_ = *std::max_element(maxNodesPerType.begin(),
-        maxNodesPerType.end());
+  explicit DeviceNodeMapMaker(const std::vector<int64_t>& maxNodesPerType)
+      : max_num_nodes_(0) {
+    max_num_nodes_ =
+        *std::max_element(maxNodesPerType.begin(), maxNodesPerType.end());
  }

  /**
@@ -65,17 +64,12 @@ class DeviceNodeMapMaker {
  void Make(
      const std::vector<IdArray>& lhs_nodes,
      const std::vector<IdArray>& rhs_nodes,
-      DeviceNodeMap<IdType> * const node_maps,
-      int64_t * const count_lhs_device,
-      std::vector<IdArray>* const lhs_device,
-      cudaStream_t stream) {
+      DeviceNodeMap<IdType>* const node_maps, int64_t* const count_lhs_device,
+      std::vector<IdArray>* const lhs_device, cudaStream_t stream) {
    const int64_t num_ntypes = lhs_nodes.size() + rhs_nodes.size();

    CUDA_CALL(cudaMemsetAsync(
-      count_lhs_device,
-      0,
-      num_ntypes*sizeof(*count_lhs_device),
-      stream));
+        count_lhs_device, 0, num_ntypes * sizeof(*count_lhs_device), stream));

    // possibly dublicate lhs nodes
    const int64_t lhs_num_ntypes = static_cast<int64_t>(lhs_nodes.size());
@@ -84,10 +78,8 @@ class DeviceNodeMapMaker {
      if (nodes->shape[0] > 0) {
        CHECK_EQ(nodes->ctx.device_type, kDGLCUDA);
        node_maps->LhsHashTable(ntype).FillWithDuplicates(
-            nodes.Ptr<IdType>(),
-            nodes->shape[0],
-            (*lhs_device)[ntype].Ptr<IdType>(),
-            count_lhs_device+ntype,
+            nodes.Ptr<IdType>(), nodes->shape[0],
+            (*lhs_device)[ntype].Ptr<IdType>(), count_lhs_device + ntype,
            stream);
      }
    }
@@ -98,9 +90,7 @@ class DeviceNodeMapMaker {
      const IdArray& nodes = rhs_nodes[ntype];
      if (nodes->shape[0] > 0) {
        node_maps->RhsHashTable(ntype).FillWithUnique(
-            nodes.Ptr<IdType>(),
-            nodes->shape[0],
-            stream);
+            nodes.Ptr<IdType>(), nodes->shape[0], stream);
      }
    }
  }
@@ -118,8 +108,7 @@ class DeviceNodeMapMaker {
  void Make(
      const std::vector<IdArray>& lhs_nodes,
      const std::vector<IdArray>& rhs_nodes,
-      DeviceNodeMap<IdType> * const node_maps,
-      cudaStream_t stream) {
+      DeviceNodeMap<IdType>* const node_maps, cudaStream_t stream) {
    const int64_t num_ntypes = lhs_nodes.size() + rhs_nodes.size();

    // unique lhs nodes
@@ -129,9 +118,7 @@ class DeviceNodeMapMaker {
      if (nodes->shape[0] > 0) {
        CHECK_EQ(nodes->ctx.device_type, kDGLCUDA);
        node_maps->LhsHashTable(ntype).FillWithUnique(
-            nodes.Ptr<IdType>(),
-            nodes->shape[0],
-            stream);
+            nodes.Ptr<IdType>(), nodes->shape[0], stream);
      }
    }

@@ -141,9 +128,7 @@ class DeviceNodeMapMaker {
      const IdArray& nodes = rhs_nodes[ntype];
      if (nodes->shape[0] > 0) {
        node_maps->RhsHashTable(ntype).FillWithUnique(
-            nodes.Ptr<IdType>(),
-            nodes->shape[0],
-            stream);
+            nodes.Ptr<IdType>(), nodes->shape[0], stream);
      }
    }
  }
@@ -152,20 +137,15 @@ class DeviceNodeMapMaker {
  IdType max_num_nodes_;
 };

-
 // Since partial specialization is not allowed for functions, use this as an
 // intermediate for ToBlock where XPU = kDGLCUDA.
-template<typename IdType>
-std::tuple<HeteroGraphPtr, std::vector<IdArray>>
-ToBlockGPU(
-    HeteroGraphPtr graph,
-    const std::vector<IdArray> &rhs_nodes,
-    const bool include_rhs_in_lhs,
-    std::vector<IdArray>* const lhs_nodes_ptr) {
+template <typename IdType>
+std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockGPU(
+    HeteroGraphPtr graph, const std::vector<IdArray>& rhs_nodes,
+    const bool include_rhs_in_lhs, std::vector<IdArray>* const lhs_nodes_ptr) {
  std::vector<IdArray>& lhs_nodes = *lhs_nodes_ptr;
  const bool generate_lhs_nodes = lhs_nodes.empty();

-
  const auto& ctx = graph->Context();
  auto device = runtime::DeviceAPI::Get(ctx);
  cudaStream_t stream = runtime::getCurrentCUDAStream();
@@ -176,10 +156,11 @@ ToBlockGPU(
  }

  // Since DST nodes are included in SRC nodes, a common requirement is to fetch
-  // the DST node features from the SRC nodes features. To avoid expensive sparse lookup,
-  // the function assures that the DST nodes in both SRC and DST sets have the same ids.
-  // As a result, given the node feature tensor ``X`` of type ``utype``,
-  // the following code finds the corresponding DST node features of type ``vtype``:
+  // the DST node features from the SRC nodes features. To avoid expensive
+  // sparse lookup, the function assures that the DST nodes in both SRC and DST
+  // sets have the same ids. As a result, given the node feature tensor ``X`` of
+  // type ``utype``, the following code finds the corresponding DST node
+  // features of type ``vtype``:

  const int64_t num_etypes = graph->NumEdgeTypes();
  const int64_t num_ntypes = graph->NumVertexTypes();
@@ -197,9 +178,9 @@ ToBlockGPU(
  }

  // count lhs and rhs nodes
-  std::vector<int64_t> maxNodesPerType(num_ntypes*2, 0);
+  std::vector<int64_t> maxNodesPerType(num_ntypes * 2, 0);
  for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
-    maxNodesPerType[ntype+num_ntypes] += rhs_nodes[ntype]->shape[0];
+    maxNodesPerType[ntype + num_ntypes] += rhs_nodes[ntype]->shape[0];

    if (generate_lhs_nodes) {
      if (include_rhs_in_lhs) {
@@ -226,16 +207,16 @@ ToBlockGPU(
  if (generate_lhs_nodes) {
    std::vector<int64_t> src_node_offsets(num_ntypes, 0);
    for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
-      src_nodes[ntype] = NewIdArray(maxNodesPerType[ntype], ctx,
-          sizeof(IdType)*8);
+      src_nodes[ntype] =
+          NewIdArray(maxNodesPerType[ntype], ctx, sizeof(IdType) * 8);
      if (include_rhs_in_lhs) {
        // place rhs nodes first
-        device->CopyDataFromTo(rhs_nodes[ntype].Ptr<IdType>(), 0,
-            src_nodes[ntype].Ptr<IdType>(), src_node_offsets[ntype],
-            sizeof(IdType)*rhs_nodes[ntype]->shape[0],
-            rhs_nodes[ntype]->ctx, src_nodes[ntype]->ctx,
-            rhs_nodes[ntype]->dtype);
-        src_node_offsets[ntype] += sizeof(IdType)*rhs_nodes[ntype]->shape[0];
+        device->CopyDataFromTo(
+            rhs_nodes[ntype].Ptr<IdType>(), 0, src_nodes[ntype].Ptr<IdType>(),
+            src_node_offsets[ntype],
+            sizeof(IdType) * rhs_nodes[ntype]->shape[0], rhs_nodes[ntype]->ctx,
+            src_nodes[ntype]->ctx, rhs_nodes[ntype]->dtype);
+        src_node_offsets[ntype] += sizeof(IdType) * rhs_nodes[ntype]->shape[0];
      }
    }
    for (int64_t etype = 0; etype < num_etypes; ++etype) {
@@ -244,14 +225,13 @@ ToBlockGPU(
      if (edge_arrays[etype].src.defined()) {
        device->CopyDataFromTo(
            edge_arrays[etype].src.Ptr<IdType>(), 0,
-            src_nodes[srctype].Ptr<IdType>(),
-            src_node_offsets[srctype],
-            sizeof(IdType)*edge_arrays[etype].src->shape[0],
-            rhs_nodes[srctype]->ctx,
-            src_nodes[srctype]->ctx,
+            src_nodes[srctype].Ptr<IdType>(), src_node_offsets[srctype],
+            sizeof(IdType) * edge_arrays[etype].src->shape[0],
+            rhs_nodes[srctype]->ctx, src_nodes[srctype]->ctx,
            rhs_nodes[srctype]->dtype);

-        src_node_offsets[srctype] += sizeof(IdType)*edge_arrays[etype].src->shape[0];
+        src_node_offsets[srctype] +=
+            sizeof(IdType) * edge_arrays[etype].src->shape[0];
      }
    }
  } else {
@@ -267,47 +247,35 @@ ToBlockGPU(
  if (generate_lhs_nodes) {
    lhs_nodes.reserve(num_ntypes);
    for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
-      lhs_nodes.emplace_back(NewIdArray(
-          maxNodesPerType[ntype], ctx, sizeof(IdType)*8));
+      lhs_nodes.emplace_back(
+          NewIdArray(maxNodesPerType[ntype], ctx, sizeof(IdType) * 8));
    }
  }

-  std::vector<int64_t> num_nodes_per_type(num_ntypes*2);
+  std::vector<int64_t> num_nodes_per_type(num_ntypes * 2);
  // populate RHS nodes from what we already know
  for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
-    num_nodes_per_type[num_ntypes+ntype] = rhs_nodes[ntype]->shape[0];
+    num_nodes_per_type[num_ntypes + ntype] = rhs_nodes[ntype]->shape[0];
  }

  // populate the mappings
  if (generate_lhs_nodes) {
-    int64_t * count_lhs_device = static_cast<int64_t*>(
-        device->AllocWorkspace(ctx, sizeof(int64_t)*num_ntypes*2));
+    int64_t* count_lhs_device = static_cast<int64_t*>(
+        device->AllocWorkspace(ctx, sizeof(int64_t) * num_ntypes * 2));

    maker.Make(
-        src_nodes,
-        rhs_nodes,
-        &node_maps,
-        count_lhs_device,
-        &lhs_nodes,
-        stream);
+        src_nodes, rhs_nodes, &node_maps, count_lhs_device, &lhs_nodes, stream);

    device->CopyDataFromTo(
-        count_lhs_device, 0,
-        num_nodes_per_type.data(), 0,
-        sizeof(*num_nodes_per_type.data())*num_ntypes,
-        ctx,
-        DGLContext{kDGLCPU, 0},
-        DGLDataType{kDGLInt, 64, 1});
+        count_lhs_device, 0, num_nodes_per_type.data(), 0,
+        sizeof(*num_nodes_per_type.data()) * num_ntypes, ctx,
+        DGLContext{kDGLCPU, 0}, DGLDataType{kDGLInt, 64, 1});
    device->StreamSync(ctx, stream);

    // wait for the node counts to finish transferring
    device->FreeWorkspace(ctx, count_lhs_device);
  } else {
-    maker.Make(
-        lhs_nodes,
-        rhs_nodes,
-        &node_maps,
-        stream);
+    maker.Make(lhs_nodes, rhs_nodes, &node_maps, stream);

    for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
      num_nodes_per_type[ntype] = lhs_nodes[ntype]->shape[0];
@@ -321,7 +289,7 @@ ToBlockGPU(
      induced_edges.push_back(edge_arrays[etype].id);
    } else {
      induced_edges.push_back(
-            aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx));
+          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx));
    }
  }

@@ -329,8 +297,8 @@ ToBlockGPU(
  const auto meta_graph = graph->meta_graph();
  const EdgeArray etypes = meta_graph->Edges("eid");
  const IdArray new_dst = Add(etypes.dst, num_ntypes);
-  const auto new_meta_graph = ImmutableGraph::CreateFromCOO(
-      num_ntypes * 2, etypes.src, new_dst);
+  const auto new_meta_graph =
+      ImmutableGraph::CreateFromCOO(num_ntypes * 2, etypes.src, new_dst);

  // allocate vector for graph relations while GPU is busy
  std::vector<HeteroGraphPtr> rel_graphs;
@@ -358,20 +326,17 @@ ToBlockGPU(
      // No rhs nodes are given for this edge type. Create an empty graph.
      rel_graphs.push_back(CreateFromCOO(
          2, lhs_nodes[srctype]->shape[0], rhs_nodes[dsttype]->shape[0],
-          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx),
-          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx)));
+          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx),
+          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx)));
    } else {
      rel_graphs.push_back(CreateFromCOO(
-          2,
-          lhs_nodes[srctype]->shape[0],
-          rhs_nodes[dsttype]->shape[0],
-          new_lhs[etype],
-          new_rhs[etype]));
+          2, lhs_nodes[srctype]->shape[0], rhs_nodes[dsttype]->shape[0],
+          new_lhs[etype], new_rhs[etype]));
    }
  }

-  HeteroGraphPtr new_graph = CreateHeteroGraph(
-      new_meta_graph, rel_graphs, num_nodes_per_type);
+  HeteroGraphPtr new_graph =
+      CreateHeteroGraph(new_meta_graph, rel_graphs, num_nodes_per_type);

  // return the new graph, the new src nodes, and new edges
  return std::make_tuple(new_graph, induced_edges);
@@ -379,26 +344,22 @@ ToBlockGPU(

 }  // namespace

-// Use explicit names to get around MSVC's broken mangling that thinks the following two
-// functions are the same.
-// Using template<> fails to export the symbols.
+// Use explicit names to get around MSVC's broken mangling that thinks the
+// following two functions are the same. Using template<> fails to export the
+// symbols.
 std::tuple<HeteroGraphPtr, std::vector<IdArray>>
 // ToBlock<kDGLCUDA, int32_t>
 ToBlockGPU32(
-    HeteroGraphPtr graph,
-    const std::vector<IdArray> &rhs_nodes,
-    bool include_rhs_in_lhs,
-    std::vector<IdArray>* const lhs_nodes) {
+    HeteroGraphPtr graph, const std::vector<IdArray>& rhs_nodes,
+    bool include_rhs_in_lhs, std::vector<IdArray>* const lhs_nodes) {
  return ToBlockGPU<int32_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
 }

 std::tuple<HeteroGraphPtr, std::vector<IdArray>>
 // ToBlock<kDGLCUDA, int64_t>
 ToBlockGPU64(
-    HeteroGraphPtr graph,
-    const std::vector<IdArray> &rhs_nodes,
-    bool include_rhs_in_lhs,
-    std::vector<IdArray>* const lhs_nodes) {
+    HeteroGraphPtr graph, const std::vector<IdArray>& rhs_nodes,
+    bool include_rhs_in_lhs, std::vector<IdArray>* const lhs_nodes) {
  return ToBlockGPU<int64_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
 }


--- a/src/graph/transform/cuda/knn.cu
+++ b/src/graph/transform/cuda/knn.cu
--- a/src/graph/transform/knn.cc
+++ b/src/graph/transform/knn.cc
@@ -4,9 +4,11 @@
 * \brief k-nearest-neighbor (KNN) interface
 */

-#include <dgl/runtime/registry.h>
-#include <dgl/runtime/packed_func.h>
 #include "knn.h"
+
+#include <dgl/runtime/packed_func.h>
+#include <dgl/runtime/registry.h>
+
 #include "../../array/check.h"

 using namespace dgl::runtime;
@@ -14,7 +16,7 @@ namespace dgl {
 namespace transform {

 DGL_REGISTER_GLOBAL("transform._CAPI_DGLKNN")
-.set_body([] (DGLArgs args, DGLRetValue* rv) {
+    .set_body([](DGLArgs args, DGLRetValue* rv) {
      const NDArray data_points = args[0];
      const IdArray data_offsets = args[1];
      const NDArray query_points = args[2];
@@ -25,7 +27,8 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLKNN")

      aten::CheckContiguous(
          {data_points, data_offsets, query_points, query_offsets, result},
-      {"data_points", "data_offsets", "query_points", "query_offsets", "result"});
+          {"data_points", "data_offsets", "query_points", "query_offsets",
+           "result"});
      aten::CheckCtx(
          data_points->ctx, {data_offsets, query_points, query_offsets, result},
          {"data_offsets", "query_points", "query_offsets", "result"});
@@ -34,15 +37,15 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLKNN")
        ATEN_FLOAT_TYPE_SWITCH(data_points->dtype, FloatType, "data_points", {
          ATEN_ID_TYPE_SWITCH(result->dtype, IdType, {
            KNN<XPU, FloatType, IdType>(
-            data_points, data_offsets, query_points,
-            query_offsets, k, result, algorithm);
+                data_points, data_offsets, query_points, query_offsets, k,
+                result, algorithm);
          });
        });
      });
    });

 DGL_REGISTER_GLOBAL("transform._CAPI_DGLNNDescent")
-.set_body([] (DGLArgs args, DGLRetValue* rv) {
+    .set_body([](DGLArgs args, DGLRetValue* rv) {
      const NDArray points = args[0];
      const IdArray offsets = args[1];
      const IdArray result = args[2];
@@ -54,7 +57,8 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLNNDescent")
      aten::CheckContiguous(
          {points, offsets, result}, {"points", "offsets", "result"});
      aten::CheckCtx(
-      points->ctx, {points, offsets, result}, {"points", "offsets", "result"});
+          points->ctx, {points, offsets, result},
+          {"points", "offsets", "result"});

      ATEN_XPU_SWITCH_CUDA(points->ctx.device_type, XPU, "NNDescent", {
        ATEN_FLOAT_TYPE_SWITCH(points->dtype, FloatType, "points", {

--- a/src/graph/transform/knn.h
+++ b/src/graph/transform/knn.h
@@ -8,6 +8,7 @@
 #define DGL_GRAPH_TRANSFORM_KNN_H_

 #include <dgl/array.h>
+
 #include <string>

 namespace dgl {
@@ -15,22 +16,24 @@ namespace transform {

 /*!
 * \brief For each point in each segment in \a query_points, find \a k nearest
- *  points in the same segment in \a data_points. \a data_offsets and \a query_offsets
- *  determine the start index of each segment in \a data_points and \a query_points.
+ *        points in the same segment in \a data_points. \a data_offsets and \a
+ *        query_offsets determine the start index of each segment in \a
+ *        data_points and \a query_points.
 *
 * \param data_points dataset points.
 * \param data_offsets offsets of point index in \a data_points.
 * \param query_points query points.
 * \param query_offsets offsets of point index in \a query_points.
 * \param k the number of nearest points.
- * \param result output array. A 2D tensor indicating the index
- *  relation between \a query_points and \a data_points.
+ * \param result output array. A 2D tensor indicating the index  relation
+ *        between \a query_points and \a data_points.
 * \param algorithm algorithm used to compute the k-nearest neighbors.
 */
 template <DGLDeviceType XPU, typename FloatType, typename IdType>
-void KNN(const NDArray& data_points, const IdArray& data_offsets,
-         const NDArray& query_points, const IdArray& query_offsets,
-         const int k, IdArray result, const std::string& algorithm);
+void KNN(
+    const NDArray& data_points, const IdArray& data_offsets,
+    const NDArray& query_points, const IdArray& query_offsets, const int k,
+    IdArray result, const std::string& algorithm);

 /*!
 * \brief For each input point, find \a k approximate nearest points in the same
@@ -38,19 +41,20 @@ void KNN(const NDArray& data_points, const IdArray& data_offsets,
 *
 * \param points input points.
 * \param offsets offsets of point index.
- * \param result output array. A 2D tensor indicating the index relation between points.
+ * \param result output array. A 2D tensor indicating the index relation between
+ *        points.
 * \param k the number of nearest points.
 * \param num_iters The maximum number of NN-descent iterations to perform.
- * \param num_candidates The maximum number of candidates to be considered during one iteration.
+ * \param num_candidates The maximum number of candidates to be considered
+ *        during one iteration.
 * \param delta A value controls the early abort.
 */
 template <DGLDeviceType XPU, typename FloatType, typename IdType>
-void NNDescent(const NDArray& points, const IdArray& offsets,
-               IdArray result, const int k, const int num_iters,
-               const int num_candidates, const double delta);
+void NNDescent(
+    const NDArray& points, const IdArray& offsets, IdArray result, const int k,
+    const int num_iters, const int num_candidates, const double delta);

 }  // namespace transform
 }  // namespace dgl

-
 #endif  // DGL_GRAPH_TRANSFORM_KNN_H_
--- a/src/graph/transform/line_graph.cc
+++ b/src/graph/transform/line_graph.cc
@@ -4,12 +4,14 @@
 * \brief Line graph implementation
 */

-#include <dgl/base_heterograph.h>
-#include <dgl/transform.h>
 #include <dgl/array.h>
+#include <dgl/base_heterograph.h>
 #include <dgl/packed_func_ext.h>
-#include <vector>
+#include <dgl/transform.h>
+
 #include <utility>
+#include <vector>
+
 #include "../../c_api_common.h"
 #include "../heterograph.h"

@@ -21,26 +23,25 @@ using namespace dgl::aten;
 namespace transform {

 /*!
- * \brief Create Line Graph
- * \param hg Graph
- * \param backtracking whether the pair of (v, u) (u, v) edges are treated as linked
- * \return The Line Graph
+ * \brief Create Line Graph.
+ * \param hg Graph.
+ * \param backtracking whether the pair of (v, u) (u, v) edges are treated as
+ *        linked.
+ * \return The Line Graph.
 */
-HeteroGraphPtr CreateLineGraph(
-    HeteroGraphPtr hg,
-    bool backtracking) {
+HeteroGraphPtr CreateLineGraph(HeteroGraphPtr hg, bool backtracking) {
  const auto hgp = std::dynamic_pointer_cast<HeteroGraph>(hg);
  return hgp->LineGraph(backtracking);
 }

 DGL_REGISTER_GLOBAL("transform._CAPI_DGLHeteroLineGraph")
-.set_body([] (DGLArgs args, DGLRetValue* rv) {
+    .set_body([](DGLArgs args, DGLRetValue* rv) {
      HeteroGraphRef hg = args[0];
      bool backtracking = args[1];

      auto hgptr = CreateLineGraph(hg.sptr(), backtracking);
      *rv = HeteroGraphRef(hgptr);
-});
+    });

 };  // namespace transform
 };  // namespace dgl
--- a/src/graph/transform/metis_partition_hetero.cc
+++ b/src/graph/transform/metis_partition_hetero.cc
@@ -19,8 +19,9 @@ namespace transform {

 #if !defined(_WIN32)

-IdArray MetisPartition(UnitGraphPtr g, int k, NDArray vwgt_arr,
-                       const std::string &mode, bool obj_cut) {
+IdArray MetisPartition(
+    UnitGraphPtr g, int k, NDArray vwgt_arr, const std::string &mode,
+    bool obj_cut) {
  // Mode can only be "k-way" or "recursive"
  CHECK(mode == "k-way" || mode == "recursive")
      << "mode can only be \"k-way\" or \"recursive\"";
@@ -51,7 +52,8 @@ IdArray MetisPartition(UnitGraphPtr g, int k, NDArray vwgt_arr,
    vwgt = static_cast<idx_t *>(vwgt_arr->data);
  }

-  auto partition_func = (mode == "k-way") ? METIS_PartGraphKway : METIS_PartGraphRecursive;
+  auto partition_func =
+      (mode == "k-way") ? METIS_PartGraphKway : METIS_PartGraphRecursive;

  idx_t options[METIS_NOPTIONS];
  METIS_SetDefaultOptions(options);

--- a/src/graph/transform/partition_hetero.cc
+++ b/src/graph/transform/partition_hetero.cc
@@ -37,20 +37,23 @@ HeteroGraphPtr ReorderUnitGraph(UnitGraphPtr ug, IdArray new_order) {
  if (format & CSC_CODE) {
    auto cscmat = ug->GetCSCMatrix(0);
    auto new_cscmat = aten::CSRReorder(cscmat, new_order, new_order);
-    return UnitGraph::CreateFromCSC(ug->NumVertexTypes(), new_cscmat, ug->GetAllowedFormats());
+    return UnitGraph::CreateFromCSC(
+        ug->NumVertexTypes(), new_cscmat, ug->GetAllowedFormats());
  } else if (format & CSR_CODE) {
    auto csrmat = ug->GetCSRMatrix(0);
    auto new_csrmat = aten::CSRReorder(csrmat, new_order, new_order);
-    return UnitGraph::CreateFromCSR(ug->NumVertexTypes(), new_csrmat, ug->GetAllowedFormats());
+    return UnitGraph::CreateFromCSR(
+        ug->NumVertexTypes(), new_csrmat, ug->GetAllowedFormats());
  } else {
    auto coomat = ug->GetCOOMatrix(0);
    auto new_coomat = aten::COOReorder(coomat, new_order, new_order);
-    return UnitGraph::CreateFromCOO(ug->NumVertexTypes(), new_coomat, ug->GetAllowedFormats());
+    return UnitGraph::CreateFromCOO(
+        ug->NumVertexTypes(), new_coomat, ug->GetAllowedFormats());
  }
 }

-HaloHeteroSubgraph GetSubgraphWithHalo(std::shared_ptr<HeteroGraph> hg,
-                                       IdArray nodes, int num_hops) {
+HaloHeteroSubgraph GetSubgraphWithHalo(
+    std::shared_ptr<HeteroGraph> hg, IdArray nodes, int num_hops) {
  CHECK_EQ(hg->NumBits(), 64) << "halo subgraph only supports 64bits graph";
  CHECK_EQ(hg->relation_graphs().size(), 1)
      << "halo subgraph only supports homogeneous graph";
@@ -113,8 +116,8 @@ HaloHeteroSubgraph GetSubgraphWithHalo(std::shared_ptr<HeteroGraph> hg,
    const dgl_id_t *eid_data = static_cast<dgl_id_t *>(eid->data);
    for (int64_t i = 0; i < num_edges; i++) {
      auto it1 = orig_nodes.find(src_data[i]);
-      // If the source node is in the partition, we have got this edge when we iterate over
-      // the out-edges above.
+      // If the source node is in the partition, we have got this edge when we
+      // iterate over the out-edges above.
      if (it1 == orig_nodes.end()) {
        edge_src.push_back(src_data[i]);
        edge_dst.push_back(dst_data[i]);
@@ -164,10 +167,10 @@ HaloHeteroSubgraph GetSubgraphWithHalo(std::shared_ptr<HeteroGraph> hg,
  }

  num_edges = edge_src.size();
-  IdArray new_src = IdArray::Empty({num_edges}, DGLDataType{kDGLInt, 64, 1},
-                                   DGLContext{kDGLCPU, 0});
-  IdArray new_dst = IdArray::Empty({num_edges}, DGLDataType{kDGLInt, 64, 1},
-                                   DGLContext{kDGLCPU, 0});
+  IdArray new_src = IdArray::Empty(
+      {num_edges}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0});
+  IdArray new_dst = IdArray::Empty(
+      {num_edges}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0});
  dgl_id_t *new_src_data = static_cast<dgl_id_t *>(new_src->data);
  dgl_id_t *new_dst_data = static_cast<dgl_id_t *>(new_dst->data);
  for (size_t i = 0; i < edge_src.size(); i++) {
@@ -180,8 +183,8 @@ HaloHeteroSubgraph GetSubgraphWithHalo(std::shared_ptr<HeteroGraph> hg,
    dgl_id_t old_nid = old_node_ids[i];
    inner_nodes[i] = all_nodes[old_nid];
  }
-  aten::COOMatrix coo(old_node_ids.size(), old_node_ids.size(), new_src,
-                      new_dst);
+  aten::COOMatrix coo(
+      old_node_ids.size(), old_node_ids.size(), new_src, new_dst);
  HeteroGraphPtr ugptr = UnitGraph::CreateFromCOO(1, coo);
  HeteroGraphPtr subg = CreateHeteroGraph(hg->meta_graph(), {ugptr});
  HaloHeteroSubgraph halo_subg;
@@ -245,10 +248,10 @@ DGL_REGISTER_GLOBAL("partition._CAPI_DGLPartitionWithHalo_Hetero")
        part_ids.push_back(it->first);
        part_nodes.push_back(it->second);
      }
-    // When we construct subgraphs, we need to access both in-edges and out-edges.
-    // We need to make sure the in-CSR and out-CSR exist. Otherwise, we'll
-    // try to construct in-CSR and out-CSR in openmp for loop, which will lead
-    // to some unexpected results.
+      // When we construct subgraphs, we need to access both in-edges and
+      // out-edges. We need to make sure the in-CSR and out-CSR exist.
+      // Otherwise, we'll try to construct in-CSR and out-CSR in openmp for
+      // loop, which will lead to some unexpected results.
      ugptr->GetInCSR();
      ugptr->GetOutCSR();
      std::vector<std::shared_ptr<HaloHeteroSubgraph>> subgs(max_part_id + 1);
@@ -270,7 +273,7 @@ DGL_REGISTER_GLOBAL("partition._CAPI_DGLPartitionWithHalo_Hetero")
      *rv = ret_list;
    });

-template<class IdType>
+template <class IdType>
 struct EdgeProperty {
  IdType eid;
  int64_t idx;
@@ -315,15 +318,18 @@ DGL_REGISTER_GLOBAL("partition._CAPI_DGLReassignEdges_Hetero")
            indexed_eids[j].part_id = part_id_data[i];
          }
        }
-      auto comp = [etype_data](const EdgeProperty<IdType> &a, const EdgeProperty<IdType> &b) {
+        auto comp = [etype_data](
+                        const EdgeProperty<IdType> &a,
+                        const EdgeProperty<IdType> &b) {
          if (a.part_id == b.part_id) {
            return etype_data[a.eid] < etype_data[b.eid];
          } else {
            return a.part_id < b.part_id;
          }
        };
-      // We only need to sort the edges if the input graph has multiple relations.
-      // If it's a homogeneous grap, we'll just assign edge Ids based on its previous order.
+        // We only need to sort the edges if the input graph has multiple
+        // relations. If it's a homogeneous grap, we'll just assign edge Ids
+        // based on its previous order.
        if (etype->shape[0] > 0) {
          std::sort(indexed_eids.begin(), indexed_eids.end(), comp);
        }
@@ -345,7 +351,6 @@ DGL_REGISTER_GLOBAL("partition._CAPI_GetHaloSubgraphInnerNodes_Hetero")
      *rv = gptr->inner_nodes[0];
    });

-
 DGL_REGISTER_GLOBAL("partition._CAPI_DGLMakeSymmetric_Hetero")
    .set_body([](DGLArgs args, DGLRetValue *rv) {
      HeteroGraphRef g = args[0];
@@ -363,13 +368,14 @@ DGL_REGISTER_GLOBAL("partition._CAPI_DGLMakeSymmetric_Hetero")
      gk_csr_Free(&gk_csr);
      gk_csr_Free(&sym_gk_csr);

-    auto new_ugptr = UnitGraph::CreateFromCSC(ugptr->NumVertexTypes(), mat,
-                                              ugptr->GetAllowedFormats());
+      auto new_ugptr = UnitGraph::CreateFromCSC(
+          ugptr->NumVertexTypes(), mat, ugptr->GetAllowedFormats());
      std::vector<HeteroGraphPtr> rel_graphs = {new_ugptr};
      *rv = HeteroGraphRef(std::make_shared<HeteroGraph>(
          hgptr->meta_graph(), rel_graphs, hgptr->NumVerticesPerType()));
 #else
-    LOG(FATAL) << "The fast version of making symmetric graph is not supported in Windows.";
+      LOG(FATAL) << "The fast version of making symmetric graph is not "
+                    "supported in Windows.";
 #endif  // !defined(_WIN32)
    });


--- a/src/graph/transform/remove_edges.cc
+++ b/src/graph/transform/remove_edges.cc
@@ -4,15 +4,16 @@
 * \brief Remove edges.
 */

-#include <dgl/base_heterograph.h>
-#include <dgl/transform.h>
 #include <dgl/array.h>
+#include <dgl/base_heterograph.h>
 #include <dgl/packed_func_ext.h>
-#include <dgl/runtime/registry.h>
 #include <dgl/runtime/container.h>
-#include <vector>
-#include <utility>
+#include <dgl/runtime/registry.h>
+#include <dgl/transform.h>
+
 #include <tuple>
+#include <utility>
+#include <vector>

 namespace dgl {

@@ -21,8 +22,8 @@ using namespace dgl::aten;

 namespace transform {

-std::pair<HeteroGraphPtr, std::vector<IdArray>>
-RemoveEdges(const HeteroGraphPtr graph, const std::vector<IdArray> &eids) {
+std::pair<HeteroGraphPtr, std::vector<IdArray>> RemoveEdges(
+    const HeteroGraphPtr graph, const std::vector<IdArray> &eids) {
  std::vector<IdArray> induced_eids;
  std::vector<HeteroGraphPtr> rel_graphs;
  const int64_t num_etypes = graph->NumEdgeTypes();
@@ -40,23 +41,30 @@ RemoveEdges(const HeteroGraphPtr graph, const std::vector<IdArray> &eids) {
      const COOMatrix &coo = graph->GetCOOMatrix(etype);
      const COOMatrix &result = COORemove(coo, eids[etype]);
      new_rel_graph = CreateFromCOO(
-          num_ntypes_rel, result.num_rows, result.num_cols, result.row, result.col);
+          num_ntypes_rel, result.num_rows, result.num_cols, result.row,
+          result.col);
      induced_eids_rel = result.data;
    } else if (fmt == SparseFormat::kCSR) {
      const CSRMatrix &csr = graph->GetCSRMatrix(etype);
      const CSRMatrix &result = CSRRemove(csr, eids[etype]);
      new_rel_graph = CreateFromCSR(
-          num_ntypes_rel, result.num_rows, result.num_cols, result.indptr, result.indices,
+          num_ntypes_rel, result.num_rows, result.num_cols, result.indptr,
+          result.indices,
          // TODO(BarclayII): make CSR support null eid array
-          Range(0, result.indices->shape[0], result.indices->dtype.bits, result.indices->ctx));
+          Range(
+              0, result.indices->shape[0], result.indices->dtype.bits,
+              result.indices->ctx));
      induced_eids_rel = result.data;
    } else if (fmt == SparseFormat::kCSC) {
      const CSRMatrix &csc = graph->GetCSCMatrix(etype);
      const CSRMatrix &result = CSRRemove(csc, eids[etype]);
      new_rel_graph = CreateFromCSC(
-          num_ntypes_rel, result.num_rows, result.num_cols, result.indptr, result.indices,
+          num_ntypes_rel, result.num_rows, result.num_cols, result.indptr,
+          result.indices,
          // TODO(BarclayII): make CSR support null eid array
-          Range(0, result.indices->shape[0], result.indices->dtype.bits, result.indices->ctx));
+          Range(
+              0, result.indices->shape[0], result.indices->dtype.bits,
+              result.indices->ctx));
      induced_eids_rel = result.data;
    }

@@ -70,7 +78,7 @@ RemoveEdges(const HeteroGraphPtr graph, const std::vector<IdArray> &eids) {
 }

 DGL_REGISTER_GLOBAL("transform._CAPI_DGLRemoveEdges")
-.set_body([] (DGLArgs args, DGLRetValue *rv) {
+    .set_body([](DGLArgs args, DGLRetValue *rv) {
      const HeteroGraphRef graph_ref = args[0];
      const std::vector<IdArray> &eids = ListValueToVector<IdArray>(args[1]);


--- a/src/graph/transform/to_bipartite.cc
+++ b/src/graph/transform/to_bipartite.cc
@@ -19,16 +19,18 @@

 #include "to_bipartite.h"

-#include <dgl/base_heterograph.h>
-#include <dgl/transform.h>
 #include <dgl/array.h>
-#include <dgl/packed_func_ext.h>
+#include <dgl/base_heterograph.h>
 #include <dgl/immutable_graph.h>
-#include <dgl/runtime/registry.h>
+#include <dgl/packed_func_ext.h>
 #include <dgl/runtime/container.h>
-#include <vector>
+#include <dgl/runtime/registry.h>
+#include <dgl/transform.h>
+
 #include <tuple>
 #include <utility>
+#include <vector>
+
 #include "../../array/cpu/array_utils.h"

 namespace dgl {
@@ -42,11 +44,11 @@ namespace {

 // Since partial specialization is not allowed for functions, use this as an
 // intermediate for ToBlock where XPU = kDGLCPU.
-template<typename IdType>
-std::tuple<HeteroGraphPtr, std::vector<IdArray>>
-ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
-    bool include_rhs_in_lhs, std::vector<IdArray>* const lhs_nodes_ptr) {
-  std::vector<IdArray>& lhs_nodes = *lhs_nodes_ptr;
+template <typename IdType>
+std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockCPU(
+    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
+    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes_ptr) {
+  std::vector<IdArray> &lhs_nodes = *lhs_nodes_ptr;
  const bool generate_lhs_nodes = lhs_nodes.empty();

  const int64_t num_etypes = graph->NumEdgeTypes();
@@ -56,7 +58,8 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
  CHECK(rhs_nodes.size() == static_cast<size_t>(num_ntypes))
      << "rhs_nodes not given for every node type";

-  const std::vector<IdHashMap<IdType>> rhs_node_mappings(rhs_nodes.begin(), rhs_nodes.end());
+  const std::vector<IdHashMap<IdType>> rhs_node_mappings(
+      rhs_nodes.begin(), rhs_nodes.end());
  std::vector<IdHashMap<IdType>> lhs_node_mappings;

  if (generate_lhs_nodes) {
@@ -66,16 +69,16 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    else
      lhs_node_mappings.resize(num_ntypes);
  } else {
-    lhs_node_mappings = std::vector<IdHashMap<IdType>>(lhs_nodes.begin(), lhs_nodes.end());
+    lhs_node_mappings =
+        std::vector<IdHashMap<IdType>>(lhs_nodes.begin(), lhs_nodes.end());
  }

-
  for (int64_t etype = 0; etype < num_etypes; ++etype) {
    const auto src_dst_types = graph->GetEndpointTypes(etype);
    const dgl_type_t srctype = src_dst_types.first;
    const dgl_type_t dsttype = src_dst_types.second;
    if (!aten::IsNullArray(rhs_nodes[dsttype])) {
-      const EdgeArray& edges = graph->Edges(etype);
+      const EdgeArray &edges = graph->Edges(etype);
      if (generate_lhs_nodes) {
        lhs_node_mappings[srctype].Update(edges.src);
      }
@@ -89,8 +92,8 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
  const auto meta_graph = graph->meta_graph();
  const EdgeArray etypes = meta_graph->Edges("eid");
  const IdArray new_dst = Add(etypes.dst, num_ntypes);
-  const auto new_meta_graph = ImmutableGraph::CreateFromCOO(
-      num_ntypes * 2, etypes.src, new_dst);
+  const auto new_meta_graph =
+      ImmutableGraph::CreateFromCOO(num_ntypes * 2, etypes.src, new_dst);

  for (int64_t ntype = 0; ntype < num_ntypes; ++ntype)
    num_nodes_per_type.push_back(lhs_node_mappings[ntype].Size());
@@ -108,8 +111,8 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    if (rhs_map.Size() == 0) {
      // No rhs nodes are given for this edge type. Create an empty graph.
      rel_graphs.push_back(CreateFromCOO(
-          2, lhs_map.Size(), rhs_map.Size(),
-          aten::NullArray(), aten::NullArray()));
+          2, lhs_map.Size(), rhs_map.Size(), aten::NullArray(),
+          aten::NullArray()));
      induced_edges.push_back(aten::NullArray());
    } else {
      IdArray new_src = lhs_map.Map(edge_arrays[etype].src, -1);
@@ -117,18 +120,18 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
      // Check whether there are unmapped IDs and raise error.
      for (int64_t i = 0; i < new_dst->shape[0]; ++i)
        CHECK_NE(new_dst.Ptr<IdType>()[i], -1)
-          << "Node " << edge_arrays[etype].dst.Ptr<IdType>()[i] << " does not exist"
+            << "Node " << edge_arrays[etype].dst.Ptr<IdType>()[i]
+            << " does not exist"
            << " in `rhs_nodes`. Argument `rhs_nodes` must contain all the edge"
            << " destination nodes.";
-      rel_graphs.push_back(CreateFromCOO(
-          2, lhs_map.Size(), rhs_map.Size(),
-          new_src, new_dst));
+      rel_graphs.push_back(
+          CreateFromCOO(2, lhs_map.Size(), rhs_map.Size(), new_src, new_dst));
      induced_edges.push_back(edge_arrays[etype].id);
    }
  }

-  const HeteroGraphPtr new_graph = CreateHeteroGraph(
-      new_meta_graph, rel_graphs, num_nodes_per_type);
+  const HeteroGraphPtr new_graph =
+      CreateHeteroGraph(new_meta_graph, rel_graphs, num_nodes_per_type);

  if (generate_lhs_nodes) {
    CHECK_EQ(lhs_nodes.size(), 0) << "InteralError: lhs_nodes should be empty "
@@ -141,59 +144,56 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,

 }  // namespace

-template<>
-std::tuple<HeteroGraphPtr, std::vector<IdArray>>
-ToBlock<kDGLCPU, int32_t>(HeteroGraphPtr graph,
-                         const std::vector<IdArray> &rhs_nodes,
-                         bool include_rhs_in_lhs,
-                         std::vector<IdArray>* const lhs_nodes) {
+template <>
+std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCPU, int32_t>(
+    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
+    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
  return ToBlockCPU<int32_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
 }

-template<>
-std::tuple<HeteroGraphPtr, std::vector<IdArray>>
-ToBlock<kDGLCPU, int64_t>(HeteroGraphPtr graph,
-                         const std::vector<IdArray> &rhs_nodes,
-                         bool include_rhs_in_lhs,
-                         std::vector<IdArray>* const lhs_nodes) {
+template <>
+std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCPU, int64_t>(
+    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
+    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
  return ToBlockCPU<int64_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
 }

 #ifdef DGL_USE_CUDA

-// Forward declaration of GPU ToBlock implementations - actual implementation is in
+// Forward declaration of GPU ToBlock implementations - actual implementation is
+// in
 // ./cuda/cuda_to_block.cu
-// This is to get around the broken name mangling in VS2019 CL 16.5.5 + CUDA 11.3
-// which complains that the two template specializations have the same signature.
-std::tuple<HeteroGraphPtr, std::vector<IdArray>>
-ToBlockGPU32(HeteroGraphPtr, const std::vector<IdArray>&, bool, std::vector<IdArray>* const);
-std::tuple<HeteroGraphPtr, std::vector<IdArray>>
-ToBlockGPU64(HeteroGraphPtr, const std::vector<IdArray>&, bool, std::vector<IdArray>* const);
-
-template<>
-std::tuple<HeteroGraphPtr, std::vector<IdArray>>
-ToBlock<kDGLCUDA, int32_t>(HeteroGraphPtr graph,
-                         const std::vector<IdArray> &rhs_nodes,
-                         bool include_rhs_in_lhs,
-                         std::vector<IdArray>* const lhs_nodes) {
+// This is to get around the broken name mangling in VS2019 CL 16.5.5 +
+// CUDA 11.3 which complains that the two template specializations have the same
+// signature.
+std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockGPU32(
+    HeteroGraphPtr, const std::vector<IdArray> &, bool,
+    std::vector<IdArray> *const);
+std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockGPU64(
+    HeteroGraphPtr, const std::vector<IdArray> &, bool,
+    std::vector<IdArray> *const);
+
+template <>
+std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCUDA, int32_t>(
+    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
+    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
  return ToBlockGPU32(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
 }

-template<>
-std::tuple<HeteroGraphPtr, std::vector<IdArray>>
-ToBlock<kDGLCUDA, int64_t>(HeteroGraphPtr graph,
-                         const std::vector<IdArray> &rhs_nodes,
-                         bool include_rhs_in_lhs,
-                         std::vector<IdArray>* const lhs_nodes) {
+template <>
+std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCUDA, int64_t>(
+    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
+    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
  return ToBlockGPU64(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
 }

 #endif  // DGL_USE_CUDA

 DGL_REGISTER_GLOBAL("transform._CAPI_DGLToBlock")
-.set_body([] (DGLArgs args, DGLRetValue *rv) {
+    .set_body([](DGLArgs args, DGLRetValue *rv) {
      const HeteroGraphRef graph_ref = args[0];
-    const std::vector<IdArray> &rhs_nodes = ListValueToVector<IdArray>(args[1]);
+      const std::vector<IdArray> &rhs_nodes =
+          ListValueToVector<IdArray>(args[1]);
      const bool include_rhs_in_lhs = args[2];
      std::vector<IdArray> lhs_nodes = ListValueToVector<IdArray>(args[3]);

@@ -203,8 +203,7 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLToBlock")
      ATEN_XPU_SWITCH_CUDA(graph_ref->Context().device_type, XPU, "ToBlock", {
        ATEN_ID_TYPE_SWITCH(graph_ref->DataType(), IdType, {
          std::tie(new_graph, induced_edges) = ToBlock<XPU, IdType>(
-          graph_ref.sptr(), rhs_nodes, include_rhs_in_lhs,
-          &lhs_nodes);
+              graph_ref.sptr(), rhs_nodes, include_rhs_in_lhs, &lhs_nodes);
        });
      });


--- a/src/graph/transform/to_bipartite.h
+++ b/src/graph/transform/to_bipartite.h
@@ -44,9 +44,9 @@ namespace transform {
 *
 * @return The block and the induced edges.
 */
-template<DGLDeviceType XPU, typename IdType>
-std::tuple<HeteroGraphPtr, std::vector<IdArray>>
-ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
+template <DGLDeviceType XPU, typename IdType>
+std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock(
+    HeteroGraphPtr graph, const std::vector<IdArray>& rhs_nodes,
    bool include_rhs_in_lhs, std::vector<IdArray>* lhs_nodes);

 }  // namespace transform