to_block.cc 14.3 KB
Newer Older
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
15
 *  Copyright 2019-2021 Contributors
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
16
 * @file graph/transform/to_block.cc
17
 * @brief Convert a graph to a bipartite-structured graph.
18
19
 *
 * Tested via python wrapper: python/dgl/path/to/to_block.py
20
21
 */

22
#include "to_block.h"
23

24
#include <dgl/array.h>
25
#include <dgl/base_heterograph.h>
26
#include <dgl/immutable_graph.h>
27
#include <dgl/packed_func_ext.h>
28
#include <dgl/runtime/container.h>
29
#include <dgl/runtime/device_api.h>
30
31
32
#include <dgl/runtime/registry.h>
#include <dgl/transform.h>

33
#include <tuple>
34
#include <utility>
35
36
#include <vector>

37
#include "../../array/cpu/concurrent_id_hash_map.h"
38
39
40
41
42
43
44
45
46
47

namespace dgl {

using namespace dgl::runtime;
using namespace dgl::aten;

namespace transform {

namespace {

48
template <typename IdType>
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
struct CPUIdsMapper {
  std::tuple<std::vector<IdArray>, std::vector<IdArray>> operator()(
      const HeteroGraphPtr &graph, bool include_rhs_in_lhs, int64_t num_ntypes,
      const DGLContext &ctx, const std::vector<int64_t> &max_nodes_per_type,
      const std::vector<EdgeArray> &edge_arrays,
      const std::vector<IdArray> &src_nodes,
      const std::vector<IdArray> &rhs_nodes,
      std::vector<IdArray> *const lhs_nodes_ptr,
      std::vector<int64_t> *const num_nodes_per_type_ptr) {
    std::vector<IdArray> &lhs_nodes = *lhs_nodes_ptr;
    std::vector<int64_t> &num_nodes_per_type = *num_nodes_per_type_ptr;

    const bool generate_lhs_nodes = lhs_nodes.empty();
    if (generate_lhs_nodes) {
      lhs_nodes.reserve(num_ntypes);
    }
65

66
67
68
69
70
71
72
73
74
    std::vector<ConcurrentIdHashMap<IdType>> lhs_nodes_map(num_ntypes);
    std::vector<ConcurrentIdHashMap<IdType>> rhs_nodes_map(num_ntypes);
    for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
      IdArray unique_ids =
          aten::NullArray(DGLDataTypeTraits<IdType>::dtype, ctx);
      if (!aten::IsNullArray(src_nodes[ntype])) {
        auto num_seeds = include_rhs_in_lhs ? rhs_nodes[ntype]->shape[0] : 0;
        unique_ids = lhs_nodes_map[ntype].Init(src_nodes[ntype], num_seeds);
      }
75
      if (generate_lhs_nodes) {
76
77
        num_nodes_per_type[ntype] = unique_ids->shape[0];
        lhs_nodes.emplace_back(unique_ids);
78
      }
79
    }
80

81
82
83
84
85
86
87
88
89
    // Skip rhs mapping construction to save efforts when rhs is already
    // contained in lhs.
    if (!include_rhs_in_lhs) {
      for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
        if (!aten::IsNullArray(rhs_nodes[ntype])) {
          rhs_nodes_map[ntype].Init(
              rhs_nodes[ntype], rhs_nodes[ntype]->shape[0]);
        }
      }
90
    }
91

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    // Map node numberings from global to local, and build pointer for CSR.
    std::vector<IdArray> new_lhs;
    std::vector<IdArray> new_rhs;
    new_lhs.reserve(edge_arrays.size());
    new_rhs.reserve(edge_arrays.size());
    const int64_t num_etypes = static_cast<int64_t>(edge_arrays.size());
    for (int64_t etype = 0; etype < num_etypes; ++etype) {
      const EdgeArray &edges = edge_arrays[etype];
      if (edges.id.defined() && !aten::IsNullArray(edges.src)) {
        const auto src_dst_types = graph->GetEndpointTypes(etype);
        const int src_type = src_dst_types.first;
        const int dst_type = src_dst_types.second;
        new_lhs.emplace_back(lhs_nodes_map[src_type].MapIds(edges.src));
        if (include_rhs_in_lhs) {
          new_rhs.emplace_back(lhs_nodes_map[dst_type].MapIds(edges.dst));
        } else {
          new_rhs.emplace_back(rhs_nodes_map[dst_type].MapIds(edges.dst));
        }
      } else {
        new_lhs.emplace_back(
            aten::NullArray(DGLDataTypeTraits<IdType>::dtype, ctx));
        new_rhs.emplace_back(
            aten::NullArray(DGLDataTypeTraits<IdType>::dtype, ctx));
      }
    }
    return std::tuple<std::vector<IdArray>, std::vector<IdArray>>(
        std::move(new_lhs), std::move(new_rhs));
119
  }
120
121
122
123
124
125
126
127
128
129
130
};

// Since partial specialization is not allowed for functions, use this as an
// intermediate for ToBlock where XPU = kDGLCPU.
template <typename IdType>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockCPU(
    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes_ptr) {
  return dgl::transform::ProcessToBlock<IdType>(
      graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes_ptr,
      CPUIdsMapper<IdType>());
131
132
}

133
}  // namespace
134

135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
template <typename IdType>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ProcessToBlock(
    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes_ptr,
    IdsMapper &&ids_mapper) {
  std::vector<IdArray> &lhs_nodes = *lhs_nodes_ptr;
  const bool generate_lhs_nodes = lhs_nodes.empty();

  const auto &ctx = graph->Context();
  auto device = runtime::DeviceAPI::Get(ctx);

  // Since DST nodes are included in SRC nodes, a common requirement is to fetch
  // the DST node features from the SRC nodes features. To avoid expensive
  // sparse lookup, the function assures that the DST nodes in both SRC and DST
  // sets have the same ids. As a result, given the node feature tensor ``X`` of
  // type ``utype``, the following code finds the corresponding DST node
  // features of type ``vtype``:

  const int64_t num_etypes = graph->NumEdgeTypes();
  const int64_t num_ntypes = graph->NumVertexTypes();

  CHECK(rhs_nodes.size() == static_cast<size_t>(num_ntypes))
      << "rhs_nodes not given for every node type";

  std::vector<EdgeArray> edge_arrays(num_etypes);
  for (int64_t etype = 0; etype < num_etypes; ++etype) {
    const auto src_dst_types = graph->GetEndpointTypes(etype);
    const dgl_type_t dsttype = src_dst_types.second;
    if (!aten::IsNullArray(rhs_nodes[dsttype])) {
      edge_arrays[etype] = graph->Edges(etype);
    }
  }

  // Count lhs and rhs nodes.
  std::vector<int64_t> maxNodesPerType(num_ntypes * 2, 0);
  for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
    maxNodesPerType[ntype + num_ntypes] += rhs_nodes[ntype]->shape[0];

    if (generate_lhs_nodes) {
      if (include_rhs_in_lhs) {
        maxNodesPerType[ntype] += rhs_nodes[ntype]->shape[0];
      }
    } else {
      maxNodesPerType[ntype] += lhs_nodes[ntype]->shape[0];
    }
  }
  if (generate_lhs_nodes) {
    // We don't have lhs_nodes, see we need to count inbound edges to get an
    // upper bound.
    for (int64_t etype = 0; etype < num_etypes; ++etype) {
      const auto src_dst_types = graph->GetEndpointTypes(etype);
      const dgl_type_t srctype = src_dst_types.first;
      if (edge_arrays[etype].src.defined()) {
        maxNodesPerType[srctype] += edge_arrays[etype].src->shape[0];
      }
    }
  }

  // Gather lhs_nodes.
  std::vector<IdArray> src_nodes(num_ntypes);
  if (generate_lhs_nodes) {
    std::vector<int64_t> src_node_offsets(num_ntypes, 0);
    for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
      src_nodes[ntype] =
          NewIdArray(maxNodesPerType[ntype], ctx, sizeof(IdType) * 8);
      if (include_rhs_in_lhs) {
        // Place rhs nodes first.
        device->CopyDataFromTo(
            rhs_nodes[ntype].Ptr<IdType>(), 0, src_nodes[ntype].Ptr<IdType>(),
            src_node_offsets[ntype],
            sizeof(IdType) * rhs_nodes[ntype]->shape[0], rhs_nodes[ntype]->ctx,
            src_nodes[ntype]->ctx, rhs_nodes[ntype]->dtype);
        src_node_offsets[ntype] += sizeof(IdType) * rhs_nodes[ntype]->shape[0];
      }
    }
    for (int64_t etype = 0; etype < num_etypes; ++etype) {
      const auto src_dst_types = graph->GetEndpointTypes(etype);
      const dgl_type_t srctype = src_dst_types.first;
      if (edge_arrays[etype].src.defined()) {
        device->CopyDataFromTo(
            edge_arrays[etype].src.Ptr<IdType>(), 0,
            src_nodes[srctype].Ptr<IdType>(), src_node_offsets[srctype],
            sizeof(IdType) * edge_arrays[etype].src->shape[0],
            rhs_nodes[srctype]->ctx, src_nodes[srctype]->ctx,
            rhs_nodes[srctype]->dtype);

        src_node_offsets[srctype] +=
            sizeof(IdType) * edge_arrays[etype].src->shape[0];
      }
    }
  } else {
    for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
      src_nodes[ntype] = lhs_nodes[ntype];
    }
  }

  std::vector<int64_t> num_nodes_per_type(num_ntypes * 2);
  // Populate RHS nodes from what we already know.
  for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
    num_nodes_per_type[num_ntypes + ntype] = rhs_nodes[ntype]->shape[0];
  }

  std::vector<IdArray> new_lhs;
  std::vector<IdArray> new_rhs;
  std::tie(new_lhs, new_rhs) = ids_mapper(
      graph, include_rhs_in_lhs, num_ntypes, ctx, maxNodesPerType, edge_arrays,
      src_nodes, rhs_nodes, lhs_nodes_ptr, &num_nodes_per_type);

  std::vector<IdArray> induced_edges;
  induced_edges.reserve(num_etypes);
  for (int64_t etype = 0; etype < num_etypes; ++etype) {
    if (edge_arrays[etype].id.defined()) {
      induced_edges.push_back(edge_arrays[etype].id);
    } else {
      induced_edges.push_back(
          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx));
    }
  }

  // Build metagraph.
  const auto meta_graph = graph->meta_graph();
  const EdgeArray etypes = meta_graph->Edges("eid");
  const IdArray new_dst = Add(etypes.dst, num_ntypes);
  const auto new_meta_graph =
      ImmutableGraph::CreateFromCOO(num_ntypes * 2, etypes.src, new_dst);

  // Allocate vector for graph relations while GPU is busy.
  std::vector<HeteroGraphPtr> rel_graphs;
  rel_graphs.reserve(num_etypes);

  // Build the heterograph.
  for (int64_t etype = 0; etype < num_etypes; ++etype) {
    const auto src_dst_types = graph->GetEndpointTypes(etype);
    const dgl_type_t srctype = src_dst_types.first;
    const dgl_type_t dsttype = src_dst_types.second;

    if (rhs_nodes[dsttype]->shape[0] == 0) {
      // No rhs nodes are given for this edge type. Create an empty graph.
      rel_graphs.push_back(CreateFromCOO(
          2, lhs_nodes[srctype]->shape[0], rhs_nodes[dsttype]->shape[0],
          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx),
          aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx)));
    } else {
      rel_graphs.push_back(CreateFromCOO(
          2, lhs_nodes[srctype]->shape[0], rhs_nodes[dsttype]->shape[0],
          new_lhs[etype], new_rhs[etype]));
    }
  }

  HeteroGraphPtr new_graph =
      CreateHeteroGraph(new_meta_graph, rel_graphs, num_nodes_per_type);

  // Return the new graph, the new src nodes, and new edges.
  return std::make_tuple(new_graph, induced_edges);
}

template std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ProcessToBlock<int32_t>(
    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes_ptr,
    IdsMapper &&get_maping_ids);

template std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ProcessToBlock<int64_t>(
    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes_ptr,
    IdsMapper &&get_maping_ids);

303
304
305
306
template <>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCPU, int32_t>(
    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
307
  return ToBlockCPU<int32_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
308
309
}

310
311
312
313
template <>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCPU, int64_t>(
    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
314
  return ToBlockCPU<int64_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
315
316
}

317
318
#ifdef DGL_USE_CUDA

319
320
// Forward declaration of GPU ToBlock implementations - actual implementation is
// in
321
// ./cuda/cuda_to_block.cu
322
323
324
325
326
327
328
329
330
331
332
333
334
335
// This is to get around the broken name mangling in VS2019 CL 16.5.5 +
// CUDA 11.3 which complains that the two template specializations have the same
// signature.
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockGPU32(
    HeteroGraphPtr, const std::vector<IdArray> &, bool,
    std::vector<IdArray> *const);
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockGPU64(
    HeteroGraphPtr, const std::vector<IdArray> &, bool,
    std::vector<IdArray> *const);

template <>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCUDA, int32_t>(
    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
336
337
338
  return ToBlockGPU32(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
}

339
340
341
342
template <>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCUDA, int64_t>(
    HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
    bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
343
344
345
346
347
  return ToBlockGPU64(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
}

#endif  // DGL_USE_CUDA

348
DGL_REGISTER_GLOBAL("capi._CAPI_DGLToBlock")
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
    .set_body([](DGLArgs args, DGLRetValue *rv) {
      const HeteroGraphRef graph_ref = args[0];
      const std::vector<IdArray> &rhs_nodes =
          ListValueToVector<IdArray>(args[1]);
      const bool include_rhs_in_lhs = args[2];
      std::vector<IdArray> lhs_nodes = ListValueToVector<IdArray>(args[3]);

      HeteroGraphPtr new_graph;
      std::vector<IdArray> induced_edges;

      ATEN_XPU_SWITCH_CUDA(graph_ref->Context().device_type, XPU, "ToBlock", {
        ATEN_ID_TYPE_SWITCH(graph_ref->DataType(), IdType, {
          std::tie(new_graph, induced_edges) = ToBlock<XPU, IdType>(
              graph_ref.sptr(), rhs_nodes, include_rhs_in_lhs, &lhs_nodes);
        });
364
      });
365

366
367
368
369
370
371
      List<Value> lhs_nodes_ref;
      for (IdArray &array : lhs_nodes)
        lhs_nodes_ref.push_back(Value(MakeValue(array)));
      List<Value> induced_edges_ref;
      for (IdArray &array : induced_edges)
        induced_edges_ref.push_back(Value(MakeValue(array)));
372

373
374
375
376
      List<ObjectRef> ret;
      ret.push_back(HeteroGraphRef(new_graph));
      ret.push_back(lhs_nodes_ref);
      ret.push_back(induced_edges_ref);
377

378
379
      *rv = ret;
    });
380
381
382
383

};  // namespace transform

};  // namespace dgl