cuda_compact_graph.cu 8.68 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*!
 *  Copyright 2021 Contributors
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 * \file graph/transform/cuda/cuda_compact_graph.cu
 * \brief Functions to find and eliminate the common isolated nodes across
 * all given graphs with the same set of nodes.
 */

#include <cuda_runtime.h>
22
23
24
#include <dgl/immutable_graph.h>
#include <dgl/runtime/device_api.h>

25
26
#include <algorithm>
#include <memory>
27
#include <utility>
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

#include "../../../runtime/cuda/cuda_common.h"
#include "../../heterograph.h"
#include "../compact.h"
#include "cuda_map_edges.cuh"

using namespace dgl::aten;
using namespace dgl::runtime::cuda;
using namespace dgl::transform::cuda;

namespace dgl {
namespace transform {

namespace {

/**
44
45
46
47
48
49
50
51
52
53
54
 * \brief This function builds node maps for each node type, preserving the
 * order of the input nodes. Here it is assumed the nodes are not unique,
 * and thus a unique list is generated.
 *
 * \param input_nodes The set of input nodes.
 * \param node_maps The node maps to be constructed.
 * \param count_unique_device The number of unique nodes (on the GPU).
 * \param unique_nodes_device The unique nodes (on the GPU).
 * \param stream The stream to operate on.
 */
template <typename IdType>
55
void BuildNodeMaps(
56
57
58
    const std::vector<IdArray> &input_nodes,
    DeviceNodeMap<IdType> *const node_maps, int64_t *const count_unique_device,
    std::vector<IdArray> *const unique_nodes_device, cudaStream_t stream) {
59
60
61
  const int64_t num_ntypes = static_cast<int64_t>(input_nodes.size());

  CUDA_CALL(cudaMemsetAsync(
62
63
      count_unique_device, 0, num_ntypes * sizeof(*count_unique_device),
      stream));
64
65
66

  // possibly duplicated nodes
  for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
67
    const IdArray &nodes = input_nodes[ntype];
68
    if (nodes->shape[0] > 0) {
69
      CHECK_EQ(nodes->ctx.device_type, kDGLCUDA);
70
      node_maps->LhsHashTable(ntype).FillWithDuplicates(
71
          nodes.Ptr<IdType>(), nodes->shape[0],
72
          (*unique_nodes_device)[ntype].Ptr<IdType>(),
73
          count_unique_device + ntype, stream);
74
75
76
77
    }
  }
}

78
79
template <typename IdType>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> CompactGraphsGPU(
80
81
    const std::vector<HeteroGraphPtr> &graphs,
    const std::vector<IdArray> &always_preserve) {
82
  const auto &ctx = graphs[0]->Context();
83
  auto device = runtime::DeviceAPI::Get(ctx);
84
  cudaStream_t stream = runtime::getCurrentCUDAStream();
85

86
  CHECK_EQ(ctx.device_type, kDGLCUDA);
87
88

  // Step 1: Collect the nodes that has connections for each type.
89
  const uint64_t num_ntypes = graphs[0]->NumVertexTypes();
90
91
  std::vector<std::vector<EdgeArray>> all_edges(
      graphs.size());  // all_edges[i][etype]
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

  // count the number of nodes per type
  std::vector<int64_t> max_vertex_cnt(num_ntypes, 0);
  for (size_t i = 0; i < graphs.size(); ++i) {
    const HeteroGraphPtr curr_graph = graphs[i];
    const int64_t num_etypes = curr_graph->NumEdgeTypes();

    for (IdType etype = 0; etype < num_etypes; ++etype) {
      IdType srctype, dsttype;
      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);

      const int64_t n_edges = curr_graph->NumEdges(etype);
      max_vertex_cnt[srctype] += n_edges;
      max_vertex_cnt[dsttype] += n_edges;
    }
  }

  for (size_t i = 0; i < always_preserve.size(); ++i) {
    max_vertex_cnt[i] += always_preserve[i]->shape[0];
  }

  // gather all nodes
  std::vector<IdArray> all_nodes(num_ntypes);
  std::vector<int64_t> node_offsets(num_ntypes, 0);

117
  for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
118
119
    all_nodes[ntype] =
        NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8);
120
    // copy the nodes in always_preserve
121
122
    if (ntype < always_preserve.size() &&
        always_preserve[ntype]->shape[0] > 0) {
123
124
      device->CopyDataFromTo(
          always_preserve[ntype].Ptr<IdType>(), 0,
125
126
127
          all_nodes[ntype].Ptr<IdType>(), node_offsets[ntype],
          sizeof(IdType) * always_preserve[ntype]->shape[0],
          always_preserve[ntype]->ctx, all_nodes[ntype]->ctx,
128
          always_preserve[ntype]->dtype);
129
      node_offsets[ntype] += sizeof(IdType) * always_preserve[ntype]->shape[0];
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
    }
  }

  for (size_t i = 0; i < graphs.size(); ++i) {
    const HeteroGraphPtr curr_graph = graphs[i];
    const int64_t num_etypes = curr_graph->NumEdgeTypes();

    all_edges[i].reserve(num_etypes);
    for (int64_t etype = 0; etype < num_etypes; ++etype) {
      dgl_type_t srctype, dsttype;
      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);

      const EdgeArray edges = curr_graph->Edges(etype, "eid");

      if (edges.src.defined()) {
        device->CopyDataFromTo(
146
147
148
149
            edges.src.Ptr<IdType>(), 0, all_nodes[srctype].Ptr<IdType>(),
            node_offsets[srctype], sizeof(IdType) * edges.src->shape[0],
            edges.src->ctx, all_nodes[srctype]->ctx, edges.src->dtype);
        node_offsets[srctype] += sizeof(IdType) * edges.src->shape[0];
150
151
152
      }
      if (edges.dst.defined()) {
        device->CopyDataFromTo(
153
154
155
156
            edges.dst.Ptr<IdType>(), 0, all_nodes[dsttype].Ptr<IdType>(),
            node_offsets[dsttype], sizeof(IdType) * edges.dst->shape[0],
            edges.dst->ctx, all_nodes[dsttype]->ctx, edges.dst->dtype);
        node_offsets[dsttype] += sizeof(IdType) * edges.dst->shape[0];
157
158
159
160
161
162
163
164
165
166
167
168
169
170
      }
      all_edges[i].push_back(edges);
    }
  }

  // Step 2: Relabel the nodes for each type to a smaller ID space
  //         using BuildNodeMaps

  // allocate space for map creation
  // the hashmap on GPU
  DeviceNodeMap<IdType> node_maps(max_vertex_cnt, 0, ctx, stream);
  // number of unique nodes per type on CPU
  std::vector<int64_t> num_induced_nodes(num_ntypes);
  // number of unique nodes per type on GPU
171
172
  int64_t *count_unique_device = static_cast<int64_t *>(
      device->AllocWorkspace(ctx, sizeof(int64_t) * num_ntypes));
173
174
  // the set of unique nodes per type
  std::vector<IdArray> induced_nodes(num_ntypes);
175
  for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
176
177
    induced_nodes[ntype] =
        NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8);
178
179
180
  }

  BuildNodeMaps(
181
      all_nodes, &node_maps, count_unique_device, &induced_nodes, stream);
182
183

  device->CopyDataFromTo(
184
185
186
      count_unique_device, 0, num_induced_nodes.data(), 0,
      sizeof(*num_induced_nodes.data()) * num_ntypes, ctx,
      DGLContext{kDGLCPU, 0}, DGLDataType{kDGLInt, 64, 1});
187
188
189
190
191
192
  device->StreamSync(ctx, stream);

  // wait for the node counts to finish transferring
  device->FreeWorkspace(ctx, count_unique_device);

  // resize induced nodes
193
  for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
    induced_nodes[ntype]->shape[0] = num_induced_nodes[ntype];
  }

  // Step 3: Remap the edges of each graph using MapEdges
  std::vector<HeteroGraphPtr> new_graphs;
  for (size_t i = 0; i < graphs.size(); ++i) {
    const HeteroGraphPtr curr_graph = graphs[i];
    const auto meta_graph = curr_graph->meta_graph();
    const int64_t num_etypes = curr_graph->NumEdgeTypes();

    std::vector<HeteroGraphPtr> rel_graphs;
    rel_graphs.reserve(num_etypes);

    std::vector<IdArray> new_src;
    std::vector<IdArray> new_dst;
209
210
    std::tie(new_src, new_dst) =
        MapEdges(curr_graph, all_edges[i], node_maps, stream);
211
212
213
214
215
216

    for (IdType etype = 0; etype < num_etypes; ++etype) {
      IdType srctype, dsttype;
      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);

      rel_graphs.push_back(UnitGraph::CreateFromCOO(
217
218
          srctype == dsttype ? 1 : 2, induced_nodes[srctype]->shape[0],
          induced_nodes[dsttype]->shape[0], new_src[etype], new_dst[etype]));
219
220
    }

221
222
    new_graphs.push_back(
        CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
223
224
225
226
227
228
229
  }

  return std::make_pair(new_graphs, induced_nodes);
}

}  // namespace

230
template <>
231
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
232
CompactGraphs<kDGLCUDA, int32_t>(
233
234
235
236
237
    const std::vector<HeteroGraphPtr> &graphs,
    const std::vector<IdArray> &always_preserve) {
  return CompactGraphsGPU<int32_t>(graphs, always_preserve);
}

238
template <>
239
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
240
CompactGraphs<kDGLCUDA, int64_t>(
241
242
243
244
245
246
247
    const std::vector<HeteroGraphPtr> &graphs,
    const std::vector<IdArray> &always_preserve) {
  return CompactGraphsGPU<int64_t>(graphs, always_preserve);
}

}  // namespace transform
}  // namespace dgl