cuda_compact_graph.hip 8.75 KB
Newer Older
sangwzh's avatar
sangwzh committed
1
// !!! This is a file automatically generated by hipify!!!
2
/**
3
4
5
6
7
8
9
10
11
12
13
14
15
16
 *  Copyright 2021 Contributors
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
17
18
 * @file graph/transform/cuda/cuda_compact_graph.cu
 * @brief Functions to find and eliminate the common isolated nodes across
19
20
21
 * all given graphs with the same set of nodes.
 */

sangwzh's avatar
sangwzh committed
22
#include <hip/hip_runtime.h>
23
24
25
#include <dgl/immutable_graph.h>
#include <dgl/runtime/device_api.h>

26
27
#include <algorithm>
#include <memory>
28
#include <utility>
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

#include "../../../runtime/cuda/cuda_common.h"
#include "../../heterograph.h"
#include "../compact.h"
#include "cuda_map_edges.cuh"

using namespace dgl::aten;
using namespace dgl::runtime::cuda;
using namespace dgl::transform::cuda;

namespace dgl {
namespace transform {

namespace {

/**
45
 * @brief This function builds node maps for each node type, preserving the
46
47
48
 * order of the input nodes. Here it is assumed the nodes are not unique,
 * and thus a unique list is generated.
 *
49
50
51
52
53
 * @param input_nodes The set of input nodes.
 * @param node_maps The node maps to be constructed.
 * @param count_unique_device The number of unique nodes (on the GPU).
 * @param unique_nodes_device The unique nodes (on the GPU).
 * @param stream The stream to operate on.
54
55
 */
template <typename IdType>
56
void BuildNodeMaps(
57
58
    const std::vector<IdArray> &input_nodes,
    DeviceNodeMap<IdType> *const node_maps, int64_t *const count_unique_device,
sangwzh's avatar
sangwzh committed
59
    std::vector<IdArray> *const unique_nodes_device, hipStream_t stream) {
60
61
  const int64_t num_ntypes = static_cast<int64_t>(input_nodes.size());

sangwzh's avatar
sangwzh committed
62
  CUDA_CALL(hipMemsetAsync(
63
64
      count_unique_device, 0, num_ntypes * sizeof(*count_unique_device),
      stream));
65
66
67

  // possibly duplicated nodes
  for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
68
    const IdArray &nodes = input_nodes[ntype];
69
    if (nodes->shape[0] > 0) {
70
      CHECK_EQ(nodes->ctx.device_type, kDGLCUDA);
71
      node_maps->LhsHashTable(ntype).FillWithDuplicates(
72
          nodes.Ptr<IdType>(), nodes->shape[0],
73
          (*unique_nodes_device)[ntype].Ptr<IdType>(),
74
          count_unique_device + ntype, stream);
75
76
77
78
    }
  }
}

79
80
template <typename IdType>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> CompactGraphsGPU(
81
82
    const std::vector<HeteroGraphPtr> &graphs,
    const std::vector<IdArray> &always_preserve) {
83
  const auto &ctx = graphs[0]->Context();
84
  auto device = runtime::DeviceAPI::Get(ctx);
sangwzh's avatar
sangwzh committed
85
  hipStream_t stream = runtime::getCurrentHIPStreamMasqueradingAsCUDA();
86

87
  CHECK_EQ(ctx.device_type, kDGLCUDA);
88
89

  // Step 1: Collect the nodes that has connections for each type.
90
  const uint64_t num_ntypes = graphs[0]->NumVertexTypes();
91
92
  std::vector<std::vector<EdgeArray>> all_edges(
      graphs.size());  // all_edges[i][etype]
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

  // count the number of nodes per type
  std::vector<int64_t> max_vertex_cnt(num_ntypes, 0);
  for (size_t i = 0; i < graphs.size(); ++i) {
    const HeteroGraphPtr curr_graph = graphs[i];
    const int64_t num_etypes = curr_graph->NumEdgeTypes();

    for (IdType etype = 0; etype < num_etypes; ++etype) {
      IdType srctype, dsttype;
      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);

      const int64_t n_edges = curr_graph->NumEdges(etype);
      max_vertex_cnt[srctype] += n_edges;
      max_vertex_cnt[dsttype] += n_edges;
    }
  }

  for (size_t i = 0; i < always_preserve.size(); ++i) {
    max_vertex_cnt[i] += always_preserve[i]->shape[0];
  }

  // gather all nodes
  std::vector<IdArray> all_nodes(num_ntypes);
  std::vector<int64_t> node_offsets(num_ntypes, 0);

118
  for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
119
120
    all_nodes[ntype] =
        NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8);
121
    // copy the nodes in always_preserve
122
123
    if (ntype < always_preserve.size() &&
        always_preserve[ntype]->shape[0] > 0) {
124
125
      device->CopyDataFromTo(
          always_preserve[ntype].Ptr<IdType>(), 0,
126
127
128
          all_nodes[ntype].Ptr<IdType>(), node_offsets[ntype],
          sizeof(IdType) * always_preserve[ntype]->shape[0],
          always_preserve[ntype]->ctx, all_nodes[ntype]->ctx,
129
          always_preserve[ntype]->dtype);
130
      node_offsets[ntype] += sizeof(IdType) * always_preserve[ntype]->shape[0];
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
    }
  }

  for (size_t i = 0; i < graphs.size(); ++i) {
    const HeteroGraphPtr curr_graph = graphs[i];
    const int64_t num_etypes = curr_graph->NumEdgeTypes();

    all_edges[i].reserve(num_etypes);
    for (int64_t etype = 0; etype < num_etypes; ++etype) {
      dgl_type_t srctype, dsttype;
      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);

      const EdgeArray edges = curr_graph->Edges(etype, "eid");

      if (edges.src.defined()) {
        device->CopyDataFromTo(
147
148
149
150
            edges.src.Ptr<IdType>(), 0, all_nodes[srctype].Ptr<IdType>(),
            node_offsets[srctype], sizeof(IdType) * edges.src->shape[0],
            edges.src->ctx, all_nodes[srctype]->ctx, edges.src->dtype);
        node_offsets[srctype] += sizeof(IdType) * edges.src->shape[0];
151
152
153
      }
      if (edges.dst.defined()) {
        device->CopyDataFromTo(
154
155
156
157
            edges.dst.Ptr<IdType>(), 0, all_nodes[dsttype].Ptr<IdType>(),
            node_offsets[dsttype], sizeof(IdType) * edges.dst->shape[0],
            edges.dst->ctx, all_nodes[dsttype]->ctx, edges.dst->dtype);
        node_offsets[dsttype] += sizeof(IdType) * edges.dst->shape[0];
158
159
160
161
162
163
164
165
166
167
168
169
170
171
      }
      all_edges[i].push_back(edges);
    }
  }

  // Step 2: Relabel the nodes for each type to a smaller ID space
  //         using BuildNodeMaps

  // allocate space for map creation
  // the hashmap on GPU
  DeviceNodeMap<IdType> node_maps(max_vertex_cnt, 0, ctx, stream);
  // number of unique nodes per type on CPU
  std::vector<int64_t> num_induced_nodes(num_ntypes);
  // number of unique nodes per type on GPU
172
173
  int64_t *count_unique_device = static_cast<int64_t *>(
      device->AllocWorkspace(ctx, sizeof(int64_t) * num_ntypes));
174
175
  // the set of unique nodes per type
  std::vector<IdArray> induced_nodes(num_ntypes);
176
  for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
177
178
    induced_nodes[ntype] =
        NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8);
179
180
181
  }

  BuildNodeMaps(
182
      all_nodes, &node_maps, count_unique_device, &induced_nodes, stream);
183
184

  device->CopyDataFromTo(
185
186
187
      count_unique_device, 0, num_induced_nodes.data(), 0,
      sizeof(*num_induced_nodes.data()) * num_ntypes, ctx,
      DGLContext{kDGLCPU, 0}, DGLDataType{kDGLInt, 64, 1});
188
189
190
191
192
193
  device->StreamSync(ctx, stream);

  // wait for the node counts to finish transferring
  device->FreeWorkspace(ctx, count_unique_device);

  // resize induced nodes
194
  for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
    induced_nodes[ntype]->shape[0] = num_induced_nodes[ntype];
  }

  // Step 3: Remap the edges of each graph using MapEdges
  std::vector<HeteroGraphPtr> new_graphs;
  for (size_t i = 0; i < graphs.size(); ++i) {
    const HeteroGraphPtr curr_graph = graphs[i];
    const auto meta_graph = curr_graph->meta_graph();
    const int64_t num_etypes = curr_graph->NumEdgeTypes();

    std::vector<HeteroGraphPtr> rel_graphs;
    rel_graphs.reserve(num_etypes);

    std::vector<IdArray> new_src;
    std::vector<IdArray> new_dst;
210
211
    std::tie(new_src, new_dst) =
        MapEdges(curr_graph, all_edges[i], node_maps, stream);
212
213
214
215
216
217

    for (IdType etype = 0; etype < num_etypes; ++etype) {
      IdType srctype, dsttype;
      std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);

      rel_graphs.push_back(UnitGraph::CreateFromCOO(
218
219
          srctype == dsttype ? 1 : 2, induced_nodes[srctype]->shape[0],
          induced_nodes[dsttype]->shape[0], new_src[etype], new_dst[etype]));
220
221
    }

222
223
    new_graphs.push_back(
        CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
224
225
226
227
228
229
230
  }

  return std::make_pair(new_graphs, induced_nodes);
}

}  // namespace

231
template <>
232
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
233
CompactGraphs<kDGLCUDA, int32_t>(
234
235
236
237
238
    const std::vector<HeteroGraphPtr> &graphs,
    const std::vector<IdArray> &always_preserve) {
  return CompactGraphsGPU<int32_t>(graphs, always_preserve);
}

239
template <>
240
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
241
CompactGraphs<kDGLCUDA, int64_t>(
242
243
244
245
246
247
248
    const std::vector<HeteroGraphPtr> &graphs,
    const std::vector<IdArray> &always_preserve) {
  return CompactGraphsGPU<int64_t>(graphs, always_preserve);
}

}  // namespace transform
}  // namespace dgl