csc_sampling_graph.cc 14.8 KB
Newer Older
1
2
/**
 *  Copyright (c) 2023 by Contributors
3
 * @file csc_sampling_graph.cc
4
5
6
 * @brief Source file of sampling graph.
 */

7
8
#include <graphbolt/csc_sampling_graph.h>
#include <graphbolt/serialize.h>
9
10
11
12
#include <torch/torch.h>

#include <tuple>
#include <vector>
13

14
15
#include "./shared_memory_utils.h"

16
17
18
19
namespace graphbolt {
namespace sampling {

CSCSamplingGraph::CSCSamplingGraph(
20
    const torch::Tensor& indptr, const torch::Tensor& indices,
21
22
23
    const torch::optional<torch::Tensor>& node_type_offset,
    const torch::optional<torch::Tensor>& type_per_edge)
    : indptr_(indptr),
24
      indices_(indices),
25
26
      node_type_offset_(node_type_offset),
      type_per_edge_(type_per_edge) {
27
28
29
30
31
32
  TORCH_CHECK(indptr.dim() == 1);
  TORCH_CHECK(indices.dim() == 1);
  TORCH_CHECK(indptr.device() == indices.device());
}

c10::intrusive_ptr<CSCSamplingGraph> CSCSamplingGraph::FromCSC(
33
    const torch::Tensor& indptr, const torch::Tensor& indices,
34
35
36
37
38
39
40
41
42
43
    const torch::optional<torch::Tensor>& node_type_offset,
    const torch::optional<torch::Tensor>& type_per_edge) {
  if (node_type_offset.has_value()) {
    auto& offset = node_type_offset.value();
    TORCH_CHECK(offset.dim() == 1);
  }
  if (type_per_edge.has_value()) {
    TORCH_CHECK(type_per_edge.value().dim() == 1);
    TORCH_CHECK(type_per_edge.value().size(0) == indices.size(0));
  }
44
45

  return c10::make_intrusive<CSCSamplingGraph>(
46
      indptr, indices, node_type_offset, type_per_edge);
47
48
}

49
void CSCSamplingGraph::Load(torch::serialize::InputArchive& archive) {
50
51
  const int64_t magic_num =
      read_from_archive(archive, "CSCSamplingGraph/magic_num").toInt();
52
53
54
  TORCH_CHECK(
      magic_num == kCSCSamplingGraphSerializeMagic,
      "Magic numbers mismatch when loading CSCSamplingGraph.");
55
56
  indptr_ = read_from_archive(archive, "CSCSamplingGraph/indptr").toTensor();
  indices_ = read_from_archive(archive, "CSCSamplingGraph/indices").toTensor();
57
58
59
60
61
62
63
64
65
66
67
  if (read_from_archive(archive, "CSCSamplingGraph/has_node_type_offset")
          .toBool()) {
    node_type_offset_ =
        read_from_archive(archive, "CSCSamplingGraph/node_type_offset")
            .toTensor();
  }
  if (read_from_archive(archive, "CSCSamplingGraph/has_type_per_edge")
          .toBool()) {
    type_per_edge_ =
        read_from_archive(archive, "CSCSamplingGraph/type_per_edge").toTensor();
  }
68
69
70
}

void CSCSamplingGraph::Save(torch::serialize::OutputArchive& archive) const {
71
  archive.write("CSCSamplingGraph/magic_num", kCSCSamplingGraphSerializeMagic);
72
73
  archive.write("CSCSamplingGraph/indptr", indptr_);
  archive.write("CSCSamplingGraph/indices", indices_);
74
75
76
77
78
79
80
81
82
83
84
  archive.write(
      "CSCSamplingGraph/has_node_type_offset", node_type_offset_.has_value());
  if (node_type_offset_) {
    archive.write(
        "CSCSamplingGraph/node_type_offset", node_type_offset_.value());
  }
  archive.write(
      "CSCSamplingGraph/has_type_per_edge", type_per_edge_.has_value());
  if (type_per_edge_) {
    archive.write("CSCSamplingGraph/type_per_edge", type_per_edge_.value());
  }
85
86
}

87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
c10::intrusive_ptr<SampledSubgraph> CSCSamplingGraph::InSubgraph(
    const torch::Tensor& nodes) const {
  using namespace torch::indexing;
  const int32_t kDefaultGrainSize = 100;
  torch::Tensor indptr = torch::zeros_like(indptr_);
  const size_t num_seeds = nodes.size(0);
  std::vector<torch::Tensor> indices_arr(num_seeds);
  std::vector<torch::Tensor> edge_ids_arr(num_seeds);
  std::vector<torch::Tensor> type_per_edge_arr(num_seeds);
  torch::parallel_for(
      0, num_seeds, kDefaultGrainSize, [&](size_t start, size_t end) {
        for (size_t i = start; i < end; ++i) {
          const int64_t node_id = nodes[i].item<int64_t>();
          const int64_t start_idx = indptr_[node_id].item<int64_t>();
          const int64_t end_idx = indptr_[node_id + 1].item<int64_t>();
          indptr[node_id + 1] = end_idx - start_idx;
          indices_arr[i] = indices_.slice(0, start_idx, end_idx);
          edge_ids_arr[i] = torch::arange(start_idx, end_idx);
          if (type_per_edge_) {
            type_per_edge_arr[i] =
                type_per_edge_.value().slice(0, start_idx, end_idx);
          }
        }
      });

  const auto& nonzero_idx = torch::nonzero(indptr).reshape(-1);
  torch::Tensor compact_indptr =
      torch::zeros({nonzero_idx.size(0) + 1}, indptr_.dtype());
  compact_indptr.index_put_({Slice(1, None)}, indptr.index({nonzero_idx}));
  return c10::make_intrusive<SampledSubgraph>(
117
      compact_indptr.cumsum(0), torch::cat(indices_arr), nonzero_idx - 1,
118
119
120
121
122
123
      torch::arange(0, NumNodes()), torch::cat(edge_ids_arr),
      type_per_edge_
          ? torch::optional<torch::Tensor>{torch::cat(type_per_edge_arr)}
          : torch::nullopt);
}

124
c10::intrusive_ptr<SampledSubgraph> CSCSamplingGraph::SampleNeighbors(
125
    const torch::Tensor& nodes, const std::vector<int64_t>& fanouts,
126
127
    bool replace, bool return_eids,
    torch::optional<torch::Tensor> probs_or_mask) const {
128
  const int64_t num_nodes = nodes.size(0);
129
130
131
132
133
134
135
136
  // Note probs will be passed as input for 'torch.multinomial' in deeper stack,
  // which doesn't support 'torch.half' and 'torch.bool' data types. To avoid
  // crashes, convert 'probs_or_mask' to 'float32' data type.
  if (probs_or_mask.has_value() &&
      (probs_or_mask.value().dtype() == torch::kBool ||
       probs_or_mask.value().dtype() == torch::kFloat16)) {
    probs_or_mask = probs_or_mask.value().to(torch::kFloat32);
  }
137
138
139
  // If true, perform sampling for each edge type of each node, otherwise just
  // sample once for each node with no regard of edge types.
  bool consider_etype = (fanouts.size() > 1);
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
  std::vector<torch::Tensor> picked_neighbors_per_node(num_nodes);
  torch::Tensor num_picked_neighbors_per_node =
      torch::zeros({num_nodes + 1}, indptr_.options());

  torch::parallel_for(0, num_nodes, 32, [&](size_t b, size_t e) {
    for (size_t i = b; i < e; ++i) {
      const auto nid = nodes[i].item<int64_t>();
      TORCH_CHECK(
          nid >= 0 && nid < NumNodes(),
          "The seed nodes' IDs should fall within the range of the graph's "
          "node IDs.");
      const auto offset = indptr_[nid].item<int64_t>();
      const auto num_neighbors = indptr_[nid + 1].item<int64_t>() - offset;

      if (num_neighbors == 0) {
        // Initialization is performed here because all tensors will be
        // concatenated in the master thread, and having an undefined tensor
        // during concatenation can result in a crash.
        picked_neighbors_per_node[i] = torch::tensor({}, indptr_.options());
        continue;
      }

162
163
164
      if (consider_etype) {
        picked_neighbors_per_node[i] = PickByEtype(
            offset, num_neighbors, fanouts, replace, indptr_.options(),
165
            type_per_edge_.value(), probs_or_mask);
166
      } else {
167
168
169
        picked_neighbors_per_node[i] = Pick(
            offset, num_neighbors, fanouts[0], replace, indptr_.options(),
            probs_or_mask);
170
      }
171
172
      num_picked_neighbors_per_node[i + 1] =
          picked_neighbors_per_node[i].size(0);
173
174
175
176
177
178
179
180
181
    }
  });  // End of the thread.

  torch::Tensor subgraph_indptr =
      torch::cumsum(num_picked_neighbors_per_node, 0);

  torch::Tensor picked_eids = torch::cat(picked_neighbors_per_node);
  torch::Tensor subgraph_indices =
      torch::index_select(indices_, 0, picked_eids);
182
183
184
185
  torch::optional<torch::Tensor> subgraph_type_per_edge = torch::nullopt;
  if (type_per_edge_.has_value())
    subgraph_type_per_edge =
        torch::index_select(type_per_edge_.value(), 0, picked_eids);
186
187
  torch::optional<torch::Tensor> subgraph_reverse_edge_ids = torch::nullopt;
  if (return_eids) subgraph_reverse_edge_ids = std::move(picked_eids);
188
  return c10::make_intrusive<SampledSubgraph>(
189
      subgraph_indptr, subgraph_indices, nodes, torch::nullopt,
190
      subgraph_reverse_edge_ids, subgraph_type_per_edge);
191
192
}

193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
c10::intrusive_ptr<CSCSamplingGraph>
CSCSamplingGraph::BuildGraphFromSharedMemoryTensors(
    std::tuple<
        SharedMemoryPtr, SharedMemoryPtr,
        std::vector<torch::optional<torch::Tensor>>>&& shared_memory_tensors) {
  auto& optional_tensors = std::get<2>(shared_memory_tensors);
  auto graph = c10::make_intrusive<CSCSamplingGraph>(
      optional_tensors[0].value(), optional_tensors[1].value(),
      optional_tensors[2], optional_tensors[3]);
  graph->tensor_meta_shm_ = std::move(std::get<0>(shared_memory_tensors));
  graph->tensor_data_shm_ = std::move(std::get<1>(shared_memory_tensors));
  return graph;
}

c10::intrusive_ptr<CSCSamplingGraph> CSCSamplingGraph::CopyToSharedMemory(
    const std::string& shared_memory_name) {
  auto optional_tensors = std::vector<torch::optional<torch::Tensor>>{
      indptr_, indices_, node_type_offset_, type_per_edge_};
  auto shared_memory_tensors = CopyTensorsToSharedMemory(
      shared_memory_name, optional_tensors, SERIALIZED_METAINFO_SIZE_MAX);
  return BuildGraphFromSharedMemoryTensors(std::move(shared_memory_tensors));
}

c10::intrusive_ptr<CSCSamplingGraph> CSCSamplingGraph::LoadFromSharedMemory(
    const std::string& shared_memory_name) {
  auto shared_memory_tensors = LoadTensorsFromSharedMemory(
      shared_memory_name, SERIALIZED_METAINFO_SIZE_MAX);
  return BuildGraphFromSharedMemoryTensors(std::move(shared_memory_tensors));
}

223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
/**
 * @brief Perform uniform sampling of elements and return the sampled indices.
 *
 * @param offset The starting edge ID for the connected neighbors of the sampled
 * node.
 * @param num_neighbors The number of neighbors to pick.
 * @param fanout The number of edges to be sampled for each node. It should be
 * >= 0 or -1.
 *  - When the value is -1, all neighbors will be chosen for sampling. It is
 * equivalent to selecting all neighbors with non-zero probability when the
 * fanout is >= the number of neighbors (and replacement is set to false).
 *  - When the value is a non-negative integer, it serves as a minimum
 * threshold for selecting neighbors.
 * @param replace Boolean indicating whether the sample is preformed with or
 * without replacement. If True, a value can be selected multiple times.
 * Otherwise, each value can be selected only once.
 * @param options Tensor options specifying the desired data type of the result.
 *
 * @return A tensor containing the picked neighbors.
 */
inline torch::Tensor UniformPick(
244
    int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace,
245
246
    const torch::TensorOptions& options) {
  torch::Tensor picked_neighbors;
247
  if ((fanout == -1) || (num_neighbors <= fanout && !replace)) {
248
249
    picked_neighbors = torch::arange(offset, offset + num_neighbors, options);
  } else {
250
251
252
253
    if (replace) {
      picked_neighbors =
          torch::randint(offset, offset + num_neighbors, {fanout}, options);
    } else {
254
255
      picked_neighbors = torch::randperm(num_neighbors, options);
      picked_neighbors = picked_neighbors.slice(0, 0, fanout) + offset;
256
    }
257
258
259
260
  }
  return picked_neighbors;
}

261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
/**
 * @brief Perform non-uniform sampling of elements based on probabilities and
 * return the sampled indices.
 *
 * If 'probs_or_mask' is provided, it indicates that the sampling is
 * non-uniform. In such cases:
 * - When the number of neighbors with non-zero probability is less than or
 * equal to fanout, all neighbors with non-zero probability will be selected.
 * - When the number of neighbors with non-zero probability exceeds fanout, the
 * sampling process will select 'fanout' elements based on their respective
 * probabilities. Higher probabilities will increase the chances of being chosen
 * during the sampling process.
 *
 * @param offset The starting edge ID for the connected neighbors of the sampled
 * node.
 * @param num_neighbors The number of neighbors to pick.
 * @param fanout The number of edges to be sampled for each node. It should be
 * >= 0 or -1.
 *  - When the value is -1, all neighbors will be chosen for sampling. It is
 * equivalent to selecting all neighbors with non-zero probability when the
 * fanout is >= the number of neighbors (and replacement is set to false).
 *  - When the value is a non-negative integer, it serves as a minimum
 * threshold for selecting neighbors.
 * @param replace Boolean indicating whether the sample is preformed with or
 * without replacement. If True, a value can be selected multiple times.
 * Otherwise, each value can be selected only once.
 * @param options Tensor options specifying the desired data type of the result.
 * @param probs_or_mask Optional tensor containing the (unnormalized)
 * probabilities associated with each neighboring edge of a node in the original
 * graph. It must be a 1D floating-point tensor with the number of elements
 * equal to the number of edges in the graph.
 *
 * @return A tensor containing the picked neighbors.
 */
inline torch::Tensor NonUniformPick(
    int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace,
    const torch::TensorOptions& options,
    const torch::optional<torch::Tensor>& probs_or_mask) {
  torch::Tensor picked_neighbors;
  auto local_probs =
      probs_or_mask.value().slice(0, offset, offset + num_neighbors);
  auto positive_probs_indices = local_probs.nonzero().squeeze(1);
  auto num_positive_probs = positive_probs_indices.size(0);
  if (num_positive_probs == 0) return torch::tensor({}, options);
  if ((fanout == -1) || (num_positive_probs <= fanout && !replace)) {
    picked_neighbors = torch::arange(offset, offset + num_neighbors, options);
    picked_neighbors =
        torch::index_select(picked_neighbors, 0, positive_probs_indices);
  } else {
    if (!replace) fanout = std::min(fanout, num_positive_probs);
    picked_neighbors =
        torch::multinomial(local_probs, fanout, replace) + offset;
  }
  return picked_neighbors;
}

torch::Tensor Pick(
    int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace,
    const torch::TensorOptions& options,
    const torch::optional<torch::Tensor>& probs_or_mask) {
  if (probs_or_mask.has_value()) {
    return NonUniformPick(
        offset, num_neighbors, fanout, replace, options, probs_or_mask);
  } else {
    return UniformPick(offset, num_neighbors, fanout, replace, options);
  }
}

329
330
331
torch::Tensor PickByEtype(
    int64_t offset, int64_t num_neighbors, const std::vector<int64_t>& fanouts,
    bool replace, const torch::TensorOptions& options,
332
333
    const torch::Tensor& type_per_edge,
    const torch::optional<torch::Tensor>& probs_or_mask) {
334
335
336
337
338
339
340
341
342
343
344
345
346
  std::vector<torch::Tensor> picked_neighbors(
      fanouts.size(), torch::tensor({}, options));
  int64_t etype_begin = offset;
  int64_t etype_end = offset;
  while (etype_end < offset + num_neighbors) {
    int64_t etype = type_per_edge[etype_end].item<int64_t>();
    int64_t fanout = fanouts[etype];
    while (etype_end < offset + num_neighbors &&
           type_per_edge[etype_end].item<int64_t>() == etype) {
      etype_end++;
    }
    // Do sampling for one etype.
    if (fanout != 0) {
347
348
349
      picked_neighbors[etype] = Pick(
          etype_begin, etype_end - etype_begin, fanout, replace, options,
          probs_or_mask);
350
351
352
353
354
355
356
    }
    etype_begin = etype_end;
  }

  return torch::cat(picked_neighbors, 0);
}

357
358
}  // namespace sampling
}  // namespace graphbolt