[Graphbolt] Refactor the nonuniform pick function to make it reusable. (#6772)

11bdd6e8 · czkkkkkk · GitHub · 3d657dbf · 11bdd6e8
Unverified Commit 11bdd6e8 authored Dec 19, 2023 by czkkkkkk Committed by GitHub Dec 19, 2023
Show whitespace changes
Inline Side-by-side

Showing with 103 additions and 97 deletions

graphbolt/src/fused_csc_sampling_graph.cc graphbolt/src/fused_csc_sampling_graph.cc +103 -97

No files found.
--- a/graphbolt/src/fused_csc_sampling_graph.cc
+++ b/graphbolt/src/fused_csc_sampling_graph.cc
@@ -818,66 +818,24 @@ inline int64_t UniformPick(
  }
 }

-/**
- * @brief Perform non-uniform sampling of elements based on probabilities and
- * return the sampled indices.
- *
- * If 'probs_or_mask' is provided, it indicates that the sampling is
- * non-uniform. In such cases:
- * - When the number of neighbors with non-zero probability is less than or
- * equal to fanout, all neighbors with non-zero probability will be selected.
- * - When the number of neighbors with non-zero probability exceeds fanout, the
- * sampling process will select 'fanout' elements based on their respective
- * probabilities. Higher probabilities will increase the chances of being chosen
- * during the sampling process.
- *
- * @param offset The starting edge ID for the connected neighbors of the sampled
- * node.
- * @param num_neighbors The number of neighbors to pick.
- * @param fanout The number of edges to be sampled for each node. It should be
- * >= 0 or -1.
- *  - When the value is -1, all neighbors with non-zero probability will be
- * sampled once regardless of replacement. It is equivalent to selecting all
- * neighbors with non-zero probability when the fanout is >= the number of
- * neighbors (and replacement is set to false).
- *  - When the value is a non-negative integer, it serves as a minimum
- * threshold for selecting neighbors.
- * @param replace Boolean indicating whether the sample is performed with or
- * without replacement. If True, a value can be selected multiple times.
- * Otherwise, each value can be selected only once.
- * @param options Tensor options specifying the desired data type of the result.
- * @param probs_or_mask Optional tensor containing the (unnormalized)
- * probabilities associated with each neighboring edge of a node in the original
- * graph. It must be a 1D floating-point tensor with the number of elements
- * equal to the number of edges in the graph.
- * @param picked_data_ptr The destination address where the picked neighbors
- * should be put. Enough memory space should be allocated in advance.
- */
-template <typename PickedType>
-inline int64_t NonUniformPick(
-    int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace,
-    const torch::TensorOptions& options,
-    const torch::optional<torch::Tensor>& probs_or_mask,
-    PickedType* picked_data_ptr) {
-  auto local_probs =
-      probs_or_mask.value().slice(0, offset, offset + num_neighbors);
-  auto positive_probs_indices = local_probs.nonzero().squeeze(1);
+/** @brief An operator to perform non-uniform sampling. */
+static torch::Tensor NonUniformPickOp(
+    torch::Tensor probs, int64_t fanout, bool replace) {
+  auto positive_probs_indices = probs.nonzero().squeeze(1);
  auto num_positive_probs = positive_probs_indices.size(0);
-  if (num_positive_probs == 0) return 0;
+  if (num_positive_probs == 0) return torch::empty({0}, torch::kLong);
  if ((fanout == -1) || (num_positive_probs <= fanout && !replace)) {
-    std::memcpy(
-        picked_data_ptr,
-        (positive_probs_indices + offset).data_ptr<PickedType>(),
-        num_positive_probs * sizeof(PickedType));
-    return num_positive_probs;
-  } else {
+    return positive_probs_indices;
+  }
  if (!replace) fanout = std::min(fanout, num_positive_probs);
-    if (fanout == 0) return 0;
+  if (fanout == 0) return torch::empty({0}, torch::kLong);
+  auto ret_tensor = torch::empty({fanout}, torch::kLong);
+  auto ret_ptr = ret_tensor.data_ptr<int64_t>();
  AT_DISPATCH_FLOATING_TYPES(
-        local_probs.scalar_type(), "MultinomialSampling", ([&] {
-          auto local_probs_data_ptr = local_probs.data_ptr<scalar_t>();
+      probs.scalar_type(), "MultinomialSampling", ([&] {
+        auto probs_data_ptr = probs.data_ptr<scalar_t>();
        auto positive_probs_indices_ptr =
-              positive_probs_indices.data_ptr<PickedType>();
+            positive_probs_indices.data_ptr<int64_t>();

        if (!replace) {
          // The algorithm is from gumbel softmax.
@@ -890,26 +848,24 @@ inline int64_t NonUniformPick(
          if (fanout == 1) {
            // Return argmax(p / q).
            scalar_t max_prob = 0;
-              PickedType max_prob_index = -1;
+            int64_t max_prob_index = -1;
            // We only care about the neighbors with non-zero probability.
            for (auto i = 0; i < num_positive_probs; ++i) {
              // Calculate (p / q) for the current neighbor.
              scalar_t current_prob =
-                    local_probs_data_ptr[positive_probs_indices_ptr[i]] /
+                  probs_data_ptr[positive_probs_indices_ptr[i]] /
                  RandomEngine::ThreadLocal()->Exponential(1.);
              if (current_prob > max_prob) {
                max_prob = current_prob;
                max_prob_index = positive_probs_indices_ptr[i];
              }
            }
-              *picked_data_ptr = max_prob_index + offset;
+            ret_ptr[0] = max_prob_index;
          } else {
            // Return topk(p / q).
-              std::vector<std::pair<scalar_t, PickedType>> q(
-                  num_positive_probs);
+            std::vector<std::pair<scalar_t, int64_t>> q(num_positive_probs);
            for (auto i = 0; i < num_positive_probs; ++i) {
-                q[i].first =
-                    local_probs_data_ptr[positive_probs_indices_ptr[i]] /
+              q[i].first = probs_data_ptr[positive_probs_indices_ptr[i]] /
                           RandomEngine::ThreadLocal()->Exponential(1.);
              q[i].second = positive_probs_indices_ptr[i];
            }
@@ -918,14 +874,14 @@ inline int64_t NonUniformPick(
              std::partial_sort(
                  q.begin(), q.begin() + fanout, q.end(), std::greater{});
              for (auto i = 0; i < fanout; ++i) {
-                  picked_data_ptr[i] = q[i].second + offset;
+                ret_ptr[i] = q[i].second;
              }
            } else {
              // Use nth_element.
              std::nth_element(
                  q.begin(), q.begin() + fanout - 1, q.end(), std::greater{});
              for (auto i = 0; i < fanout; ++i) {
-                  picked_data_ptr[i] = q[i].second + offset;
+                ret_ptr[i] = q[i].second;
              }
            }
          }
@@ -934,7 +890,7 @@ inline int64_t NonUniformPick(
          std::vector<scalar_t> prefix_sum_probs(num_positive_probs);
          scalar_t sum_probs = 0;
          for (auto i = 0; i < num_positive_probs; ++i) {
-              sum_probs += local_probs_data_ptr[positive_probs_indices_ptr[i]];
+            sum_probs += probs_data_ptr[positive_probs_indices_ptr[i]];
            prefix_sum_probs[i] = sum_probs;
          }
          // Normalize.
@@ -952,13 +908,63 @@ inline int64_t NonUniformPick(
                                    prefix_sum_probs.begin(),
                                    prefix_sum_probs.end(), uniform_sample) -
                                prefix_sum_probs.begin();
-              picked_data_ptr[i] =
-                  positive_probs_indices_ptr[sampled_index] + offset;
+            ret_ptr[i] = positive_probs_indices_ptr[sampled_index];
          }
        }
      }));
-    return fanout;
+  return ret_tensor;
+}
+
+/**
+ * @brief Perform non-uniform sampling of elements based on probabilities and
+ * return the sampled indices.
+ *
+ * If 'probs_or_mask' is provided, it indicates that the sampling is
+ * non-uniform. In such cases:
+ * - When the number of neighbors with non-zero probability is less than or
+ * equal to fanout, all neighbors with non-zero probability will be selected.
+ * - When the number of neighbors with non-zero probability exceeds fanout, the
+ * sampling process will select 'fanout' elements based on their respective
+ * probabilities. Higher probabilities will increase the chances of being chosen
+ * during the sampling process.
+ *
+ * @param offset The starting edge ID for the connected neighbors of the sampled
+ * node.
+ * @param num_neighbors The number of neighbors to pick.
+ * @param fanout The number of edges to be sampled for each node. It should be
+ * >= 0 or -1.
+ *  - When the value is -1, all neighbors with non-zero probability will be
+ * sampled once regardless of replacement. It is equivalent to selecting all
+ * neighbors with non-zero probability when the fanout is >= the number of
+ * neighbors (and replacement is set to false).
+ *  - When the value is a non-negative integer, it serves as a minimum
+ * threshold for selecting neighbors.
+ * @param replace Boolean indicating whether the sample is performed with or
+ * without replacement. If True, a value can be selected multiple times.
+ * Otherwise, each value can be selected only once.
+ * @param options Tensor options specifying the desired data type of the result.
+ * @param probs_or_mask Optional tensor containing the (unnormalized)
+ * probabilities associated with each neighboring edge of a node in the original
+ * graph. It must be a 1D floating-point tensor with the number of elements
+ * equal to the number of edges in the graph.
+ * @param picked_data_ptr The destination address where the picked neighbors
+ * should be put. Enough memory space should be allocated in advance.
+ */
+template <typename PickedType>
+inline int64_t NonUniformPick(
+    int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace,
+    const torch::TensorOptions& options,
+    const torch::optional<torch::Tensor>& probs_or_mask,
+    PickedType* picked_data_ptr) {
+  auto local_probs =
+      probs_or_mask.value().slice(0, offset, offset + num_neighbors);
+  auto picked_indices = NonUniformPickOp(local_probs, fanout, replace);
+  auto picked_indices_ptr = picked_indices.data_ptr<int64_t>();
+  for (int i = 0; i < picked_indices.numel(); ++i) {
+    picked_data_ptr[i] =
+        static_cast<PickedType>(picked_indices_ptr[i]) + offset;
  }
+  return picked_indices.numel();
 }

 template <typename PickedType>