`rand()` -> `torch::randint()` (#217)

* torch randint * update * fix type * update * add test Co-authored-by: rusty1s <matthias.fey@tu-dortmund.de>

`rand()` -> `torch::randint()` (#217)
* torch randint * update * fix type * update * add test Co-authored-by: rusty1s <matthias.fey@tu-dortmund.de>
122811a6 · Zeyuan Tan · GitHub · 124bc097 · 122811a6 · 122811a6
Unverified Commit 122811a6 authored Apr 02, 2022 by Zeyuan Tan Committed by GitHub Apr 02, 2022
7 changed files
--- a/csrc/cpu/ego_sample_cpu.cpp
+++ b/csrc/cpu/ego_sample_cpu.cpp
@@ -19,8 +19,6 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col,
                         torch::Tensor idx, int64_t depth,
                         int64_t num_neighbors, bool replace) {
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
  std::vector<torch::Tensor> out_rowptrs(idx.numel() + 1);
  std::vector<torch::Tensor> out_cols(idx.numel());
  std::vector<torch::Tensor> out_n_ids(idx.numel());
@@ -56,14 +54,14 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col,
            }
          } else if (replace) {
            for (int64_t j = 0; j < num_neighbors; j++) {
-              w = col_data[row_start + (rand() % row_count)];
+              w = col_data[row_start + uniform_randint(row_count)];
              n_id_set.insert(w);
              n_ids.push_back(w);
            }
          } else {
            std::unordered_set<int64_t> perm;
            for (int64_t j = row_count - num_neighbors; j < row_count; j++) {
-              if (!perm.insert(rand() % j).second) {
+              if (!perm.insert(uniform_randint(j)).second) {
                perm.insert(j);
              }
            }

--- a/csrc/cpu/hgt_sample_cpu.cpp
+++ b/csrc/cpu/hgt_sample_cpu.cpp
@@ -105,8 +105,6 @@ hgt_sample_cpu(const c10::Dict<rel_t, torch::Tensor> &colptr_dict,
               const c10::Dict<node_t, vector<int64_t>> &num_samples_dict,
               const int64_t num_hops) {
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
  // Create a mapping to convert single string relations to edge type triplets:
  unordered_map<rel_t, edge_t> to_edge_type;
  for (const auto &kv : colptr_dict) {

--- a/csrc/cpu/neighbor_sample_cpu.cpp
+++ b/csrc/cpu/neighbor_sample_cpu.cpp
@@ -15,8 +15,6 @@ tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
 sample(const torch::Tensor &colptr, const torch::Tensor &row,
       const torch::Tensor &input_node, const vector<int64_t> num_neighbors) {
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
  // Initialize some data structures for the sampling process:
  vector<int64_t> samples;
  unordered_map<int64_t, int64_t> to_local_node;
@@ -59,7 +57,7 @@ sample(const torch::Tensor &colptr, const torch::Tensor &row,
        }
      } else if (replace) {
        for (int64_t j = 0; j < num_samples; j++) {
-          const int64_t offset = col_start + rand() % col_count;
+          const int64_t offset = col_start + uniform_randint(col_count);
          const int64_t &v = row_data[offset];
          const auto res = to_local_node.insert({v, samples.size()});
          if (res.second)
@@ -73,7 +71,7 @@ sample(const torch::Tensor &colptr, const torch::Tensor &row,
      } else {
        unordered_set<int64_t> rnd_indices;
        for (int64_t j = col_count - num_samples; j < col_count; j++) {
-          int64_t rnd = rand() % j;
+          int64_t rnd = uniform_randint(j);
          if (!rnd_indices.insert(rnd).second) {
            rnd = j;
            rnd_indices.insert(j);
@@ -127,8 +125,6 @@ hetero_sample(const vector<node_t> &node_types,
              const c10::Dict<rel_t, vector<int64_t>> &num_neighbors_dict,
              const int64_t num_hops) {
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
  // Create a mapping to convert single string relations to edge type triplets:
  unordered_map<rel_t, edge_t> to_edge_type;
  for (const auto &k : edge_types)
@@ -180,8 +176,10 @@ hetero_sample(const vector<node_t> &node_types,
      auto &src_samples = samples_dict.at(src_node_type);
      auto &to_local_src_node = to_local_node_dict.at(src_node_type);
-      const auto *colptr_data = ((torch::Tensor)colptr_dict.at(rel_type)).data_ptr<int64_t>();
+      const auto *colptr_data =
-      const auto *row_data = ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
+          ((torch::Tensor)colptr_dict.at(rel_type)).data_ptr<int64_t>();
+      const auto *row_data =
+          ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
      auto &rows = rows_dict.at(rel_type);
      auto &cols = cols_dict.at(rel_type);
@@ -212,7 +210,7 @@ hetero_sample(const vector<node_t> &node_types,
          }
        } else if (replace) {
          for (int64_t j = 0; j < num_samples; j++) {
-            const int64_t offset = col_start + rand() % col_count;
+            const int64_t offset = col_start + uniform_randint(col_count);
            const int64_t &v = row_data[offset];
            const auto res = to_local_src_node.insert({v, src_samples.size()});
            if (res.second)
@@ -226,7 +224,7 @@ hetero_sample(const vector<node_t> &node_types,
        } else {
          unordered_set<int64_t> rnd_indices;
          for (int64_t j = col_count - num_samples; j < col_count; j++) {
-            int64_t rnd = rand() % j;
+            int64_t rnd = uniform_randint(j);
            if (!rnd_indices.insert(rnd).second) {
              rnd = j;
              rnd_indices.insert(j);
@@ -262,7 +260,8 @@ hetero_sample(const vector<node_t> &node_types,
      auto &to_local_src_node = to_local_node_dict.at(src_node_type);
      const auto *colptr_data = ((torch::Tensor)kv.value()).data_ptr<int64_t>();
-      const auto *row_data = ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
+      const auto *row_data =
+          ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
      auto &rows = rows_dict.at(rel_type);
      auto &cols = cols_dict.at(rel_type);

--- a/csrc/cpu/sample_cpu.cpp
+++ b/csrc/cpu/sample_cpu.cpp
@@ -15,8 +15,6 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx,
  CHECK_CPU(idx);
  CHECK_INPUT(idx.dim() == 1);
-  srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
  auto rowptr_data = rowptr.data_ptr<int64_t>();
  auto col_data = col.data_ptr<int64_t>();
  auto idx_data = idx.data_ptr<int64_t>();
@@ -69,7 +67,7 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx,
      if (row_count > 0) {
        for (int64_t j = 0; j < num_neighbors; j++) {
-          e = row_start + rand() % row_count;
+          e = row_start + uniform_randint(row_count);
          c = col_data[e];
          if (n_id_map.count(c) == 0) {
@@ -96,7 +94,7 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx,
      } else { // See: https://www.nowherenearithaca.com/2013/05/
               //      robert-floyds-tiny-and-beautiful.html
        for (int64_t j = row_count - num_neighbors; j < row_count; j++) {
-          if (!perm.insert(rand() % j).second)
+          if (!perm.insert(uniform_randint(j)).second)
            perm.insert(j);
        }
      }

--- a/csrc/cpu/utils.h
+++ b/csrc/cpu/utils.h
@@ -35,6 +35,18 @@ from_vector(const std::unordered_map<key_t, std::vector<scalar_t>> &vec_dict,
  return out_dict;
 }
+inline int64_t uniform_randint(int64_t low, int64_t high) {
+  CHECK_LT(low, high);
+  auto options = torch::TensorOptions().dtype(torch::kInt64);
+  auto ret = torch::randint(low, high, {1}, options);
+  auto ptr = ret.data_ptr<int64_t>();
+  return *ptr;
+}
+inline int64_t uniform_randint(int64_t high) {
+  return uniform_randint(0, high);
+}
 inline torch::Tensor
 choice(int64_t population, int64_t num_samples, bool replace = false,
       torch::optional<torch::Tensor> weight = torch::nullopt) {
@@ -52,7 +64,7 @@ choice(int64_t population, int64_t num_samples, bool replace = false,
    const auto out = torch::empty(num_samples, at::kLong);
    auto *out_data = out.data_ptr<int64_t>();
    for (int64_t i = 0; i < num_samples; i++) {
-      out_data[i] = rand() % population;
+      out_data[i] = uniform_randint(population);
    }
    return out;
@@ -64,7 +76,7 @@ choice(int64_t population, int64_t num_samples, bool replace = false,
    auto *out_data = out.data_ptr<int64_t>();
    std::unordered_set<int64_t> samples;
    for (int64_t i = population - num_samples; i < population; i++) {
-      int64_t sample = rand() % i;
+      int64_t sample = uniform_randint(i);
      if (!samples.insert(sample).second) {
        sample = i;
        samples.insert(sample);
@@ -86,7 +98,7 @@ uniform_choice(const int64_t population, const int64_t num_samples,
  if (replace) {
    for (int64_t i = 0; i < num_samples; i++) {
-      const int64_t &v = idx_data[rand() % population];
+      const int64_t &v = idx_data[uniform_randint(population)];
      if (to_local_node->insert({v, samples->size()}).second)
        samples->push_back(v);
    }
@@ -99,7 +111,7 @@ uniform_choice(const int64_t population, const int64_t num_samples,
  } else {
    std::unordered_set<int64_t> indices;
    for (int64_t i = population - num_samples; i < population; i++) {
-      int64_t j = rand() % i;
+      int64_t j = uniform_randint(i);
      if (!indices.insert(j).second) {
        j = i;
        indices.insert(j);

--- a/csrc/extensions.h
+++ b/csrc/extensions.h
 #include "macros.h"
 #include <torch/extension.h>
-// for getpid()
-#ifdef _WIN32
-#include <process.h>
-#else
-#include <unistd.h>
-#endif
--- a/test/test_neighbor_sample.py
+++ b/test/test_neighbor_sample.py
@@ -25,3 +25,18 @@ def test_neighbor_sample():
    assert out[0].tolist() == [1, 0]
    assert out[1].tolist() == [1]
    assert out[2].tolist() == [0]
+def test_neighbor_sample_seed():
+    colptr = torch.tensor([0, 3, 6, 9])
+    row = torch.tensor([0, 1, 2, 0, 1, 2, 0, 1, 2])
+    input_nodes = torch.tensor([0, 1])
+    torch.manual_seed(42)
+    out1 = neighbor_sample(colptr, row, input_nodes, [1, 1], True, False)
+    torch.manual_seed(42)
+    out2 = neighbor_sample(colptr, row, input_nodes, [1, 1], True, False)
+    for data1, data2 in zip(out1, out2):
+        assert data1.tolist() == data2.tolist()