Unverified Commit 122811a6 authored by Zeyuan Tan's avatar Zeyuan Tan Committed by GitHub
Browse files

`rand()` -> `torch::randint()` (#217)



* torch randint

* update

* fix type

* update

* add test
Co-authored-by: default avatarrusty1s <matthias.fey@tu-dortmund.de>
parent 124bc097
...@@ -19,8 +19,6 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, ...@@ -19,8 +19,6 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col,
torch::Tensor idx, int64_t depth, torch::Tensor idx, int64_t depth,
int64_t num_neighbors, bool replace) { int64_t num_neighbors, bool replace) {
srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
std::vector<torch::Tensor> out_rowptrs(idx.numel() + 1); std::vector<torch::Tensor> out_rowptrs(idx.numel() + 1);
std::vector<torch::Tensor> out_cols(idx.numel()); std::vector<torch::Tensor> out_cols(idx.numel());
std::vector<torch::Tensor> out_n_ids(idx.numel()); std::vector<torch::Tensor> out_n_ids(idx.numel());
...@@ -56,14 +54,14 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, ...@@ -56,14 +54,14 @@ ego_k_hop_sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col,
} }
} else if (replace) { } else if (replace) {
for (int64_t j = 0; j < num_neighbors; j++) { for (int64_t j = 0; j < num_neighbors; j++) {
w = col_data[row_start + (rand() % row_count)]; w = col_data[row_start + uniform_randint(row_count)];
n_id_set.insert(w); n_id_set.insert(w);
n_ids.push_back(w); n_ids.push_back(w);
} }
} else { } else {
std::unordered_set<int64_t> perm; std::unordered_set<int64_t> perm;
for (int64_t j = row_count - num_neighbors; j < row_count; j++) { for (int64_t j = row_count - num_neighbors; j < row_count; j++) {
if (!perm.insert(rand() % j).second) { if (!perm.insert(uniform_randint(j)).second) {
perm.insert(j); perm.insert(j);
} }
} }
......
...@@ -105,8 +105,6 @@ hgt_sample_cpu(const c10::Dict<rel_t, torch::Tensor> &colptr_dict, ...@@ -105,8 +105,6 @@ hgt_sample_cpu(const c10::Dict<rel_t, torch::Tensor> &colptr_dict,
const c10::Dict<node_t, vector<int64_t>> &num_samples_dict, const c10::Dict<node_t, vector<int64_t>> &num_samples_dict,
const int64_t num_hops) { const int64_t num_hops) {
srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
// Create a mapping to convert single string relations to edge type triplets: // Create a mapping to convert single string relations to edge type triplets:
unordered_map<rel_t, edge_t> to_edge_type; unordered_map<rel_t, edge_t> to_edge_type;
for (const auto &kv : colptr_dict) { for (const auto &kv : colptr_dict) {
......
...@@ -15,8 +15,6 @@ tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor> ...@@ -15,8 +15,6 @@ tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
sample(const torch::Tensor &colptr, const torch::Tensor &row, sample(const torch::Tensor &colptr, const torch::Tensor &row,
const torch::Tensor &input_node, const vector<int64_t> num_neighbors) { const torch::Tensor &input_node, const vector<int64_t> num_neighbors) {
srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
// Initialize some data structures for the sampling process: // Initialize some data structures for the sampling process:
vector<int64_t> samples; vector<int64_t> samples;
unordered_map<int64_t, int64_t> to_local_node; unordered_map<int64_t, int64_t> to_local_node;
...@@ -59,7 +57,7 @@ sample(const torch::Tensor &colptr, const torch::Tensor &row, ...@@ -59,7 +57,7 @@ sample(const torch::Tensor &colptr, const torch::Tensor &row,
} }
} else if (replace) { } else if (replace) {
for (int64_t j = 0; j < num_samples; j++) { for (int64_t j = 0; j < num_samples; j++) {
const int64_t offset = col_start + rand() % col_count; const int64_t offset = col_start + uniform_randint(col_count);
const int64_t &v = row_data[offset]; const int64_t &v = row_data[offset];
const auto res = to_local_node.insert({v, samples.size()}); const auto res = to_local_node.insert({v, samples.size()});
if (res.second) if (res.second)
...@@ -73,7 +71,7 @@ sample(const torch::Tensor &colptr, const torch::Tensor &row, ...@@ -73,7 +71,7 @@ sample(const torch::Tensor &colptr, const torch::Tensor &row,
} else { } else {
unordered_set<int64_t> rnd_indices; unordered_set<int64_t> rnd_indices;
for (int64_t j = col_count - num_samples; j < col_count; j++) { for (int64_t j = col_count - num_samples; j < col_count; j++) {
int64_t rnd = rand() % j; int64_t rnd = uniform_randint(j);
if (!rnd_indices.insert(rnd).second) { if (!rnd_indices.insert(rnd).second) {
rnd = j; rnd = j;
rnd_indices.insert(j); rnd_indices.insert(j);
...@@ -127,8 +125,6 @@ hetero_sample(const vector<node_t> &node_types, ...@@ -127,8 +125,6 @@ hetero_sample(const vector<node_t> &node_types,
const c10::Dict<rel_t, vector<int64_t>> &num_neighbors_dict, const c10::Dict<rel_t, vector<int64_t>> &num_neighbors_dict,
const int64_t num_hops) { const int64_t num_hops) {
srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
// Create a mapping to convert single string relations to edge type triplets: // Create a mapping to convert single string relations to edge type triplets:
unordered_map<rel_t, edge_t> to_edge_type; unordered_map<rel_t, edge_t> to_edge_type;
for (const auto &k : edge_types) for (const auto &k : edge_types)
...@@ -180,8 +176,10 @@ hetero_sample(const vector<node_t> &node_types, ...@@ -180,8 +176,10 @@ hetero_sample(const vector<node_t> &node_types,
auto &src_samples = samples_dict.at(src_node_type); auto &src_samples = samples_dict.at(src_node_type);
auto &to_local_src_node = to_local_node_dict.at(src_node_type); auto &to_local_src_node = to_local_node_dict.at(src_node_type);
const auto *colptr_data = ((torch::Tensor)colptr_dict.at(rel_type)).data_ptr<int64_t>(); const auto *colptr_data =
const auto *row_data = ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>(); ((torch::Tensor)colptr_dict.at(rel_type)).data_ptr<int64_t>();
const auto *row_data =
((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
auto &rows = rows_dict.at(rel_type); auto &rows = rows_dict.at(rel_type);
auto &cols = cols_dict.at(rel_type); auto &cols = cols_dict.at(rel_type);
...@@ -212,7 +210,7 @@ hetero_sample(const vector<node_t> &node_types, ...@@ -212,7 +210,7 @@ hetero_sample(const vector<node_t> &node_types,
} }
} else if (replace) { } else if (replace) {
for (int64_t j = 0; j < num_samples; j++) { for (int64_t j = 0; j < num_samples; j++) {
const int64_t offset = col_start + rand() % col_count; const int64_t offset = col_start + uniform_randint(col_count);
const int64_t &v = row_data[offset]; const int64_t &v = row_data[offset];
const auto res = to_local_src_node.insert({v, src_samples.size()}); const auto res = to_local_src_node.insert({v, src_samples.size()});
if (res.second) if (res.second)
...@@ -226,7 +224,7 @@ hetero_sample(const vector<node_t> &node_types, ...@@ -226,7 +224,7 @@ hetero_sample(const vector<node_t> &node_types,
} else { } else {
unordered_set<int64_t> rnd_indices; unordered_set<int64_t> rnd_indices;
for (int64_t j = col_count - num_samples; j < col_count; j++) { for (int64_t j = col_count - num_samples; j < col_count; j++) {
int64_t rnd = rand() % j; int64_t rnd = uniform_randint(j);
if (!rnd_indices.insert(rnd).second) { if (!rnd_indices.insert(rnd).second) {
rnd = j; rnd = j;
rnd_indices.insert(j); rnd_indices.insert(j);
...@@ -262,7 +260,8 @@ hetero_sample(const vector<node_t> &node_types, ...@@ -262,7 +260,8 @@ hetero_sample(const vector<node_t> &node_types,
auto &to_local_src_node = to_local_node_dict.at(src_node_type); auto &to_local_src_node = to_local_node_dict.at(src_node_type);
const auto *colptr_data = ((torch::Tensor)kv.value()).data_ptr<int64_t>(); const auto *colptr_data = ((torch::Tensor)kv.value()).data_ptr<int64_t>();
const auto *row_data = ((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>(); const auto *row_data =
((torch::Tensor)row_dict.at(rel_type)).data_ptr<int64_t>();
auto &rows = rows_dict.at(rel_type); auto &rows = rows_dict.at(rel_type);
auto &cols = cols_dict.at(rel_type); auto &cols = cols_dict.at(rel_type);
......
...@@ -15,8 +15,6 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx, ...@@ -15,8 +15,6 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx,
CHECK_CPU(idx); CHECK_CPU(idx);
CHECK_INPUT(idx.dim() == 1); CHECK_INPUT(idx.dim() == 1);
srand(time(NULL) + 1000 * getpid()); // Initialize random seed.
auto rowptr_data = rowptr.data_ptr<int64_t>(); auto rowptr_data = rowptr.data_ptr<int64_t>();
auto col_data = col.data_ptr<int64_t>(); auto col_data = col.data_ptr<int64_t>();
auto idx_data = idx.data_ptr<int64_t>(); auto idx_data = idx.data_ptr<int64_t>();
...@@ -69,7 +67,7 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx, ...@@ -69,7 +67,7 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx,
if (row_count > 0) { if (row_count > 0) {
for (int64_t j = 0; j < num_neighbors; j++) { for (int64_t j = 0; j < num_neighbors; j++) {
e = row_start + rand() % row_count; e = row_start + uniform_randint(row_count);
c = col_data[e]; c = col_data[e];
if (n_id_map.count(c) == 0) { if (n_id_map.count(c) == 0) {
...@@ -96,7 +94,7 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx, ...@@ -96,7 +94,7 @@ sample_adj_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor idx,
} else { // See: https://www.nowherenearithaca.com/2013/05/ } else { // See: https://www.nowherenearithaca.com/2013/05/
// robert-floyds-tiny-and-beautiful.html // robert-floyds-tiny-and-beautiful.html
for (int64_t j = row_count - num_neighbors; j < row_count; j++) { for (int64_t j = row_count - num_neighbors; j < row_count; j++) {
if (!perm.insert(rand() % j).second) if (!perm.insert(uniform_randint(j)).second)
perm.insert(j); perm.insert(j);
} }
} }
......
...@@ -35,6 +35,18 @@ from_vector(const std::unordered_map<key_t, std::vector<scalar_t>> &vec_dict, ...@@ -35,6 +35,18 @@ from_vector(const std::unordered_map<key_t, std::vector<scalar_t>> &vec_dict,
return out_dict; return out_dict;
} }
inline int64_t uniform_randint(int64_t low, int64_t high) {
CHECK_LT(low, high);
auto options = torch::TensorOptions().dtype(torch::kInt64);
auto ret = torch::randint(low, high, {1}, options);
auto ptr = ret.data_ptr<int64_t>();
return *ptr;
}
inline int64_t uniform_randint(int64_t high) {
return uniform_randint(0, high);
}
inline torch::Tensor inline torch::Tensor
choice(int64_t population, int64_t num_samples, bool replace = false, choice(int64_t population, int64_t num_samples, bool replace = false,
torch::optional<torch::Tensor> weight = torch::nullopt) { torch::optional<torch::Tensor> weight = torch::nullopt) {
...@@ -52,7 +64,7 @@ choice(int64_t population, int64_t num_samples, bool replace = false, ...@@ -52,7 +64,7 @@ choice(int64_t population, int64_t num_samples, bool replace = false,
const auto out = torch::empty(num_samples, at::kLong); const auto out = torch::empty(num_samples, at::kLong);
auto *out_data = out.data_ptr<int64_t>(); auto *out_data = out.data_ptr<int64_t>();
for (int64_t i = 0; i < num_samples; i++) { for (int64_t i = 0; i < num_samples; i++) {
out_data[i] = rand() % population; out_data[i] = uniform_randint(population);
} }
return out; return out;
...@@ -64,7 +76,7 @@ choice(int64_t population, int64_t num_samples, bool replace = false, ...@@ -64,7 +76,7 @@ choice(int64_t population, int64_t num_samples, bool replace = false,
auto *out_data = out.data_ptr<int64_t>(); auto *out_data = out.data_ptr<int64_t>();
std::unordered_set<int64_t> samples; std::unordered_set<int64_t> samples;
for (int64_t i = population - num_samples; i < population; i++) { for (int64_t i = population - num_samples; i < population; i++) {
int64_t sample = rand() % i; int64_t sample = uniform_randint(i);
if (!samples.insert(sample).second) { if (!samples.insert(sample).second) {
sample = i; sample = i;
samples.insert(sample); samples.insert(sample);
...@@ -86,7 +98,7 @@ uniform_choice(const int64_t population, const int64_t num_samples, ...@@ -86,7 +98,7 @@ uniform_choice(const int64_t population, const int64_t num_samples,
if (replace) { if (replace) {
for (int64_t i = 0; i < num_samples; i++) { for (int64_t i = 0; i < num_samples; i++) {
const int64_t &v = idx_data[rand() % population]; const int64_t &v = idx_data[uniform_randint(population)];
if (to_local_node->insert({v, samples->size()}).second) if (to_local_node->insert({v, samples->size()}).second)
samples->push_back(v); samples->push_back(v);
} }
...@@ -99,7 +111,7 @@ uniform_choice(const int64_t population, const int64_t num_samples, ...@@ -99,7 +111,7 @@ uniform_choice(const int64_t population, const int64_t num_samples,
} else { } else {
std::unordered_set<int64_t> indices; std::unordered_set<int64_t> indices;
for (int64_t i = population - num_samples; i < population; i++) { for (int64_t i = population - num_samples; i < population; i++) {
int64_t j = rand() % i; int64_t j = uniform_randint(i);
if (!indices.insert(j).second) { if (!indices.insert(j).second) {
j = i; j = i;
indices.insert(j); indices.insert(j);
......
#include "macros.h" #include "macros.h"
#include <torch/extension.h> #include <torch/extension.h>
// for getpid()
#ifdef _WIN32
#include <process.h>
#else
#include <unistd.h>
#endif
...@@ -25,3 +25,18 @@ def test_neighbor_sample(): ...@@ -25,3 +25,18 @@ def test_neighbor_sample():
assert out[0].tolist() == [1, 0] assert out[0].tolist() == [1, 0]
assert out[1].tolist() == [1] assert out[1].tolist() == [1]
assert out[2].tolist() == [0] assert out[2].tolist() == [0]
def test_neighbor_sample_seed():
colptr = torch.tensor([0, 3, 6, 9])
row = torch.tensor([0, 1, 2, 0, 1, 2, 0, 1, 2])
input_nodes = torch.tensor([0, 1])
torch.manual_seed(42)
out1 = neighbor_sample(colptr, row, input_nodes, [1, 1], True, False)
torch.manual_seed(42)
out2 = neighbor_sample(colptr, row, input_nodes, [1, 1], True, False)
for data1, data2 in zip(out1, out2):
assert data1.tolist() == data2.tolist()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment