hgt_sample_cpu.cpp 8.87 KB
Newer Older
rusty1s's avatar
rusty1s committed
1
2
#include "hgt_sample_cpu.h"

rusty1s's avatar
rusty1s committed
3
#include "utils.h"
rusty1s's avatar
rusty1s committed
4

rusty1s's avatar
rusty1s committed
5
#define MAX_NEIGHBORS 50
rusty1s's avatar
update  
rusty1s committed
6

rusty1s's avatar
typos  
rusty1s committed
7
8
using namespace std;

rusty1s's avatar
rusty1s committed
9
edge_t split(const rel_t &rel_type) {
rusty1s's avatar
rusty1s committed
10
11
  vector<string> result(3);
  int start = 0, end;
rusty1s's avatar
rusty1s committed
12
  for (int i = 0; i < 3; i++) {
rusty1s's avatar
rusty1s committed
13
    end = rel_type.find("__", start);
rusty1s's avatar
rusty1s committed
14
15
16
    result[i] = rel_type.substr(start, end - start);
    start = end + 2;
  }
rusty1s's avatar
rusty1s committed
17
  return make_tuple(result[0], result[1], result[2]);
rusty1s's avatar
rusty1s committed
18
19
}

rusty1s's avatar
rusty1s committed
20
21
void update_budget_(
    unordered_map<node_t, unordered_map<int64_t, float>> *budget_dict,
rusty1s's avatar
typos  
rusty1s committed
22
    const node_t &node_type, const vector<int64_t> &samples,
rusty1s's avatar
rusty1s committed
23
24
25
    const unordered_map<node_t, unordered_map<int64_t, int64_t>>
        &to_local_node_dict,
    const unordered_map<rel_t, edge_t> &to_edge_type,
rusty1s's avatar
update  
rusty1s committed
26
27
    const c10::Dict<rel_t, torch::Tensor> &colptr_dict,
    const c10::Dict<rel_t, torch::Tensor> &row_dict) {
rusty1s's avatar
rusty1s committed
28

rusty1s's avatar
rusty1s committed
29
30
31
  if (samples.empty())
    return;

rusty1s's avatar
update  
rusty1s committed
32
  for (const auto &kv : colptr_dict) {
rusty1s's avatar
rusty1s committed
33
    const auto &rel_type = kv.key();
rusty1s's avatar
rusty1s committed
34
35
36
    const auto &edge_type = to_edge_type.at(rel_type);
    const auto &src_node_type = get<0>(edge_type);
    const auto &dst_node_type = get<2>(edge_type);
rusty1s's avatar
rusty1s committed
37
38
39
40

    if (node_type != dst_node_type)
      continue;

rusty1s's avatar
rusty1s committed
41
    const auto &to_local_src_node = to_local_node_dict.at(src_node_type);
rusty1s's avatar
update  
rusty1s committed
42
43
    const auto *colptr_data = kv.value().data_ptr<int64_t>();
    const auto *row_data = row_dict.at(rel_type).data_ptr<int64_t>();
rusty1s's avatar
rusty1s committed
44
    auto &src_budget = budget_dict->at(src_node_type);
rusty1s's avatar
rusty1s committed
45

rusty1s's avatar
rusty1s committed
46
47
48
    for (const auto &w : samples) {
      const auto &col_start = colptr_data[w], &col_end = colptr_data[w + 1];
      if (col_end - col_start > MAX_NEIGHBORS) {
rusty1s's avatar
update  
rusty1s committed
49
        // There might be same neighbors with large neighborhood sizes.
rusty1s's avatar
rusty1s committed
50
51
52
53
54
55
        // In order to prevent that we fill our budget with many values of low
        // probability, we instead sample a fixed amount without replacement:
        auto indices = choice(col_end - col_start, MAX_NEIGHBORS, false);
        auto *indices_data = indices.data_ptr<int64_t>();
        for (int64_t i = 0; i < indices.numel(); i++) {
          const auto &v = row_data[col_start + indices_data[i]];
rusty1s's avatar
update  
rusty1s committed
56
          // Only add the neighbor in case we have not yet seen it before:
rusty1s's avatar
rusty1s committed
57
58
          if (to_local_src_node.find(v) == to_local_src_node.end())
            src_budget[v] += 1.f / float(MAX_NEIGHBORS);
rusty1s's avatar
update  
rusty1s committed
59
        }
rusty1s's avatar
rusty1s committed
60
61

      } else if (col_end != col_start) {
rusty1s's avatar
update  
rusty1s committed
62
        const auto inv_deg = 1.f / float(col_end - col_start);
rusty1s's avatar
rusty1s committed
63
64
        for (int64_t i = col_start; i < col_end; i++) {
          const auto &v = row_data[i];
rusty1s's avatar
update  
rusty1s committed
65
          // Only add the neighbor in case we have not yet seen it before:
rusty1s's avatar
rusty1s committed
66
67
          if (to_local_src_node.find(v) == to_local_src_node.end())
            src_budget[v] += inv_deg;
rusty1s's avatar
rusty1s committed
68
69
70
71
72
73
        }
      }
    }
  }
}

rusty1s's avatar
rusty1s committed
74
75
76
77
78
79
vector<int64_t> sample_from(const unordered_map<int64_t, float> &budget,
                            const int64_t num_samples) {
  vector<int64_t> indices;
  vector<float> weights;
  indices.reserve(budget.size());
  weights.reserve(budget.size());
rusty1s's avatar
update  
rusty1s committed
80
  for (const auto &kv : budget) {
rusty1s's avatar
rusty1s committed
81
82
    indices.push_back(kv.first);
    weights.push_back(kv.second * kv.second);
rusty1s's avatar
update  
rusty1s committed
83
  }
rusty1s's avatar
rusty1s committed
84

rusty1s's avatar
rusty1s committed
85
86
87
  const auto weight = from_vector(weights, true);
  const auto sample = choice(budget.size(), num_samples, false, weight);
  const auto *sample_data = sample.data_ptr<int64_t>();
rusty1s's avatar
rusty1s committed
88

rusty1s's avatar
rusty1s committed
89
90
91
  vector<int64_t> out(sample.numel());
  for (int64_t i = 0; i < sample.numel(); i++) {
    out[i] = indices[sample_data[i]];
rusty1s's avatar
rusty1s committed
92
  }
rusty1s's avatar
rusty1s committed
93
  return out;
rusty1s's avatar
rusty1s committed
94
95
}

rusty1s's avatar
rusty1s committed
96
97
tuple<c10::Dict<node_t, torch::Tensor>, c10::Dict<rel_t, torch::Tensor>,
      c10::Dict<rel_t, torch::Tensor>, c10::Dict<rel_t, torch::Tensor>>
rusty1s's avatar
update  
rusty1s committed
98
99
hgt_sample_cpu(const c10::Dict<rel_t, torch::Tensor> &colptr_dict,
               const c10::Dict<rel_t, torch::Tensor> &row_dict,
rusty1s's avatar
rusty1s committed
100
               const c10::Dict<node_t, torch::Tensor> &input_node_dict,
rusty1s's avatar
rusty1s committed
101
102
               const c10::Dict<node_t, vector<int64_t>> &num_samples_dict,
               const int64_t num_hops) {
rusty1s's avatar
rusty1s committed
103

rusty1s's avatar
rusty1s committed
104
  // Create a mapping to convert single string relations to edge type triplets:
rusty1s's avatar
rusty1s committed
105
  unordered_map<rel_t, edge_t> to_edge_type;
rusty1s's avatar
update  
rusty1s committed
106
  for (const auto &kv : colptr_dict) {
rusty1s's avatar
rusty1s committed
107
    const auto &rel_type = kv.key();
rusty1s's avatar
rusty1s committed
108
    to_edge_type[rel_type] = split(rel_type);
rusty1s's avatar
rusty1s committed
109
110
  }

rusty1s's avatar
rusty1s committed
111
112
113
114
  // Initialize some necessary data structures for the sampling process:
  unordered_map<node_t, vector<int64_t>> nodes_dict;
  unordered_map<node_t, unordered_map<int64_t, int64_t>> to_local_node_dict;
  unordered_map<node_t, unordered_map<int64_t, float>> budget_dict;
rusty1s's avatar
fix  
rusty1s committed
115
116
  for (const auto &kv : num_samples_dict) {
    const auto &node_type = kv.key();
rusty1s's avatar
rusty1s committed
117
118
    nodes_dict[node_type];
    to_local_node_dict[node_type];
rusty1s's avatar
fix  
rusty1s committed
119
120
    budget_dict[node_type];
  }
rusty1s's avatar
rusty1s committed
121

rusty1s's avatar
rusty1s committed
122
  // Add the input nodes to the sampled output nodes (line 1):
rusty1s's avatar
rusty1s committed
123
124
125
126
127
  for (const auto &kv : input_node_dict) {
    const auto &node_type = kv.key();
    const auto &input_node = kv.value();
    const auto *input_node_data = input_node.data_ptr<int64_t>();

rusty1s's avatar
rusty1s committed
128
129
    auto &nodes = nodes_dict.at(node_type);
    auto &to_local_node = to_local_node_dict.at(node_type);
rusty1s's avatar
rusty1s committed
130
    for (int64_t i = 0; i < input_node.numel(); i++) {
rusty1s's avatar
rusty1s committed
131
132
133
      const auto &v = input_node_data[i];
      nodes.push_back(v);
      to_local_node[v] = i;
rusty1s's avatar
rusty1s committed
134
    }
rusty1s's avatar
update  
rusty1s committed
135
  }
rusty1s's avatar
rusty1s committed
136
137
138
139
140
141
142

  // Update the budget based on the initial input set (line 3-5):
  for (const auto &kv : nodes_dict) {
    const auto &node_type = kv.first;
    const auto &last_samples = kv.second;
    update_budget_(&budget_dict, node_type, last_samples, to_local_node_dict,
                   to_edge_type, colptr_dict, row_dict);
rusty1s's avatar
rusty1s committed
143
144
145
  }

  for (int64_t ell = 0; ell < num_hops; ell++) {
rusty1s's avatar
rusty1s committed
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
    unordered_map<node_t, vector<int64_t>> samples_dict;
    for (auto &kv : budget_dict) {
      const auto &node_type = kv.first;
      auto &budget = kv.second;
      const auto num_samples = num_samples_dict.at(node_type)[ell];

      // Sample `num_samples` nodes, according to the budget (line 9-11):
      const auto samples = sample_from(budget, num_samples);
      samples_dict[node_type] = samples;

      // Add samples to the sampled output nodes, and erase them from the budget
      // (line 13/15):
      auto &nodes = nodes_dict.at(node_type);
      auto &to_local_node = to_local_node_dict.at(node_type);
      for (const auto &v : samples) {
        to_local_node[v] = nodes.size();
        nodes.push_back(v);
        budget.erase(v);
      }
rusty1s's avatar
update  
rusty1s committed
165
    }
rusty1s's avatar
rusty1s committed
166

rusty1s's avatar
rusty1s committed
167
168
169
170
171
172
173
174
    if (ell < num_hops - 1) {
      // Add neighbors of newly sampled nodes to the budget (line 14):
      // Note that we do not need to update the budget in the last iteration.
      for (const auto &kv : samples_dict) {
        const auto &node_type = kv.first;
        const auto &last_samples = kv.second;
        update_budget_(&budget_dict, node_type, last_samples,
                       to_local_node_dict, to_edge_type, colptr_dict, row_dict);
rusty1s's avatar
update  
rusty1s committed
175
      }
rusty1s's avatar
rusty1s committed
176
177
178
    }
  }

rusty1s's avatar
rusty1s committed
179
180
181
182
  c10::Dict<node_t, torch::Tensor> out_node_dict;
  c10::Dict<rel_t, torch::Tensor> out_row_dict;
  c10::Dict<rel_t, torch::Tensor> out_col_dict;
  c10::Dict<rel_t, torch::Tensor> out_edge_dict;
rusty1s's avatar
update  
rusty1s committed
183

rusty1s's avatar
rusty1s committed
184
  // Reconstruct the sampled adjacency matrix among the sampled nodes (line 19):
rusty1s's avatar
update  
rusty1s committed
185
  for (const auto &kv : colptr_dict) {
rusty1s's avatar
rusty1s committed
186
    const auto &rel_type = kv.key();
rusty1s's avatar
rusty1s committed
187
188
189
    const auto &edge_type = to_edge_type.at(rel_type);
    const auto &src_node_type = get<0>(edge_type);
    const auto &dst_node_type = get<2>(edge_type);
rusty1s's avatar
rusty1s committed
190

rusty1s's avatar
update  
rusty1s committed
191
192
    const auto *colptr_data = kv.value().data_ptr<int64_t>();
    const auto *row_data = row_dict.at(rel_type).data_ptr<int64_t>();
rusty1s's avatar
rusty1s committed
193

rusty1s's avatar
rusty1s committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
    const auto &dst_nodes = nodes_dict.at(dst_node_type);
    const auto &to_local_src_node = to_local_node_dict.at(src_node_type);

    vector<int64_t> rows, cols, edges;
    for (int64_t i = 0; i < (int64_t)dst_nodes.size(); i++) {
      const auto &w = dst_nodes[i];
      const auto &col_start = colptr_data[w], &col_end = colptr_data[w + 1];
      if (col_end - col_start > MAX_NEIGHBORS) {
        auto indices = choice(col_end - col_start, MAX_NEIGHBORS, false);
        auto *indices_data = indices.data_ptr<int64_t>();
        for (int64_t j = 0; j < indices.numel(); j++) {
          const auto &v = row_data[col_start + indices_data[j]];
          if (to_local_src_node.find(v) != to_local_src_node.end()) {
            rows.push_back(to_local_src_node.at(v));
            cols.push_back(i);
            edges.push_back(col_start + j);
          }
        }
      } else {
        for (int64_t j = col_start; j < col_end; j++) {
          const auto &v = row_data[j];
          if (to_local_src_node.find(v) != to_local_src_node.end()) {
            rows.push_back(to_local_src_node.at(v));
            cols.push_back(i);
            edges.push_back(j);
          }
rusty1s's avatar
rusty1s committed
220
221
222
        }
      }
    }
rusty1s's avatar
update  
rusty1s committed
223
    if (rows.size() > 0) {
rusty1s's avatar
rusty1s committed
224
225
226
      out_row_dict.insert(rel_type, from_vector<int64_t>(rows));
      out_col_dict.insert(rel_type, from_vector<int64_t>(cols));
      out_edge_dict.insert(rel_type, from_vector<int64_t>(edges));
rusty1s's avatar
update  
rusty1s committed
227
    }
rusty1s's avatar
rusty1s committed
228
229
  }

rusty1s's avatar
rusty1s committed
230
231
232
233
234
235
  // Generate tensor-valued output node dictionary (line 20):
  for (const auto &kv : nodes_dict) {
    const auto &node_type = kv.first;
    const auto &nodes = kv.second;
    if (!nodes.empty())
      out_node_dict.insert(node_type, from_vector<int64_t>(nodes));
rusty1s's avatar
rusty1s committed
236
237
  }

rusty1s's avatar
rusty1s committed
238
  return make_tuple(out_node_dict, out_row_dict, out_col_dict, out_edge_dict);
rusty1s's avatar
rusty1s committed
239
}