Unverified Commit 162dc19a authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

Accelerate edge sampling (#1151)



* accel edge sampling.

* measure sampling time.

* attach positive.

* add neg_deg_sample.

* fix a minor bug.

* remove unnecessary code.

* Revert "add neg_deg_sample."

This reverts commit 45f3c08c87e33589563ca7e6a3ec91cb1239910e.

* Revert "attach positive."

This reverts commit 503d7821555b69b7ccfed55ce8b87ee24fff852d.
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
parent 61b78e6e
......@@ -37,11 +37,14 @@ def train(args, model, train_sampler, valid_samplers=None):
logging.info('{:20}:{}'.format(arg, getattr(args, arg)))
start = time.time()
sample_time = 0
update_time = 0
forward_time = 0
backward_time = 0
for step in range(args.init_step, args.max_step):
start1 = time.time()
pos_g, neg_g = next(train_sampler)
sample_time += time.time() - start1
args.step = step
start1 = time.time()
......@@ -64,9 +67,9 @@ def train(args, model, train_sampler, valid_samplers=None):
logs = []
print('[Train] {} steps take {:.3f} seconds'.format(args.log_interval,
time.time() - start))
print('forward: {:.3f}, backward: {:.3f}, update: {:.3f}'.format(forward_time,
backward_time,
update_time))
print('sample: {:.3f}, forward: {:.3f}, backward: {:.3f}, update: {:.3f}'.format(
sample_time, forward_time, backward_time, update_time))
sample_time = 0
update_time = 0
forward_time = 0
backward_time = 0
......
......@@ -1094,7 +1094,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
int64_t num_neg_edges = num_pos_edges * neg_sample_size;
IdArray neg_dst = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx);
IdArray neg_src = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx);
IdArray neg_eid = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx);
IdArray induced_neg_eid = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx);
// These are vids in the positive subgraph.
......@@ -1109,7 +1108,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
dgl_id_t *neg_dst_data = static_cast<dgl_id_t *>(neg_dst->data);
dgl_id_t *neg_src_data = static_cast<dgl_id_t *>(neg_src->data);
dgl_id_t *neg_eid_data = static_cast<dgl_id_t *>(neg_eid->data);
dgl_id_t *induced_neg_eid_data = static_cast<dgl_id_t *>(induced_neg_eid->data);
const dgl_id_t *unchanged;
......@@ -1129,7 +1127,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
std::vector<dgl_id_t> local_pos_vids;
local_pos_vids.reserve(num_pos_edges);
dgl_id_t curr_eid = 0;
std::vector<size_t> neg_vids;
neg_vids.reserve(neg_sample_size);
// If we don't exclude positive edges, we are actually sampling more than
......@@ -1205,7 +1202,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
for (int64_t j = 0; j < neg_sample_size; j++) {
neg_unchanged[neg_idx + j] = local_unchanged;
neg_eid_data[neg_idx + j] = curr_eid++;
dgl_id_t local_changed = global2local_map(neg_vids[j + prev_neg_offset], &neg_map);
neg_changed[neg_idx + j] = local_changed;
// induced negative eid references to the positive one.
......@@ -1285,7 +1281,6 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
IdArray neg_dst = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx);
IdArray neg_src = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx);
IdArray neg_eid = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx);
IdArray induced_neg_eid = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx);
// These are vids in the positive subgraph.
......@@ -1300,7 +1295,6 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
dgl_id_t *neg_dst_data = static_cast<dgl_id_t *>(neg_dst->data);
dgl_id_t *neg_src_data = static_cast<dgl_id_t *>(neg_src->data);
dgl_id_t *neg_eid_data = static_cast<dgl_id_t *>(neg_eid->data);
dgl_id_t *induced_neg_eid_data = static_cast<dgl_id_t *>(induced_neg_eid->data);
const dgl_id_t *unchanged;
......@@ -1317,12 +1311,12 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
}
// We first sample all negative edges.
std::vector<size_t> neg_vids;
std::vector<size_t> global_neg_vids;
std::vector<size_t> local_neg_vids;
randomSample(num_tot_nodes,
num_chunks * neg_sample_size,
&neg_vids);
&global_neg_vids);
dgl_id_t curr_eid = 0;
std::unordered_map<dgl_id_t, dgl_id_t> neg_map;
dgl_id_t local_vid = 0;
......@@ -1338,6 +1332,13 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
}
}
// We should map the global negative nodes to local Ids in advance
// to reduce computation overhead.
local_neg_vids.resize(global_neg_vids.size());
for (size_t i = 0; i < global_neg_vids.size(); i++) {
local_neg_vids[i] = global2local_map(global_neg_vids[i], &neg_map);;
}
for (int64_t i_chunk = 0; i_chunk < num_chunks; i_chunk++) {
// for each chunk.
int64_t neg_idx = neg_sample_size * chunk_size * i_chunk;
......@@ -1356,12 +1357,7 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
dgl_id_t local_unchanged = global2local_map(global_unchanged, &neg_map);
for (int64_t j = 0; j < neg_sample_size; ++j) {
neg_unchanged[neg_idx] = local_unchanged;
neg_eid_data[neg_idx] = curr_eid++;
dgl_id_t global_changed_vid = neg_vids[neg_node_idx + j];
// TODO(zhengda) we can avoid the hashtable lookup here.
dgl_id_t local_changed = global2local_map(global_changed_vid, &neg_map);
neg_changed[neg_idx] = local_changed;
neg_changed[neg_idx] = local_neg_vids[neg_node_idx + j];
induced_neg_eid_data[neg_idx] = induced_eid_data[pos_edge_idx + in_chunk];
neg_idx++;
}
......@@ -1384,11 +1380,11 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
neg_subg.induced_vertices = induced_neg_vid;
neg_subg.induced_edges = induced_neg_eid;
if (IsNegativeHeadMode(neg_mode)) {
neg_subg.head_nid = aten::VecToIdArray(Global2Local(neg_vids, neg_map));
neg_subg.head_nid = aten::VecToIdArray(Global2Local(global_neg_vids, neg_map));
neg_subg.tail_nid = aten::VecToIdArray(local_pos_vids);
} else {
neg_subg.head_nid = aten::VecToIdArray(local_pos_vids);
neg_subg.tail_nid = aten::VecToIdArray(Global2Local(neg_vids, neg_map));
neg_subg.tail_nid = aten::VecToIdArray(Global2Local(global_neg_vids, neg_map));
}
if (check_false_neg) {
if (relations_->shape[0] == 0) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment