"...text-generation-inference.git" did not exist on "6cb42f49ae47a117e8f1bdfcdb5cbe42332dc360"
Unverified Commit 162dc19a authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

Accelerate edge sampling (#1151)



* accel edge sampling.

* measure sampling time.

* attach positive.

* add neg_deg_sample.

* fix a minor bug.

* remove unnecessary code.

* Revert "add neg_deg_sample."

This reverts commit 45f3c08c87e33589563ca7e6a3ec91cb1239910e.

* Revert "attach positive."

This reverts commit 503d7821555b69b7ccfed55ce8b87ee24fff852d.
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
parent 61b78e6e
...@@ -37,11 +37,14 @@ def train(args, model, train_sampler, valid_samplers=None): ...@@ -37,11 +37,14 @@ def train(args, model, train_sampler, valid_samplers=None):
logging.info('{:20}:{}'.format(arg, getattr(args, arg))) logging.info('{:20}:{}'.format(arg, getattr(args, arg)))
start = time.time() start = time.time()
sample_time = 0
update_time = 0 update_time = 0
forward_time = 0 forward_time = 0
backward_time = 0 backward_time = 0
for step in range(args.init_step, args.max_step): for step in range(args.init_step, args.max_step):
start1 = time.time()
pos_g, neg_g = next(train_sampler) pos_g, neg_g = next(train_sampler)
sample_time += time.time() - start1
args.step = step args.step = step
start1 = time.time() start1 = time.time()
...@@ -64,9 +67,9 @@ def train(args, model, train_sampler, valid_samplers=None): ...@@ -64,9 +67,9 @@ def train(args, model, train_sampler, valid_samplers=None):
logs = [] logs = []
print('[Train] {} steps take {:.3f} seconds'.format(args.log_interval, print('[Train] {} steps take {:.3f} seconds'.format(args.log_interval,
time.time() - start)) time.time() - start))
print('forward: {:.3f}, backward: {:.3f}, update: {:.3f}'.format(forward_time, print('sample: {:.3f}, forward: {:.3f}, backward: {:.3f}, update: {:.3f}'.format(
backward_time, sample_time, forward_time, backward_time, update_time))
update_time)) sample_time = 0
update_time = 0 update_time = 0
forward_time = 0 forward_time = 0
backward_time = 0 backward_time = 0
......
...@@ -1094,7 +1094,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1094,7 +1094,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
int64_t num_neg_edges = num_pos_edges * neg_sample_size; int64_t num_neg_edges = num_pos_edges * neg_sample_size;
IdArray neg_dst = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx); IdArray neg_dst = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx);
IdArray neg_src = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx); IdArray neg_src = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx);
IdArray neg_eid = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx);
IdArray induced_neg_eid = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx); IdArray induced_neg_eid = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx);
// These are vids in the positive subgraph. // These are vids in the positive subgraph.
...@@ -1109,7 +1108,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1109,7 +1108,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
dgl_id_t *neg_dst_data = static_cast<dgl_id_t *>(neg_dst->data); dgl_id_t *neg_dst_data = static_cast<dgl_id_t *>(neg_dst->data);
dgl_id_t *neg_src_data = static_cast<dgl_id_t *>(neg_src->data); dgl_id_t *neg_src_data = static_cast<dgl_id_t *>(neg_src->data);
dgl_id_t *neg_eid_data = static_cast<dgl_id_t *>(neg_eid->data);
dgl_id_t *induced_neg_eid_data = static_cast<dgl_id_t *>(induced_neg_eid->data); dgl_id_t *induced_neg_eid_data = static_cast<dgl_id_t *>(induced_neg_eid->data);
const dgl_id_t *unchanged; const dgl_id_t *unchanged;
...@@ -1129,7 +1127,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1129,7 +1127,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
std::vector<dgl_id_t> local_pos_vids; std::vector<dgl_id_t> local_pos_vids;
local_pos_vids.reserve(num_pos_edges); local_pos_vids.reserve(num_pos_edges);
dgl_id_t curr_eid = 0;
std::vector<size_t> neg_vids; std::vector<size_t> neg_vids;
neg_vids.reserve(neg_sample_size); neg_vids.reserve(neg_sample_size);
// If we don't exclude positive edges, we are actually sampling more than // If we don't exclude positive edges, we are actually sampling more than
...@@ -1205,7 +1202,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1205,7 +1202,6 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
for (int64_t j = 0; j < neg_sample_size; j++) { for (int64_t j = 0; j < neg_sample_size; j++) {
neg_unchanged[neg_idx + j] = local_unchanged; neg_unchanged[neg_idx + j] = local_unchanged;
neg_eid_data[neg_idx + j] = curr_eid++;
dgl_id_t local_changed = global2local_map(neg_vids[j + prev_neg_offset], &neg_map); dgl_id_t local_changed = global2local_map(neg_vids[j + prev_neg_offset], &neg_map);
neg_changed[neg_idx + j] = local_changed; neg_changed[neg_idx + j] = local_changed;
// induced negative eid references to the positive one. // induced negative eid references to the positive one.
...@@ -1285,7 +1281,6 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1285,7 +1281,6 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
IdArray neg_dst = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx); IdArray neg_dst = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx);
IdArray neg_src = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx); IdArray neg_src = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx);
IdArray neg_eid = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx);
IdArray induced_neg_eid = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx); IdArray induced_neg_eid = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx);
// These are vids in the positive subgraph. // These are vids in the positive subgraph.
...@@ -1300,7 +1295,6 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1300,7 +1295,6 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
dgl_id_t *neg_dst_data = static_cast<dgl_id_t *>(neg_dst->data); dgl_id_t *neg_dst_data = static_cast<dgl_id_t *>(neg_dst->data);
dgl_id_t *neg_src_data = static_cast<dgl_id_t *>(neg_src->data); dgl_id_t *neg_src_data = static_cast<dgl_id_t *>(neg_src->data);
dgl_id_t *neg_eid_data = static_cast<dgl_id_t *>(neg_eid->data);
dgl_id_t *induced_neg_eid_data = static_cast<dgl_id_t *>(induced_neg_eid->data); dgl_id_t *induced_neg_eid_data = static_cast<dgl_id_t *>(induced_neg_eid->data);
const dgl_id_t *unchanged; const dgl_id_t *unchanged;
...@@ -1317,12 +1311,12 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1317,12 +1311,12 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
} }
// We first sample all negative edges. // We first sample all negative edges.
std::vector<size_t> neg_vids; std::vector<size_t> global_neg_vids;
std::vector<size_t> local_neg_vids;
randomSample(num_tot_nodes, randomSample(num_tot_nodes,
num_chunks * neg_sample_size, num_chunks * neg_sample_size,
&neg_vids); &global_neg_vids);
dgl_id_t curr_eid = 0;
std::unordered_map<dgl_id_t, dgl_id_t> neg_map; std::unordered_map<dgl_id_t, dgl_id_t> neg_map;
dgl_id_t local_vid = 0; dgl_id_t local_vid = 0;
...@@ -1338,6 +1332,13 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1338,6 +1332,13 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
} }
} }
// We should map the global negative nodes to local Ids in advance
// to reduce computation overhead.
local_neg_vids.resize(global_neg_vids.size());
for (size_t i = 0; i < global_neg_vids.size(); i++) {
local_neg_vids[i] = global2local_map(global_neg_vids[i], &neg_map);;
}
for (int64_t i_chunk = 0; i_chunk < num_chunks; i_chunk++) { for (int64_t i_chunk = 0; i_chunk < num_chunks; i_chunk++) {
// for each chunk. // for each chunk.
int64_t neg_idx = neg_sample_size * chunk_size * i_chunk; int64_t neg_idx = neg_sample_size * chunk_size * i_chunk;
...@@ -1356,12 +1357,7 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1356,12 +1357,7 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
dgl_id_t local_unchanged = global2local_map(global_unchanged, &neg_map); dgl_id_t local_unchanged = global2local_map(global_unchanged, &neg_map);
for (int64_t j = 0; j < neg_sample_size; ++j) { for (int64_t j = 0; j < neg_sample_size; ++j) {
neg_unchanged[neg_idx] = local_unchanged; neg_unchanged[neg_idx] = local_unchanged;
neg_eid_data[neg_idx] = curr_eid++; neg_changed[neg_idx] = local_neg_vids[neg_node_idx + j];
dgl_id_t global_changed_vid = neg_vids[neg_node_idx + j];
// TODO(zhengda) we can avoid the hashtable lookup here.
dgl_id_t local_changed = global2local_map(global_changed_vid, &neg_map);
neg_changed[neg_idx] = local_changed;
induced_neg_eid_data[neg_idx] = induced_eid_data[pos_edge_idx + in_chunk]; induced_neg_eid_data[neg_idx] = induced_eid_data[pos_edge_idx + in_chunk];
neg_idx++; neg_idx++;
} }
...@@ -1384,11 +1380,11 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg, ...@@ -1384,11 +1380,11 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
neg_subg.induced_vertices = induced_neg_vid; neg_subg.induced_vertices = induced_neg_vid;
neg_subg.induced_edges = induced_neg_eid; neg_subg.induced_edges = induced_neg_eid;
if (IsNegativeHeadMode(neg_mode)) { if (IsNegativeHeadMode(neg_mode)) {
neg_subg.head_nid = aten::VecToIdArray(Global2Local(neg_vids, neg_map)); neg_subg.head_nid = aten::VecToIdArray(Global2Local(global_neg_vids, neg_map));
neg_subg.tail_nid = aten::VecToIdArray(local_pos_vids); neg_subg.tail_nid = aten::VecToIdArray(local_pos_vids);
} else { } else {
neg_subg.head_nid = aten::VecToIdArray(local_pos_vids); neg_subg.head_nid = aten::VecToIdArray(local_pos_vids);
neg_subg.tail_nid = aten::VecToIdArray(Global2Local(neg_vids, neg_map)); neg_subg.tail_nid = aten::VecToIdArray(Global2Local(global_neg_vids, neg_map));
} }
if (check_false_neg) { if (check_false_neg) {
if (relations_->shape[0] == 0) { if (relations_->shape[0] == 0) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment