"src/graph/sampling/vscode:/vscode.git/clone" did not exist on "44b686411451956012e28c78ac5ebacf4ed69cc6"
Commit 346bc235 authored by Da Zheng's avatar Da Zheng Committed by xiang song(charlie.song)
Browse files

[KG] Disable filter in evaluation (#1162)

* add no_eval_filter

* fix eval.
parent 7451bb2a
...@@ -166,7 +166,8 @@ def create_neg_subgraph(pos_g, neg_g, is_pbg, neg_head, num_nodes): ...@@ -166,7 +166,8 @@ def create_neg_subgraph(pos_g, neg_g, is_pbg, neg_head, num_nodes):
neg_sample_size, neg_head) neg_sample_size, neg_head)
class EvalSampler(object): class EvalSampler(object):
def __init__(self, g, edges, batch_size, neg_sample_size, mode, num_workers): def __init__(self, g, edges, batch_size, neg_sample_size, mode, num_workers,
filter_false_neg):
EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler') EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler')
self.sampler = EdgeSampler(g, self.sampler = EdgeSampler(g,
batch_size=batch_size, batch_size=batch_size,
...@@ -177,11 +178,12 @@ class EvalSampler(object): ...@@ -177,11 +178,12 @@ class EvalSampler(object):
shuffle=False, shuffle=False,
exclude_positive=False, exclude_positive=False,
relations=g.edata['id'], relations=g.edata['id'],
return_false_neg=True) return_false_neg=filter_false_neg)
self.sampler_iter = iter(self.sampler) self.sampler_iter = iter(self.sampler)
self.mode = mode self.mode = mode
self.neg_head = 'head' in mode self.neg_head = 'head' in mode
self.g = g self.g = g
self.filter_false_neg = filter_false_neg
def __iter__(self): def __iter__(self):
return self return self
...@@ -189,6 +191,7 @@ class EvalSampler(object): ...@@ -189,6 +191,7 @@ class EvalSampler(object):
def __next__(self): def __next__(self):
while True: while True:
pos_g, neg_g = next(self.sampler_iter) pos_g, neg_g = next(self.sampler_iter)
if self.filter_false_neg:
neg_positive = neg_g.edata['false_neg'] neg_positive = neg_g.edata['false_neg']
neg_g = create_neg_subgraph(pos_g, neg_g, 'PBG' in self.mode, neg_g = create_neg_subgraph(pos_g, neg_g, 'PBG' in self.mode,
self.neg_head, self.g.number_of_nodes()) self.neg_head, self.g.number_of_nodes())
...@@ -197,6 +200,7 @@ class EvalSampler(object): ...@@ -197,6 +200,7 @@ class EvalSampler(object):
pos_g.copy_from_parent() pos_g.copy_from_parent()
neg_g.copy_from_parent() neg_g.copy_from_parent()
if self.filter_false_neg:
neg_g.edata['bias'] = F.astype(-neg_positive, F.float32) neg_g.edata['bias'] = F.astype(-neg_positive, F.float32)
return pos_g, neg_g return pos_g, neg_g
...@@ -276,14 +280,15 @@ class EvalDataset(object): ...@@ -276,14 +280,15 @@ class EvalDataset(object):
np.testing.assert_equal(F.asnumpy(dst_id), orig_dst) np.testing.assert_equal(F.asnumpy(dst_id), orig_dst)
np.testing.assert_equal(F.asnumpy(etype), orig_etype) np.testing.assert_equal(F.asnumpy(etype), orig_etype)
def create_sampler(self, eval_type, batch_size, neg_sample_size, mode='head', def create_sampler(self, eval_type, batch_size, neg_sample_size,
num_workers=5, rank=0, ranks=1): filter_false_neg, mode='head', num_workers=5, rank=0, ranks=1):
edges = self.get_edges(eval_type) edges = self.get_edges(eval_type)
beg = edges.shape[0] * rank // ranks beg = edges.shape[0] * rank // ranks
end = min(edges.shape[0] * (rank + 1) // ranks, edges.shape[0]) end = min(edges.shape[0] * (rank + 1) // ranks, edges.shape[0])
edges = edges[beg: end] edges = edges[beg: end]
print("eval on {} edges".format(len(edges))) print("eval on {} edges".format(len(edges)))
return EvalSampler(self.g, edges, batch_size, neg_sample_size, mode, num_workers) return EvalSampler(self.g, edges, batch_size, neg_sample_size,
mode, num_workers, filter_false_neg)
class NewBidirectionalOneShotIterator: class NewBidirectionalOneShotIterator:
def __init__(self, dataloader_head, dataloader_tail, is_pbg, num_nodes): def __init__(self, dataloader_head, dataloader_tail, is_pbg, num_nodes):
......
...@@ -22,7 +22,7 @@ class ArgParser(argparse.ArgumentParser): ...@@ -22,7 +22,7 @@ class ArgParser(argparse.ArgumentParser):
super(ArgParser, self).__init__() super(ArgParser, self).__init__()
self.add_argument('--model_name', default='TransE', self.add_argument('--model_name', default='TransE',
choices=['TransE', 'TransH', 'TransR', 'TransD', choices=['TransE', 'TransE_l1', 'TransE_l2', 'TransH', 'TransR', 'TransD',
'RESCAL', 'DistMult', 'ComplEx', 'RotatE', 'pRotatE'], 'RESCAL', 'DistMult', 'ComplEx', 'RotatE', 'pRotatE'],
help='model to use') help='model to use')
self.add_argument('--data_path', type=str, default='data', self.add_argument('--data_path', type=str, default='data',
...@@ -44,6 +44,8 @@ class ArgParser(argparse.ArgumentParser): ...@@ -44,6 +44,8 @@ class ArgParser(argparse.ArgumentParser):
help='margin value') help='margin value')
self.add_argument('--eval_percent', type=float, default=1, self.add_argument('--eval_percent', type=float, default=1,
help='sample some percentage for evaluation.') help='sample some percentage for evaluation.')
self.add_argument('--no_eval_filter', action='store_true',
help='do not filter positive edges among negative edges for evaluation')
self.add_argument('--gpu', type=int, default=-1, self.add_argument('--gpu', type=int, default=-1,
help='use GPU') help='use GPU')
...@@ -99,17 +101,20 @@ def main(args): ...@@ -99,17 +101,20 @@ def main(args):
args.neg_sample_size_test = args.neg_sample_size args.neg_sample_size_test = args.neg_sample_size
if args.neg_sample_size < 0: if args.neg_sample_size < 0:
args.neg_sample_size_test = args.neg_sample_size = eval_dataset.g.number_of_nodes() args.neg_sample_size_test = args.neg_sample_size = eval_dataset.g.number_of_nodes()
args.eval_filter = not args.no_eval_filter
if args.num_proc > 1: if args.num_proc > 1:
test_sampler_tails = [] test_sampler_tails = []
test_sampler_heads = [] test_sampler_heads = []
for i in range(args.num_proc): for i in range(args.num_proc):
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size, test_sampler_head = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size, args.neg_sample_size,
args.eval_filter,
mode='PBG-head', mode='PBG-head',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=i, ranks=args.num_proc) rank=i, ranks=args.num_proc)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size, test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size, args.neg_sample_size,
args.eval_filter,
mode='PBG-tail', mode='PBG-tail',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=i, ranks=args.num_proc) rank=i, ranks=args.num_proc)
...@@ -118,11 +123,13 @@ def main(args): ...@@ -118,11 +123,13 @@ def main(args):
else: else:
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size, test_sampler_head = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size, args.neg_sample_size,
args.eval_filter,
mode='PBG-head', mode='PBG-head',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=0, ranks=1) rank=0, ranks=1)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size, test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size, args.neg_sample_size,
args.eval_filter,
mode='PBG-tail', mode='PBG-tail',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=0, ranks=1) rank=0, ranks=1)
......
...@@ -140,6 +140,7 @@ class KEModel(object): ...@@ -140,6 +140,7 @@ class KEModel(object):
neg_scores = reshape(logsigmoid(neg_scores), batch_size, -1) neg_scores = reshape(logsigmoid(neg_scores), batch_size, -1)
# We need to filter the positive edges in the negative graph. # We need to filter the positive edges in the negative graph.
if self.args.eval_filter:
filter_bias = reshape(neg_g.edata['bias'], batch_size, -1) filter_bias = reshape(neg_g.edata['bias'], batch_size, -1)
if self.args.gpu >= 0: if self.args.gpu >= 0:
filter_bias = cuda(filter_bias, self.args.gpu) filter_bias = cuda(filter_bias, self.args.gpu)
......
...@@ -59,6 +59,8 @@ class ArgParser(argparse.ArgumentParser): ...@@ -59,6 +59,8 @@ class ArgParser(argparse.ArgumentParser):
help='margin value') help='margin value')
self.add_argument('--eval_percent', type=float, default=1, self.add_argument('--eval_percent', type=float, default=1,
help='sample some percentage for evaluation.') help='sample some percentage for evaluation.')
self.add_argument('--no_eval_filter', action='store_true',
help='do not filter positive edges among negative edges for evaluation')
self.add_argument('--gpu', type=int, default=-1, self.add_argument('--gpu', type=int, default=-1,
help='use GPU') help='use GPU')
...@@ -135,6 +137,7 @@ def run(args, logger): ...@@ -135,6 +137,7 @@ def run(args, logger):
n_relations = dataset.n_relations n_relations = dataset.n_relations
if args.neg_sample_size_test < 0: if args.neg_sample_size_test < 0:
args.neg_sample_size_test = n_entities args.neg_sample_size_test = n_entities
args.eval_filter = not args.no_eval_filter
train_data = TrainDataset(dataset, args, ranks=args.num_proc) train_data = TrainDataset(dataset, args, ranks=args.num_proc)
if args.num_proc > 1: if args.num_proc > 1:
...@@ -179,11 +182,13 @@ def run(args, logger): ...@@ -179,11 +182,13 @@ def run(args, logger):
for i in range(args.num_proc): for i in range(args.num_proc):
valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval, valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid, args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-head', mode='PBG-head',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=i, ranks=args.num_proc) rank=i, ranks=args.num_proc)
valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval, valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid, args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-tail', mode='PBG-tail',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=i, ranks=args.num_proc) rank=i, ranks=args.num_proc)
...@@ -192,11 +197,13 @@ def run(args, logger): ...@@ -192,11 +197,13 @@ def run(args, logger):
else: else:
valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval, valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid, args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-head', mode='PBG-head',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=0, ranks=1) rank=0, ranks=1)
valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval, valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid, args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-tail', mode='PBG-tail',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=0, ranks=1) rank=0, ranks=1)
...@@ -209,11 +216,13 @@ def run(args, logger): ...@@ -209,11 +216,13 @@ def run(args, logger):
for i in range(args.num_proc): for i in range(args.num_proc):
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval, test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test, args.neg_sample_size_test,
args.eval_filter,
mode='PBG-head', mode='PBG-head',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=i, ranks=args.num_proc) rank=i, ranks=args.num_proc)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval, test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test, args.neg_sample_size_test,
args.eval_filter,
mode='PBG-tail', mode='PBG-tail',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=i, ranks=args.num_proc) rank=i, ranks=args.num_proc)
...@@ -222,11 +231,13 @@ def run(args, logger): ...@@ -222,11 +231,13 @@ def run(args, logger):
else: else:
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval, test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test, args.neg_sample_size_test,
args.eval_filter,
mode='PBG-head', mode='PBG-head',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=0, ranks=1) rank=0, ranks=1)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval, test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test, args.neg_sample_size_test,
args.eval_filter,
mode='PBG-tail', mode='PBG-tail',
num_workers=args.num_worker, num_workers=args.num_worker,
rank=0, ranks=1) rank=0, ranks=1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment