"src/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "ccc8321651ebb879f70e563274b2d03c84c18f2f"
Commit 346bc235 authored by Da Zheng's avatar Da Zheng Committed by xiang song(charlie.song)
Browse files

[KG] Disable filter in evaluation (#1162)

* add no_eval_filter

* fix eval.
parent 7451bb2a
......@@ -166,7 +166,8 @@ def create_neg_subgraph(pos_g, neg_g, is_pbg, neg_head, num_nodes):
neg_sample_size, neg_head)
class EvalSampler(object):
def __init__(self, g, edges, batch_size, neg_sample_size, mode, num_workers):
def __init__(self, g, edges, batch_size, neg_sample_size, mode, num_workers,
filter_false_neg):
EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler')
self.sampler = EdgeSampler(g,
batch_size=batch_size,
......@@ -177,11 +178,12 @@ class EvalSampler(object):
shuffle=False,
exclude_positive=False,
relations=g.edata['id'],
return_false_neg=True)
return_false_neg=filter_false_neg)
self.sampler_iter = iter(self.sampler)
self.mode = mode
self.neg_head = 'head' in mode
self.g = g
self.filter_false_neg = filter_false_neg
def __iter__(self):
return self
......@@ -189,7 +191,8 @@ class EvalSampler(object):
def __next__(self):
while True:
pos_g, neg_g = next(self.sampler_iter)
neg_positive = neg_g.edata['false_neg']
if self.filter_false_neg:
neg_positive = neg_g.edata['false_neg']
neg_g = create_neg_subgraph(pos_g, neg_g, 'PBG' in self.mode,
self.neg_head, self.g.number_of_nodes())
if neg_g is not None:
......@@ -197,7 +200,8 @@ class EvalSampler(object):
pos_g.copy_from_parent()
neg_g.copy_from_parent()
neg_g.edata['bias'] = F.astype(-neg_positive, F.float32)
if self.filter_false_neg:
neg_g.edata['bias'] = F.astype(-neg_positive, F.float32)
return pos_g, neg_g
def reset(self):
......@@ -276,14 +280,15 @@ class EvalDataset(object):
np.testing.assert_equal(F.asnumpy(dst_id), orig_dst)
np.testing.assert_equal(F.asnumpy(etype), orig_etype)
def create_sampler(self, eval_type, batch_size, neg_sample_size, mode='head',
num_workers=5, rank=0, ranks=1):
def create_sampler(self, eval_type, batch_size, neg_sample_size,
filter_false_neg, mode='head', num_workers=5, rank=0, ranks=1):
edges = self.get_edges(eval_type)
beg = edges.shape[0] * rank // ranks
end = min(edges.shape[0] * (rank + 1) // ranks, edges.shape[0])
edges = edges[beg: end]
print("eval on {} edges".format(len(edges)))
return EvalSampler(self.g, edges, batch_size, neg_sample_size, mode, num_workers)
return EvalSampler(self.g, edges, batch_size, neg_sample_size,
mode, num_workers, filter_false_neg)
class NewBidirectionalOneShotIterator:
def __init__(self, dataloader_head, dataloader_tail, is_pbg, num_nodes):
......
......@@ -22,7 +22,7 @@ class ArgParser(argparse.ArgumentParser):
super(ArgParser, self).__init__()
self.add_argument('--model_name', default='TransE',
choices=['TransE', 'TransH', 'TransR', 'TransD',
choices=['TransE', 'TransE_l1', 'TransE_l2', 'TransH', 'TransR', 'TransD',
'RESCAL', 'DistMult', 'ComplEx', 'RotatE', 'pRotatE'],
help='model to use')
self.add_argument('--data_path', type=str, default='data',
......@@ -44,6 +44,8 @@ class ArgParser(argparse.ArgumentParser):
help='margin value')
self.add_argument('--eval_percent', type=float, default=1,
help='sample some percentage for evaluation.')
self.add_argument('--no_eval_filter', action='store_true',
help='do not filter positive edges among negative edges for evaluation')
self.add_argument('--gpu', type=int, default=-1,
help='use GPU')
......@@ -99,17 +101,20 @@ def main(args):
args.neg_sample_size_test = args.neg_sample_size
if args.neg_sample_size < 0:
args.neg_sample_size_test = args.neg_sample_size = eval_dataset.g.number_of_nodes()
args.eval_filter = not args.no_eval_filter
if args.num_proc > 1:
test_sampler_tails = []
test_sampler_heads = []
for i in range(args.num_proc):
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
......@@ -118,11 +123,13 @@ def main(args):
else:
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=0, ranks=1)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size,
args.neg_sample_size,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=0, ranks=1)
......
......@@ -140,10 +140,11 @@ class KEModel(object):
neg_scores = reshape(logsigmoid(neg_scores), batch_size, -1)
# We need to filter the positive edges in the negative graph.
filter_bias = reshape(neg_g.edata['bias'], batch_size, -1)
if self.args.gpu >= 0:
filter_bias = cuda(filter_bias, self.args.gpu)
neg_scores += filter_bias
if self.args.eval_filter:
filter_bias = reshape(neg_g.edata['bias'], batch_size, -1)
if self.args.gpu >= 0:
filter_bias = cuda(filter_bias, self.args.gpu)
neg_scores += filter_bias
# To compute the rank of a positive edge among all negative edges,
# we need to know how many negative edges have higher scores than
# the positive edge.
......
......@@ -59,6 +59,8 @@ class ArgParser(argparse.ArgumentParser):
help='margin value')
self.add_argument('--eval_percent', type=float, default=1,
help='sample some percentage for evaluation.')
self.add_argument('--no_eval_filter', action='store_true',
help='do not filter positive edges among negative edges for evaluation')
self.add_argument('--gpu', type=int, default=-1,
help='use GPU')
......@@ -135,6 +137,7 @@ def run(args, logger):
n_relations = dataset.n_relations
if args.neg_sample_size_test < 0:
args.neg_sample_size_test = n_entities
args.eval_filter = not args.no_eval_filter
train_data = TrainDataset(dataset, args, ranks=args.num_proc)
if args.num_proc > 1:
......@@ -179,11 +182,13 @@ def run(args, logger):
for i in range(args.num_proc):
valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
......@@ -192,11 +197,13 @@ def run(args, logger):
else:
valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=0, ranks=1)
valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
args.neg_sample_size_valid,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=0, ranks=1)
......@@ -209,11 +216,13 @@ def run(args, logger):
for i in range(args.num_proc):
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=i, ranks=args.num_proc)
......@@ -222,11 +231,13 @@ def run(args, logger):
else:
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test,
args.eval_filter,
mode='PBG-head',
num_workers=args.num_worker,
rank=0, ranks=1)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_test,
args.eval_filter,
mode='PBG-tail',
num_workers=args.num_worker,
rank=0, ranks=1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment