[KG] PBG's way of constructing negative edges (#1159)

* attach positive. * add neg_deg_sample. * add comment. * add neg_deg_sample for eval. * change the edge sampler. * rename edge sampler in KG. * allow specifying chunk size and negative sample size separately. * fix bugs in KG. * add check in sampler. * add more checks. * fix * add comment. * add comments.

[KG] PBG's way of constructing negative edges (#1159)
* attach positive. * add neg_deg_sample. * add comment. * add neg_deg_sample for eval. * change the edge sampler. * rename edge sampler in KG. * allow specifying chunk size and negative sample size separately. * fix bugs in KG. * add check in sampler. * add more checks. * fix * add comment. * add comments.
1022d5d6 · Da Zheng · GitHub · 1de192f4 · 1022d5d6 · 1022d5d6
Unverified Commit 1022d5d6 authored Jan 04, 2020 by Da Zheng Committed by GitHub Jan 04, 2020
8 changed files
--- a/apps/kg/dataloader/sampler.py
+++ b/apps/kg/dataloader/sampler.py
@@ -107,13 +107,14 @@ class TrainDataset(object):
                count[(tail, -rel - 1)] += 1
        return count

-    def create_sampler(self, batch_size, neg_sample_size=2, mode='head', num_workers=5,
+    def create_sampler(self, batch_size, neg_sample_size=2, neg_chunk_size=None, mode='head', num_workers=5,
                       shuffle=True, exclude_positive=False, rank=0):
        EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler')
        return EdgeSampler(self.g,
                           seed_edges=F.tensor(self.edge_parts[rank]),
                           batch_size=batch_size,
                           neg_sample_size=neg_sample_size,
+                           chunk_size=neg_chunk_size,
                           negative_mode=mode,
                           num_workers=num_workers,
                           shuffle=shuffle,
@@ -121,10 +122,10 @@ class TrainDataset(object):
                           return_false_neg=False)


-class PBGNegEdgeSubgraph(dgl.subgraph.DGLSubGraph):
+class ChunkNegEdgeSubgraph(dgl.subgraph.DGLSubGraph):
    def __init__(self, subg, num_chunks, chunk_size,
                 neg_sample_size, neg_head):
-        super(PBGNegEdgeSubgraph, self).__init__(subg._parent, subg.sgi)
+        super(ChunkNegEdgeSubgraph, self).__init__(subg._parent, subg.sgi)
        self.subg = subg
        self.num_chunks = num_chunks
        self.chunk_size = chunk_size
@@ -140,7 +141,11 @@ class PBGNegEdgeSubgraph(dgl.subgraph.DGLSubGraph):
        return self.subg.tail_nid


-def create_neg_subgraph(pos_g, neg_g, is_pbg, neg_head, num_nodes):
+# KG models need to know the number of chunks, the chunk size and negative sample size
+# of a negative subgraph to perform the computation more efficiently.
+# This function tries to infer all of these information of the negative subgraph
+# and create a wrapper class that contains all of the information.
+def create_neg_subgraph(pos_g, neg_g, chunk_size, is_chunked, neg_head, num_nodes):
    assert neg_g.number_of_edges() % pos_g.number_of_edges() == 0
    neg_sample_size = int(neg_g.number_of_edges() / pos_g.number_of_edges())
    # We use all nodes to create negative edges. Regardless of the sampling algorithm,
@@ -149,30 +154,32 @@ def create_neg_subgraph(pos_g, neg_g, is_pbg, neg_head, num_nodes):
       or (not neg_head and len(neg_g.tail_nid) == num_nodes):
        num_chunks = 1
        chunk_size = pos_g.number_of_edges()
-    elif is_pbg:
-        if pos_g.number_of_edges() < neg_sample_size:
+    elif is_chunked:
+        if pos_g.number_of_edges() < chunk_size:
            num_chunks = 1
            chunk_size = pos_g.number_of_edges()
        else:
            # This is probably the last batch. Let's ignore it.
-            if pos_g.number_of_edges() % neg_sample_size > 0:
+            if pos_g.number_of_edges() % chunk_size > 0:
                return None
-            num_chunks = int(pos_g.number_of_edges()/ neg_sample_size)
-            chunk_size = neg_sample_size
+            num_chunks = int(pos_g.number_of_edges()/ chunk_size)
+        assert num_chunks * chunk_size == pos_g.number_of_edges()
+        assert num_chunks * neg_sample_size * chunk_size == neg_g.number_of_edges()
    else:
        num_chunks = pos_g.number_of_edges()
        chunk_size = 1
-    return PBGNegEdgeSubgraph(neg_g, num_chunks, chunk_size,
-                              neg_sample_size, neg_head)
+    return ChunkNegEdgeSubgraph(neg_g, num_chunks, chunk_size,
+                                neg_sample_size, neg_head)

 class EvalSampler(object):
-    def __init__(self, g, edges, batch_size, neg_sample_size, mode, num_workers,
+    def __init__(self, g, edges, batch_size, neg_sample_size, neg_chunk_size, mode, num_workers,
                 filter_false_neg):
        EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler')
        self.sampler = EdgeSampler(g,
                                   batch_size=batch_size,
                                   seed_edges=edges,
                                   neg_sample_size=neg_sample_size,
+                                   chunk_size=neg_chunk_size,
                                   negative_mode=mode,
                                   num_workers=num_workers,
                                   shuffle=False,
@@ -184,6 +191,7 @@ class EvalSampler(object):
        self.neg_head = 'head' in mode
        self.g = g
        self.filter_false_neg = filter_false_neg
+        self.neg_chunk_size = neg_chunk_size

    def __iter__(self):
        return self
@@ -193,7 +201,7 @@ class EvalSampler(object):
            pos_g, neg_g = next(self.sampler_iter)
            if self.filter_false_neg:
                neg_positive = neg_g.edata['false_neg']
-            neg_g = create_neg_subgraph(pos_g, neg_g, 'PBG' in self.mode,
+            neg_g = create_neg_subgraph(pos_g, neg_g, self.neg_chunk_size, 'chunk' in self.mode,
                                        self.neg_head, self.g.number_of_nodes())
            if neg_g is not None:
                break
@@ -280,22 +288,22 @@ class EvalDataset(object):
        np.testing.assert_equal(F.asnumpy(dst_id), orig_dst)
        np.testing.assert_equal(F.asnumpy(etype), orig_etype)

-    def create_sampler(self, eval_type, batch_size, neg_sample_size,
+    def create_sampler(self, eval_type, batch_size, neg_sample_size, neg_chunk_size,
                       filter_false_neg, mode='head', num_workers=5, rank=0, ranks=1):
        edges = self.get_edges(eval_type)
        beg = edges.shape[0] * rank // ranks
        end = min(edges.shape[0] * (rank + 1) // ranks, edges.shape[0])
        edges = edges[beg: end]
-        return EvalSampler(self.g, edges, batch_size, neg_sample_size,
+        return EvalSampler(self.g, edges, batch_size, neg_sample_size, neg_chunk_size,
                           mode, num_workers, filter_false_neg)

 class NewBidirectionalOneShotIterator:
-    def __init__(self, dataloader_head, dataloader_tail, is_pbg, num_nodes):
+    def __init__(self, dataloader_head, dataloader_tail, neg_chunk_size, is_chunked, num_nodes):
        self.sampler_head = dataloader_head
        self.sampler_tail = dataloader_tail
-        self.iterator_head = self.one_shot_iterator(dataloader_head, is_pbg,
+        self.iterator_head = self.one_shot_iterator(dataloader_head, neg_chunk_size, is_chunked,
                                                    True, num_nodes)
-        self.iterator_tail = self.one_shot_iterator(dataloader_tail, is_pbg,
+        self.iterator_tail = self.one_shot_iterator(dataloader_tail, neg_chunk_size, is_chunked,
                                                    False, num_nodes)
        self.step = 0

@@ -308,10 +316,11 @@ class NewBidirectionalOneShotIterator:
        return pos_g, neg_g

    @staticmethod
-    def one_shot_iterator(dataloader, is_pbg, neg_head, num_nodes):
+    def one_shot_iterator(dataloader, neg_chunk_size, is_chunked, neg_head, num_nodes):
        while True:
            for pos_g, neg_g in dataloader:
-                neg_g = create_neg_subgraph(pos_g, neg_g, is_pbg, neg_head, num_nodes)
+                neg_g = create_neg_subgraph(pos_g, neg_g, neg_chunk_size, is_chunked,
+                                            neg_head, num_nodes)
                if neg_g is None:
                    continue


--- a/apps/kg/eval.py
+++ b/apps/kg/eval.py
@@ -38,6 +38,10 @@ class ArgParser(argparse.ArgumentParser):
                          help='batch size used for eval and test')
        self.add_argument('--neg_sample_size', type=int, default=-1,
                          help='negative sampling size for testing')
+        self.add_argument('--neg_deg_sample', action='store_true',
+                          help='negative sampling proportional to vertex degree for testing')
+        self.add_argument('--neg_chunk_size', type=int, default=-1,
+                          help='chunk size of the negative edges.')
        self.add_argument('--hidden_dim', type=int, default=256,
                          help='hidden dim used by relation and entity')
        self.add_argument('-g', '--gamma', type=float, default=12.0,
@@ -86,6 +90,10 @@ def get_logger(args):
    return logger

 def main(args):
+    args.eval_filter = not args.no_eval_filter
+    if args.neg_deg_sample:
+        assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges."
+
    # load dataset and samplers
    dataset = get_dataset(args.data_path, args.dataset, args.format)
    args.pickle_graph = False
@@ -98,10 +106,14 @@ def main(args):
    # Here we want to use the regualr negative sampler because we need to ensure that
    # all positive edges are excluded.
    eval_dataset = EvalDataset(dataset, args)
+
    args.neg_sample_size_test = args.neg_sample_size
+    args.neg_deg_sample_eval = args.neg_deg_sample
    if args.neg_sample_size < 0:
        args.neg_sample_size_test = args.neg_sample_size = eval_dataset.g.number_of_nodes()
-    args.eval_filter = not args.no_eval_filter
+    if args.neg_chunk_size < 0:
+        args.neg_chunk_size = args.neg_sample_size
+
    num_workers = args.num_worker
    # for multiprocessing evaluation, we don't need to sample multiple batches at a time
    # in each process.
@@ -113,14 +125,16 @@ def main(args):
        for i in range(args.num_proc):
            test_sampler_head = eval_dataset.create_sampler('test', args.batch_size,
                                                            args.neg_sample_size,
+                                                            args.neg_chunk_size,
                                                            args.eval_filter,
-                                                            mode='PBG-head',
+                                                            mode='chunk-head',
                                                            num_workers=num_workers,
                                                            rank=i, ranks=args.num_proc)
            test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size,
                                                            args.neg_sample_size,
+                                                            args.neg_chunk_size,
                                                            args.eval_filter,
-                                                            mode='PBG-tail',
+                                                            mode='chunk-tail',
                                                            num_workers=num_workers,
                                                            rank=i, ranks=args.num_proc)
            test_sampler_heads.append(test_sampler_head)
@@ -128,14 +142,16 @@ def main(args):
    else:
        test_sampler_head = eval_dataset.create_sampler('test', args.batch_size,
                                                        args.neg_sample_size,
+                                                        args.neg_chunk_size,
                                                        args.eval_filter,
-                                                        mode='PBG-head',
+                                                        mode='chunk-head',
                                                        num_workers=num_workers,
                                                        rank=0, ranks=1)
        test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size,
                                                        args.neg_sample_size,
+                                                        args.neg_chunk_size,
                                                        args.eval_filter,
-                                                        mode='PBG-tail',
+                                                        mode='chunk-tail',
                                                        num_workers=num_workers,
                                                        rank=0, ranks=1)


--- a/apps/kg/models/general_models.py
+++ b/apps/kg/models/general_models.py
@@ -94,36 +94,67 @@ class KEModel(object):
        self.score_func(g)
        return g.edata['score']

-    def predict_neg_score(self, pos_g, neg_g, to_device=None, gpu_id=-1, trace=False):
+    def predict_neg_score(self, pos_g, neg_g, to_device=None, gpu_id=-1, trace=False,
+                          neg_deg_sample=False):
        num_chunks = neg_g.num_chunks
        chunk_size = neg_g.chunk_size
        neg_sample_size = neg_g.neg_sample_size
+        mask = F.ones((num_chunks, chunk_size * (neg_sample_size + chunk_size)),
+                      dtype=F.float32, ctx=F.context(pos_g.ndata['emb']))
        if neg_g.neg_head:
            neg_head_ids = neg_g.ndata['id'][neg_g.head_nid]
            neg_head = self.entity_emb(neg_head_ids, gpu_id, trace)
-            _, tail_ids = pos_g.all_edges(order='eid')
+            head_ids, tail_ids = pos_g.all_edges(order='eid')
            if to_device is not None and gpu_id >= 0:
                tail_ids = to_device(tail_ids, gpu_id)
            tail = pos_g.ndata['emb'][tail_ids]
            rel = pos_g.edata['emb']

+            # When we train a batch, we could use the head nodes of the positive edges to
+            # construct negative edges. We construct a negative edge between a positive head
+            # node and every positive tail node.
+            # When we construct negative edges like this, we know there is one positive
+            # edge for a positive head node among the negative edges. We need to mask
+            # them.
+            if neg_deg_sample:
+                head = pos_g.ndata['emb'][head_ids]
+                head = head.reshape(num_chunks, chunk_size, -1)
+                neg_head = neg_head.reshape(num_chunks, neg_sample_size, -1)
+                neg_head = F.cat([head, neg_head], 1)
+                neg_sample_size = chunk_size + neg_sample_size
+                mask[:,0::(neg_sample_size + 1)] = 0
+            neg_head = neg_head.reshape(num_chunks * neg_sample_size, -1)
            neg_head, tail = self.head_neg_prepare(pos_g.edata['id'], num_chunks, neg_head, tail, gpu_id, trace)
            neg_score = self.head_neg_score(neg_head, rel, tail,
                                            num_chunks, chunk_size, neg_sample_size)
        else:
            neg_tail_ids = neg_g.ndata['id'][neg_g.tail_nid]
            neg_tail = self.entity_emb(neg_tail_ids, gpu_id, trace)
-            head_ids, _ = pos_g.all_edges(order='eid')
+            head_ids, tail_ids = pos_g.all_edges(order='eid')
            if to_device is not None and gpu_id >= 0:
                head_ids = to_device(head_ids, gpu_id)
            head = pos_g.ndata['emb'][head_ids]
            rel = pos_g.edata['emb']

+            # This is negative edge construction similar to the above.
+            if neg_deg_sample:
+                tail = pos_g.ndata['emb'][tail_ids]
+                tail = tail.reshape(num_chunks, chunk_size, -1)
+                neg_tail = neg_tail.reshape(num_chunks, neg_sample_size, -1)
+                neg_tail = F.cat([tail, neg_tail], 1)
+                neg_sample_size = chunk_size + neg_sample_size
+                mask[:,0::(neg_sample_size + 1)] = 0
+            neg_tail = neg_tail.reshape(num_chunks * neg_sample_size, -1)
            head, neg_tail = self.tail_neg_prepare(pos_g.edata['id'], num_chunks, head, neg_tail, gpu_id, trace)
            neg_score = self.tail_neg_score(head, rel, neg_tail,
                                            num_chunks, chunk_size, neg_sample_size)

-        return neg_score
+        if neg_deg_sample:
+            neg_g.neg_sample_size = neg_sample_size
+            mask = mask.reshape(num_chunks, chunk_size, neg_sample_size)
+            return neg_score * mask
+        else:
+            return neg_score

    def forward_test(self, pos_g, neg_g, logs, gpu_id=-1):
        pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, False)
@@ -136,7 +167,8 @@ class KEModel(object):
        pos_scores = reshape(logsigmoid(pos_scores), batch_size, -1)

        neg_scores = self.predict_neg_score(pos_g, neg_g, to_device=cuda,
-                                            gpu_id=gpu_id, trace=False)
+                                            gpu_id=gpu_id, trace=False,
+                                            neg_deg_sample=self.args.neg_deg_sample_eval)
        neg_scores = reshape(logsigmoid(neg_scores), batch_size, -1)

        # We need to filter the positive edges in the negative graph.
@@ -171,9 +203,11 @@ class KEModel(object):
        pos_score = logsigmoid(pos_score)
        if gpu_id >= 0:
            neg_score = self.predict_neg_score(pos_g, neg_g, to_device=cuda,
-                                               gpu_id=gpu_id, trace=True)
+                                               gpu_id=gpu_id, trace=True,
+                                               neg_deg_sample=self.args.neg_deg_sample)
        else:
-            neg_score = self.predict_neg_score(pos_g, neg_g, trace=True)
+            neg_score = self.predict_neg_score(pos_g, neg_g, trace=True,
+                                               neg_deg_sample=self.args.neg_deg_sample)

        neg_score = reshape(neg_score, -1, neg_g.neg_sample_size)
        # Adversarial sampling

--- a/apps/kg/tests/test_score.py
+++ b/apps/kg/tests/test_score.py
@@ -137,14 +137,14 @@ def check_score_func(func_name):
    EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler')
    sampler = EdgeSampler(g, batch_size=batch_size,
                          neg_sample_size=neg_sample_size,
-                          negative_mode='PBG-head',
+                          negative_mode='chunk-head',
                          num_workers=1,
                          shuffle=False,
                          exclude_positive=False,
                          return_false_neg=False)

    for pos_g, neg_g in sampler:
-        neg_g = create_neg_subgraph(pos_g, neg_g, True, True, g.number_of_nodes())
+        neg_g = create_neg_subgraph(pos_g, neg_g, neg_sample_size, True, True, g.number_of_nodes())
        pos_g.copy_from_parent()
        neg_g.copy_from_parent()
        score1 = F.reshape(model.predict_score(neg_g), (batch_size, -1))

--- a/apps/kg/train.py
+++ b/apps/kg/train.py
@@ -47,10 +47,20 @@ class ArgParser(argparse.ArgumentParser):
                          help='batch size used for eval and test')
        self.add_argument('--neg_sample_size', type=int, default=128,
                          help='negative sampling size')
+        self.add_argument('--neg_chunk_size', type=int, default=-1,
+                          help='chunk size of the negative edges.')
+        self.add_argument('--neg_deg_sample', action='store_true',
+                          help='negative sample proportional to vertex degree in the training')
+        self.add_argument('--neg_deg_sample_eval', action='store_true',
+                          help='negative sampling proportional to vertex degree in the evaluation')
        self.add_argument('--neg_sample_size_valid', type=int, default=1000,
                          help='negative sampling size for validation')
+        self.add_argument('--neg_chunk_size_valid', type=int, default=-1,
+                          help='chunk size of the negative edges.')
        self.add_argument('--neg_sample_size_test', type=int, default=-1,
                          help='negative sampling size for testing')
+        self.add_argument('--neg_chunk_size_test', type=int, default=-1,
+                          help='chunk size of the negative edges.')
        self.add_argument('--hidden_dim', type=int, default=256,
                          help='hidden dim used by relation and entity')
        self.add_argument('--lr', type=float, default=0.0001,
@@ -138,37 +148,56 @@ def run(args, logger):
    if args.neg_sample_size_test < 0:
        args.neg_sample_size_test = n_entities
    args.eval_filter = not args.no_eval_filter
+    if args.neg_deg_sample_eval:
+        assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges."
+
+    # When we generate a batch of negative edges from a set of positive edges,
+    # we first divide the positive edges into chunks and corrupt the edges in a chunk
+    # together. By default, the chunk size is equal to the negative sample size.
+    # Usually, this works well. But we also allow users to specify the chunk size themselves.
+    if args.neg_chunk_size < 0:
+        args.neg_chunk_size = args.neg_sample_size
+    if args.neg_chunk_size_valid < 0:
+        args.neg_chunk_size_valid = args.neg_sample_size_valid
+    if args.neg_chunk_size_test < 0:
+        args.neg_chunk_size_test = args.neg_sample_size_test

    train_data = TrainDataset(dataset, args, ranks=args.num_proc)
    if args.num_proc > 1:
        train_samplers = []
        for i in range(args.num_proc):
            train_sampler_head = train_data.create_sampler(args.batch_size, args.neg_sample_size,
-                                                           mode='PBG-head',
+                                                           args.neg_chunk_size,
+                                                           mode='chunk-head',
                                                           num_workers=args.num_worker,
                                                           shuffle=True,
                                                           exclude_positive=True,
                                                           rank=i)
            train_sampler_tail = train_data.create_sampler(args.batch_size, args.neg_sample_size,
-                                                           mode='PBG-tail',
+                                                           args.neg_chunk_size,
+                                                           mode='chunk-tail',
                                                           num_workers=args.num_worker,
                                                           shuffle=True,
                                                           exclude_positive=True,
                                                           rank=i)
            train_samplers.append(NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail,
+                                                                  args.neg_chunk_size,
                                                                  True, n_entities))
    else:
        train_sampler_head = train_data.create_sampler(args.batch_size, args.neg_sample_size,
-                                                       mode='PBG-head',
+                                                       args.neg_chunk_size,
+                                                       mode='chunk-head',
                                                       num_workers=args.num_worker,
                                                       shuffle=True,
                                                       exclude_positive=True)
        train_sampler_tail = train_data.create_sampler(args.batch_size, args.neg_sample_size,
-                                                       mode='PBG-tail',
+                                                       args.neg_chunk_size,
+                                                       mode='chunk-tail',
                                                       num_workers=args.num_worker,
                                                       shuffle=True,
                                                       exclude_positive=True)
        train_sampler = NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail,
+                                                        args.neg_chunk_size,
                                                        True, n_entities)

    # for multiprocessing evaluation, we don't need to sample multiple batches at a time
@@ -187,14 +216,16 @@ def run(args, logger):
            for i in range(args.num_proc):
                valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
                                                                 args.neg_sample_size_valid,
+                                                                 args.neg_chunk_size_valid,
                                                                 args.eval_filter,
-                                                                 mode='PBG-head',
+                                                                 mode='chunk-head',
                                                                 num_workers=num_workers,
                                                                 rank=i, ranks=args.num_proc)
                valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
                                                                 args.neg_sample_size_valid,
+                                                                 args.neg_chunk_size_valid,
                                                                 args.eval_filter,
-                                                                 mode='PBG-tail',
+                                                                 mode='chunk-tail',
                                                                 num_workers=num_workers,
                                                                 rank=i, ranks=args.num_proc)
                valid_sampler_heads.append(valid_sampler_head)
@@ -202,14 +233,16 @@ def run(args, logger):
        else:
            valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval,
                                                             args.neg_sample_size_valid,
+                                                             args.neg_chunk_size_valid,
                                                             args.eval_filter,
-                                                             mode='PBG-head',
+                                                             mode='chunk-head',
                                                             num_workers=num_workers,
                                                             rank=0, ranks=1)
            valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval,
                                                             args.neg_sample_size_valid,
+                                                             args.neg_chunk_size_valid,
                                                             args.eval_filter,
-                                                             mode='PBG-tail',
+                                                             mode='chunk-tail',
                                                             num_workers=num_workers,
                                                             rank=0, ranks=1)
    if args.test:
@@ -221,14 +254,16 @@ def run(args, logger):
            for i in range(args.num_proc):
                test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
                                                                args.neg_sample_size_test,
+                                                                args.neg_chunk_size_test,
                                                                args.eval_filter,
-                                                                mode='PBG-head',
+                                                                mode='chunk-head',
                                                                num_workers=num_workers,
                                                                rank=i, ranks=args.num_proc)
                test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
                                                                args.neg_sample_size_test,
+                                                                args.neg_chunk_size_test,
                                                                args.eval_filter,
-                                                                mode='PBG-tail',
+                                                                mode='chunk-tail',
                                                                num_workers=num_workers,
                                                                rank=i, ranks=args.num_proc)
                test_sampler_heads.append(test_sampler_head)
@@ -236,14 +271,16 @@ def run(args, logger):
        else:
            test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
                                                            args.neg_sample_size_test,
+                                                            args.neg_chunk_size_test,
                                                            args.eval_filter,
-                                                            mode='PBG-head',
+                                                            mode='chunk-head',
                                                            num_workers=num_workers,
                                                            rank=0, ranks=1)
            test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
                                                            args.neg_sample_size_test,
+                                                            args.neg_chunk_size_test,
                                                            args.eval_filter,
-                                                            mode='PBG-tail',
+                                                            mode='chunk-tail',
                                                            num_workers=num_workers,
                                                            rank=0, ranks=1)


--- a/python/dgl/contrib/sampling/sampler.py
+++ b/python/dgl/contrib/sampling/sampler.py
@@ -500,12 +500,16 @@ class EdgeSampler(object):

    * 'tail': the negative edges are generated by corrupting tail nodes with uniformly randomly sampled nodes,

-    * 'PBG-head': the negative edges are generated by corrupting a set \
-    of head nodes with the same set of nodes uniformly randomly sampled \
-    from the graph. Please see Pytorch-BigGraph for more details.
+    * 'chunk-head': the negative edges are generated for a chunk of positive edges. \
+    It first groups positive edges into chunks and corrupts a chunk of edges together \
+    by replacing a set of head nodes with the same set of nodes uniformly randomly sampled \
+    from the graph.

-    * 'PBG-tail': the negative edges are generated by corrupting a set \
-    of tail nodes with the same set of nodes similar to 'PBG-head'.
+    * 'chunk-tail': the negative edges are generated by corrupting a set \
+    of tail nodes with the same set of nodes similar to 'chunk-head'.
+
+    When we use chunked negative sampling, a chunk size needs to be specified. By default,
+    the chunk size is the same as the number of negative edges.

    The sampler returns EdgeSubgraph, where a user can access the unique head nodes
    and tail nodes directly.
@@ -577,6 +581,8 @@ class EdgeSampler(object):
        The method used to construct negative edges. Possible values are 'head', 'tail'.
    neg_sample_size : int, optional
        The number of negative edges to sample for each edge.
+    chunk_size : int, optional
+        The chunk size for chunked negative sampling.
    exclude_positive : int, optional
        Whether to exclude positive edges from the negative edges.
    return_false_neg: bool, optional
@@ -615,7 +621,8 @@ class EdgeSampler(object):
            neg_sample_size=0,
            exclude_positive=False,
            return_false_neg=False,
-            relations=None):
+            relations=None,
+            chunk_size=None):
        self._g = g
        if self.immutable_only and not g._graph.is_readonly():
            raise NotImplementedError("This loader only support read-only graphs.")
@@ -659,8 +666,16 @@ class EdgeSampler(object):
        self._replacement = replacement
        self._reset = reset

+        if chunk_size is None and negative_mode in ('chunk-head', 'chunk-tail'):
+            chunk_size = neg_sample_size
+        elif chunk_size is None:
+            chunk_size = -1
+
+        assert negative_mode in ('', 'head', 'tail', 'chunk-head', 'chunk-tail')
+
        self._num_workers = int(num_workers)
        self._negative_mode = negative_mode
+        self._chunk_size = chunk_size
        self._neg_sample_size = neg_sample_size
        self._exclude_positive = exclude_positive
        if self._is_uniform:
@@ -675,7 +690,8 @@ class EdgeSampler(object):
                self._neg_sample_size,
                self._exclude_positive,
                self._return_false_neg,
-                self._relations)
+                self._relations,
+                self._chunk_size)
        else:
            self._sampler = _CAPI_CreateWeightedEdgeSampler(
                self.g._graph,
@@ -690,7 +706,8 @@ class EdgeSampler(object):
                self._neg_sample_size,
                self._exclude_positive,
                self._return_false_neg,
-                self._relations)
+                self._relations,
+                self._chunk_size)

    def fetch(self, current_index):
        '''

--- a/src/graph/sampler.cc
+++ b/src/graph/sampler.cc
@@ -125,6 +125,7 @@ class EdgeSamplerObject: public Object {
                    const bool reset,
                    const std::string neg_mode,
                    const int64_t neg_sample_size,
+                    const int64_t chunk_size,
                    const bool exclude_positive,
                    const bool check_false_neg,
                    IdArray relations) {
@@ -140,6 +141,7 @@ class EdgeSamplerObject: public Object {
    neg_sample_size_ = neg_sample_size;
    exclude_positive_ = exclude_positive;
    check_false_neg_ = check_false_neg;
+    chunk_size_ = chunk_size;
  }

  ~EdgeSamplerObject() {}
@@ -157,11 +159,11 @@ class EdgeSamplerObject: public Object {
                                 int64_t neg_sample_size,
                                 bool exclude_positive,
                                 bool check_false_neg);
-  NegSubgraph genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
-                                    const std::string &neg_mode,
-                                    int64_t neg_sample_size,
-                                    bool exclude_positive,
-                                    bool check_false_neg);
+  NegSubgraph genChunkedNegEdgeSubgraph(const Subgraph &pos_subg,
+                                        const std::string &neg_mode,
+                                        int64_t neg_sample_size,
+                                        bool exclude_positive,
+                                        bool check_false_neg);

  GraphPtr gptr_;
  IdArray seed_edges_;
@@ -175,6 +177,7 @@ class EdgeSamplerObject: public Object {
  int64_t neg_sample_size_;
  bool exclude_positive_;
  bool check_false_neg_;
+  int64_t chunk_size_;
 };

 /*
@@ -1250,11 +1253,11 @@ NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph(const Subgraph &pos_subg,
  return neg_subg;
 }

-NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
-                                                     const std::string &neg_mode,
-                                                     int64_t neg_sample_size,
-                                                     bool exclude_positive,
-                                                     bool check_false_neg) {
+NegSubgraph EdgeSamplerObject::genChunkedNegEdgeSubgraph(const Subgraph &pos_subg,
+                                                         const std::string &neg_mode,
+                                                         int64_t neg_sample_size,
+                                                         bool exclude_positive,
+                                                         bool check_false_neg) {
  int64_t num_tot_nodes = gptr_->NumVertices();
  std::vector<IdArray> adj = pos_subg.graph->GetAdj(false, "coo");
  IdArray coo = adj[0];
@@ -1262,7 +1265,8 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
  if (neg_sample_size > num_tot_nodes)
    neg_sample_size = num_tot_nodes;

-  int64_t chunk_size = neg_sample_size;
+  int64_t chunk_size = chunk_size_;
+  CHECK_GT(chunk_size, 0) << "chunk size has to be positive";
  // If num_pos_edges isn't divisible by chunk_size, the actual number of chunks
  // is num_chunks + 1 and the last chunk size is last_chunk_size.
  // Otherwise, the actual number of chunks is num_chunks, the last chunk size
@@ -1316,6 +1320,7 @@ NegSubgraph EdgeSamplerObject::genPBGNegEdgeSubgraph(const Subgraph &pos_subg,
  randomSample(num_tot_nodes,
               num_chunks * neg_sample_size,
               &global_neg_vids);
+  CHECK_EQ(num_chunks * neg_sample_size, global_neg_vids.size());

  std::unordered_map<dgl_id_t, dgl_id_t> neg_map;
  dgl_id_t local_vid = 0;
@@ -1438,6 +1443,7 @@ public:
                                    const bool reset,
                                    const std::string neg_mode,
                                    const int64_t neg_sample_size,
+                                    const int64_t chunk_size,
                                    const bool exclude_positive,
                                    const bool check_false_neg,
                                    IdArray relations)
@@ -1449,6 +1455,7 @@ public:
                                        reset,
                                        neg_mode,
                                        neg_sample_size,
+                                        chunk_size,
                                        exclude_positive,
                                        check_false_neg,
                                        relations) {
@@ -1499,15 +1506,15 @@ public:

      Subgraph subg = gptr_->EdgeSubgraph(worker_seeds, false);
      positive_subgs[i] = ConvertRef(subg);
-      // For PBG negative sampling, we accept "PBG-head" for corrupting head
-      // nodes and "PBG-tail" for corrupting tail nodes.
-      if (neg_mode_.substr(0, 3) == "PBG") {
-        NegSubgraph neg_subg = genPBGNegEdgeSubgraph(subg, neg_mode_.substr(4),
-                                                     neg_sample_size_,
-                                                     exclude_positive_,
-                                                     check_false_neg_);
+      // For chunked negative sampling, we accept "chunk-head" for corrupting head
+      // nodes and "chunk-tail" for corrupting tail nodes.
+      if (neg_mode_.substr(0, 5) == "chunk") {
+        NegSubgraph neg_subg = genChunkedNegEdgeSubgraph(subg, neg_mode_.substr(6),
+                                                         neg_sample_size_,
+                                                         exclude_positive_,
+                                                         check_false_neg_);
        negative_subgs[i] = ConvertRef(neg_subg);
-      } else if (neg_mode_.size() > 0) {
+      } else if (neg_mode_ == "head" || neg_mode_ == "tail") {
        NegSubgraph neg_subg = genNegEdgeSubgraph(subg, neg_mode_,
                                                  neg_sample_size_,
                                                  exclude_positive_,
@@ -1585,6 +1592,7 @@ DGL_REGISTER_GLOBAL("sampling._CAPI_CreateUniformEdgeSampler")
    const bool exclude_positive = args[8];
    const bool check_false_neg = args[9];
    IdArray relations = args[10];
+    const int64_t chunk_size = args[11];
    // process args
    auto gptr = std::dynamic_pointer_cast<ImmutableGraph>(g.sptr());
    CHECK(gptr) << "sampling isn't implemented in mutable graph";
@@ -1607,6 +1615,7 @@ DGL_REGISTER_GLOBAL("sampling._CAPI_CreateUniformEdgeSampler")
                                                        reset,
                                                        neg_mode,
                                                        neg_sample_size,
+                                                        chunk_size,
                                                        exclude_positive,
                                                        check_false_neg,
                                                        relations);
@@ -1638,6 +1647,7 @@ class WeightedEdgeSamplerObject: public EdgeSamplerObject {
                                     const bool reset,
                                     const std::string neg_mode,
                                     const int64_t neg_sample_size,
+                                     const int64_t chunk_size,
                                     const bool exclude_positive,
                                     const bool check_false_neg,
                                     IdArray relations)
@@ -1649,6 +1659,7 @@ class WeightedEdgeSamplerObject: public EdgeSamplerObject {
                                        reset,
                                        neg_mode,
                                        neg_sample_size,
+                                        chunk_size,
                                        exclude_positive,
                                        check_false_neg,
                                        relations) {
@@ -1723,15 +1734,15 @@ class WeightedEdgeSamplerObject: public EdgeSamplerObject {
      // TODO(zhengda) what if there are duplicates in the src and dst vectors.
      Subgraph subg = gptr_->EdgeSubgraph(worker_seeds, false);
      positive_subgs[i] = ConvertRef(subg);
-      // For PBG negative sampling, we accept "PBG-head" for corrupting head
-      // nodes and "PBG-tail" for corrupting tail nodes.
-      if (neg_mode_.substr(0, 3) == "PBG") {
-        NegSubgraph neg_subg = genPBGNegEdgeSubgraph(subg, neg_mode_.substr(4),
-                                                     neg_sample_size_,
-                                                     exclude_positive_,
-                                                     check_false_neg_);
+      // For chunked negative sampling, we accept "chunk-head" for corrupting head
+      // nodes and "chunk-tail" for corrupting tail nodes.
+      if (neg_mode_.substr(0, 5) == "chunk") {
+        NegSubgraph neg_subg = genChunkedNegEdgeSubgraph(subg, neg_mode_.substr(6),
+                                                         neg_sample_size_,
+                                                         exclude_positive_,
+                                                         check_false_neg_);
        negative_subgs[i] = ConvertRef(neg_subg);
-      } else if (neg_mode_.size() > 0) {
+      } else if (neg_mode_ == "head" || neg_mode_ == "tail") {
        NegSubgraph neg_subg = genNegEdgeSubgraph(subg, neg_mode_,
                                                  neg_sample_size_,
                                                  exclude_positive_,
@@ -1867,6 +1878,7 @@ DGL_REGISTER_GLOBAL("sampling._CAPI_CreateWeightedEdgeSampler")
    const bool exclude_positive = args[10];
    const bool check_false_neg = args[11];
    IdArray relations = args[12];
+    const int64_t chunk_size = args[13];

    auto gptr = std::dynamic_pointer_cast<ImmutableGraph>(g.sptr());
    CHECK(gptr) << "sampling isn't implemented in mutable graph";
@@ -1904,6 +1916,7 @@ DGL_REGISTER_GLOBAL("sampling._CAPI_CreateWeightedEdgeSampler")
                                                                reset,
                                                                neg_mode,
                                                                neg_sample_size,
+                                                                chunk_size,
                                                                exclude_positive,
                                                                check_false_neg,
                                                                relations);

--- a/tests/compute/test_sampler.py
+++ b/tests/compute/test_sampler.py
@@ -700,10 +700,10 @@ def check_positive_edge_sampler():

 @unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support item assignment")
 def test_negative_sampler():
-    check_negative_sampler('PBG-head', False, 10)
+    check_negative_sampler('chunk-head', False, 10)
    check_negative_sampler('head', True, 10)
    check_negative_sampler('head', False, 10)
-    check_weighted_negative_sampler('PBG-head', False, 10)
+    check_weighted_negative_sampler('chunk-head', False, 10)
    check_weighted_negative_sampler('head', True, 10)
    check_weighted_negative_sampler('head', False, 10)
    check_positive_edge_sampler()