Unverified Commit 7612af0f authored by Xinyu Yao's avatar Xinyu Yao Committed by GitHub
Browse files

[GraphBolt] Remove old version negative sampler. (#7302)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
parent 21aeed86
...@@ -1047,60 +1047,6 @@ class FusedCSCSamplingGraph(SamplingGraph): ...@@ -1047,60 +1047,6 @@ class FusedCSCSamplingGraph(SamplingGraph):
def sample_negative_edges_uniform( def sample_negative_edges_uniform(
self, edge_type, node_pairs, negative_ratio self, edge_type, node_pairs, negative_ratio
):
"""
Sample negative edges by randomly choosing negative source-destination
pairs according to a uniform distribution. For each edge ``(u, v)``,
it is supposed to generate `negative_ratio` pairs of negative edges
``(u, v')``, where ``v'`` is chosen uniformly from all the nodes in
the graph. As ``u`` is exactly same as the corresponding positive edges,
it returns None for negative sources.
Parameters
----------
edge_type: str
The type of edges in the provided node_pairs. Any negative edges
sampled will also have the same type. If set to None, it will be
considered as a homogeneous graph.
node_pairs : Tuple[Tensor, Tensor]
A tuple of two 1D tensors that represent the source and destination
of positive edges, with 'positive' indicating that these edges are
present in the graph. It's important to note that within the
context of a heterogeneous graph, the ids in these tensors signify
heterogeneous ids.
negative_ratio: int
The ratio of the number of negative samples to positive samples.
Returns
-------
Tuple[Tensor, Tensor]
A tuple consisting of two 1D tensors represents the source and
destination of negative edges. In the context of a heterogeneous
graph, both the input nodes and the selected nodes are represented
by heterogeneous IDs, and the formed edges are of the input type
`edge_type`. Note that negative refers to false negatives, which
means the edge could be present or not present in the graph.
"""
if edge_type:
_, _, dst_ntype = etype_str_to_tuple(edge_type)
max_node_id = self.num_nodes[dst_ntype]
else:
max_node_id = self.total_num_nodes
pos_src, _ = node_pairs
num_negative = pos_src.size(0) * negative_ratio
return (
None,
torch.randint(
0,
max_node_id,
(num_negative,),
dtype=pos_src.dtype,
device=pos_src.device,
),
)
def sample_negative_edges_uniform_2(
self, edge_type, node_pairs, negative_ratio
): ):
""" """
Sample negative edges by randomly choosing negative source-destination Sample negative edges by randomly choosing negative source-destination
......
...@@ -61,38 +61,26 @@ class UniformNegativeSampler(NegativeSampler): ...@@ -61,38 +61,26 @@ class UniformNegativeSampler(NegativeSampler):
super().__init__(datapipe, negative_ratio) super().__init__(datapipe, negative_ratio)
self.graph = graph self.graph = graph
def _sample_with_etype(self, node_pairs, etype=None, use_seeds=False): def _sample_with_etype(self, seeds, etype=None):
if use_seeds: assert seeds.ndim == 2 and seeds.shape[1] == 2, (
assert node_pairs.ndim == 2 and node_pairs.shape[1] == 2, (
"Only tensor with shape N*2 is supported for negative" "Only tensor with shape N*2 is supported for negative"
+ f" sampling, but got {node_pairs.shape}." + f" sampling, but got {seeds.shape}."
) )
# Sample negative edges, and concatenate positive edges with them. # Sample negative edges, and concatenate positive edges with them.
seeds = self.graph.sample_negative_edges_uniform_2( all_seeds = self.graph.sample_negative_edges_uniform(
etype, etype,
node_pairs, seeds,
self.negative_ratio, self.negative_ratio,
) )
# Construct indexes for all node pairs. # Construct indexes for all node pairs.
num_pos_node_pairs = node_pairs.shape[0] pos_num = seeds.shape[0]
negative_ratio = self.negative_ratio negative_ratio = self.negative_ratio
pos_indexes = torch.arange( pos_indexes = torch.arange(0, pos_num, device=all_seeds.device)
0,
num_pos_node_pairs,
device=seeds.device,
)
neg_indexes = pos_indexes.repeat_interleave(negative_ratio) neg_indexes = pos_indexes.repeat_interleave(negative_ratio)
indexes = torch.cat((pos_indexes, neg_indexes)) indexes = torch.cat((pos_indexes, neg_indexes))
# Construct labels for all node pairs. # Construct labels for all node pairs.
pos_num = node_pairs.shape[0] neg_num = all_seeds.shape[0] - pos_num
neg_num = seeds.shape[0] - pos_num labels = torch.empty(pos_num + neg_num, device=all_seeds.device)
labels = torch.empty(pos_num + neg_num, device=seeds.device)
labels[:pos_num] = 1 labels[:pos_num] = 1
labels[pos_num:] = 0 labels[pos_num:] = 0
return seeds, labels, indexes return all_seeds, labels, indexes
else:
return self.graph.sample_negative_edges_uniform(
etype,
node_pairs,
self.negative_ratio,
)
...@@ -45,11 +45,9 @@ class NegativeSampler(MiniBatchTransformer): ...@@ -45,11 +45,9 @@ class NegativeSampler(MiniBatchTransformer):
Parameters Parameters
---------- ----------
minibatch : MiniBatch minibatch : MiniBatch
An instance of 'MiniBatch' class requires the 'node_pairs' field. An instance of 'MiniBatch' class requires the 'seeds' field. This
This function is responsible for generating negative edges function is responsible for generating negative edges corresponding
corresponding to the positive edges defined by the 'node_pairs'. In to the positive edges defined by the 'seeds'.
cases where negative edges already exist, this function will
overwrite them.
Returns Returns
------- -------
...@@ -57,20 +55,6 @@ class NegativeSampler(MiniBatchTransformer): ...@@ -57,20 +55,6 @@ class NegativeSampler(MiniBatchTransformer):
An instance of 'MiniBatch' encompasses both positive and negative An instance of 'MiniBatch' encompasses both positive and negative
samples. samples.
""" """
if minibatch.seeds is None:
node_pairs = minibatch.node_pairs
assert node_pairs is not None
if isinstance(node_pairs, Mapping):
minibatch.negative_srcs, minibatch.negative_dsts = {}, {}
for etype, pos_pairs in node_pairs.items():
self._collate(
minibatch,
self._sample_with_etype(pos_pairs, etype),
etype,
)
else:
self._collate(minibatch, self._sample_with_etype(node_pairs))
else:
seeds = minibatch.seeds seeds = minibatch.seeds
if isinstance(seeds, Mapping): if isinstance(seeds, Mapping):
if minibatch.indexes is None: if minibatch.indexes is None:
...@@ -82,27 +66,25 @@ class NegativeSampler(MiniBatchTransformer): ...@@ -82,27 +66,25 @@ class NegativeSampler(MiniBatchTransformer):
minibatch.seeds[etype], minibatch.seeds[etype],
minibatch.labels[etype], minibatch.labels[etype],
minibatch.indexes[etype], minibatch.indexes[etype],
) = self._sample_with_etype( ) = self._sample_with_etype(pos_pairs, etype)
pos_pairs, etype, use_seeds=True
)
else: else:
( (
minibatch.seeds, minibatch.seeds,
minibatch.labels, minibatch.labels,
minibatch.indexes, minibatch.indexes,
) = self._sample_with_etype(seeds, use_seeds=True) ) = self._sample_with_etype(seeds)
return minibatch return minibatch
def _sample_with_etype(self, node_pairs, etype=None, use_seeds=False): def _sample_with_etype(self, seeds, etype=None):
"""Generate negative pairs for a given etype form positive pairs """Generate negative pairs for a given etype form positive pairs
for a given etype. If `node_pairs` is a 2D tensor, which represents for a given etype. If `seeds` is a 2D tensor, which represents
`seeds` is used in minibatch, corresponding labels and indexes will be `seeds` is used in minibatch, corresponding labels and indexes will be
constructed. constructed.
Parameters Parameters
---------- ----------
node_pairs : Tuple[Tensor, Tensor] seeds : Tensor, Tensor
A tuple of tensors that represent source-destination node pairs of A N*2 tensors that represent source-destination node pairs of
positive edges, where positive means the edge must exist in the positive edges, where positive means the edge must exist in the
graph. graph.
etype : str etype : str
...@@ -110,40 +92,13 @@ class NegativeSampler(MiniBatchTransformer): ...@@ -110,40 +92,13 @@ class NegativeSampler(MiniBatchTransformer):
Returns Returns
------- -------
Tuple[Tensor, Tensor] or Tensor Tensor
A collection of negative node pairs. A collection of postive and negative node pairs.
Tensor or None Tensor
Corresponding labels. If label is True, corresponding edge is Corresponding labels. If label is True, corresponding edge is
positive. If label is False, corresponding edge is negative. positive. If label is False, corresponding edge is negative.
Tensor or None Tensor
Corresponding indexes, indicates to which query an edge belongs. Corresponding indexes, indicates to which query an edge belongs.
""" """
raise NotImplementedError raise NotImplementedError
def _collate(self, minibatch, neg_pairs, etype=None):
"""Collates positive and negative samples into minibatch.
Parameters
----------
minibatch : MiniBatch
The input minibatch, which contains positive node pairs, will be
filled with negative information in this function.
neg_pairs : Tuple[Tensor, Tensor]
A tuple of tensors represents source-destination node pairs of
negative edges, where negative means the edge may not exist in
the graph.
etype : str
Canonical edge type.
"""
neg_src, neg_dst = neg_pairs
if neg_src is not None:
neg_src = neg_src.view(-1, self.negative_ratio)
if neg_dst is not None:
neg_dst = neg_dst.view(-1, self.negative_ratio)
if etype is not None:
minibatch.negative_srcs[etype] = neg_src
minibatch.negative_dsts[etype] = neg_dst
else:
minibatch.negative_srcs = neg_src
minibatch.negative_dsts = neg_dst
...@@ -13,7 +13,7 @@ def test_NegativeSampler_invoke(): ...@@ -13,7 +13,7 @@ def test_NegativeSampler_invoke():
# Instantiate graph and required datapipes. # Instantiate graph and required datapipes.
num_seeds = 30 num_seeds = 30
item_set = gb.ItemSet( item_set = gb.ItemSet(
torch.arange(0, 2 * num_seeds).reshape(-1, 2), names="node_pairs" torch.arange(0, 2 * num_seeds).reshape(-1, 2), names="seeds"
) )
batch_size = 10 batch_size = 10
item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to( item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to(
...@@ -76,94 +76,6 @@ def test_UniformNegativeSampler_invoke(): ...@@ -76,94 +76,6 @@ def test_UniformNegativeSampler_invoke():
_verify(negative_sampler) _verify(negative_sampler)
def test_UniformNegativeSampler_node_pairs_invoke():
# Instantiate graph and required datapipes.
graph = gb_test_utils.rand_csc_graph(100, 0.05, bidirection_edge=True).to(
F.ctx()
)
num_seeds = 30
item_set = gb.ItemSet(
torch.arange(0, 2 * num_seeds).reshape(-1, 2), names="node_pairs"
)
batch_size = 10
item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to(
F.ctx()
)
negative_ratio = 2
# Verify iteration over UniformNegativeSampler.
def _verify(negative_sampler):
for data in negative_sampler:
# Assertation
seeds_len = batch_size + batch_size * negative_ratio
assert data.seeds.size(0) == seeds_len
assert data.labels.size(0) == seeds_len
assert data.indexes.size(0) == seeds_len
# Invoke UniformNegativeSampler via class constructor.
negative_sampler = gb.UniformNegativeSampler(
item_sampler,
graph,
negative_ratio,
)
_verify(negative_sampler)
# Invoke UniformNegativeSampler via functional form.
negative_sampler = item_sampler.sample_uniform_negative(
graph,
negative_ratio,
)
_verify(negative_sampler)
@pytest.mark.parametrize("negative_ratio", [1, 5, 10, 20])
def test_Uniform_NegativeSampler_node_pairs(negative_ratio):
# Construct FusedCSCSamplingGraph.
graph = gb_test_utils.rand_csc_graph(100, 0.05, bidirection_edge=True).to(
F.ctx()
)
num_seeds = 30
item_set = gb.ItemSet(
torch.arange(0, num_seeds * 2).reshape(-1, 2), names="node_pairs"
)
batch_size = 10
item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to(
F.ctx()
)
# Construct NegativeSampler.
negative_sampler = gb.UniformNegativeSampler(
item_sampler,
graph,
negative_ratio,
)
# Perform Negative sampling.
for data in negative_sampler:
expected_labels = torch.empty(
batch_size * (negative_ratio + 1), device=F.ctx()
)
expected_labels[:batch_size] = 1
expected_labels[batch_size:] = 0
expected_indexes = torch.arange(batch_size, device=F.ctx())
expected_indexes = torch.cat(
(
expected_indexes,
expected_indexes.repeat_interleave(negative_ratio),
)
)
expected_neg_src = data.seeds[:batch_size][:, 0].repeat_interleave(
negative_ratio
)
# Assertation
assert data.negative_srcs is None
assert data.negative_dsts is None
assert data.labels is not None
assert data.indexes is not None
assert data.seeds.size(0) == batch_size * (negative_ratio + 1)
assert torch.equal(data.labels, expected_labels)
assert torch.equal(data.indexes, expected_indexes)
assert torch.equal(data.seeds[batch_size:][:, 0], expected_neg_src)
@pytest.mark.parametrize("negative_ratio", [1, 5, 10, 20]) @pytest.mark.parametrize("negative_ratio", [1, 5, 10, 20])
def test_Uniform_NegativeSampler(negative_ratio): def test_Uniform_NegativeSampler(negative_ratio):
# Construct FusedCSCSamplingGraph. # Construct FusedCSCSamplingGraph.
...@@ -307,26 +219,6 @@ def get_hetero_graph(): ...@@ -307,26 +219,6 @@ def get_hetero_graph():
) )
def test_NegativeSampler_Hetero_node_pairs_Data():
graph = get_hetero_graph().to(F.ctx())
itemset = gb.ItemSetDict(
{
"n1:e1:n2": gb.ItemSet(
torch.LongTensor([[0, 0, 1, 1], [0, 2, 0, 1]]).T,
names="node_pairs",
),
"n2:e2:n1": gb.ItemSet(
torch.LongTensor([[0, 0, 1, 1, 2, 2], [0, 1, 1, 0, 0, 1]]).T,
names="node_pairs",
),
}
)
item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx())
negative_dp = gb.UniformNegativeSampler(item_sampler, graph, 1)
assert len(list(negative_dp)) == 5
def test_NegativeSampler_Hetero_Data(): def test_NegativeSampler_Hetero_Data():
graph = get_hetero_graph().to(F.ctx()) graph = get_hetero_graph().to(F.ctx())
itemset = gb.ItemSetDict( itemset = gb.ItemSetDict(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment