Unverified Commit 7612af0f authored by Xinyu Yao's avatar Xinyu Yao Committed by GitHub
Browse files

[GraphBolt] Remove old version negative sampler. (#7302)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
parent 21aeed86
......@@ -1047,60 +1047,6 @@ class FusedCSCSamplingGraph(SamplingGraph):
def sample_negative_edges_uniform(
self, edge_type, node_pairs, negative_ratio
):
"""
Sample negative edges by randomly choosing negative source-destination
pairs according to a uniform distribution. For each edge ``(u, v)``,
it is supposed to generate `negative_ratio` pairs of negative edges
``(u, v')``, where ``v'`` is chosen uniformly from all the nodes in
the graph. As ``u`` is exactly same as the corresponding positive edges,
it returns None for negative sources.
Parameters
----------
edge_type: str
The type of edges in the provided node_pairs. Any negative edges
sampled will also have the same type. If set to None, it will be
considered as a homogeneous graph.
node_pairs : Tuple[Tensor, Tensor]
A tuple of two 1D tensors that represent the source and destination
of positive edges, with 'positive' indicating that these edges are
present in the graph. It's important to note that within the
context of a heterogeneous graph, the ids in these tensors signify
heterogeneous ids.
negative_ratio: int
The ratio of the number of negative samples to positive samples.
Returns
-------
Tuple[Tensor, Tensor]
A tuple consisting of two 1D tensors represents the source and
destination of negative edges. In the context of a heterogeneous
graph, both the input nodes and the selected nodes are represented
by heterogeneous IDs, and the formed edges are of the input type
`edge_type`. Note that negative refers to false negatives, which
means the edge could be present or not present in the graph.
"""
if edge_type:
_, _, dst_ntype = etype_str_to_tuple(edge_type)
max_node_id = self.num_nodes[dst_ntype]
else:
max_node_id = self.total_num_nodes
pos_src, _ = node_pairs
num_negative = pos_src.size(0) * negative_ratio
return (
None,
torch.randint(
0,
max_node_id,
(num_negative,),
dtype=pos_src.dtype,
device=pos_src.device,
),
)
def sample_negative_edges_uniform_2(
self, edge_type, node_pairs, negative_ratio
):
"""
Sample negative edges by randomly choosing negative source-destination
......
......@@ -61,38 +61,26 @@ class UniformNegativeSampler(NegativeSampler):
super().__init__(datapipe, negative_ratio)
self.graph = graph
def _sample_with_etype(self, node_pairs, etype=None, use_seeds=False):
if use_seeds:
assert node_pairs.ndim == 2 and node_pairs.shape[1] == 2, (
def _sample_with_etype(self, seeds, etype=None):
assert seeds.ndim == 2 and seeds.shape[1] == 2, (
"Only tensor with shape N*2 is supported for negative"
+ f" sampling, but got {node_pairs.shape}."
+ f" sampling, but got {seeds.shape}."
)
# Sample negative edges, and concatenate positive edges with them.
seeds = self.graph.sample_negative_edges_uniform_2(
all_seeds = self.graph.sample_negative_edges_uniform(
etype,
node_pairs,
seeds,
self.negative_ratio,
)
# Construct indexes for all node pairs.
num_pos_node_pairs = node_pairs.shape[0]
pos_num = seeds.shape[0]
negative_ratio = self.negative_ratio
pos_indexes = torch.arange(
0,
num_pos_node_pairs,
device=seeds.device,
)
pos_indexes = torch.arange(0, pos_num, device=all_seeds.device)
neg_indexes = pos_indexes.repeat_interleave(negative_ratio)
indexes = torch.cat((pos_indexes, neg_indexes))
# Construct labels for all node pairs.
pos_num = node_pairs.shape[0]
neg_num = seeds.shape[0] - pos_num
labels = torch.empty(pos_num + neg_num, device=seeds.device)
neg_num = all_seeds.shape[0] - pos_num
labels = torch.empty(pos_num + neg_num, device=all_seeds.device)
labels[:pos_num] = 1
labels[pos_num:] = 0
return seeds, labels, indexes
else:
return self.graph.sample_negative_edges_uniform(
etype,
node_pairs,
self.negative_ratio,
)
return all_seeds, labels, indexes
......@@ -45,11 +45,9 @@ class NegativeSampler(MiniBatchTransformer):
Parameters
----------
minibatch : MiniBatch
An instance of 'MiniBatch' class requires the 'node_pairs' field.
This function is responsible for generating negative edges
corresponding to the positive edges defined by the 'node_pairs'. In
cases where negative edges already exist, this function will
overwrite them.
An instance of 'MiniBatch' class requires the 'seeds' field. This
function is responsible for generating negative edges corresponding
to the positive edges defined by the 'seeds'.
Returns
-------
......@@ -57,20 +55,6 @@ class NegativeSampler(MiniBatchTransformer):
An instance of 'MiniBatch' encompasses both positive and negative
samples.
"""
if minibatch.seeds is None:
node_pairs = minibatch.node_pairs
assert node_pairs is not None
if isinstance(node_pairs, Mapping):
minibatch.negative_srcs, minibatch.negative_dsts = {}, {}
for etype, pos_pairs in node_pairs.items():
self._collate(
minibatch,
self._sample_with_etype(pos_pairs, etype),
etype,
)
else:
self._collate(minibatch, self._sample_with_etype(node_pairs))
else:
seeds = minibatch.seeds
if isinstance(seeds, Mapping):
if minibatch.indexes is None:
......@@ -82,27 +66,25 @@ class NegativeSampler(MiniBatchTransformer):
minibatch.seeds[etype],
minibatch.labels[etype],
minibatch.indexes[etype],
) = self._sample_with_etype(
pos_pairs, etype, use_seeds=True
)
) = self._sample_with_etype(pos_pairs, etype)
else:
(
minibatch.seeds,
minibatch.labels,
minibatch.indexes,
) = self._sample_with_etype(seeds, use_seeds=True)
) = self._sample_with_etype(seeds)
return minibatch
def _sample_with_etype(self, node_pairs, etype=None, use_seeds=False):
def _sample_with_etype(self, seeds, etype=None):
"""Generate negative pairs for a given etype form positive pairs
for a given etype. If `node_pairs` is a 2D tensor, which represents
for a given etype. If `seeds` is a 2D tensor, which represents
`seeds` is used in minibatch, corresponding labels and indexes will be
constructed.
Parameters
----------
node_pairs : Tuple[Tensor, Tensor]
A tuple of tensors that represent source-destination node pairs of
seeds : Tensor, Tensor
A N*2 tensors that represent source-destination node pairs of
positive edges, where positive means the edge must exist in the
graph.
etype : str
......@@ -110,40 +92,13 @@ class NegativeSampler(MiniBatchTransformer):
Returns
-------
Tuple[Tensor, Tensor] or Tensor
A collection of negative node pairs.
Tensor or None
Tensor
A collection of postive and negative node pairs.
Tensor
Corresponding labels. If label is True, corresponding edge is
positive. If label is False, corresponding edge is negative.
Tensor or None
Tensor
Corresponding indexes, indicates to which query an edge belongs.
"""
raise NotImplementedError
def _collate(self, minibatch, neg_pairs, etype=None):
"""Collates positive and negative samples into minibatch.
Parameters
----------
minibatch : MiniBatch
The input minibatch, which contains positive node pairs, will be
filled with negative information in this function.
neg_pairs : Tuple[Tensor, Tensor]
A tuple of tensors represents source-destination node pairs of
negative edges, where negative means the edge may not exist in
the graph.
etype : str
Canonical edge type.
"""
neg_src, neg_dst = neg_pairs
if neg_src is not None:
neg_src = neg_src.view(-1, self.negative_ratio)
if neg_dst is not None:
neg_dst = neg_dst.view(-1, self.negative_ratio)
if etype is not None:
minibatch.negative_srcs[etype] = neg_src
minibatch.negative_dsts[etype] = neg_dst
else:
minibatch.negative_srcs = neg_src
minibatch.negative_dsts = neg_dst
......@@ -13,7 +13,7 @@ def test_NegativeSampler_invoke():
# Instantiate graph and required datapipes.
num_seeds = 30
item_set = gb.ItemSet(
torch.arange(0, 2 * num_seeds).reshape(-1, 2), names="node_pairs"
torch.arange(0, 2 * num_seeds).reshape(-1, 2), names="seeds"
)
batch_size = 10
item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to(
......@@ -76,94 +76,6 @@ def test_UniformNegativeSampler_invoke():
_verify(negative_sampler)
def test_UniformNegativeSampler_node_pairs_invoke():
# Instantiate graph and required datapipes.
graph = gb_test_utils.rand_csc_graph(100, 0.05, bidirection_edge=True).to(
F.ctx()
)
num_seeds = 30
item_set = gb.ItemSet(
torch.arange(0, 2 * num_seeds).reshape(-1, 2), names="node_pairs"
)
batch_size = 10
item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to(
F.ctx()
)
negative_ratio = 2
# Verify iteration over UniformNegativeSampler.
def _verify(negative_sampler):
for data in negative_sampler:
# Assertation
seeds_len = batch_size + batch_size * negative_ratio
assert data.seeds.size(0) == seeds_len
assert data.labels.size(0) == seeds_len
assert data.indexes.size(0) == seeds_len
# Invoke UniformNegativeSampler via class constructor.
negative_sampler = gb.UniformNegativeSampler(
item_sampler,
graph,
negative_ratio,
)
_verify(negative_sampler)
# Invoke UniformNegativeSampler via functional form.
negative_sampler = item_sampler.sample_uniform_negative(
graph,
negative_ratio,
)
_verify(negative_sampler)
@pytest.mark.parametrize("negative_ratio", [1, 5, 10, 20])
def test_Uniform_NegativeSampler_node_pairs(negative_ratio):
# Construct FusedCSCSamplingGraph.
graph = gb_test_utils.rand_csc_graph(100, 0.05, bidirection_edge=True).to(
F.ctx()
)
num_seeds = 30
item_set = gb.ItemSet(
torch.arange(0, num_seeds * 2).reshape(-1, 2), names="node_pairs"
)
batch_size = 10
item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to(
F.ctx()
)
# Construct NegativeSampler.
negative_sampler = gb.UniformNegativeSampler(
item_sampler,
graph,
negative_ratio,
)
# Perform Negative sampling.
for data in negative_sampler:
expected_labels = torch.empty(
batch_size * (negative_ratio + 1), device=F.ctx()
)
expected_labels[:batch_size] = 1
expected_labels[batch_size:] = 0
expected_indexes = torch.arange(batch_size, device=F.ctx())
expected_indexes = torch.cat(
(
expected_indexes,
expected_indexes.repeat_interleave(negative_ratio),
)
)
expected_neg_src = data.seeds[:batch_size][:, 0].repeat_interleave(
negative_ratio
)
# Assertation
assert data.negative_srcs is None
assert data.negative_dsts is None
assert data.labels is not None
assert data.indexes is not None
assert data.seeds.size(0) == batch_size * (negative_ratio + 1)
assert torch.equal(data.labels, expected_labels)
assert torch.equal(data.indexes, expected_indexes)
assert torch.equal(data.seeds[batch_size:][:, 0], expected_neg_src)
@pytest.mark.parametrize("negative_ratio", [1, 5, 10, 20])
def test_Uniform_NegativeSampler(negative_ratio):
# Construct FusedCSCSamplingGraph.
......@@ -307,26 +219,6 @@ def get_hetero_graph():
)
def test_NegativeSampler_Hetero_node_pairs_Data():
graph = get_hetero_graph().to(F.ctx())
itemset = gb.ItemSetDict(
{
"n1:e1:n2": gb.ItemSet(
torch.LongTensor([[0, 0, 1, 1], [0, 2, 0, 1]]).T,
names="node_pairs",
),
"n2:e2:n1": gb.ItemSet(
torch.LongTensor([[0, 0, 1, 1, 2, 2], [0, 1, 1, 0, 0, 1]]).T,
names="node_pairs",
),
}
)
item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx())
negative_dp = gb.UniformNegativeSampler(item_sampler, graph, 1)
assert len(list(negative_dp)) == 5
def test_NegativeSampler_Hetero_Data():
graph = get_hetero_graph().to(F.ctx())
itemset = gb.ItemSetDict(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment