Unverified Commit 661f8177 authored by peizhou001's avatar peizhou001 Committed by GitHub
Browse files

[Graphbolt]Add data format (#6075)

parent 14f396d0
......@@ -11,14 +11,30 @@ class LinkPredictionEdgeFormat(Enum):
in link prediction:
Attributes:
CONDITIONED: Represents the 'conditioned' format where data is
structured as quadruples `[u, v, [negative heads], [negative tails]]`
indicating the source and destination nodes of positive and negative edges.
INDEPENDENT: Represents the 'independent' format where data is structured
as triples `[u, v, label]` indicating the source and destination nodes of
as triples `(u, v, label)` indicating the source and destination nodes of
an edge, with a label (0 or 1) denoting it as negative or positive.
CONDITIONED: Represents the 'conditioned' format where data is structured
as quadruples `(u, v, neg_u, neg_v)` indicating the source and destination
nodes of positive and negative edges. And 'u' with 'v' are 1D tensors with
the same shape, while 'neg_u' and 'neg_v' are 2D tensors with the same
shape.
HEAD_CONDITIONED: Represents the 'head conditioned' format where data is
structured as triples `(u, v, neg_u)`, where '(u, v)' signifies the
source and destination nodes of positive edges, while each node in
'neg_u' collaborates with 'v' to create negative edges. And 'u' and 'v' are
1D tensors with the same shape, while 'neg_u' is a 2D tensor.
TAIL_CONDITIONED: Represents the 'tail conditioned' format where data is
structured as triples `(u, v, neg_v)`, where '(u, v)' signifies the
source and destination nodes of positive edges, while 'u' collaborates
with each node in 'neg_v' to create negative edges. And 'u' and 'v' are
1D tensors with the same shape, while 'neg_v' is a 2D tensor.
"""
CONDITIONED = "conditioned"
INDEPENDENT = "independent"
CONDITIONED = "conditioned"
HEAD_CONDITIONED = "head_conditioned"
TAIL_CONDITIONED = "tail_conditioned"
......@@ -31,16 +31,7 @@ class UniformNegativeSampler(NegativeSampler):
negative_ratio : int
The proportion of negative samples to positive samples.
output_format : LinkPredictionEdgeFormat
Determines the format of the output data:
- Conditioned format: Outputs data as quadruples
`[u, v, [negative heads], [negative tails]]`. Here, 'u' and 'v'
are the source and destination nodes of positive edges, while
'negative heads' and 'negative tails' refer to the source and
destination nodes of negative edges.
- Independent format: Outputs data as triples `[u, v, label]`.
In this case, 'u' and 'v' are the source and destination nodes
of an edge, and 'label' indicates whether the edge is negative
(0) or positive (1).
Determines the format of the output data.
graph : CSCSamplingGraph
The graph on which to perform negative sampling.
......
......@@ -30,16 +30,7 @@ class NegativeSampler(Mapper):
negative_ratio : int
The proportion of negative samples to positive samples.
output_format : LinkPredictionEdgeFormat
Determines the edge format of the output data:
- Conditioned format: Outputs data as quadruples
`[u, v, [negative heads], [negative tails]]`. Here, 'u' and 'v'
are the source and destination nodes of positive edges, while
'negative heads' and 'negative tails' refer to the source and
destination nodes of negative edges.
- Independent format: Outputs data as triples `[u, v, label]`.
In this case, 'u' and 'v' are the source and destination nodes
of an edge, and 'label' indicates whether the edge is negative
(0) or positive (1).
Determines the edge format of the output data.
"""
super().__init__(datapipe, self._sample)
assert negative_ratio > 0, "Negative_ratio should be positive Integer."
......@@ -129,5 +120,15 @@ class NegativeSampler(Mapper):
neg_src = neg_src.view(-1, self.negative_ratio)
neg_dst = neg_dst.view(-1, self.negative_ratio)
return (pos_src, pos_dst, neg_src, neg_dst)
elif self.output_format == LinkPredictionEdgeFormat.HEAD_CONDITIONED:
pos_src, pos_dst = pos_pairs
neg_src, _ = neg_pairs
neg_src = neg_src.view(-1, self.negative_ratio)
return (pos_src, pos_dst, neg_src)
elif self.output_format == LinkPredictionEdgeFormat.TAIL_CONDITIONED:
pos_src, pos_dst = pos_pairs
_, neg_dst = neg_pairs
neg_dst = neg_dst.view(-1, self.negative_ratio)
return (pos_src, pos_dst, neg_dst)
else:
raise ValueError("Unsupported output format.")
......@@ -67,3 +67,65 @@ def test_NegativeSampler_Conditioned_Format(negative_ratio):
assert neg_dst.numel() == batch_size * negative_ratio
expected_src = pos_src.repeat(negative_ratio).view(-1, negative_ratio)
assert torch.equal(expected_src, neg_src)
@pytest.mark.parametrize("negative_ratio", [1, 5, 10, 20])
def test_NegativeSampler_Head_Conditioned_Format(negative_ratio):
# Construct CSCSamplingGraph.
graph = gb_test_utils.rand_csc_graph(100, 0.05)
num_seeds = 30
item_set = gb.ItemSet(
(
torch.arange(0, num_seeds),
torch.arange(num_seeds, num_seeds * 2),
)
)
batch_size = 10
minibatch_sampler = gb.MinibatchSampler(item_set, batch_size=batch_size)
# Construct NegativeSampler.
negative_sampler = gb.UniformNegativeSampler(
minibatch_sampler,
negative_ratio,
gb.LinkPredictionEdgeFormat.HEAD_CONDITIONED,
graph,
)
# Perform Negative sampling.
for data in negative_sampler:
pos_src, pos_dst, neg_src = data
# Assertation
assert len(pos_src) == batch_size
assert len(pos_dst) == batch_size
assert len(neg_src) == batch_size
assert neg_src.numel() == batch_size * negative_ratio
expected_src = pos_src.repeat(negative_ratio).view(-1, negative_ratio)
assert torch.equal(expected_src, neg_src)
@pytest.mark.parametrize("negative_ratio", [1, 5, 10, 20])
def test_NegativeSampler_Tail_Conditioned_Format(negative_ratio):
# Construct CSCSamplingGraph.
graph = gb_test_utils.rand_csc_graph(100, 0.05)
num_seeds = 30
item_set = gb.ItemSet(
(
torch.arange(0, num_seeds),
torch.arange(num_seeds, num_seeds * 2),
)
)
batch_size = 10
minibatch_sampler = gb.MinibatchSampler(item_set, batch_size=batch_size)
# Construct NegativeSampler.
negative_sampler = gb.UniformNegativeSampler(
minibatch_sampler,
negative_ratio,
gb.LinkPredictionEdgeFormat.TAIL_CONDITIONED,
graph,
)
# Perform Negative sampling.
for data in negative_sampler:
pos_src, pos_dst, neg_dst = data
# Assertation
assert len(pos_src) == batch_size
assert len(pos_dst) == batch_size
assert len(neg_dst) == batch_size
assert neg_dst.numel() == batch_size * negative_ratio
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment