Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
661f8177
Unverified
Commit
661f8177
authored
Aug 02, 2023
by
peizhou001
Committed by
GitHub
Aug 02, 2023
Browse files
[Graphbolt]Add data format (#6075)
parent
14f396d0
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
96 additions
and
26 deletions
+96
-26
python/dgl/graphbolt/data_format.py
python/dgl/graphbolt/data_format.py
+22
-6
python/dgl/graphbolt/impl/uniform_negative_sampler.py
python/dgl/graphbolt/impl/uniform_negative_sampler.py
+1
-10
python/dgl/graphbolt/negative_sampler.py
python/dgl/graphbolt/negative_sampler.py
+11
-10
tests/python/pytorch/graphbolt/impl/test_negative_sampler.py
tests/python/pytorch/graphbolt/impl/test_negative_sampler.py
+62
-0
No files found.
python/dgl/graphbolt/data_format.py
View file @
661f8177
...
...
@@ -11,14 +11,30 @@ class LinkPredictionEdgeFormat(Enum):
in link prediction:
Attributes:
CONDITIONED: Represents the 'conditioned' format where data is
structured as quadruples `[u, v, [negative heads], [negative tails]]`
indicating the source and destination nodes of positive and negative edges.
INDEPENDENT: Represents the 'independent' format where data is structured
as triples `
[
u, v, label
]
` indicating the source and destination nodes of
as triples `
(
u, v, label
)
` indicating the source and destination nodes of
an edge, with a label (0 or 1) denoting it as negative or positive.
CONDITIONED: Represents the 'conditioned' format where data is structured
as quadruples `(u, v, neg_u, neg_v)` indicating the source and destination
nodes of positive and negative edges. And 'u' with 'v' are 1D tensors with
the same shape, while 'neg_u' and 'neg_v' are 2D tensors with the same
shape.
HEAD_CONDITIONED: Represents the 'head conditioned' format where data is
structured as triples `(u, v, neg_u)`, where '(u, v)' signifies the
source and destination nodes of positive edges, while each node in
'neg_u' collaborates with 'v' to create negative edges. And 'u' and 'v' are
1D tensors with the same shape, while 'neg_u' is a 2D tensor.
TAIL_CONDITIONED: Represents the 'tail conditioned' format where data is
structured as triples `(u, v, neg_v)`, where '(u, v)' signifies the
source and destination nodes of positive edges, while 'u' collaborates
with each node in 'neg_v' to create negative edges. And 'u' and 'v' are
1D tensors with the same shape, while 'neg_v' is a 2D tensor.
"""
CONDITIONED
=
"conditioned"
INDEPENDENT
=
"independent"
CONDITIONED
=
"conditioned"
HEAD_CONDITIONED
=
"head_conditioned"
TAIL_CONDITIONED
=
"tail_conditioned"
python/dgl/graphbolt/impl/uniform_negative_sampler.py
View file @
661f8177
...
...
@@ -31,16 +31,7 @@ class UniformNegativeSampler(NegativeSampler):
negative_ratio : int
The proportion of negative samples to positive samples.
output_format : LinkPredictionEdgeFormat
Determines the format of the output data:
- Conditioned format: Outputs data as quadruples
`[u, v, [negative heads], [negative tails]]`. Here, 'u' and 'v'
are the source and destination nodes of positive edges, while
'negative heads' and 'negative tails' refer to the source and
destination nodes of negative edges.
- Independent format: Outputs data as triples `[u, v, label]`.
In this case, 'u' and 'v' are the source and destination nodes
of an edge, and 'label' indicates whether the edge is negative
(0) or positive (1).
Determines the format of the output data.
graph : CSCSamplingGraph
The graph on which to perform negative sampling.
...
...
python/dgl/graphbolt/negative_sampler.py
View file @
661f8177
...
...
@@ -30,16 +30,7 @@ class NegativeSampler(Mapper):
negative_ratio : int
The proportion of negative samples to positive samples.
output_format : LinkPredictionEdgeFormat
Determines the edge format of the output data:
- Conditioned format: Outputs data as quadruples
`[u, v, [negative heads], [negative tails]]`. Here, 'u' and 'v'
are the source and destination nodes of positive edges, while
'negative heads' and 'negative tails' refer to the source and
destination nodes of negative edges.
- Independent format: Outputs data as triples `[u, v, label]`.
In this case, 'u' and 'v' are the source and destination nodes
of an edge, and 'label' indicates whether the edge is negative
(0) or positive (1).
Determines the edge format of the output data.
"""
super
().
__init__
(
datapipe
,
self
.
_sample
)
assert
negative_ratio
>
0
,
"Negative_ratio should be positive Integer."
...
...
@@ -129,5 +120,15 @@ class NegativeSampler(Mapper):
neg_src
=
neg_src
.
view
(
-
1
,
self
.
negative_ratio
)
neg_dst
=
neg_dst
.
view
(
-
1
,
self
.
negative_ratio
)
return
(
pos_src
,
pos_dst
,
neg_src
,
neg_dst
)
elif
self
.
output_format
==
LinkPredictionEdgeFormat
.
HEAD_CONDITIONED
:
pos_src
,
pos_dst
=
pos_pairs
neg_src
,
_
=
neg_pairs
neg_src
=
neg_src
.
view
(
-
1
,
self
.
negative_ratio
)
return
(
pos_src
,
pos_dst
,
neg_src
)
elif
self
.
output_format
==
LinkPredictionEdgeFormat
.
TAIL_CONDITIONED
:
pos_src
,
pos_dst
=
pos_pairs
_
,
neg_dst
=
neg_pairs
neg_dst
=
neg_dst
.
view
(
-
1
,
self
.
negative_ratio
)
return
(
pos_src
,
pos_dst
,
neg_dst
)
else
:
raise
ValueError
(
"Unsupported output format."
)
tests/python/pytorch/graphbolt/impl/test_negative_sampler.py
View file @
661f8177
...
...
@@ -67,3 +67,65 @@ def test_NegativeSampler_Conditioned_Format(negative_ratio):
assert
neg_dst
.
numel
()
==
batch_size
*
negative_ratio
expected_src
=
pos_src
.
repeat
(
negative_ratio
).
view
(
-
1
,
negative_ratio
)
assert
torch
.
equal
(
expected_src
,
neg_src
)
@
pytest
.
mark
.
parametrize
(
"negative_ratio"
,
[
1
,
5
,
10
,
20
])
def
test_NegativeSampler_Head_Conditioned_Format
(
negative_ratio
):
# Construct CSCSamplingGraph.
graph
=
gb_test_utils
.
rand_csc_graph
(
100
,
0.05
)
num_seeds
=
30
item_set
=
gb
.
ItemSet
(
(
torch
.
arange
(
0
,
num_seeds
),
torch
.
arange
(
num_seeds
,
num_seeds
*
2
),
)
)
batch_size
=
10
minibatch_sampler
=
gb
.
MinibatchSampler
(
item_set
,
batch_size
=
batch_size
)
# Construct NegativeSampler.
negative_sampler
=
gb
.
UniformNegativeSampler
(
minibatch_sampler
,
negative_ratio
,
gb
.
LinkPredictionEdgeFormat
.
HEAD_CONDITIONED
,
graph
,
)
# Perform Negative sampling.
for
data
in
negative_sampler
:
pos_src
,
pos_dst
,
neg_src
=
data
# Assertation
assert
len
(
pos_src
)
==
batch_size
assert
len
(
pos_dst
)
==
batch_size
assert
len
(
neg_src
)
==
batch_size
assert
neg_src
.
numel
()
==
batch_size
*
negative_ratio
expected_src
=
pos_src
.
repeat
(
negative_ratio
).
view
(
-
1
,
negative_ratio
)
assert
torch
.
equal
(
expected_src
,
neg_src
)
@
pytest
.
mark
.
parametrize
(
"negative_ratio"
,
[
1
,
5
,
10
,
20
])
def
test_NegativeSampler_Tail_Conditioned_Format
(
negative_ratio
):
# Construct CSCSamplingGraph.
graph
=
gb_test_utils
.
rand_csc_graph
(
100
,
0.05
)
num_seeds
=
30
item_set
=
gb
.
ItemSet
(
(
torch
.
arange
(
0
,
num_seeds
),
torch
.
arange
(
num_seeds
,
num_seeds
*
2
),
)
)
batch_size
=
10
minibatch_sampler
=
gb
.
MinibatchSampler
(
item_set
,
batch_size
=
batch_size
)
# Construct NegativeSampler.
negative_sampler
=
gb
.
UniformNegativeSampler
(
minibatch_sampler
,
negative_ratio
,
gb
.
LinkPredictionEdgeFormat
.
TAIL_CONDITIONED
,
graph
,
)
# Perform Negative sampling.
for
data
in
negative_sampler
:
pos_src
,
pos_dst
,
neg_dst
=
data
# Assertation
assert
len
(
pos_src
)
==
batch_size
assert
len
(
pos_dst
)
==
batch_size
assert
len
(
neg_dst
)
==
batch_size
assert
neg_dst
.
numel
()
==
batch_size
*
negative_ratio
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment