Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
661f8177
Unverified
Commit
661f8177
authored
Aug 02, 2023
by
peizhou001
Committed by
GitHub
Aug 02, 2023
Browse files
[Graphbolt]Add data format (#6075)
parent
14f396d0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
96 additions
and
26 deletions
+96
-26
python/dgl/graphbolt/data_format.py
python/dgl/graphbolt/data_format.py
+22
-6
python/dgl/graphbolt/impl/uniform_negative_sampler.py
python/dgl/graphbolt/impl/uniform_negative_sampler.py
+1
-10
python/dgl/graphbolt/negative_sampler.py
python/dgl/graphbolt/negative_sampler.py
+11
-10
tests/python/pytorch/graphbolt/impl/test_negative_sampler.py
tests/python/pytorch/graphbolt/impl/test_negative_sampler.py
+62
-0
No files found.
python/dgl/graphbolt/data_format.py
View file @
661f8177
...
@@ -11,14 +11,30 @@ class LinkPredictionEdgeFormat(Enum):
...
@@ -11,14 +11,30 @@ class LinkPredictionEdgeFormat(Enum):
in link prediction:
in link prediction:
Attributes:
Attributes:
CONDITIONED: Represents the 'conditioned' format where data is
structured as quadruples `[u, v, [negative heads], [negative tails]]`
indicating the source and destination nodes of positive and negative edges.
INDEPENDENT: Represents the 'independent' format where data is structured
INDEPENDENT: Represents the 'independent' format where data is structured
as triples `
[
u, v, label
]
` indicating the source and destination nodes of
as triples `
(
u, v, label
)
` indicating the source and destination nodes of
an edge, with a label (0 or 1) denoting it as negative or positive.
an edge, with a label (0 or 1) denoting it as negative or positive.
CONDITIONED: Represents the 'conditioned' format where data is structured
as quadruples `(u, v, neg_u, neg_v)` indicating the source and destination
nodes of positive and negative edges. And 'u' with 'v' are 1D tensors with
the same shape, while 'neg_u' and 'neg_v' are 2D tensors with the same
shape.
HEAD_CONDITIONED: Represents the 'head conditioned' format where data is
structured as triples `(u, v, neg_u)`, where '(u, v)' signifies the
source and destination nodes of positive edges, while each node in
'neg_u' collaborates with 'v' to create negative edges. And 'u' and 'v' are
1D tensors with the same shape, while 'neg_u' is a 2D tensor.
TAIL_CONDITIONED: Represents the 'tail conditioned' format where data is
structured as triples `(u, v, neg_v)`, where '(u, v)' signifies the
source and destination nodes of positive edges, while 'u' collaborates
with each node in 'neg_v' to create negative edges. And 'u' and 'v' are
1D tensors with the same shape, while 'neg_v' is a 2D tensor.
"""
"""
CONDITIONED
=
"conditioned"
INDEPENDENT
=
"independent"
INDEPENDENT
=
"independent"
CONDITIONED
=
"conditioned"
HEAD_CONDITIONED
=
"head_conditioned"
TAIL_CONDITIONED
=
"tail_conditioned"
python/dgl/graphbolt/impl/uniform_negative_sampler.py
View file @
661f8177
...
@@ -31,16 +31,7 @@ class UniformNegativeSampler(NegativeSampler):
...
@@ -31,16 +31,7 @@ class UniformNegativeSampler(NegativeSampler):
negative_ratio : int
negative_ratio : int
The proportion of negative samples to positive samples.
The proportion of negative samples to positive samples.
output_format : LinkPredictionEdgeFormat
output_format : LinkPredictionEdgeFormat
Determines the format of the output data:
Determines the format of the output data.
- Conditioned format: Outputs data as quadruples
`[u, v, [negative heads], [negative tails]]`. Here, 'u' and 'v'
are the source and destination nodes of positive edges, while
'negative heads' and 'negative tails' refer to the source and
destination nodes of negative edges.
- Independent format: Outputs data as triples `[u, v, label]`.
In this case, 'u' and 'v' are the source and destination nodes
of an edge, and 'label' indicates whether the edge is negative
(0) or positive (1).
graph : CSCSamplingGraph
graph : CSCSamplingGraph
The graph on which to perform negative sampling.
The graph on which to perform negative sampling.
...
...
python/dgl/graphbolt/negative_sampler.py
View file @
661f8177
...
@@ -30,16 +30,7 @@ class NegativeSampler(Mapper):
...
@@ -30,16 +30,7 @@ class NegativeSampler(Mapper):
negative_ratio : int
negative_ratio : int
The proportion of negative samples to positive samples.
The proportion of negative samples to positive samples.
output_format : LinkPredictionEdgeFormat
output_format : LinkPredictionEdgeFormat
Determines the edge format of the output data:
Determines the edge format of the output data.
- Conditioned format: Outputs data as quadruples
`[u, v, [negative heads], [negative tails]]`. Here, 'u' and 'v'
are the source and destination nodes of positive edges, while
'negative heads' and 'negative tails' refer to the source and
destination nodes of negative edges.
- Independent format: Outputs data as triples `[u, v, label]`.
In this case, 'u' and 'v' are the source and destination nodes
of an edge, and 'label' indicates whether the edge is negative
(0) or positive (1).
"""
"""
super
().
__init__
(
datapipe
,
self
.
_sample
)
super
().
__init__
(
datapipe
,
self
.
_sample
)
assert
negative_ratio
>
0
,
"Negative_ratio should be positive Integer."
assert
negative_ratio
>
0
,
"Negative_ratio should be positive Integer."
...
@@ -129,5 +120,15 @@ class NegativeSampler(Mapper):
...
@@ -129,5 +120,15 @@ class NegativeSampler(Mapper):
neg_src
=
neg_src
.
view
(
-
1
,
self
.
negative_ratio
)
neg_src
=
neg_src
.
view
(
-
1
,
self
.
negative_ratio
)
neg_dst
=
neg_dst
.
view
(
-
1
,
self
.
negative_ratio
)
neg_dst
=
neg_dst
.
view
(
-
1
,
self
.
negative_ratio
)
return
(
pos_src
,
pos_dst
,
neg_src
,
neg_dst
)
return
(
pos_src
,
pos_dst
,
neg_src
,
neg_dst
)
elif
self
.
output_format
==
LinkPredictionEdgeFormat
.
HEAD_CONDITIONED
:
pos_src
,
pos_dst
=
pos_pairs
neg_src
,
_
=
neg_pairs
neg_src
=
neg_src
.
view
(
-
1
,
self
.
negative_ratio
)
return
(
pos_src
,
pos_dst
,
neg_src
)
elif
self
.
output_format
==
LinkPredictionEdgeFormat
.
TAIL_CONDITIONED
:
pos_src
,
pos_dst
=
pos_pairs
_
,
neg_dst
=
neg_pairs
neg_dst
=
neg_dst
.
view
(
-
1
,
self
.
negative_ratio
)
return
(
pos_src
,
pos_dst
,
neg_dst
)
else
:
else
:
raise
ValueError
(
"Unsupported output format."
)
raise
ValueError
(
"Unsupported output format."
)
tests/python/pytorch/graphbolt/impl/test_negative_sampler.py
View file @
661f8177
...
@@ -67,3 +67,65 @@ def test_NegativeSampler_Conditioned_Format(negative_ratio):
...
@@ -67,3 +67,65 @@ def test_NegativeSampler_Conditioned_Format(negative_ratio):
assert
neg_dst
.
numel
()
==
batch_size
*
negative_ratio
assert
neg_dst
.
numel
()
==
batch_size
*
negative_ratio
expected_src
=
pos_src
.
repeat
(
negative_ratio
).
view
(
-
1
,
negative_ratio
)
expected_src
=
pos_src
.
repeat
(
negative_ratio
).
view
(
-
1
,
negative_ratio
)
assert
torch
.
equal
(
expected_src
,
neg_src
)
assert
torch
.
equal
(
expected_src
,
neg_src
)
@
pytest
.
mark
.
parametrize
(
"negative_ratio"
,
[
1
,
5
,
10
,
20
])
def
test_NegativeSampler_Head_Conditioned_Format
(
negative_ratio
):
# Construct CSCSamplingGraph.
graph
=
gb_test_utils
.
rand_csc_graph
(
100
,
0.05
)
num_seeds
=
30
item_set
=
gb
.
ItemSet
(
(
torch
.
arange
(
0
,
num_seeds
),
torch
.
arange
(
num_seeds
,
num_seeds
*
2
),
)
)
batch_size
=
10
minibatch_sampler
=
gb
.
MinibatchSampler
(
item_set
,
batch_size
=
batch_size
)
# Construct NegativeSampler.
negative_sampler
=
gb
.
UniformNegativeSampler
(
minibatch_sampler
,
negative_ratio
,
gb
.
LinkPredictionEdgeFormat
.
HEAD_CONDITIONED
,
graph
,
)
# Perform Negative sampling.
for
data
in
negative_sampler
:
pos_src
,
pos_dst
,
neg_src
=
data
# Assertation
assert
len
(
pos_src
)
==
batch_size
assert
len
(
pos_dst
)
==
batch_size
assert
len
(
neg_src
)
==
batch_size
assert
neg_src
.
numel
()
==
batch_size
*
negative_ratio
expected_src
=
pos_src
.
repeat
(
negative_ratio
).
view
(
-
1
,
negative_ratio
)
assert
torch
.
equal
(
expected_src
,
neg_src
)
@
pytest
.
mark
.
parametrize
(
"negative_ratio"
,
[
1
,
5
,
10
,
20
])
def
test_NegativeSampler_Tail_Conditioned_Format
(
negative_ratio
):
# Construct CSCSamplingGraph.
graph
=
gb_test_utils
.
rand_csc_graph
(
100
,
0.05
)
num_seeds
=
30
item_set
=
gb
.
ItemSet
(
(
torch
.
arange
(
0
,
num_seeds
),
torch
.
arange
(
num_seeds
,
num_seeds
*
2
),
)
)
batch_size
=
10
minibatch_sampler
=
gb
.
MinibatchSampler
(
item_set
,
batch_size
=
batch_size
)
# Construct NegativeSampler.
negative_sampler
=
gb
.
UniformNegativeSampler
(
minibatch_sampler
,
negative_ratio
,
gb
.
LinkPredictionEdgeFormat
.
TAIL_CONDITIONED
,
graph
,
)
# Perform Negative sampling.
for
data
in
negative_sampler
:
pos_src
,
pos_dst
,
neg_dst
=
data
# Assertation
assert
len
(
pos_src
)
==
batch_size
assert
len
(
pos_dst
)
==
batch_size
assert
len
(
neg_dst
)
==
batch_size
assert
neg_dst
.
numel
()
==
batch_size
*
negative_ratio
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment