Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
86befc63
Unverified
Commit
86befc63
authored
Jul 06, 2023
by
peizhou001
Committed by
GitHub
Jul 06, 2023
Browse files
[Graphbolt] Add negative sampler function in csc graph (#5922)
parent
229e2883
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
95 additions
and
0 deletions
+95
-0
graphbolt/include/graphbolt/csc_sampling_graph.h
graphbolt/include/graphbolt/csc_sampling_graph.h
+26
-0
graphbolt/src/csc_sampling_graph.cc
graphbolt/src/csc_sampling_graph.cc
+12
-0
graphbolt/src/python_binding.cc
graphbolt/src/python_binding.cc
+3
-0
python/dgl/graphbolt/graph_storage/csc_sampling_graph.py
python/dgl/graphbolt/graph_storage/csc_sampling_graph.py
+54
-0
No files found.
graphbolt/include/graphbolt/csc_sampling_graph.h
View file @
86befc63
...
@@ -150,6 +150,32 @@ class CSCSamplingGraph : public torch::CustomClassHolder {
...
@@ -150,6 +150,32 @@ class CSCSamplingGraph : public torch::CustomClassHolder {
bool
replace
,
bool
return_eids
,
bool
replace
,
bool
return_eids
,
torch
::
optional
<
torch
::
Tensor
>
probs_or_mask
)
const
;
torch
::
optional
<
torch
::
Tensor
>
probs_or_mask
)
const
;
/**
* @brief Sample negative edges by randomly choosing negative
* source-destination pairs according to a uniform distribution. For each edge
* ``(u, v)``, it is supposed to generate `negative_ratio` pairs of negative
* edges ``(u, v')``, where ``v'`` is chosen uniformly from all the nodes in
* the graph.
*
* @param node_pairs A tuple of two 1D tensors that represent the source and
* destination of positive edges, with 'positive' indicating that these edges
* are present in the graph. It's important to note that within the context of
* a heterogeneous graph, the ids in these tensors signify heterogeneous ids.
* @param negative_ratio The ratio of the number of negative samples to
* positive samples.
* @param max_node_id The maximum ID of the node to be selected. It
* should correspond to the number of nodes of a specific type.
*
* @return A tuple consisting of two 1D tensors represents the source and
* destination of negative edges. In the context of a heterogeneous
* graph, both the input nodes and the selected nodes are represented
* by heterogeneous IDs. Note that negative refers to false negatives,
* which means the edge could be present or not present in the graph.
*/
std
::
tuple
<
torch
::
Tensor
,
torch
::
Tensor
>
SampleNegativeEdgesUniform
(
const
std
::
tuple
<
torch
::
Tensor
,
torch
::
Tensor
>&
node_pairs
,
int64_t
negative_ratio
,
int64_t
max_node_id
)
const
;
/**
/**
* @brief Copy the graph to shared memory.
* @brief Copy the graph to shared memory.
* @param shared_memory_name The name of the shared memory.
* @param shared_memory_name The name of the shared memory.
...
...
graphbolt/src/csc_sampling_graph.cc
View file @
86befc63
...
@@ -195,6 +195,18 @@ c10::intrusive_ptr<SampledSubgraph> CSCSamplingGraph::SampleNeighbors(
...
@@ -195,6 +195,18 @@ c10::intrusive_ptr<SampledSubgraph> CSCSamplingGraph::SampleNeighbors(
subgraph_reverse_edge_ids
,
subgraph_type_per_edge
);
subgraph_reverse_edge_ids
,
subgraph_type_per_edge
);
}
}
std
::
tuple
<
torch
::
Tensor
,
torch
::
Tensor
>
CSCSamplingGraph
::
SampleNegativeEdgesUniform
(
const
std
::
tuple
<
torch
::
Tensor
,
torch
::
Tensor
>&
node_pairs
,
int64_t
negative_ratio
,
int64_t
max_node_id
)
const
{
torch
::
Tensor
pos_src
;
std
::
tie
(
pos_src
,
std
::
ignore
)
=
node_pairs
;
auto
neg_len
=
pos_src
.
size
(
0
)
*
negative_ratio
;
auto
neg_src
=
pos_src
.
repeat
(
negative_ratio
);
auto
neg_dst
=
torch
::
randint
(
0
,
max_node_id
,
{
neg_len
},
pos_src
.
options
());
return
std
::
make_tuple
(
neg_src
,
neg_dst
);
}
c10
::
intrusive_ptr
<
CSCSamplingGraph
>
c10
::
intrusive_ptr
<
CSCSamplingGraph
>
CSCSamplingGraph
::
BuildGraphFromSharedMemoryTensors
(
CSCSamplingGraph
::
BuildGraphFromSharedMemoryTensors
(
std
::
tuple
<
std
::
tuple
<
...
...
graphbolt/src/python_binding.cc
View file @
86befc63
...
@@ -30,6 +30,9 @@ TORCH_LIBRARY(graphbolt, m) {
...
@@ -30,6 +30,9 @@ TORCH_LIBRARY(graphbolt, m) {
.
def
(
"type_per_edge"
,
&
CSCSamplingGraph
::
TypePerEdge
)
.
def
(
"type_per_edge"
,
&
CSCSamplingGraph
::
TypePerEdge
)
.
def
(
"in_subgraph"
,
&
CSCSamplingGraph
::
InSubgraph
)
.
def
(
"in_subgraph"
,
&
CSCSamplingGraph
::
InSubgraph
)
.
def
(
"sample_neighbors"
,
&
CSCSamplingGraph
::
SampleNeighbors
)
.
def
(
"sample_neighbors"
,
&
CSCSamplingGraph
::
SampleNeighbors
)
.
def
(
"sample_negative_edges_uniform"
,
&
CSCSamplingGraph
::
SampleNegativeEdgesUniform
)
.
def
(
"copy_to_shared_memory"
,
&
CSCSamplingGraph
::
CopyToSharedMemory
);
.
def
(
"copy_to_shared_memory"
,
&
CSCSamplingGraph
::
CopyToSharedMemory
);
m
.
def
(
"from_csc"
,
&
CSCSamplingGraph
::
FromCSC
);
m
.
def
(
"from_csc"
,
&
CSCSamplingGraph
::
FromCSC
);
m
.
def
(
"load_csc_sampling_graph"
,
&
LoadCSCSamplingGraph
);
m
.
def
(
"load_csc_sampling_graph"
,
&
LoadCSCSamplingGraph
);
...
...
python/dgl/graphbolt/graph_storage/csc_sampling_graph.py
View file @
86befc63
...
@@ -304,6 +304,60 @@ class CSCSamplingGraph:
...
@@ -304,6 +304,60 @@ class CSCSamplingGraph:
nodes
,
fanouts
.
tolist
(),
replace
,
return_eids
,
probs_or_mask
nodes
,
fanouts
.
tolist
(),
replace
,
return_eids
,
probs_or_mask
)
)
def
sample_negative_edges_uniform
(
self
,
edge_type
,
node_pairs
,
negative_ratio
):
"""
Sample negative edges by randomly choosing negative source-destination
pairs according to a uniform distribution. For each edge ``(u, v)``,
it is supposed to generate `negative_ratio` pairs of negative edges
``(u, v')``, where ``v'`` is chosen uniformly from all the nodes in
the graph.
Parameters
----------
edge_type: Tuple[str]
The type of edges in the provided node_pairs. Any negative edges
sampled will also have the same type. If set to None, it will be
considered as a homogeneous graph.
node_pairs : Tuple[Tensor]
A tuple of two 1D tensors that represent the source and destination
of positive edges, with 'positive' indicating that these edges are
present in the graph. It's important to note that within the
context of a heterogeneous graph, the ids in these tensors signify
heterogeneous ids.
negative_ratio: int
The ratio of the number of negative samples to positive samples.
Returns
-------
Tuple[Tensor]
A tuple consisting of two 1D tensors represents the source and
destination of negative edges. In the context of a heterogeneous
graph, both the input nodes and the selected nodes are represented
by heterogeneous IDs, and the formed edges are of the input type
`edge_type`. Note that negative refers to false negatives, which
means the edge could be present or not present in the graph.
"""
if
edge_type
:
assert
(
self
.
node_type_offset
is
not
None
),
"The 'node_type_offset' array is necessary for performing
\
negative sampling by edge type."
_
,
_
,
dst_node_type
=
edge_type
dst_node_type_id
=
self
.
metadata
.
node_type_to_id
[
dst_node_type
]
max_node_id
=
(
self
.
node_type_offset
[
dst_node_type_id
+
1
]
-
self
.
node_type_offset
[
dst_node_type_id
]
)
else
:
max_node_id
=
self
.
num_nodes
return
self
.
_c_csc_graph
.
sample_negative_edges_uniform
(
node_pairs
,
negative_ratio
,
max_node_id
,
)
def
copy_to_shared_memory
(
self
,
shared_memory_name
:
str
):
def
copy_to_shared_memory
(
self
,
shared_memory_name
:
str
):
"""Copy the graph to shared memory.
"""Copy the graph to shared memory.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment