Unverified Commit 86befc63 authored by peizhou001's avatar peizhou001 Committed by GitHub
Browse files

[Graphbolt] Add negative sampler function in csc graph (#5922)

parent 229e2883
......@@ -150,6 +150,32 @@ class CSCSamplingGraph : public torch::CustomClassHolder {
bool replace, bool return_eids,
torch::optional<torch::Tensor> probs_or_mask) const;
/**
* @brief Sample negative edges by randomly choosing negative
* source-destination pairs according to a uniform distribution. For each edge
* ``(u, v)``, it is supposed to generate `negative_ratio` pairs of negative
* edges ``(u, v')``, where ``v'`` is chosen uniformly from all the nodes in
* the graph.
*
* @param node_pairs A tuple of two 1D tensors that represent the source and
* destination of positive edges, with 'positive' indicating that these edges
* are present in the graph. It's important to note that within the context of
* a heterogeneous graph, the ids in these tensors signify heterogeneous ids.
* @param negative_ratio The ratio of the number of negative samples to
* positive samples.
* @param max_node_id The maximum ID of the node to be selected. It
* should correspond to the number of nodes of a specific type.
*
* @return A tuple consisting of two 1D tensors represents the source and
* destination of negative edges. In the context of a heterogeneous
* graph, both the input nodes and the selected nodes are represented
* by heterogeneous IDs. Note that negative refers to false negatives,
* which means the edge could be present or not present in the graph.
*/
std::tuple<torch::Tensor, torch::Tensor> SampleNegativeEdgesUniform(
const std::tuple<torch::Tensor, torch::Tensor>& node_pairs,
int64_t negative_ratio, int64_t max_node_id) const;
/**
* @brief Copy the graph to shared memory.
* @param shared_memory_name The name of the shared memory.
......
......@@ -195,6 +195,18 @@ c10::intrusive_ptr<SampledSubgraph> CSCSamplingGraph::SampleNeighbors(
subgraph_reverse_edge_ids, subgraph_type_per_edge);
}
std::tuple<torch::Tensor, torch::Tensor>
CSCSamplingGraph::SampleNegativeEdgesUniform(
const std::tuple<torch::Tensor, torch::Tensor>& node_pairs,
int64_t negative_ratio, int64_t max_node_id) const {
torch::Tensor pos_src;
std::tie(pos_src, std::ignore) = node_pairs;
auto neg_len = pos_src.size(0) * negative_ratio;
auto neg_src = pos_src.repeat(negative_ratio);
auto neg_dst = torch::randint(0, max_node_id, {neg_len}, pos_src.options());
return std::make_tuple(neg_src, neg_dst);
}
c10::intrusive_ptr<CSCSamplingGraph>
CSCSamplingGraph::BuildGraphFromSharedMemoryTensors(
std::tuple<
......
......@@ -30,6 +30,9 @@ TORCH_LIBRARY(graphbolt, m) {
.def("type_per_edge", &CSCSamplingGraph::TypePerEdge)
.def("in_subgraph", &CSCSamplingGraph::InSubgraph)
.def("sample_neighbors", &CSCSamplingGraph::SampleNeighbors)
.def(
"sample_negative_edges_uniform",
&CSCSamplingGraph::SampleNegativeEdgesUniform)
.def("copy_to_shared_memory", &CSCSamplingGraph::CopyToSharedMemory);
m.def("from_csc", &CSCSamplingGraph::FromCSC);
m.def("load_csc_sampling_graph", &LoadCSCSamplingGraph);
......
......@@ -304,6 +304,60 @@ class CSCSamplingGraph:
nodes, fanouts.tolist(), replace, return_eids, probs_or_mask
)
def sample_negative_edges_uniform(
self, edge_type, node_pairs, negative_ratio
):
"""
Sample negative edges by randomly choosing negative source-destination
pairs according to a uniform distribution. For each edge ``(u, v)``,
it is supposed to generate `negative_ratio` pairs of negative edges
``(u, v')``, where ``v'`` is chosen uniformly from all the nodes in
the graph.
Parameters
----------
edge_type: Tuple[str]
The type of edges in the provided node_pairs. Any negative edges
sampled will also have the same type. If set to None, it will be
considered as a homogeneous graph.
node_pairs : Tuple[Tensor]
A tuple of two 1D tensors that represent the source and destination
of positive edges, with 'positive' indicating that these edges are
present in the graph. It's important to note that within the
context of a heterogeneous graph, the ids in these tensors signify
heterogeneous ids.
negative_ratio: int
The ratio of the number of negative samples to positive samples.
Returns
-------
Tuple[Tensor]
A tuple consisting of two 1D tensors represents the source and
destination of negative edges. In the context of a heterogeneous
graph, both the input nodes and the selected nodes are represented
by heterogeneous IDs, and the formed edges are of the input type
`edge_type`. Note that negative refers to false negatives, which
means the edge could be present or not present in the graph.
"""
if edge_type:
assert (
self.node_type_offset is not None
), "The 'node_type_offset' array is necessary for performing \
negative sampling by edge type."
_, _, dst_node_type = edge_type
dst_node_type_id = self.metadata.node_type_to_id[dst_node_type]
max_node_id = (
self.node_type_offset[dst_node_type_id + 1]
- self.node_type_offset[dst_node_type_id]
)
else:
max_node_id = self.num_nodes
return self._c_csc_graph.sample_negative_edges_uniform(
node_pairs,
negative_ratio,
max_node_id,
)
def copy_to_shared_memory(self, shared_memory_name: str):
"""Copy the graph to shared memory.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment