"sgl-kernel/python/vscode:/vscode.git/clone" did not exist on "42c8704560b4bb4baab022a5bad97b1f788eaa08"
Unverified Commit 11bdd6e8 authored by czkkkkkk's avatar czkkkkkk Committed by GitHub
Browse files

[Graphbolt] Refactor the nonuniform pick function to make it reusable. (#6772)

parent 3d657dbf
......@@ -818,66 +818,24 @@ inline int64_t UniformPick(
}
}
/**
* @brief Perform non-uniform sampling of elements based on probabilities and
* return the sampled indices.
*
* If 'probs_or_mask' is provided, it indicates that the sampling is
* non-uniform. In such cases:
* - When the number of neighbors with non-zero probability is less than or
* equal to fanout, all neighbors with non-zero probability will be selected.
* - When the number of neighbors with non-zero probability exceeds fanout, the
* sampling process will select 'fanout' elements based on their respective
* probabilities. Higher probabilities will increase the chances of being chosen
* during the sampling process.
*
* @param offset The starting edge ID for the connected neighbors of the sampled
* node.
* @param num_neighbors The number of neighbors to pick.
* @param fanout The number of edges to be sampled for each node. It should be
* >= 0 or -1.
* - When the value is -1, all neighbors with non-zero probability will be
* sampled once regardless of replacement. It is equivalent to selecting all
* neighbors with non-zero probability when the fanout is >= the number of
* neighbors (and replacement is set to false).
* - When the value is a non-negative integer, it serves as a minimum
* threshold for selecting neighbors.
* @param replace Boolean indicating whether the sample is performed with or
* without replacement. If True, a value can be selected multiple times.
* Otherwise, each value can be selected only once.
* @param options Tensor options specifying the desired data type of the result.
* @param probs_or_mask Optional tensor containing the (unnormalized)
* probabilities associated with each neighboring edge of a node in the original
* graph. It must be a 1D floating-point tensor with the number of elements
* equal to the number of edges in the graph.
* @param picked_data_ptr The destination address where the picked neighbors
* should be put. Enough memory space should be allocated in advance.
*/
template <typename PickedType>
inline int64_t NonUniformPick(
int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace,
const torch::TensorOptions& options,
const torch::optional<torch::Tensor>& probs_or_mask,
PickedType* picked_data_ptr) {
auto local_probs =
probs_or_mask.value().slice(0, offset, offset + num_neighbors);
auto positive_probs_indices = local_probs.nonzero().squeeze(1);
/** @brief An operator to perform non-uniform sampling. */
static torch::Tensor NonUniformPickOp(
torch::Tensor probs, int64_t fanout, bool replace) {
auto positive_probs_indices = probs.nonzero().squeeze(1);
auto num_positive_probs = positive_probs_indices.size(0);
if (num_positive_probs == 0) return 0;
if (num_positive_probs == 0) return torch::empty({0}, torch::kLong);
if ((fanout == -1) || (num_positive_probs <= fanout && !replace)) {
std::memcpy(
picked_data_ptr,
(positive_probs_indices + offset).data_ptr<PickedType>(),
num_positive_probs * sizeof(PickedType));
return num_positive_probs;
} else {
return positive_probs_indices;
}
if (!replace) fanout = std::min(fanout, num_positive_probs);
if (fanout == 0) return 0;
if (fanout == 0) return torch::empty({0}, torch::kLong);
auto ret_tensor = torch::empty({fanout}, torch::kLong);
auto ret_ptr = ret_tensor.data_ptr<int64_t>();
AT_DISPATCH_FLOATING_TYPES(
local_probs.scalar_type(), "MultinomialSampling", ([&] {
auto local_probs_data_ptr = local_probs.data_ptr<scalar_t>();
probs.scalar_type(), "MultinomialSampling", ([&] {
auto probs_data_ptr = probs.data_ptr<scalar_t>();
auto positive_probs_indices_ptr =
positive_probs_indices.data_ptr<PickedType>();
positive_probs_indices.data_ptr<int64_t>();
if (!replace) {
// The algorithm is from gumbel softmax.
......@@ -890,26 +848,24 @@ inline int64_t NonUniformPick(
if (fanout == 1) {
// Return argmax(p / q).
scalar_t max_prob = 0;
PickedType max_prob_index = -1;
int64_t max_prob_index = -1;
// We only care about the neighbors with non-zero probability.
for (auto i = 0; i < num_positive_probs; ++i) {
// Calculate (p / q) for the current neighbor.
scalar_t current_prob =
local_probs_data_ptr[positive_probs_indices_ptr[i]] /
probs_data_ptr[positive_probs_indices_ptr[i]] /
RandomEngine::ThreadLocal()->Exponential(1.);
if (current_prob > max_prob) {
max_prob = current_prob;
max_prob_index = positive_probs_indices_ptr[i];
}
}
*picked_data_ptr = max_prob_index + offset;
ret_ptr[0] = max_prob_index;
} else {
// Return topk(p / q).
std::vector<std::pair<scalar_t, PickedType>> q(
num_positive_probs);
std::vector<std::pair<scalar_t, int64_t>> q(num_positive_probs);
for (auto i = 0; i < num_positive_probs; ++i) {
q[i].first =
local_probs_data_ptr[positive_probs_indices_ptr[i]] /
q[i].first = probs_data_ptr[positive_probs_indices_ptr[i]] /
RandomEngine::ThreadLocal()->Exponential(1.);
q[i].second = positive_probs_indices_ptr[i];
}
......@@ -918,14 +874,14 @@ inline int64_t NonUniformPick(
std::partial_sort(
q.begin(), q.begin() + fanout, q.end(), std::greater{});
for (auto i = 0; i < fanout; ++i) {
picked_data_ptr[i] = q[i].second + offset;
ret_ptr[i] = q[i].second;
}
} else {
// Use nth_element.
std::nth_element(
q.begin(), q.begin() + fanout - 1, q.end(), std::greater{});
for (auto i = 0; i < fanout; ++i) {
picked_data_ptr[i] = q[i].second + offset;
ret_ptr[i] = q[i].second;
}
}
}
......@@ -934,7 +890,7 @@ inline int64_t NonUniformPick(
std::vector<scalar_t> prefix_sum_probs(num_positive_probs);
scalar_t sum_probs = 0;
for (auto i = 0; i < num_positive_probs; ++i) {
sum_probs += local_probs_data_ptr[positive_probs_indices_ptr[i]];
sum_probs += probs_data_ptr[positive_probs_indices_ptr[i]];
prefix_sum_probs[i] = sum_probs;
}
// Normalize.
......@@ -952,13 +908,63 @@ inline int64_t NonUniformPick(
prefix_sum_probs.begin(),
prefix_sum_probs.end(), uniform_sample) -
prefix_sum_probs.begin();
picked_data_ptr[i] =
positive_probs_indices_ptr[sampled_index] + offset;
ret_ptr[i] = positive_probs_indices_ptr[sampled_index];
}
}
}));
return fanout;
return ret_tensor;
}
/**
* @brief Perform non-uniform sampling of elements based on probabilities and
* return the sampled indices.
*
* If 'probs_or_mask' is provided, it indicates that the sampling is
* non-uniform. In such cases:
* - When the number of neighbors with non-zero probability is less than or
* equal to fanout, all neighbors with non-zero probability will be selected.
* - When the number of neighbors with non-zero probability exceeds fanout, the
* sampling process will select 'fanout' elements based on their respective
* probabilities. Higher probabilities will increase the chances of being chosen
* during the sampling process.
*
* @param offset The starting edge ID for the connected neighbors of the sampled
* node.
* @param num_neighbors The number of neighbors to pick.
* @param fanout The number of edges to be sampled for each node. It should be
* >= 0 or -1.
* - When the value is -1, all neighbors with non-zero probability will be
* sampled once regardless of replacement. It is equivalent to selecting all
* neighbors with non-zero probability when the fanout is >= the number of
* neighbors (and replacement is set to false).
* - When the value is a non-negative integer, it serves as a minimum
* threshold for selecting neighbors.
* @param replace Boolean indicating whether the sample is performed with or
* without replacement. If True, a value can be selected multiple times.
* Otherwise, each value can be selected only once.
* @param options Tensor options specifying the desired data type of the result.
* @param probs_or_mask Optional tensor containing the (unnormalized)
* probabilities associated with each neighboring edge of a node in the original
* graph. It must be a 1D floating-point tensor with the number of elements
* equal to the number of edges in the graph.
* @param picked_data_ptr The destination address where the picked neighbors
* should be put. Enough memory space should be allocated in advance.
*/
template <typename PickedType>
inline int64_t NonUniformPick(
int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace,
const torch::TensorOptions& options,
const torch::optional<torch::Tensor>& probs_or_mask,
PickedType* picked_data_ptr) {
auto local_probs =
probs_or_mask.value().slice(0, offset, offset + num_neighbors);
auto picked_indices = NonUniformPickOp(local_probs, fanout, replace);
auto picked_indices_ptr = picked_indices.data_ptr<int64_t>();
for (int i = 0; i < picked_indices.numel(); ++i) {
picked_data_ptr[i] =
static_cast<PickedType>(picked_indices_ptr[i]) + offset;
}
return picked_indices.numel();
}
template <typename PickedType>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment