Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
11bdd6e8
"torchvision/vscode:/vscode.git/clone" did not exist on "2686e1a3c97f035e430e7387e81af8fa4cde9609"
Unverified
Commit
11bdd6e8
authored
Dec 19, 2023
by
czkkkkkk
Committed by
GitHub
Dec 19, 2023
Browse files
[Graphbolt] Refactor the nonuniform pick function to make it reusable. (#6772)
parent
3d657dbf
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
103 additions
and
97 deletions
+103
-97
graphbolt/src/fused_csc_sampling_graph.cc
graphbolt/src/fused_csc_sampling_graph.cc
+103
-97
No files found.
graphbolt/src/fused_csc_sampling_graph.cc
View file @
11bdd6e8
...
...
@@ -818,66 +818,24 @@ inline int64_t UniformPick(
}
}
/**
* @brief Perform non-uniform sampling of elements based on probabilities and
* return the sampled indices.
*
* If 'probs_or_mask' is provided, it indicates that the sampling is
* non-uniform. In such cases:
* - When the number of neighbors with non-zero probability is less than or
* equal to fanout, all neighbors with non-zero probability will be selected.
* - When the number of neighbors with non-zero probability exceeds fanout, the
* sampling process will select 'fanout' elements based on their respective
* probabilities. Higher probabilities will increase the chances of being chosen
* during the sampling process.
*
* @param offset The starting edge ID for the connected neighbors of the sampled
* node.
* @param num_neighbors The number of neighbors to pick.
* @param fanout The number of edges to be sampled for each node. It should be
* >= 0 or -1.
* - When the value is -1, all neighbors with non-zero probability will be
* sampled once regardless of replacement. It is equivalent to selecting all
* neighbors with non-zero probability when the fanout is >= the number of
* neighbors (and replacement is set to false).
* - When the value is a non-negative integer, it serves as a minimum
* threshold for selecting neighbors.
* @param replace Boolean indicating whether the sample is performed with or
* without replacement. If True, a value can be selected multiple times.
* Otherwise, each value can be selected only once.
* @param options Tensor options specifying the desired data type of the result.
* @param probs_or_mask Optional tensor containing the (unnormalized)
* probabilities associated with each neighboring edge of a node in the original
* graph. It must be a 1D floating-point tensor with the number of elements
* equal to the number of edges in the graph.
* @param picked_data_ptr The destination address where the picked neighbors
* should be put. Enough memory space should be allocated in advance.
*/
template
<
typename
PickedType
>
inline
int64_t
NonUniformPick
(
int64_t
offset
,
int64_t
num_neighbors
,
int64_t
fanout
,
bool
replace
,
const
torch
::
TensorOptions
&
options
,
const
torch
::
optional
<
torch
::
Tensor
>&
probs_or_mask
,
PickedType
*
picked_data_ptr
)
{
auto
local_probs
=
probs_or_mask
.
value
().
slice
(
0
,
offset
,
offset
+
num_neighbors
);
auto
positive_probs_indices
=
local_probs
.
nonzero
().
squeeze
(
1
);
/** @brief An operator to perform non-uniform sampling. */
static
torch
::
Tensor
NonUniformPickOp
(
torch
::
Tensor
probs
,
int64_t
fanout
,
bool
replace
)
{
auto
positive_probs_indices
=
probs
.
nonzero
().
squeeze
(
1
);
auto
num_positive_probs
=
positive_probs_indices
.
size
(
0
);
if
(
num_positive_probs
==
0
)
return
0
;
if
(
num_positive_probs
==
0
)
return
torch
::
empty
({
0
},
torch
::
kLong
)
;
if
((
fanout
==
-
1
)
||
(
num_positive_probs
<=
fanout
&&
!
replace
))
{
std
::
memcpy
(
picked_data_ptr
,
(
positive_probs_indices
+
offset
).
data_ptr
<
PickedType
>
(),
num_positive_probs
*
sizeof
(
PickedType
));
return
num_positive_probs
;
}
else
{
return
positive_probs_indices
;
}
if
(
!
replace
)
fanout
=
std
::
min
(
fanout
,
num_positive_probs
);
if
(
fanout
==
0
)
return
0
;
if
(
fanout
==
0
)
return
torch
::
empty
({
0
},
torch
::
kLong
);
auto
ret_tensor
=
torch
::
empty
({
fanout
},
torch
::
kLong
);
auto
ret_ptr
=
ret_tensor
.
data_ptr
<
int64_t
>
();
AT_DISPATCH_FLOATING_TYPES
(
local_
probs
.
scalar_type
(),
"MultinomialSampling"
,
([
&
]
{
auto
local_
probs_data_ptr
=
local_
probs
.
data_ptr
<
scalar_t
>
();
probs
.
scalar_type
(),
"MultinomialSampling"
,
([
&
]
{
auto
probs_data_ptr
=
probs
.
data_ptr
<
scalar_t
>
();
auto
positive_probs_indices_ptr
=
positive_probs_indices
.
data_ptr
<
PickedType
>
();
positive_probs_indices
.
data_ptr
<
int64_t
>
();
if
(
!
replace
)
{
// The algorithm is from gumbel softmax.
...
...
@@ -890,26 +848,24 @@ inline int64_t NonUniformPick(
if
(
fanout
==
1
)
{
// Return argmax(p / q).
scalar_t
max_prob
=
0
;
PickedType
max_prob_index
=
-
1
;
int64_t
max_prob_index
=
-
1
;
// We only care about the neighbors with non-zero probability.
for
(
auto
i
=
0
;
i
<
num_positive_probs
;
++
i
)
{
// Calculate (p / q) for the current neighbor.
scalar_t
current_prob
=
local_
probs_data_ptr
[
positive_probs_indices_ptr
[
i
]]
/
probs_data_ptr
[
positive_probs_indices_ptr
[
i
]]
/
RandomEngine
::
ThreadLocal
()
->
Exponential
(
1.
);
if
(
current_prob
>
max_prob
)
{
max_prob
=
current_prob
;
max_prob_index
=
positive_probs_indices_ptr
[
i
];
}
}
*
picked_data
_ptr
=
max_prob_index
+
offset
;
ret
_ptr
[
0
]
=
max_prob_index
;
}
else
{
// Return topk(p / q).
std
::
vector
<
std
::
pair
<
scalar_t
,
PickedType
>>
q
(
num_positive_probs
);
std
::
vector
<
std
::
pair
<
scalar_t
,
int64_t
>>
q
(
num_positive_probs
);
for
(
auto
i
=
0
;
i
<
num_positive_probs
;
++
i
)
{
q
[
i
].
first
=
local_probs_data_ptr
[
positive_probs_indices_ptr
[
i
]]
/
q
[
i
].
first
=
probs_data_ptr
[
positive_probs_indices_ptr
[
i
]]
/
RandomEngine
::
ThreadLocal
()
->
Exponential
(
1.
);
q
[
i
].
second
=
positive_probs_indices_ptr
[
i
];
}
...
...
@@ -918,14 +874,14 @@ inline int64_t NonUniformPick(
std
::
partial_sort
(
q
.
begin
(),
q
.
begin
()
+
fanout
,
q
.
end
(),
std
::
greater
{});
for
(
auto
i
=
0
;
i
<
fanout
;
++
i
)
{
picked_data
_ptr
[
i
]
=
q
[
i
].
second
+
offset
;
ret
_ptr
[
i
]
=
q
[
i
].
second
;
}
}
else
{
// Use nth_element.
std
::
nth_element
(
q
.
begin
(),
q
.
begin
()
+
fanout
-
1
,
q
.
end
(),
std
::
greater
{});
for
(
auto
i
=
0
;
i
<
fanout
;
++
i
)
{
picked_data
_ptr
[
i
]
=
q
[
i
].
second
+
offset
;
ret
_ptr
[
i
]
=
q
[
i
].
second
;
}
}
}
...
...
@@ -934,7 +890,7 @@ inline int64_t NonUniformPick(
std
::
vector
<
scalar_t
>
prefix_sum_probs
(
num_positive_probs
);
scalar_t
sum_probs
=
0
;
for
(
auto
i
=
0
;
i
<
num_positive_probs
;
++
i
)
{
sum_probs
+=
local_
probs_data_ptr
[
positive_probs_indices_ptr
[
i
]];
sum_probs
+=
probs_data_ptr
[
positive_probs_indices_ptr
[
i
]];
prefix_sum_probs
[
i
]
=
sum_probs
;
}
// Normalize.
...
...
@@ -952,13 +908,63 @@ inline int64_t NonUniformPick(
prefix_sum_probs
.
begin
(),
prefix_sum_probs
.
end
(),
uniform_sample
)
-
prefix_sum_probs
.
begin
();
picked_data_ptr
[
i
]
=
positive_probs_indices_ptr
[
sampled_index
]
+
offset
;
ret_ptr
[
i
]
=
positive_probs_indices_ptr
[
sampled_index
];
}
}
}));
return
fanout
;
return
ret_tensor
;
}
/**
* @brief Perform non-uniform sampling of elements based on probabilities and
* return the sampled indices.
*
* If 'probs_or_mask' is provided, it indicates that the sampling is
* non-uniform. In such cases:
* - When the number of neighbors with non-zero probability is less than or
* equal to fanout, all neighbors with non-zero probability will be selected.
* - When the number of neighbors with non-zero probability exceeds fanout, the
* sampling process will select 'fanout' elements based on their respective
* probabilities. Higher probabilities will increase the chances of being chosen
* during the sampling process.
*
* @param offset The starting edge ID for the connected neighbors of the sampled
* node.
* @param num_neighbors The number of neighbors to pick.
* @param fanout The number of edges to be sampled for each node. It should be
* >= 0 or -1.
* - When the value is -1, all neighbors with non-zero probability will be
* sampled once regardless of replacement. It is equivalent to selecting all
* neighbors with non-zero probability when the fanout is >= the number of
* neighbors (and replacement is set to false).
* - When the value is a non-negative integer, it serves as a minimum
* threshold for selecting neighbors.
* @param replace Boolean indicating whether the sample is performed with or
* without replacement. If True, a value can be selected multiple times.
* Otherwise, each value can be selected only once.
* @param options Tensor options specifying the desired data type of the result.
* @param probs_or_mask Optional tensor containing the (unnormalized)
* probabilities associated with each neighboring edge of a node in the original
* graph. It must be a 1D floating-point tensor with the number of elements
* equal to the number of edges in the graph.
* @param picked_data_ptr The destination address where the picked neighbors
* should be put. Enough memory space should be allocated in advance.
*/
template
<
typename
PickedType
>
inline
int64_t
NonUniformPick
(
int64_t
offset
,
int64_t
num_neighbors
,
int64_t
fanout
,
bool
replace
,
const
torch
::
TensorOptions
&
options
,
const
torch
::
optional
<
torch
::
Tensor
>&
probs_or_mask
,
PickedType
*
picked_data_ptr
)
{
auto
local_probs
=
probs_or_mask
.
value
().
slice
(
0
,
offset
,
offset
+
num_neighbors
);
auto
picked_indices
=
NonUniformPickOp
(
local_probs
,
fanout
,
replace
);
auto
picked_indices_ptr
=
picked_indices
.
data_ptr
<
int64_t
>
();
for
(
int
i
=
0
;
i
<
picked_indices
.
numel
();
++
i
)
{
picked_data_ptr
[
i
]
=
static_cast
<
PickedType
>
(
picked_indices_ptr
[
i
])
+
offset
;
}
return
picked_indices
.
numel
();
}
template
<
typename
PickedType
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment