Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
f0c7efa9
Unverified
Commit
f0c7efa9
authored
Mar 11, 2024
by
Muhammed Fatih BALIN
Committed by
GitHub
Mar 11, 2024
Browse files
[GraphBolt] Implement dependent minibatching for labor. (#7205)
parent
2bda1582
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
126 additions
and
46 deletions
+126
-46
graphbolt/include/graphbolt/continuous_seed.h
graphbolt/include/graphbolt/continuous_seed.h
+97
-0
graphbolt/include/graphbolt/fused_csc_sampling_graph.h
graphbolt/include/graphbolt/fused_csc_sampling_graph.h
+2
-1
graphbolt/src/cuda/neighbor_sampler.cu
graphbolt/src/cuda/neighbor_sampler.cu
+6
-15
graphbolt/src/fused_csc_sampling_graph.cc
graphbolt/src/fused_csc_sampling_graph.cc
+21
-4
graphbolt/src/random.h
graphbolt/src/random.h
+0
-26
No files found.
graphbolt/include/graphbolt/continuous_seed.h
0 → 100644
View file @
f0c7efa9
/**
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @file graphbolt/continuous_seed.h
* @brief CPU and CUDA implementation for continuous random seeds
*/
#ifndef GRAPHBOLT_CONTINUOUS_SEED_H_
#define GRAPHBOLT_CONTINUOUS_SEED_H_
#include <torch/script.h>
#include <cmath>
#ifdef __CUDACC__
#include <curand_kernel.h>
#else
#include <pcg_random.hpp>
#include <random>
#endif // __CUDA_ARCH__
#ifndef M_SQRT1_2
#define M_SQRT1_2 0.707106781186547524401
#endif // M_SQRT1_2
namespace
graphbolt
{
class
continuous_seed
{
uint64_t
s
[
2
];
float
c
[
2
];
public:
/* implicit */
continuous_seed
(
const
int64_t
seed
)
{
// NOLINT
s
[
0
]
=
s
[
1
]
=
seed
;
c
[
0
]
=
c
[
1
]
=
0
;
}
continuous_seed
(
torch
::
Tensor
seed_arr
,
float
r
)
{
auto
seed
=
seed_arr
.
data_ptr
<
int64_t
>
();
s
[
0
]
=
seed
[
0
];
s
[
1
]
=
seed
[
seed_arr
.
size
(
0
)
-
1
];
const
auto
pi
=
std
::
acos
(
-
1.0
);
c
[
0
]
=
std
::
cos
(
pi
*
r
/
2
);
c
[
1
]
=
std
::
sin
(
pi
*
r
/
2
);
}
#ifdef __CUDACC__
__device__
inline
float
uniform
(
const
uint64_t
t
)
const
{
const
uint64_t
kCurandSeed
=
999961
;
// Could be any random number.
curandStatePhilox4_32_10_t
rng
;
curand_init
(
kCurandSeed
,
s
[
0
],
t
,
&
rng
);
float
rnd
;
if
(
s
[
0
]
!=
s
[
1
])
{
rnd
=
c
[
0
]
*
curand_normal
(
&
rng
);
curand_init
(
kCurandSeed
,
s
[
1
],
t
,
&
rng
);
rnd
+=
c
[
1
]
*
curand_normal
(
&
rng
);
rnd
=
normcdff
(
rnd
);
}
else
{
rnd
=
curand_uniform
(
&
rng
);
}
return
rnd
;
}
#else
inline
float
uniform
(
const
uint64_t
t
)
const
{
pcg32
ng0
(
s
[
0
],
t
);
float
rnd
;
if
(
s
[
0
]
!=
s
[
1
])
{
std
::
normal_distribution
<
float
>
norm
;
rnd
=
c
[
0
]
*
norm
(
ng0
);
pcg32
ng1
(
s
[
1
],
t
);
norm
.
reset
();
rnd
+=
c
[
1
]
*
norm
(
ng1
);
rnd
=
std
::
erfc
(
-
rnd
*
static_cast
<
float
>
(
M_SQRT1_2
))
/
2.0
f
;
}
else
{
std
::
uniform_real_distribution
<
float
>
uni
;
rnd
=
uni
(
ng0
);
}
return
rnd
;
}
#endif // __CUDA_ARCH__
};
}
// namespace graphbolt
#endif // GRAPHBOLT_CONTINUOUS_SEED_H_
graphbolt/include/graphbolt/fused_csc_sampling_graph.h
View file @
f0c7efa9
...
@@ -6,6 +6,7 @@
...
@@ -6,6 +6,7 @@
#ifndef GRAPHBOLT_CSC_SAMPLING_GRAPH_H_
#ifndef GRAPHBOLT_CSC_SAMPLING_GRAPH_H_
#define GRAPHBOLT_CSC_SAMPLING_GRAPH_H_
#define GRAPHBOLT_CSC_SAMPLING_GRAPH_H_
#include <graphbolt/continuous_seed.h>
#include <graphbolt/fused_sampled_subgraph.h>
#include <graphbolt/fused_sampled_subgraph.h>
#include <graphbolt/shared_memory.h>
#include <graphbolt/shared_memory.h>
#include <torch/torch.h>
#include <torch/torch.h>
...
@@ -27,7 +28,7 @@ struct SamplerArgs<SamplerType::NEIGHBOR> {};
...
@@ -27,7 +28,7 @@ struct SamplerArgs<SamplerType::NEIGHBOR> {};
template
<
>
template
<
>
struct
SamplerArgs
<
SamplerType
::
LABOR
>
{
struct
SamplerArgs
<
SamplerType
::
LABOR
>
{
const
torch
::
Tensor
&
indices
;
const
torch
::
Tensor
&
indices
;
int64_t
random_seed
;
continuous_seed
random_seed
;
int64_t
num_nodes
;
int64_t
num_nodes
;
};
};
...
...
graphbolt/src/cuda/neighbor_sampler.cu
View file @
f0c7efa9
...
@@ -6,6 +6,7 @@
...
@@ -6,6 +6,7 @@
*/
*/
#include <c10/core/ScalarType.h>
#include <c10/core/ScalarType.h>
#include <curand_kernel.h>
#include <curand_kernel.h>
#include <graphbolt/continuous_seed.h>
#include <graphbolt/cuda_ops.h>
#include <graphbolt/cuda_ops.h>
#include <graphbolt/cuda_sampling_ops.h>
#include <graphbolt/cuda_sampling_ops.h>
#include <thrust/gather.h>
#include <thrust/gather.h>
...
@@ -41,27 +42,17 @@ __global__ void _ComputeRandoms(
...
@@ -41,27 +42,17 @@ __global__ void _ComputeRandoms(
const
int64_t
num_edges
,
const
indptr_t
*
const
sliced_indptr
,
const
int64_t
num_edges
,
const
indptr_t
*
const
sliced_indptr
,
const
indptr_t
*
const
sub_indptr
,
const
indices_t
*
const
csr_rows
,
const
indptr_t
*
const
sub_indptr
,
const
indices_t
*
const
csr_rows
,
const
weights_t
*
const
sliced_weights
,
const
indices_t
*
const
indices
,
const
weights_t
*
const
sliced_weights
,
const
indices_t
*
const
indices
,
const
uint64_t
random_seed
,
float_t
*
random_arr
,
edge_id_t
*
edge_ids
)
{
const
continuous_seed
random_seed
,
float_t
*
random_arr
,
edge_id_t
*
edge_ids
)
{
int64_t
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int64_t
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
stride
=
gridDim
.
x
*
blockDim
.
x
;
const
int
stride
=
gridDim
.
x
*
blockDim
.
x
;
curandStatePhilox4_32_10_t
rng
;
const
auto
labor
=
indices
!=
nullptr
;
const
auto
labor
=
indices
!=
nullptr
;
if
(
!
labor
)
{
curand_init
(
random_seed
,
i
,
0
,
&
rng
);
}
while
(
i
<
num_edges
)
{
while
(
i
<
num_edges
)
{
const
auto
row_position
=
csr_rows
[
i
];
const
auto
row_position
=
csr_rows
[
i
];
const
auto
row_offset
=
i
-
sub_indptr
[
row_position
];
const
auto
row_offset
=
i
-
sub_indptr
[
row_position
];
const
auto
in_idx
=
sliced_indptr
[
row_position
]
+
row_offset
;
const
auto
in_idx
=
sliced_indptr
[
row_position
]
+
row_offset
;
const
auto
rnd
=
random_seed
.
uniform
(
labor
?
indices
[
in_idx
]
:
i
);
if
(
labor
)
{
constexpr
uint64_t
kCurandSeed
=
999961
;
curand_init
(
kCurandSeed
,
random_seed
,
indices
[
in_idx
],
&
rng
);
}
const
auto
rnd
=
curand_uniform
(
&
rng
);
const
auto
prob
=
const
auto
prob
=
sliced_weights
?
sliced_weights
[
i
]
:
static_cast
<
weights_t
>
(
1
);
sliced_weights
?
sliced_weights
[
i
]
:
static_cast
<
weights_t
>
(
1
);
const
auto
exp_rnd
=
-
__logf
(
rnd
);
const
auto
exp_rnd
=
-
__logf
(
rnd
);
...
@@ -211,8 +202,8 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
...
@@ -211,8 +202,8 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
auto
coo_rows
=
ExpandIndptrImpl
(
auto
coo_rows
=
ExpandIndptrImpl
(
sub_indptr
,
indices
.
scalar_type
(),
torch
::
nullopt
,
num_edges
);
sub_indptr
,
indices
.
scalar_type
(),
torch
::
nullopt
,
num_edges
);
num_edges
=
coo_rows
.
size
(
0
);
num_edges
=
coo_rows
.
size
(
0
);
const
auto
random_seed
=
RandomEngine
::
ThreadLocal
()
->
RandInt
(
const
continuous_seed
random_seed
(
RandomEngine
::
ThreadLocal
()
->
RandInt
(
static_cast
<
int64_t
>
(
0
),
std
::
numeric_limits
<
int64_t
>::
max
());
static_cast
<
int64_t
>
(
0
),
std
::
numeric_limits
<
int64_t
>::
max
())
)
;
auto
output_indptr
=
torch
::
empty_like
(
sub_indptr
);
auto
output_indptr
=
torch
::
empty_like
(
sub_indptr
);
torch
::
Tensor
picked_eids
;
torch
::
Tensor
picked_eids
;
torch
::
Tensor
output_indices
;
torch
::
Tensor
output_indices
;
...
...
graphbolt/src/fused_csc_sampling_graph.cc
View file @
f0c7efa9
...
@@ -1417,6 +1417,25 @@ inline void safe_divide(T& a, U b) {
...
@@ -1417,6 +1417,25 @@ inline void safe_divide(T& a, U b) {
a
=
b
>
0
?
(
T
)(
a
/
b
)
:
std
::
numeric_limits
<
T
>::
infinity
();
a
=
b
>
0
?
(
T
)(
a
/
b
)
:
std
::
numeric_limits
<
T
>::
infinity
();
}
}
namespace
labor
{
template
<
typename
T
>
inline
T
invcdf
(
T
u
,
int64_t
n
,
T
rem
)
{
constexpr
T
one
=
1
;
return
rem
*
(
one
-
std
::
pow
(
one
-
u
,
one
/
n
));
}
template
<
typename
T
>
inline
T
jth_sorted_uniform_random
(
continuous_seed
seed
,
int64_t
t
,
int64_t
c
,
int64_t
j
,
T
&
rem
,
int64_t
n
)
{
const
T
u
=
seed
.
uniform
(
t
+
j
*
c
);
// https://mathematica.stackexchange.com/a/256707
rem
-=
invcdf
(
u
,
n
,
rem
);
return
1
-
rem
;
}
};
// namespace labor
/**
/**
* @brief Perform uniform-nonuniform sampling of elements depending on the
* @brief Perform uniform-nonuniform sampling of elements depending on the
* template parameter NonUniform and return the sampled indices.
* template parameter NonUniform and return the sampled indices.
...
@@ -1563,8 +1582,7 @@ inline int64_t LaborPick(
...
@@ -1563,8 +1582,7 @@ inline int64_t LaborPick(
// O(num_neighbors).
// O(num_neighbors).
for
(
uint32_t
i
=
0
;
i
<
fanout
;
++
i
)
{
for
(
uint32_t
i
=
0
;
i
<
fanout
;
++
i
)
{
const
auto
t
=
local_indices_data
[
i
];
const
auto
t
=
local_indices_data
[
i
];
auto
rnd
=
auto
rnd
=
args
.
random_seed
.
uniform
(
t
);
// r_t
labor
::
uniform_random
<
float
>
(
args
.
random_seed
,
t
);
// r_t
if
constexpr
(
NonUniform
)
{
if
constexpr
(
NonUniform
)
{
safe_divide
(
rnd
,
local_probs_data
[
i
]);
safe_divide
(
rnd
,
local_probs_data
[
i
]);
}
// r_t / \pi_t
}
// r_t / \pi_t
...
@@ -1575,8 +1593,7 @@ inline int64_t LaborPick(
...
@@ -1575,8 +1593,7 @@ inline int64_t LaborPick(
}
}
for
(
uint32_t
i
=
fanout
;
i
<
num_neighbors
;
++
i
)
{
for
(
uint32_t
i
=
fanout
;
i
<
num_neighbors
;
++
i
)
{
const
auto
t
=
local_indices_data
[
i
];
const
auto
t
=
local_indices_data
[
i
];
auto
rnd
=
auto
rnd
=
args
.
random_seed
.
uniform
(
t
);
// r_t
labor
::
uniform_random
<
float
>
(
args
.
random_seed
,
t
);
// r_t
if
constexpr
(
NonUniform
)
{
if
constexpr
(
NonUniform
)
{
safe_divide
(
rnd
,
local_probs_data
[
i
]);
safe_divide
(
rnd
,
local_probs_data
[
i
]);
}
// r_t / \pi_t
}
// r_t / \pi_t
...
...
graphbolt/src/random.h
View file @
f0c7efa9
...
@@ -76,32 +76,6 @@ class RandomEngine {
...
@@ -76,32 +76,6 @@ class RandomEngine {
pcg32
rng_
;
pcg32
rng_
;
};
};
namespace
labor
{
template
<
typename
T
>
inline
T
uniform_random
(
int64_t
random_seed
,
int64_t
t
)
{
pcg32
ng
(
random_seed
,
t
);
std
::
uniform_real_distribution
<
T
>
uni
;
return
uni
(
ng
);
}
template
<
typename
T
>
inline
T
invcdf
(
T
u
,
int64_t
n
,
T
rem
)
{
constexpr
T
one
=
1
;
return
rem
*
(
one
-
std
::
pow
(
one
-
u
,
one
/
n
));
}
template
<
typename
T
>
inline
T
jth_sorted_uniform_random
(
int64_t
random_seed
,
int64_t
t
,
int64_t
c
,
int64_t
j
,
T
&
rem
,
int64_t
n
)
{
const
auto
u
=
uniform_random
<
T
>
(
random_seed
,
t
+
j
*
c
);
// https://mathematica.stackexchange.com/a/256707
rem
-=
invcdf
(
u
,
n
,
rem
);
return
1
-
rem
;
}
};
// namespace labor
}
// namespace graphbolt
}
// namespace graphbolt
#endif // GRAPHBOLT_RANDOM_H_
#endif // GRAPHBOLT_RANDOM_H_
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment