Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
6ac701f8
Commit
6ac701f8
authored
Sep 13, 2024
by
sangwzh
Browse files
update src and graphbolt code
parent
1547bd93
Changes
116
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
105 additions
and
81 deletions
+105
-81
graphbolt/src/cuda/common.h
graphbolt/src/cuda/common.h
+20
-19
graphbolt/src/cuda/cumsum.hip
graphbolt/src/cuda/cumsum.hip
+3
-2
graphbolt/src/cuda/expand_indptr.hip
graphbolt/src/cuda/expand_indptr.hip
+4
-3
graphbolt/src/cuda/gpu_cache.hip
graphbolt/src/cuda/gpu_cache.hip
+3
-2
graphbolt/src/cuda/index_select_csc_impl.hip
graphbolt/src/cuda/index_select_csc_impl.hip
+8
-6
graphbolt/src/cuda/index_select_impl.hip
graphbolt/src/cuda/index_select_impl.hip
+7
-5
graphbolt/src/cuda/insubgraph.hip
graphbolt/src/cuda/insubgraph.hip
+2
-1
graphbolt/src/cuda/isin.hip
graphbolt/src/cuda/isin.hip
+2
-1
graphbolt/src/cuda/max_uva_threads.cc
graphbolt/src/cuda/max_uva_threads.cc
+2
-1
graphbolt/src/cuda/neighbor_sampler.hip
graphbolt/src/cuda/neighbor_sampler.hip
+15
-12
graphbolt/src/cuda/sampling_utils.hip
graphbolt/src/cuda/sampling_utils.hip
+6
-5
graphbolt/src/cuda/sort_impl.hip
graphbolt/src/cuda/sort_impl.hip
+4
-3
graphbolt/src/cuda/unique_and_compact_impl.hip
graphbolt/src/cuda/unique_and_compact_impl.hip
+5
-4
graphbolt/src/expand_indptr.cc
graphbolt/src/expand_indptr.cc
+3
-2
graphbolt/src/fused_csc_sampling_graph.cc
graphbolt/src/fused_csc_sampling_graph.cc
+5
-4
graphbolt/src/index_select.cc
graphbolt/src/index_select.cc
+3
-2
graphbolt/src/isin.cc
graphbolt/src/isin.cc
+3
-2
graphbolt/src/python_binding.cc
graphbolt/src/python_binding.cc
+6
-5
graphbolt/src/random.cc
graphbolt/src/random.cc
+2
-1
graphbolt/src/shared_memory_helper.cc
graphbolt/src/shared_memory_helper.cc
+2
-1
No files found.
graphbolt/src/cuda/common.h
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2017-2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -7,11 +8,11 @@
#ifndef GRAPHBOLT_CUDA_COMMON_H_
#define GRAPHBOLT_CUDA_COMMON_H_
#include <ATen/
cuda/CUDA
Event.h>
#include <
c10/cuda/CUDACachingAllocator
.h>
#include <c10/
cuda/CUDA
Exception.h>
#include <
c10/cuda/CUDAStream
.h>
#include <
cuda
_runtime.h>
#include <ATen/
hip/HIP
Event.h>
#include <
ATen/hip/impl/HIPCachingAllocatorMasqueradingAsCUDA
.h>
#include <c10/
hip/HIP
Exception.h>
#include <
ATen/hip/impl/HIPStreamMasqueradingAsCUDA
.h>
#include <
hip/hip
_runtime.h>
#include <torch/script.h>
#include <memory>
...
...
@@ -26,8 +27,8 @@ namespace cuda {
* that uses torch's CUDA memory pool and the current cuda stream:
*
* cuda::CUDAWorkspaceAllocator allocator;
* const auto stream = torch::
cuda
::getDefault
CUDA
Stream();
* const auto exec_policy = thrust::
cuda
::par_nosync(allocator).on(stream);
* const auto stream = torch::
hip
::getDefault
HIP
Stream
MasqueradingAsCUDA
();
* const auto exec_policy = thrust::
hip
::par_nosync(allocator).on(stream);
*
* Now, one can pass exec_policy to thrust functions
*
...
...
@@ -47,13 +48,13 @@ struct CUDAWorkspaceAllocator {
CUDAWorkspaceAllocator
&
operator
=
(
const
CUDAWorkspaceAllocator
&
)
=
default
;
void
operator
()(
void
*
ptr
)
const
{
c10
::
cuda
::
CUDA
CachingAllocator
::
raw_delete
(
ptr
);
c10
::
hip
::
HIP
CachingAllocator
::
raw_delete
(
ptr
);
}
// Required by thrust to satisfy allocator requirements.
value_type
*
allocate
(
std
::
ptrdiff_t
size
)
const
{
return
reinterpret_cast
<
value_type
*>
(
c10
::
cuda
::
CUDA
CachingAllocator
::
raw_alloc
(
size
));
c10
::
hip
::
HIP
CachingAllocator
::
raw_alloc
(
size
));
}
// Required by thrust to satisfy allocator requirements.
...
...
@@ -69,7 +70,7 @@ struct CUDAWorkspaceAllocator {
inline
auto
GetAllocator
()
{
return
CUDAWorkspaceAllocator
{};
}
inline
auto
GetCurrentStream
()
{
return
c10
::
cuda
::
getCurrent
CUDA
Stream
();
}
inline
auto
GetCurrentStream
()
{
return
c10
::
hip
::
getCurrent
HIP
Stream
MasqueradingAsCUDA
();
}
template
<
typename
T
>
inline
bool
is_zero
(
T
size
)
{
...
...
@@ -81,15 +82,15 @@ inline bool is_zero<dim3>(dim3 size) {
return
size
.
x
==
0
||
size
.
y
==
0
||
size
.
z
==
0
;
}
#define CUDA_CALL(func) C10_
CUDA
_CHECK((func))
#define CUDA_CALL(func) C10_
HIP
_CHECK((func))
#define CUDA_KERNEL_CALL(kernel, nblks, nthrs, shmem, ...) \
{ \
if (!graphbolt::cuda::is_zero((nblks)) && \
!graphbolt::cuda::is_zero((nthrs))) { \
auto stream = graphbolt::cuda::GetCurrentStream(); \
(kernel)
<<<
(nblks), (nthrs), (shmem), stream
>>>(
__VA_ARGS__); \
C10_
CUDA
_KERNEL_LAUNCH_CHECK(); \
hipLaunchKernelGGL((
(kernel)
), dim3(
(nblks)
)
,
dim3(
(nthrs)
)
, (shmem), stream
,
__VA_ARGS__); \
C10_
HIP
_KERNEL_LAUNCH_CHECK(); \
} \
}
...
...
@@ -98,16 +99,16 @@ inline bool is_zero<dim3>(dim3 size) {
auto allocator = graphbolt::cuda::GetAllocator(); \
auto stream = graphbolt::cuda::GetCurrentStream(); \
size_t workspace_size = 0; \
CUDA_CALL(cub::fn(nullptr, workspace_size, __VA_ARGS__, stream)); \
CUDA_CALL(
hip
cub::fn(nullptr, workspace_size, __VA_ARGS__, stream)); \
auto workspace = allocator.AllocateStorage<char>(workspace_size); \
CUDA_CALL(cub::fn(workspace.get(), workspace_size, __VA_ARGS__, stream)); \
CUDA_CALL(
hip
cub::fn(workspace.get(), workspace_size, __VA_ARGS__, stream)); \
}
#define THRUST_CALL(fn, ...) \
[&] { \
auto allocator = graphbolt::cuda::GetAllocator(); \
auto stream = graphbolt::cuda::GetCurrentStream(); \
const auto exec_policy = thrust::
cuda
::par_nosync(allocator).on(stream); \
const auto exec_policy = thrust::
hip
::par_nosync(allocator).on(stream); \
return thrust::fn(exec_policy, __VA_ARGS__); \
}()
...
...
@@ -126,7 +127,7 @@ template <typename scalar_t>
struct
CopyScalar
{
CopyScalar
()
:
is_ready_
(
true
)
{
init_pinned_storage
();
}
void
record
(
at
::
cuda
::
CUDAStream
stream
=
GetCurrentStream
())
{
void
record
(
at
::
hip
::
HIPStreamMasqueradingAsCUDA
stream
=
GetCurrentStream
())
{
copy_event_
.
record
(
stream
);
is_ready_
=
false
;
}
...
...
@@ -138,9 +139,9 @@ struct CopyScalar {
CopyScalar
(
const
scalar_t
*
device_ptr
)
{
init_pinned_storage
();
auto
stream
=
GetCurrentStream
();
CUDA_CALL
(
cuda
MemcpyAsync
(
CUDA_CALL
(
hip
MemcpyAsync
(
reinterpret_cast
<
scalar_t
*>
(
pinned_scalar_
.
data_ptr
()),
device_ptr
,
sizeof
(
scalar_t
),
cuda
MemcpyDeviceToHost
,
stream
));
sizeof
(
scalar_t
),
hip
MemcpyDeviceToHost
,
stream
));
record
(
stream
);
}
...
...
graphbolt/src/cuda/cumsum.
cu
→
graphbolt/src/cuda/cumsum.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
* @file cuda/cumsum.cu
* @brief Cumsum operators implementation on CUDA.
*/
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include "
./
common.h"
#include "common.h"
namespace graphbolt {
namespace ops {
...
...
graphbolt/src/cuda/expand_indptr.
cu
→
graphbolt/src/cuda/expand_indptr.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -8,10 +9,10 @@
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include <limits>
#include "
./
common.h"
#include "common.h"
namespace graphbolt {
namespace ops {
...
...
@@ -86,7 +87,7 @@ torch::Tensor ExpandIndptrImpl(
CUB_CALL(
DeviceCopy::Batched, input_buffer + i,
output_buffer + i, buffer_sizes + i,
std
::
min
(
num_rows
-
i
,
max_copy_at_once
));
::min(num_rows - i, max_copy_at_once));
}
}));
}));
...
...
graphbolt/src/cuda/gpu_cache.
cu
→
graphbolt/src/cuda/gpu_cache.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -6,8 +7,8 @@
*/
#include <numeric>
#include "
./
common.h"
#include "
./
gpu_cache.h"
#include "common.h"
#include "gpu_cache.h"
namespace graphbolt {
namespace cuda {
...
...
graphbolt/src/cuda/index_select_csc_impl.
cu
→
graphbolt/src/cuda/index_select_csc_impl.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -10,12 +12,12 @@
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include <numeric>
#include "
./
common.h"
#include "
./
max_uva_threads.h"
#include "
./
utils.h"
#include "common.h"
#include "max_uva_threads.h"
#include "utils.h"
namespace graphbolt {
namespace ops {
...
...
@@ -132,7 +134,7 @@ std::tuple<torch::Tensor, torch::Tensor> UVAIndexSelectCSCCopyIndices(
torch::empty(output_size.value(), options.dtype(indices.scalar_type()));
const dim3 block(BLOCK_SIZE);
const dim3 grid(
(
std
::
min
(
edge_count_aligned
,
cuda
::
max_uva_threads
.
value_or
(
1
<<
20
))
+
(::min(edge_count_aligned, cuda::max_uva_threads.value_or(1 << 20)) +
BLOCK_SIZE - 1) /
BLOCK_SIZE);
...
...
@@ -220,7 +222,7 @@ void IndexSelectCSCCopyIndices(
for (int64_t i = 0; i < num_nodes; i += max_copy_at_once) {
CUB_CALL(
DeviceMemcpy::Batched, input_buffer_it + i, output_buffer_it + i,
buffer_sizes
+
i
,
std
::
min
(
num_nodes
-
i
,
max_copy_at_once
));
buffer_sizes + i, ::min(num_nodes - i, max_copy_at_once));
}
}
...
...
graphbolt/src/cuda/index_select_impl.
cu
→
graphbolt/src/cuda/index_select_impl.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -9,9 +11,9 @@
#include <numeric>
#include "
./
common.h"
#include "
./
max_uva_threads.h"
#include "
./
utils.h"
#include "common.h"
#include "max_uva_threads.h"
#include "utils.h"
namespace graphbolt {
namespace ops {
...
...
@@ -124,7 +126,7 @@ torch::Tensor UVAIndexSelectImpl_(torch::Tensor input, torch::Tensor index) {
// Use a single thread to process each output row to avoid wasting threads.
const int num_threads = cuda::FindNumThreads(return_len);
const int num_blocks =
(
std
::
min
(
return_len
,
cuda
::
max_uva_threads
.
value_or
(
1
<<
20
))
+
(::min(return_len, cuda::max_uva_threads.value_or(1 << 20)) +
num_threads - 1) /
num_threads;
CUDA_KERNEL_CALL(
...
...
@@ -137,7 +139,7 @@ torch::Tensor UVAIndexSelectImpl_(torch::Tensor input, torch::Tensor index) {
block.x >>= 1;
block.y <<= 1;
}
const
dim3
grid
(
std
::
min
(
const dim3 grid(::min(
(return_len + block.y - 1) / block.y,
cuda::max_uva_threads.value_or(1 << 20) / BLOCK_SIZE));
if (aligned_feature_size * sizeof(DType) <= GPU_CACHE_LINE_SIZE) {
...
...
graphbolt/src/cuda/insubgraph.
cu
→
graphbolt/src/cuda/insubgraph.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -8,7 +9,7 @@
#include <graphbolt/cuda_ops.h>
#include <graphbolt/cuda_sampling_ops.h>
#include "
./
common.h"
#include "common.h"
namespace graphbolt {
namespace ops {
...
...
graphbolt/src/cuda/isin.
cu
→
graphbolt/src/cuda/isin.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -7,7 +8,7 @@
#include <graphbolt/cuda_ops.h>
#include <thrust/binary_search.h>
#include "
./
common.h"
#include "common.h"
namespace graphbolt {
namespace ops {
...
...
graphbolt/src/cuda/max_uva_threads.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
* @file cuda/max_uva_threads.cc
* @brief Max uva threads variable setter function.
*/
#include "
./
max_uva_threads.h"
#include "max_uva_threads.h"
namespace
graphbolt
{
namespace
cuda
{
...
...
graphbolt/src/cuda/neighbor_sampler.
cu
→
graphbolt/src/cuda/neighbor_sampler.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
#include "hip/hip_bf16.h"
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -5,7 +8,7 @@
* @brief Index select operator implementation on CUDA.
*/
#include <c10/core/ScalarType.h>
#include <
cu
rand_kernel.h>
#include <
hiprand/hip
rand_kernel.h>
#include <graphbolt/cuda_ops.h>
#include <graphbolt/cuda_sampling_ops.h>
#include <thrust/gather.h>
...
...
@@ -15,14 +18,14 @@
#include <algorithm>
#include <array>
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include <limits>
#include <numeric>
#include <type_traits>
#include "../random.h"
#include "
./
common.h"
#include "
./
utils.h"
#include "common.h"
#include "utils.h"
namespace graphbolt {
namespace ops {
...
...
@@ -44,11 +47,11 @@ __global__ void _ComputeRandoms(
const uint64_t random_seed, float_t* random_arr, edge_id_t* edge_ids) {
int64_t i = blockIdx.x * blockDim.x + threadIdx.x;
const int stride = gridDim.x * blockDim.x;
cu
randStatePhilox4_32_10_t
rng
;
hip
randStatePhilox4_32_10_t rng;
const auto labor = indices != nullptr;
if (!labor) {
cu
rand_init
(
random_seed
,
i
,
0
,
&
rng
);
hip
rand_init(random_seed, i, 0, &rng);
}
while (i < num_edges) {
...
...
@@ -58,10 +61,10 @@ __global__ void _ComputeRandoms(
if (labor) {
constexpr uint64_t kCurandSeed = 999961;
cu
rand_init
(
kCurandSeed
,
random_seed
,
indices
[
in_idx
],
&
rng
);
hip
rand_init(kCurandSeed, random_seed, indices[in_idx], &rng);
}
const
auto
rnd
=
cu
rand_uniform
(
&
rng
);
const auto rnd =
hip
rand_uniform(&rng);
const auto prob =
sliced_weights ? sliced_weights[i] : static_cast<weights_t>(1);
const auto exp_rnd = -__logf(rnd);
...
...
@@ -152,9 +155,9 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
}
// Finally, copy the adjusted fanout values to the device memory.
auto fanouts_device = allocator.AllocateStorage<int64_t>(fanouts.size());
CUDA_CALL
(
cuda
MemcpyAsync
(
CUDA_CALL(
hip
MemcpyAsync(
fanouts_device.get(), fanouts_pinned_ptr,
sizeof
(
int64_t
)
*
fanouts
.
size
(),
cuda
MemcpyHostToDevice
,
sizeof(int64_t) * fanouts.size(),
hip
MemcpyHostToDevice,
cuda::GetCurrentStream()));
auto in_degree_and_sliced_indptr = SliceCSCIndptr(indptr, nodes);
auto in_degree = std::get<0>(in_degree_and_sliced_indptr);
...
...
@@ -271,7 +274,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
"Selected edge_id_t must be capable of storing edge_ids.");
// Using bfloat16 for random numbers works just as reliably as
// float32 and provides around %30 percent speedup.
using
rnd_t
=
nv
_bfloat16
;
using rnd_t =
__hip
_bfloat16;
auto randoms =
allocator.AllocateStorage<rnd_t>(num_edges.value());
auto randoms_sorted =
...
...
@@ -362,7 +365,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
CUB_CALL(
DeviceCopy::Batched, input_buffer_it + i,
output_buffer_it + i, sampled_degree + i,
std
::
min
(
num_rows
-
i
,
max_copy_at_once
));
::min(num_rows - i, max_copy_at_once));
}
}));
...
...
graphbolt/src/cuda/sampling_utils.
cu
→
graphbolt/src/cuda/sampling_utils.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -7,10 +8,10 @@
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include "
./
common.h"
#include "
./
utils.h"
#include "common.h"
#include "utils.h"
namespace graphbolt {
namespace ops {
...
...
@@ -72,7 +73,7 @@ std::tuple<torch::Tensor, torch::Tensor> SliceCSCIndptr(
CUB_CALL(
DeviceAdjacentDifference::SubtractLeftCopy,
indptr.data_ptr<indptr_t>(), in_degree.data_ptr<indptr_t>(),
num_nodes
+
1
,
cub
::
Difference
{});
num_nodes + 1,
hip
cub::Difference{});
}));
in_degree = in_degree.slice(0, 1);
return {in_degree, sliced_indptr};
...
...
@@ -126,7 +127,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> SliceCSCIndptrHetero(
CUB_CALL(
DeviceAdjacentDifference::SubtractLeftCopy,
new_sub_indptr.data_ptr<indptr_t>(),
new_indegree
.
data_ptr
<
indptr_t
>
(),
num_rows
+
1
,
cub
::
Difference
{});
new_indegree.data_ptr<indptr_t>(), num_rows + 1,
hip
cub::Difference{});
}));
// Discard the first element of the SubtractLeftCopy result and ensure that
// new_indegree tensor has size num_rows + 1 so that its ExclusiveCumSum is
...
...
graphbolt/src/cuda/sort_impl.
cu
→
graphbolt/src/cuda/sort_impl.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -6,10 +7,10 @@
*/
#include <c10/core/ScalarType.h>
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include "
./
common.h"
#include "
./
utils.h"
#include "common.h"
#include "utils.h"
namespace graphbolt {
namespace ops {
...
...
graphbolt/src/cuda/unique_and_compact_impl.
cu
→
graphbolt/src/cuda/unique_and_compact_impl.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -10,11 +11,11 @@
#include <thrust/gather.h>
#include <thrust/logical.h>
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include <type_traits>
#include "
./
common.h"
#include "
./
utils.h"
#include "common.h"
#include "utils.h"
namespace graphbolt {
namespace ops {
...
...
@@ -97,7 +98,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> UniqueAndCompact(
// and max_id_dst.
if (num_bits == 0) {
num_bits = cuda::NumberOfBits(
1
+
std
::
max
(
1 + ::max(
static_cast<scalar_t>(max_id_src),
static_cast<scalar_t>(max_id_dst)));
}
...
...
graphbolt/src/expand_indptr.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -6,8 +7,8 @@
*/
#include <graphbolt/cuda_ops.h>
#include "
./
macro.h"
#include "
./
utils.h"
#include "macro.h"
#include "utils.h"
namespace
graphbolt
{
namespace
ops
{
...
...
graphbolt/src/fused_csc_sampling_graph.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* @file fused_csc_sampling_graph.cc
...
...
@@ -17,10 +18,10 @@
#include <tuple>
#include <vector>
#include "
./
macro.h"
#include "
./
random.h"
#include "
./
shared_memory_helper.h"
#include "
./
utils.h"
#include "macro.h"
#include "random.h"
#include "shared_memory_helper.h"
#include "utils.h"
namespace
{
torch
::
optional
<
torch
::
Dict
<
std
::
string
,
torch
::
Tensor
>>
TensorizeDict
(
...
...
graphbolt/src/index_select.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* @file index_select.cc
...
...
@@ -6,8 +7,8 @@
#include <graphbolt/cuda_ops.h>
#include <graphbolt/fused_csc_sampling_graph.h>
#include "
./
macro.h"
#include "
./
utils.h"
#include "macro.h"
#include "utils.h"
namespace
graphbolt
{
namespace
ops
{
...
...
graphbolt/src/isin.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
*
...
...
@@ -8,8 +9,8 @@
#include <graphbolt/cuda_ops.h>
#include <graphbolt/isin.h>
#include "
./
macro.h"
#include "
./
utils.h"
#include "macro.h"
#include "utils.h"
namespace
{
static
constexpr
int
kSearchGrainSize
=
4096
;
...
...
graphbolt/src/python_binding.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* @file python_binding.cc
...
...
@@ -10,14 +11,14 @@
#include <graphbolt/unique_and_compact.h>
#ifdef GRAPHBOLT_USE_CUDA
#include "
./
cuda/max_uva_threads.h"
#include "cuda/max_uva_threads.h"
#endif
#include "
./
expand_indptr.h"
#include "
./
index_select.h"
#include "
./
random.h"
#include "expand_indptr.h"
#include "index_select.h"
#include "random.h"
#ifdef GRAPHBOLT_USE_CUDA
#include "
./
cuda/gpu_cache.h"
#include "cuda/gpu_cache.h"
#endif
namespace
graphbolt
{
...
...
graphbolt/src/random.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
* @file random.cc
* @brief Random Engine.
*/
#include "
./
random.h"
#include "random.h"
#include <torch/torch.h>
...
...
graphbolt/src/shared_memory_helper.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
*
* @file shared_memory_helper.cc
* @brief Share memory helper implementation.
*/
#include "
./
shared_memory_helper.h"
#include "shared_memory_helper.h"
#include <graphbolt/serialize.h>
#include <graphbolt/shared_memory.h>
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment