Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
910d6a98
You need to sign in or sign up before continuing.
Commit
910d6a98
authored
Sep 25, 2024
by
sangwzh
Browse files
update atomicAdd and csr2coo.hip
parent
8f9dcabf
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
13 deletions
+15
-13
src/array/cuda/atomic.cuh
src/array/cuda/atomic.cuh
+11
-10
src/array/cuda/csr2coo.hip
src/array/cuda/csr2coo.hip
+4
-3
No files found.
src/array/cuda/atomic.cuh
View file @
910d6a98
...
@@ -169,7 +169,7 @@ static __host__ __device__ __forceinline__ unsigned short int atomicCASshort( /
...
@@ -169,7 +169,7 @@ static __host__ __device__ __forceinline__ unsigned short int atomicCASshort( /
return Cast<dtype>::Decode(old); \
return Cast<dtype>::Decode(old); \
}
}
#define DEFINE_ATOMIC_16BIT_
BF
(NAME, dtype) \
#define DEFINE_ATOMIC_16BIT_
MAX
(NAME, dtype) \
template <> \
template <> \
__device__ __forceinline__ dtype Atomic##NAME<dtype>( \
__device__ __forceinline__ dtype Atomic##NAME<dtype>( \
dtype * addr, dtype val) { \
dtype * addr, dtype val) { \
...
@@ -181,12 +181,12 @@ static __host__ __device__ __forceinline__ unsigned short int atomicCASshort( /
...
@@ -181,12 +181,12 @@ static __host__ __device__ __forceinline__ unsigned short int atomicCASshort( /
assumed = old; \
assumed = old; \
old = atomicCASshort( \
old = atomicCASshort( \
addr_as_ui, assumed, \
addr_as_ui, assumed, \
Cast<dtype>::Encode(
max((double
)val, (
double
)dtype(old)))); \
Cast<dtype>::Encode(
dtype(max((float
)val, (
float
)dtype(old))))
)
; \
} while (assumed != old); \
} while (assumed != old); \
return Cast<dtype>::Decode(old); \
return Cast<dtype>::Decode(old); \
}
}
#define DEFINE_ATOMIC_16BIT_M
in
(NAME, dtype) \
#define DEFINE_ATOMIC_16BIT_M
IN
(NAME, dtype) \
template <> \
template <> \
__device__ __forceinline__ dtype Atomic##NAME<dtype>( \
__device__ __forceinline__ dtype Atomic##NAME<dtype>( \
dtype * addr, dtype val) { \
dtype * addr, dtype val) { \
...
@@ -198,24 +198,25 @@ static __host__ __device__ __forceinline__ unsigned short int atomicCASshort( /
...
@@ -198,24 +198,25 @@ static __host__ __device__ __forceinline__ unsigned short int atomicCASshort( /
assumed = old; \
assumed = old; \
old = atomicCASshort( \
old = atomicCASshort( \
addr_as_ui, assumed, \
addr_as_ui, assumed, \
Cast<dtype>::Encode(
min(val, dtype(
old)))); \
Cast<dtype>::Encode(
dtype(min((float)val,(float)
old)))); \
} while (assumed != old); \
} while (assumed != old); \
return Cast<dtype>::Decode(old); \
return Cast<dtype>::Decode(old); \
}
}
#define OP(a, b) max(
(double)a, (double)
b)
#define OP(a, b) max(
a,
b)
DEFINE_ATOMIC
(
Max
)
DEFINE_ATOMIC
(
Max
)
DEFINE_ATOMIC_16BIT
(
Max
,
half
)
DEFINE_ATOMIC_16BIT
_MAX
(
Max
,
half
)
#if BF16_ENABLED
#if BF16_ENABLED
DEFINE_ATOMIC_16BIT_BF
(
Max
,
__hip_bfloat16
)
#define OP_BF(a, b) max_bf((float)a, (float)b)
DEFINE_ATOMIC_16BIT_MAX
(
Max
,
__hip_bfloat16
)
#endif // BF16_ENABLED
#endif // BF16_ENABLED
#undef OP
#undef OP
#define OP(a, b) min(
(double)a, (double)
b)
#define OP(a, b) min(
a,
b)
DEFINE_ATOMIC
(
Min
)
DEFINE_ATOMIC
(
Min
)
DEFINE_ATOMIC_16BIT
(
Min
,
half
)
DEFINE_ATOMIC_16BIT
_MIN
(
Min
,
half
)
#if BF16_ENABLED
#if BF16_ENABLED
DEFINE_ATOMIC_16BIT_
BF
(
Min
,
__hip_bfloat16
)
DEFINE_ATOMIC_16BIT_
MIN
(
Min
,
__hip_bfloat16
)
#endif // BF16_ENABLED
#endif // BF16_ENABLED
#undef OP
#undef OP
...
...
src/array/cuda/csr2coo.hip
View file @
910d6a98
...
@@ -9,6 +9,7 @@
...
@@ -9,6 +9,7 @@
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <hipcub/backend/rocprim/device/device_copy.hpp>
#include <hipcub/hipcub.hpp>
#include <hipcub/hipcub.hpp>
...
@@ -103,7 +104,7 @@ __global__ void _RepeatKernel(
...
@@ -103,7 +104,7 @@ __global__ void _RepeatKernel(
}
}
#if
0
#if
1
template <>
template <>
COOMatrix CSRToCOO<kDGLCUDA, int64_t>(CSRMatrix csr) {
COOMatrix CSRToCOO<kDGLCUDA, int64_t>(CSRMatrix csr) {
const auto& ctx = csr.indptr->ctx;
const auto& ctx = csr.indptr->ctx;
...
@@ -126,14 +127,14 @@ COOMatrix CSRToCOO<kDGLCUDA, int64_t>(CSRMatrix csr) {
...
@@ -126,14 +127,14 @@ COOMatrix CSRToCOO<kDGLCUDA, int64_t>(CSRMatrix csr) {
constexpr int64_t max_copy_at_once = std::numeric_limits<int32_t>::max();
constexpr int64_t max_copy_at_once = std::numeric_limits<int32_t>::max();
for (int64_t i = 0; i < csr.num_rows; i += max_copy_at_once) {
for (int64_t i = 0; i < csr.num_rows; i += max_copy_at_once) {
std::size_t temp_storage_bytes = 0;
std::size_t temp_storage_bytes = 0;
CUDA_CALL(cub::DeviceCopy::Batched(
CUDA_CALL(
hip
cub::DeviceCopy::Batched(
nullptr, temp_storage_bytes, input_buffer + i, output_buffer + i,
nullptr, temp_storage_bytes, input_buffer + i, output_buffer + i,
buffer_sizes + i, ::min(csr.num_rows - i, max_copy_at_once),
buffer_sizes + i, ::min(csr.num_rows - i, max_copy_at_once),
stream));
stream));
auto temp = allocator.alloc_unique<char>(temp_storage_bytes);
auto temp = allocator.alloc_unique<char>(temp_storage_bytes);
CUDA_CALL(cub::DeviceCopy::Batched(
CUDA_CALL(
hip
cub::DeviceCopy::Batched(
temp.get(), temp_storage_bytes, input_buffer + i, output_buffer + i,
temp.get(), temp_storage_bytes, input_buffer + i, output_buffer + i,
buffer_sizes + i, ::min(csr.num_rows - i, max_copy_at_once),
buffer_sizes + i, ::min(csr.num_rows - i, max_copy_at_once),
stream));
stream));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment