"vscode:/vscode.git/clone" did not exist on "34dfaf7532daefdb03c2f618d3c421e1c49fb05e"
array_index_select.hip 3.93 KB
Newer Older
sangwzh's avatar
sangwzh committed
1
2
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
3
/**
4
 *  Copyright (c) 2019 by Contributors
5
6
 * @file array/cpu/array_index_select.cu
 * @brief Array index select GPU implementation
7
8
 */
#include <dgl/array.h>
sangwzh's avatar
sangwzh committed
9
#include "../../../include/dgl/array.h"
10

11
#include "../../runtime/cuda/cuda_common.h"
sangwzh's avatar
sangwzh committed
12
13
#include "array_index_select.cuh"
#include "utils.h"
14
15
16
17
18
19

namespace dgl {
using runtime::NDArray;
namespace aten {
namespace impl {

20
template <DGLDeviceType XPU, typename DType, typename IdType>
21
22
23
NDArray IndexSelect(NDArray array, IdArray index) {
  const int64_t arr_len = array->shape[0];
  const int64_t len = index->shape[0];
24
25
26
27
28
29
30
  int64_t num_feat = 1;
  std::vector<int64_t> shape{len};
  for (int d = 1; d < array->ndim; ++d) {
    num_feat *= array->shape[d];
    shape.emplace_back(array->shape[d]);
  }

31
  // use index->ctx for pinned array
32
  NDArray ret = NDArray::Empty(shape, array->dtype, index->ctx);
33
  if (len == 0 || arr_len * num_feat == 0) return ret;
34
  DType* ret_data = static_cast<DType*>(ret->data);
35

36
  const DType* array_data = static_cast<DType*>(cuda::GetDevicePointer(array));
sangwzh's avatar
sangwzh committed
37
38
  // const IdType* idx_data = static_cast<IdType*>(index->data);
  const IdType* idx_data = static_cast<IdType*>(cuda::GetDevicePointer(index));
39

sangwzh's avatar
sangwzh committed
40
  hipStream_t stream = runtime::getCurrentHIPStreamMasqueradingAsCUDA();
41
  if (num_feat == 1) {
42
43
44
45
46
    const int nt = cuda::FindNumThreads(len);
    const int nb = (len + nt - 1) / nt;
    CUDA_KERNEL_CALL(
        IndexSelectSingleKernel, nb, nt, 0, stream, array_data, idx_data, len,
        arr_len, ret_data);
47
  } else {
48
49
50
51
52
53
54
55
56
    dim3 block(256, 1);
    while (static_cast<int64_t>(block.x) >= 2 * num_feat) {
      block.x /= 2;
      block.y *= 2;
    }
    const dim3 grid((len + block.y - 1) / block.y);
    CUDA_KERNEL_CALL(
        IndexSelectMultiKernel, grid, block, 0, stream, array_data, num_feat,
        idx_data, len, arr_len, ret_data);
57
  }
58
59
60
  return ret;
}

61
62
63
64
65
66
template NDArray IndexSelect<kDGLCUDA, int32_t, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, int32_t, int64_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, int64_t, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, int64_t, int64_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int64_t>(NDArray, IdArray);
67
#if BF16_ENABLED
sangwzh's avatar
sangwzh committed
68
template NDArray IndexSelect<kDGLCUDA, __hip_bfloat16, int32_t>(
69
    NDArray, IdArray);
sangwzh's avatar
sangwzh committed
70
template NDArray IndexSelect<kDGLCUDA, __hip_bfloat16, int64_t>(
71
    NDArray, IdArray);
72
#endif  // BF16_ENABLED
73
74
75
76
template NDArray IndexSelect<kDGLCUDA, float, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, float, int64_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, double, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, double, int64_t>(NDArray, IdArray);
77

78
template <DGLDeviceType XPU, typename DType>
79
DType IndexSelect(NDArray array, int64_t index) {
80
  auto device = runtime::DeviceAPI::Get(array->ctx);
81
  DType ret = static_cast<DType>(0.0f);
82
  device->CopyDataFromTo(
83
84
      static_cast<DType*>(array->data) + index, 0, &ret, 0, sizeof(DType),
      array->ctx, DGLContext{kDGLCPU, 0}, array->dtype);
85
  return ret;
86
87
}

88
89
90
91
92
template int32_t IndexSelect<kDGLCUDA, int32_t>(NDArray array, int64_t index);
template int64_t IndexSelect<kDGLCUDA, int64_t>(NDArray array, int64_t index);
template uint32_t IndexSelect<kDGLCUDA, uint32_t>(NDArray array, int64_t index);
template uint64_t IndexSelect<kDGLCUDA, uint64_t>(NDArray array, int64_t index);
template __half IndexSelect<kDGLCUDA, __half>(NDArray array, int64_t index);
93
#if BF16_ENABLED
sangwzh's avatar
sangwzh committed
94
template __hip_bfloat16 IndexSelect<kDGLCUDA, __hip_bfloat16>(
95
    NDArray array, int64_t index);
96
#endif  // BF16_ENABLED
97
98
template float IndexSelect<kDGLCUDA, float>(NDArray array, int64_t index);
template double IndexSelect<kDGLCUDA, double>(NDArray array, int64_t index);
99
100
101
102

}  // namespace impl
}  // namespace aten
}  // namespace dgl