"examples/vscode:/vscode.git/clone" did not exist on "5a47442f9221eba3abe3e27c7af8304241ac58ca"
macro.cuh 3.06 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
/*!
 *  Copyright (c) 2020 by Contributors
 * \file array/cuda/macro.cuh
 * \brief Macro to call SPMM/SDDMM cuda kernels.
 */
#ifndef DGL_ARRAY_CUDA_MACRO_CUH_
#define DGL_ARRAY_CUDA_MACRO_CUH_

///////////////////////// Dispatchers //////////////////////////


/* Macro used for switching between broadcasting and non-broadcasting kernels.
 * It also copies the auxiliary information for calculating broadcasting offsets
 * to GPU.
 */
#define BCAST_IDX_CTX_SWITCH(BCAST, EDGE_MAP, CTX, LHS_OFF, RHS_OFF, ...) do { \
  const BcastOff &info = (BCAST);                                              \
  if (!info.use_bcast) {                                                       \
    constexpr bool UseBcast = false;                                           \
    if ((EDGE_MAP)) {                                                          \
      constexpr bool UseIdx = true;                                            \
      { __VA_ARGS__ }                                                          \
    } else {                                                                   \
      constexpr bool UseIdx = false;                                           \
      { __VA_ARGS__ }                                                          \
    }                                                                          \
  } else {                                                                     \
    constexpr bool UseBcast = true;                                            \
    const DLContext ctx = (CTX);                                               \
    const auto device = runtime::DeviceAPI::Get(ctx);                          \
    (LHS_OFF) = static_cast<int64_t*>(                                         \
      device->AllocWorkspace(ctx, sizeof(int64_t) * info.lhs_offset.size()));  \
    CUDA_CALL(cudaMemcpy((LHS_OFF), &info.lhs_offset[0],                       \
      sizeof(int64_t) * info.lhs_offset.size(), cudaMemcpyHostToDevice));      \
    (RHS_OFF) = static_cast<int64_t*>(                                         \
      device->AllocWorkspace(ctx, sizeof(int64_t) * info.rhs_offset.size()));  \
    CUDA_CALL(cudaMemcpy((RHS_OFF), &info.rhs_offset[0],                       \
      sizeof(int64_t) * info.rhs_offset.size(), cudaMemcpyHostToDevice));      \
    if ((EDGE_MAP)) {                                                          \
      constexpr bool UseIdx = true;                                            \
      { __VA_ARGS__ }                                                          \
    } else {                                                                   \
      constexpr bool UseIdx = false;                                           \
      { __VA_ARGS__ }                                                          \
    }                                                                          \
    device->FreeWorkspace(ctx, (LHS_OFF));                                     \
    device->FreeWorkspace(ctx, (RHS_OFF));                                     \
  }                                                                            \
49
} while (0)
50
51

#endif