cuda_utils_kernels.cu 1008 Bytes
Newer Older
1
#include "cuda_utils.h"
2
3
#ifdef USE_ROCM
  #include <hip/hip_runtime.h>
4
  #include <hip/hip_runtime_api.h>
5
#endif
6

7
int64_t get_device_attribute(int64_t attribute, int64_t device_id) {
8
9
10
11
12
13
14
15
16
17
18
19
  // Return the cached value on subsequent calls
  static int value = [=]() {
    int device = static_cast<int>(device_id);
    if (device < 0) {
      CUDA_CHECK(cudaGetDevice(&device));
    }
    int value;
    CUDA_CHECK(cudaDeviceGetAttribute(
        &value, static_cast<cudaDeviceAttr>(attribute), device));
    return static_cast<int>(value);
  }();

20
  return value;
21
}
22

23
24
int64_t get_max_shared_memory_per_block_device_attribute(int64_t device_id) {
  int64_t attribute;
25
26
  // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
  // cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
27
28

#ifdef USE_ROCM
29
  attribute = hipDeviceAttributeMaxSharedMemoryPerBlock;
30
#else
31
  attribute = cudaDevAttrMaxSharedMemoryPerBlockOptin;
32
33
#endif

34
  return get_device_attribute(attribute, device_id);
35
}