cuda_utils_kernels.cu 843 Bytes
Newer Older
1
2
#ifdef USE_ROCM
  #include <hip/hip_runtime.h>
3
  #include <hip/hip_runtime_api.h>
4
#endif
5
int64_t get_device_attribute(int64_t attribute, int64_t device_id) {
6
7
8
9
10
11
12
13
14
  int device, value;
  if (device_id < 0) {
    cudaGetDevice(&device);
  } else {
    device = device_id;
  }
  cudaDeviceGetAttribute(&value, static_cast<cudaDeviceAttr>(attribute),
                         device);
  return value;
15
}
16

17
18
int64_t get_max_shared_memory_per_block_device_attribute(int64_t device_id) {
  int64_t attribute;
19
20
  // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
  // cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
21
22

#ifdef USE_ROCM
23
  attribute = hipDeviceAttributeMaxSharedMemoryPerBlock;
24
#else
25
  attribute = cudaDevAttrMaxSharedMemoryPerBlockOptin;
26
27
#endif

28
  return get_device_attribute(attribute, device_id);
29
}