/**#
   *  @file: cuda_runtime.h
   *  @brief: cuda runtime header wrapper
   *  @author: louis
   *  @data: 2020/4/17
   #**/

#if !defined(__CUDA_RUNTIME_H__)
#define __CUDA_RUNTIME_H__

#include "cuda_runtime_api.h"

#if defined(__cplusplus)

template<class T>
static __inline__ __host__ cudaError_t cudaLaunchKernel(
  const T *func,
  dim3 gridDim,
  dim3 blockDim,
  void **args,
  size_t sharedMem = 0,
  cudaStream_t stream = 0
)
{
    return cudaLaunchKernel((const void *)func, gridDim, blockDim, args, sharedMem, stream);
}

static __inline__ __host__ cudaError_t cudaEventCreate(
  cudaEvent_t  *event,
  unsigned int  flags
)
{
  return cudaEventCreateWithFlags(event, flags);
}

static __inline__ __host__ cudaError_t cudaMallocHost(
  void         **ptr,
  size_t         size,
  unsigned int   flags
)
{
  return cudaHostAlloc(ptr, size, flags);
}

template<class T>
static __inline__ __host__ cudaError_t cudaHostAlloc(
  T            **ptr,
  size_t         size,
  unsigned int   flags
)
{
  return cudaHostAlloc((void**)(void*)ptr, size, flags);
}

template<class T>
static __inline__ __host__ cudaError_t cudaHostGetDevicePointer(
  T            **pDevice,
  void          *pHost,
  unsigned int   flags
)
{
  return cudaHostGetDevicePointer((void**)(void*)pDevice, pHost, flags);
}


template<class T>
static __inline__ __host__ cudaError_t cudaMallocManaged(
  T            **devPtr,
  size_t         size,
  unsigned int   flags = cudaMemAttachGlobal
)
{
  return cudaMallocManaged((void**)(void*)devPtr, size, flags);
}

template<class T>
static __inline__ __host__ cudaError_t cudaMalloc(
  T      **devPtr,
  size_t   size
)
{
  return cudaMalloc((void**)(void*)devPtr, size);
}

template<class T>
static __inline__ __host__ cudaError_t cudaMallocHost(
  T            **ptr,
  size_t         size,
  unsigned int   flags = 0
)
{
  return cudaMallocHost((void**)(void*)ptr, size, flags);
}

template<class T>
static __inline__ __host__ cudaError_t cudaMallocPitch(
  T      **devPtr,
  size_t  *pitch,
  size_t   width,
  size_t   height
)
{
  return cudaMallocPitch((void**)(void*)devPtr, pitch, width, height);
}

template<class T>
static __inline__ __host__ cudaError_t cudaMemcpyToSymbol(
  const T                   &symbol,
  const void                *src,
        size_t               count,
        size_t               offset = 0,
        enum cudaMemcpyKind  kind   = cudaMemcpyHostToDevice
)
{
  return cudaMemcpyToSymbol((const void*)&symbol, src, count, offset, kind);
}


template<class T>
static __inline__ __host__ cudaError_t cudaMemcpyToSymbolAsync(
  const T                   &symbol,
  const void                *src,
        size_t               count,
        size_t               offset = 0,
        enum cudaMemcpyKind  kind   = cudaMemcpyHostToDevice,
        cudaStream_t         stream = 0
)
{
  return cudaMemcpyToSymbolAsync((const void*)&symbol, src, count, offset, kind, stream);
}


template<class T>
static __inline__ __host__ cudaError_t cudaMemcpyFromSymbol(
        void                *dst,
  const T                   &symbol,
        size_t               count,
        size_t               offset = 0,
        enum cudaMemcpyKind  kind   = cudaMemcpyDeviceToHost
)
{
  return cudaMemcpyFromSymbol(dst, (const void*)&symbol, count, offset, kind);
}


template<class T>
static __inline__ __host__ cudaError_t cudaMemcpyFromSymbolAsync(
        void                *dst,
  const T                   &symbol,
        size_t               count,
        size_t               offset = 0,
        enum cudaMemcpyKind  kind   = cudaMemcpyDeviceToHost,
        cudaStream_t         stream = 0
)
{
  return cudaMemcpyFromSymbolAsync(dst, (const void*)&symbol, count, offset, kind, stream);
}


template<class T>
static __inline__ __host__ cudaError_t cudaGetSymbolAddress(
        void **devPtr,
  const T     &symbol
)
{
  return cudaGetSymbolAddress(devPtr, (const void*)&symbol);
}


template<class T>
static __inline__ __host__ cudaError_t cudaGetSymbolSize(
        size_t *size,
  const T      &symbol
)
{
  return cudaGetSymbolSize(size, (const void*)&symbol);
}

template<class T>
static __inline__ __host__ cudaError_t cudaFuncSetCacheConfig(
  T                  *func,
  enum cudaFuncCache  cacheConfig
)
{
  return cudaFuncSetCacheConfig((const void*)func, cacheConfig);
}

template<class T>
static __inline__ __host__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(
    int   *numBlocks,
    T      func,
    int    blockSize,
    size_t dynamicSMemSize)
{
    return cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, (const void*)func, blockSize, dynamicSMemSize, cudaOccupancyDefault);
}


template<class T>
static __inline__ __host__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
    int         *numBlocks,
    T            func,
    int          blockSize,
    size_t       dynamicSMemSize,
    unsigned int flags)
{
    return cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, (const void*)func, blockSize, dynamicSMemSize, flags);
}

template<class T>
static __inline__ __host__ cudaError_t cudaOccupancyMaxPotentialBlockSize(
    int    *minGridSize,
    int    *blockSize,
    T       func,
    size_t  dynamicSMemSize = 0,
    int     blockSizeLimit = 0)
{
  return cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, dynamicSMemSize, blockSizeLimit);
}

template<class T>
static __inline__ __host__ cudaError_t cudaFuncGetAttributes(
  cudaFuncAttributes *attr,
  T                         *entry
)
{
  return cudaFuncGetAttributes(attr, (const void*)entry);
}
/*
template<class T>
static __inline__ __host__ cudaError_t cudaFuncSetAttribute(
  T                         *entry,
  enum cudaFuncAttribute    attr,
  int                       value
)
{
  return cudaFuncSetAttribute((const void*)entry, attr, value);
}
*/

#endif /* __cplusplus */

#endif /* !__CUDA_RUNTIME_H__ */
