/*
 * Copyright 1993-2017 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
 * This source code and/or documentation ("Licensed Deliverables") are
 * subject to NVIDIA intellectual property rights under U.S. and
 * international Copyright laws.
 *
 * These Licensed Deliverables contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 * conditions of a form of NVIDIA software license agreement by and
 * between NVIDIA and Licensee ("License Agreement") or electronically
 * accepted by Licensee.  Notwithstanding any terms or conditions to
 * the contrary in the License Agreement, reproduction or disclosure
 * of the Licensed Deliverables to any third party without the express
 * written consent of NVIDIA is prohibited.
 *
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THESE LICENSED DELIVERABLES.
 *
 * U.S. Government End Users.  These Licensed Deliverables are a
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 * 1995), consisting of "commercial computer software" and "commercial
 * computer software documentation" as such terms are used in 48
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 * U.S. Government End Users acquire the Licensed Deliverables with
 * only those rights set forth herein.
 *
 * Any use of the Licensed Deliverables in individual and commercial
 * software must include, in the user documentation and internal
 * comments to the code, the above Disclaimer and U.S. Government End
 * Users Notice.
 */


#ifndef __SURFACE_INDIRECT_FUNCTIONS_H__
#define __SURFACE_INDIRECT_FUNCTIONS_H__


#if defined(__cplusplus) && defined(__CUDACC__)

#include "cuda_runtime_api.h"
#include "ockl_image_internal.h"

#ifndef __CUDA_HIP_PLATFORM_AMD__

template<typename T> struct __nv_isurf_trait { };
template<> struct __nv_isurf_trait<char> { typedef void type; };
template<> struct __nv_isurf_trait<signed char> { typedef void type; };
template<> struct __nv_isurf_trait<char1> { typedef void type; };
template<> struct __nv_isurf_trait<unsigned char> { typedef void type; };
template<> struct __nv_isurf_trait<uchar1> { typedef void type; };
template<> struct __nv_isurf_trait<short> { typedef void type; };
template<> struct __nv_isurf_trait<short1> { typedef void type; };
template<> struct __nv_isurf_trait<unsigned short> { typedef void type; };
template<> struct __nv_isurf_trait<ushort1> { typedef void type; };
template<> struct __nv_isurf_trait<int> { typedef void type; };
template<> struct __nv_isurf_trait<int1> { typedef void type; };
template<> struct __nv_isurf_trait<unsigned int> { typedef void type; };
template<> struct __nv_isurf_trait<uint1> { typedef void type; };
template<> struct __nv_isurf_trait<long long> { typedef void type; };
template<> struct __nv_isurf_trait<longlong1> { typedef void type; };
template<> struct __nv_isurf_trait<unsigned long long> { typedef void type; };
template<> struct __nv_isurf_trait<ulonglong1> { typedef void type; };
template<> struct __nv_isurf_trait<float> { typedef void type; };
template<> struct __nv_isurf_trait<float1> { typedef void type; };

template<> struct __nv_isurf_trait<char2> { typedef void type; };
template<> struct __nv_isurf_trait<uchar2> { typedef void type; };
template<> struct __nv_isurf_trait<short2> { typedef void type; };
template<> struct __nv_isurf_trait<ushort2> { typedef void type; };
template<> struct __nv_isurf_trait<int2> { typedef void type; };
template<> struct __nv_isurf_trait<uint2> { typedef void type; };
template<> struct __nv_isurf_trait<longlong2> { typedef void type; };
template<> struct __nv_isurf_trait<ulonglong2> { typedef void type; };
template<> struct __nv_isurf_trait<float2> { typedef void type; };

template<> struct __nv_isurf_trait<char4> { typedef void type; };
template<> struct __nv_isurf_trait<uchar4> { typedef void type; };
template<> struct __nv_isurf_trait<short4> { typedef void type; };
template<> struct __nv_isurf_trait<ushort4> { typedef void type; };
template<> struct __nv_isurf_trait<int4> { typedef void type; };
template<> struct __nv_isurf_trait<uint4> { typedef void type; };
template<> struct __nv_isurf_trait<float4> { typedef void type; };


#define __SURFACE_OBJECT_PARAMETERS_INIT                                                            \
    unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)surfObject; 

template <typename T>
static __device__ typename __nv_isurf_trait<T>::type surf1Dread(T *ptr, cudaSurfaceObject_t surfObject, int x, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  *ptr = surf1Dread<T>(surfObject, x, mode);
#endif /* __CUDA_ARCH__ */
}

template <class T>
static __device__ T surf1Dread(cudaSurfaceObject_t surfObject, int x, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__  
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i));
  auto tmp = __ockl_image_load_1D(i, x);
  return mapFrom<T>(tmp);
#endif /* __CUDA_ARCH__ */   
}

template <typename T>
static __device__ typename __nv_isurf_trait<T>::type  surf2Dread(T *ptr, cudaSurfaceObject_t surfObject, int x, int y, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  *ptr = surf2Dread<T>(surfObject, x, y, mode);
#endif /* __CUDA_ARCH__ */
}

template <class T>
static __device__ T surf2Dread(cudaSurfaceObject_t surfObject, int x, int y, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__   
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
  auto tmp = __ockl_image_load_2D(i, make_int2(x, y));
  return mapFrom<T>(tmp);
#endif /* __CUDA_ARCH__ */   
}

template <typename T>
static __device__ typename  __nv_isurf_trait<T>::type  surf3Dread(T *ptr, cudaSurfaceObject_t surfObject, int x, int y, int z, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  *ptr = surf3Dread<T>(surfObject, x, y, z, mode);
#endif /* __CUDA_ARCH__ */
}

template <class T>
static __device__ T surf3Dread(cudaSurfaceObject_t surfObject, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__   
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i));
  auto tmp = __ockl_image_load_3D(i, make_int4(x, y, z, 0));
  return mapFrom<T>(tmp);
#endif /* __CUDA_ARCH__ */   
}

template <typename T>
static __device__ typename  __nv_isurf_trait<T>::type  surf1DLayeredread(T *ptr, cudaSurfaceObject_t surfObject, int x, int layer, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  *ptr = surf1DLayeredread<T>(surfObject, x, layer, mode);
#endif /* __CUDA_ARCH__ */
}

template <class T>
static __device__ T surf1DLayeredread(cudaSurfaceObject_t surfObject, int x, int layer, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__   
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i));
  auto tmp = __ockl_image_load_lod_1D(i, x, layer);
  return mapFrom<T>(tmp);
#endif /* __CUDA_ARCH__ */   
}

template <typename T>
static __device__  typename __nv_isurf_trait<T>::type  surf2DLayeredread(T *ptr, cudaSurfaceObject_t surfObject, int x, int y, int layer, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  *ptr = surf2DLayeredread<T>(surfObject, x, y, layer, mode);
#endif /* __CUDA_ARCH__ */
}

template <class T>
static __device__ T surf2DLayeredread(cudaSurfaceObject_t surfObject, int x, int y, int layer, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__   
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
  auto tmp = __ockl_image_load_lod_2D(i, make_int2(x, y), layer);
  return mapFrom<T>(tmp);
#endif /* __CUDA_ARCH__ */   
}

template <typename T>
static __device__ typename __nv_isurf_trait<T>::type  surfCubemapread(T *ptr, cudaSurfaceObject_t surfObject, int x, int y, int face, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  *ptr = surfCubemapread<T>(surfObject, x, y, face, mode);
#endif /* __CUDA_ARCH__ */
}

template <class T>
static __device__ T surfCubemapread(cudaSurfaceObject_t surfObject, int x, int y, int face, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__   
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
  auto tmp = __ockl_image_load_CM(i, make_int2(x, y), face);
  return mapFrom<T>(tmp);
#endif /* __CUDA_ARCH__ */   
}

template <typename T>
static __device__  typename __nv_isurf_trait<T>::type  surfCubemapLayeredread(T *ptr, cudaSurfaceObject_t surfObject, int x, int y, int layerface, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  *ptr = surfCubemapread<T>(surfObject, x, y, layerface, mode);
#endif /* __CUDA_ARCH__ */
}

//TODO: cuda arg mot align to hip arg
template <class T>
static __device__ T surfCubemapLayeredread(cudaSurfaceObject_t surfObject, int x, int y, int layerface, cudaSurfaceBoundaryMode boundaryMode = cudaBoundaryModeTrap)
{
//#ifdef __CUDA_ARCH__  
#if 0 
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
  auto tmp = __ockl_image_load_lod_CM(i, make_int2(x, y), layerface, layerface);
  return mapFrom<T>(tmp);
#endif /* __CUDA_ARCH__ */   
}

template <typename T>
static __device__ typename __nv_isurf_trait<T>::type surf1Dwrite(T val, cudaSurfaceObject_t surfObject, int x, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{ 
#ifdef __CUDA_ARCH__
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i));
  auto tmp = mapFrom<float4>(val);
  __ockl_image_store_1D(i, x, tmp);
#endif /* __CUDA_ARCH__ */  
}

template <typename T>
static __device__ typename __nv_isurf_trait<T>::type surf2Dwrite(T val, cudaSurfaceObject_t surfObject, int x, int y, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
  auto tmp = mapFrom<float4>(val);
  //auto tmp = __hipMapToNativeFloat4(val);
  __ockl_image_store_2D(i, make_int2(x, y), tmp);
#endif /* __CUDA_ARCH__ */ 
}

template <typename T>
static __device__ typename __nv_isurf_trait<T>::type surf3Dwrite(T val, cudaSurfaceObject_t surfObject, int x, int y, int z, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i));
  auto tmp = mapFrom<float4>(val);
  __ockl_image_store_3D(i, make_int4(x, y, z, 0), tmp);
#endif /* __CUDA_ARCH__ */
}

template <typename T>
static __device__ typename __nv_isurf_trait<T>::type surf1DLayeredwrite(T val, cudaSurfaceObject_t surfObject, int x, int layer, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i));
  auto tmp = mapFrom<float4>(val);
  __ockl_image_store_lod_1D(i, x, layer, tmp);
#endif /* __CUDA_ARCH__ */
}

template <typename T>
static __device__ typename __nv_isurf_trait<T>::type surf2DLayeredwrite(T val, cudaSurfaceObject_t surfObject, int x, int y, int layer, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
  auto tmp = mapFrom<float4>(val);
  __ockl_image_store_lod_2D(i, make_int2(x, y), layer, tmp);
#endif /* __CUDA_ARCH__ */
}

template <typename T>
static __device__ typename __nv_isurf_trait<T>::type surfCubemapwrite(T val, cudaSurfaceObject_t surfObject, int x, int y, int face, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
  auto tmp = mapFrom<float4>(val);
  __ockl_image_store_CM(i, make_int2(x, y), face, tmp);
#endif /* __CUDA_ARCH__ */
}

//TODO: cuda arg mot align to hip arg
template <typename T>
static __device__ typename __nv_isurf_trait<T>::type surfCubemapLayeredwrite(T val, cudaSurfaceObject_t surfObject, int x, int y, int layerface, cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
//#ifdef __CUDA_ARCH__
#if 0
  __SURFACE_OBJECT_PARAMETERS_INIT
  x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
  auto tmp = mapFrom<float4>(val);
  __ockl_image_store_lod_CM(i, make_int2(x, y), layerface, layerface, tmp);
#endif /* __CUDA_ARCH__ */
}
#endif // __CUDA_HIP_PLATFORM_AMD__
#endif // __cplusplus && __CUDACC__

#endif // __SURFACE_INDIRECT_FUNCTIONS_H__


