


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! ==============================================================================
! hipfort: FORTRAN Interfaces for GPU kernels
! ==============================================================================
! Copyright (c) 2020-2022 Advanced Micro Devices, Inc. All rights reserved.
! [MITx11 License]
! 
! Permission is hereby granted, free of charge, to any person obtaining a copy
! of this software and associated documentation files (the "Software"), to deal
! in the Software without restriction, including without limitation the rights
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
! copies of the Software, and to permit persons to whom the Software is
! furnished to do so, subject to the following conditions:
! 
! The above copyright notice and this permission notice shall be included in
! all copies or substantial portions of the Software.
! 
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
! THE SOFTWARE.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


module cudafor_cudamemcpy
  
  interface cudaMemcpy
    !> 
    !>    @brief Copy data from src to dest.
    !>  
    !>    It supports memory from host to device,
    !>    device to host, device to device and host to host
    !>    The src and dest must not overlap.
    !>  
    !>    For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice).
    !>    For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the
    !>    device where the src data is physically located. For optimal peer-to-peer copies, the copy device
    !>    must be able to access the src and dest pointers (by calling hipDeviceEnablePeerAccess with copy
    !>    agent as the current device and srcdest as the peerDevice argument.  if this is not done, the
    !>    hipMemcpy will still work, but will perform the copy using a staging buffer on the host.
    !>    Calling hipMemcpy with dest and src pointers that do not match the hipMemcpyKind results in
    !>    undefined behavior.
    !>  
    !>    @param[out]  dest Data being copy to
    !>    @param[in]  src Data being copy from
    !>    @param[in]  sizeBytes Data size in bytes
    !>    @param[in]  copyType Memory copy type
    !>    @return#hipSuccess,#hipErrorInvalidValue,#hipErrorMemoryFree,#hipErrorUnknowni
    !>  
    !>    @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
    !>   hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
    !>   hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
    !>   hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
    !>   hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
    !>   hipMemHostAlloc, hipMemHostGetDevicePointer
    !>  
    function cudaMemcpy_(dest,src,sizeBytes,myKind) bind(c, &
&name="cudaMemcpy")
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(kind(cudaSuccess)) :: cudaMemcpy_
      type(c_ptr),value :: dest
      type(c_ptr),value :: src
      integer(c_size_t),value :: sizeBytes
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
    end function
        
    module procedure cudaMemcpy_l_0,&
      cudaMemcpy_l_0_c_int,&
      cudaMemcpy_l_0_c_size_t,&
      cudaMemcpy_l_1,&
      cudaMemcpy_l_1_c_int,&
      cudaMemcpy_l_1_c_size_t,&
      cudaMemcpy_l_2,&
      cudaMemcpy_l_2_c_int,&
      cudaMemcpy_l_2_c_size_t,&
      cudaMemcpy_l_3,&
      cudaMemcpy_l_3_c_int,&
      cudaMemcpy_l_3_c_size_t,&
      cudaMemcpy_l_4,&
      cudaMemcpy_l_4_c_int,&
      cudaMemcpy_l_4_c_size_t,&
      cudaMemcpy_l_5,&
      cudaMemcpy_l_5_c_int,&
      cudaMemcpy_l_5_c_size_t,&
      cudaMemcpy_l_6,&
      cudaMemcpy_l_6_c_int,&
      cudaMemcpy_l_6_c_size_t,&
      cudaMemcpy_l_7,&
      cudaMemcpy_l_7_c_int,&
      cudaMemcpy_l_7_c_size_t,&
      cudaMemcpy_i4_0,&
      cudaMemcpy_i4_0_c_int,&
      cudaMemcpy_i4_0_c_size_t,&
      cudaMemcpy_i4_1,&
      cudaMemcpy_i4_1_c_int,&
      cudaMemcpy_i4_1_c_size_t,&
      cudaMemcpy_i4_2,&
      cudaMemcpy_i4_2_c_int,&
      cudaMemcpy_i4_2_c_size_t,&
      cudaMemcpy_i4_3,&
      cudaMemcpy_i4_3_c_int,&
      cudaMemcpy_i4_3_c_size_t,&
      cudaMemcpy_i4_4,&
      cudaMemcpy_i4_4_c_int,&
      cudaMemcpy_i4_4_c_size_t,&
      cudaMemcpy_i4_5,&
      cudaMemcpy_i4_5_c_int,&
      cudaMemcpy_i4_5_c_size_t,&
      cudaMemcpy_i4_6,&
      cudaMemcpy_i4_6_c_int,&
      cudaMemcpy_i4_6_c_size_t,&
      cudaMemcpy_i4_7,&
      cudaMemcpy_i4_7_c_int,&
      cudaMemcpy_i4_7_c_size_t,&
      cudaMemcpy_i8_0,&
      cudaMemcpy_i8_0_c_int,&
      cudaMemcpy_i8_0_c_size_t,&
      cudaMemcpy_i8_1,&
      cudaMemcpy_i8_1_c_int,&
      cudaMemcpy_i8_1_c_size_t,&
      cudaMemcpy_i8_2,&
      cudaMemcpy_i8_2_c_int,&
      cudaMemcpy_i8_2_c_size_t,&
      cudaMemcpy_i8_3,&
      cudaMemcpy_i8_3_c_int,&
      cudaMemcpy_i8_3_c_size_t,&
      cudaMemcpy_i8_4,&
      cudaMemcpy_i8_4_c_int,&
      cudaMemcpy_i8_4_c_size_t,&
      cudaMemcpy_i8_5,&
      cudaMemcpy_i8_5_c_int,&
      cudaMemcpy_i8_5_c_size_t,&
      cudaMemcpy_i8_6,&
      cudaMemcpy_i8_6_c_int,&
      cudaMemcpy_i8_6_c_size_t,&
      cudaMemcpy_i8_7,&
      cudaMemcpy_i8_7_c_int,&
      cudaMemcpy_i8_7_c_size_t,&
      cudaMemcpy_r4_0,&
      cudaMemcpy_r4_0_c_int,&
      cudaMemcpy_r4_0_c_size_t,&
      cudaMemcpy_r4_1,&
      cudaMemcpy_r4_1_c_int,&
      cudaMemcpy_r4_1_c_size_t,&
      cudaMemcpy_r4_2,&
      cudaMemcpy_r4_2_c_int,&
      cudaMemcpy_r4_2_c_size_t,&
      cudaMemcpy_r4_3,&
      cudaMemcpy_r4_3_c_int,&
      cudaMemcpy_r4_3_c_size_t,&
      cudaMemcpy_r4_4,&
      cudaMemcpy_r4_4_c_int,&
      cudaMemcpy_r4_4_c_size_t,&
      cudaMemcpy_r4_5,&
      cudaMemcpy_r4_5_c_int,&
      cudaMemcpy_r4_5_c_size_t,&
      cudaMemcpy_r4_6,&
      cudaMemcpy_r4_6_c_int,&
      cudaMemcpy_r4_6_c_size_t,&
      cudaMemcpy_r4_7,&
      cudaMemcpy_r4_7_c_int,&
      cudaMemcpy_r4_7_c_size_t,&
      cudaMemcpy_r8_0,&
      cudaMemcpy_r8_0_c_int,&
      cudaMemcpy_r8_0_c_size_t,&
      cudaMemcpy_r8_1,&
      cudaMemcpy_r8_1_c_int,&
      cudaMemcpy_r8_1_c_size_t,&
      cudaMemcpy_r8_2,&
      cudaMemcpy_r8_2_c_int,&
      cudaMemcpy_r8_2_c_size_t,&
      cudaMemcpy_r8_3,&
      cudaMemcpy_r8_3_c_int,&
      cudaMemcpy_r8_3_c_size_t,&
      cudaMemcpy_r8_4,&
      cudaMemcpy_r8_4_c_int,&
      cudaMemcpy_r8_4_c_size_t,&
      cudaMemcpy_r8_5,&
      cudaMemcpy_r8_5_c_int,&
      cudaMemcpy_r8_5_c_size_t,&
      cudaMemcpy_r8_6,&
      cudaMemcpy_r8_6_c_int,&
      cudaMemcpy_r8_6_c_size_t,&
      cudaMemcpy_r8_7,&
      cudaMemcpy_r8_7_c_int,&
      cudaMemcpy_r8_7_c_size_t,&
      cudaMemcpy_c4_0,&
      cudaMemcpy_c4_0_c_int,&
      cudaMemcpy_c4_0_c_size_t,&
      cudaMemcpy_c4_1,&
      cudaMemcpy_c4_1_c_int,&
      cudaMemcpy_c4_1_c_size_t,&
      cudaMemcpy_c4_2,&
      cudaMemcpy_c4_2_c_int,&
      cudaMemcpy_c4_2_c_size_t,&
      cudaMemcpy_c4_3,&
      cudaMemcpy_c4_3_c_int,&
      cudaMemcpy_c4_3_c_size_t,&
      cudaMemcpy_c4_4,&
      cudaMemcpy_c4_4_c_int,&
      cudaMemcpy_c4_4_c_size_t,&
      cudaMemcpy_c4_5,&
      cudaMemcpy_c4_5_c_int,&
      cudaMemcpy_c4_5_c_size_t,&
      cudaMemcpy_c4_6,&
      cudaMemcpy_c4_6_c_int,&
      cudaMemcpy_c4_6_c_size_t,&
      cudaMemcpy_c4_7,&
      cudaMemcpy_c4_7_c_int,&
      cudaMemcpy_c4_7_c_size_t,&
      cudaMemcpy_c8_0,&
      cudaMemcpy_c8_0_c_int,&
      cudaMemcpy_c8_0_c_size_t,&
      cudaMemcpy_c8_1,&
      cudaMemcpy_c8_1_c_int,&
      cudaMemcpy_c8_1_c_size_t,&
      cudaMemcpy_c8_2,&
      cudaMemcpy_c8_2_c_int,&
      cudaMemcpy_c8_2_c_size_t,&
      cudaMemcpy_c8_3,&
      cudaMemcpy_c8_3_c_int,&
      cudaMemcpy_c8_3_c_size_t,&
      cudaMemcpy_c8_4,&
      cudaMemcpy_c8_4_c_int,&
      cudaMemcpy_c8_4_c_size_t,&
      cudaMemcpy_c8_5,&
      cudaMemcpy_c8_5_c_int,&
      cudaMemcpy_c8_5_c_size_t,&
      cudaMemcpy_c8_6,&
      cudaMemcpy_c8_6_c_int,&
      cudaMemcpy_c8_6_c_size_t,&
      cudaMemcpy_c8_7,&
      cudaMemcpy_c8_7_c_int,&
      cudaMemcpy_c8_7_c_size_t 
  end interface
  
  interface cudaMemcpyAsync
    !> 
    !>    @brief Copy data from src to dest asynchronously.
    !>  
    !>    @warning If host or dest are not pinned, the memory copy will be performed synchronously.  For
    !>   best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously.
    !>  
    !>    @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.
    !>    For hipMemcpy, the copy is always performed by the device associated with the specified stream.
    !>  
    !>    For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is a
    !>   attached to the device where the src data is physically located. For optimal peer-to-peer copies,
    !>   the copy device must be able to access the src and dest pointers (by calling
    !>   hipDeviceEnablePeerAccess with copy agent as the current device and srcdest as the peerDevice
    !>   argument.  if this is not done, the hipMemcpy will still work, but will perform the copy using a
    !>   staging buffer on the host.
    !>  
    !>    @param[out] dest Data being copy to
    !>    @param[in]  src Data being copy from
    !>    @param[in]  sizeBytes Data size in bytes
    !>    @param[in]  accelerator_view Accelerator view which the copy is being enqueued
    !>    @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
    !>  
    !>    @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
    !>   hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol,
    !>   hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,
    !>   hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,
    !>   hipMemcpyFromSymbolAsync
    !>  
    function cudaMemcpyAsync_(dest,src,sizeBytes,myKind,stream) bind(c,&
& name="cudaMemcpyAsync")
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_
      type(c_ptr),value :: dest
      type(c_ptr),value :: src
      integer(c_size_t),value :: sizeBytes
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
    end function

    module procedure cudaMemcpyAsync_l_0,&
      cudaMemcpyAsync_l_0_c_int,&
      cudaMemcpyAsync_l_0_c_size_t,&
      cudaMemcpyAsync_l_1,&
      cudaMemcpyAsync_l_1_c_int,&
      cudaMemcpyAsync_l_1_c_size_t,&
      cudaMemcpyAsync_l_2,&
      cudaMemcpyAsync_l_2_c_int,&
      cudaMemcpyAsync_l_2_c_size_t,&
      cudaMemcpyAsync_l_3,&
      cudaMemcpyAsync_l_3_c_int,&
      cudaMemcpyAsync_l_3_c_size_t,&
      cudaMemcpyAsync_l_4,&
      cudaMemcpyAsync_l_4_c_int,&
      cudaMemcpyAsync_l_4_c_size_t,&
      cudaMemcpyAsync_l_5,&
      cudaMemcpyAsync_l_5_c_int,&
      cudaMemcpyAsync_l_5_c_size_t,&
      cudaMemcpyAsync_l_6,&
      cudaMemcpyAsync_l_6_c_int,&
      cudaMemcpyAsync_l_6_c_size_t,&
      cudaMemcpyAsync_l_7,&
      cudaMemcpyAsync_l_7_c_int,&
      cudaMemcpyAsync_l_7_c_size_t,&
      cudaMemcpyAsync_i4_0,&
      cudaMemcpyAsync_i4_0_c_int,&
      cudaMemcpyAsync_i4_0_c_size_t,&
      cudaMemcpyAsync_i4_1,&
      cudaMemcpyAsync_i4_1_c_int,&
      cudaMemcpyAsync_i4_1_c_size_t,&
      cudaMemcpyAsync_i4_2,&
      cudaMemcpyAsync_i4_2_c_int,&
      cudaMemcpyAsync_i4_2_c_size_t,&
      cudaMemcpyAsync_i4_3,&
      cudaMemcpyAsync_i4_3_c_int,&
      cudaMemcpyAsync_i4_3_c_size_t,&
      cudaMemcpyAsync_i4_4,&
      cudaMemcpyAsync_i4_4_c_int,&
      cudaMemcpyAsync_i4_4_c_size_t,&
      cudaMemcpyAsync_i4_5,&
      cudaMemcpyAsync_i4_5_c_int,&
      cudaMemcpyAsync_i4_5_c_size_t,&
      cudaMemcpyAsync_i4_6,&
      cudaMemcpyAsync_i4_6_c_int,&
      cudaMemcpyAsync_i4_6_c_size_t,&
      cudaMemcpyAsync_i4_7,&
      cudaMemcpyAsync_i4_7_c_int,&
      cudaMemcpyAsync_i4_7_c_size_t,&
      cudaMemcpyAsync_i8_0,&
      cudaMemcpyAsync_i8_0_c_int,&
      cudaMemcpyAsync_i8_0_c_size_t,&
      cudaMemcpyAsync_i8_1,&
      cudaMemcpyAsync_i8_1_c_int,&
      cudaMemcpyAsync_i8_1_c_size_t,&
      cudaMemcpyAsync_i8_2,&
      cudaMemcpyAsync_i8_2_c_int,&
      cudaMemcpyAsync_i8_2_c_size_t,&
      cudaMemcpyAsync_i8_3,&
      cudaMemcpyAsync_i8_3_c_int,&
      cudaMemcpyAsync_i8_3_c_size_t,&
      cudaMemcpyAsync_i8_4,&
      cudaMemcpyAsync_i8_4_c_int,&
      cudaMemcpyAsync_i8_4_c_size_t,&
      cudaMemcpyAsync_i8_5,&
      cudaMemcpyAsync_i8_5_c_int,&
      cudaMemcpyAsync_i8_5_c_size_t,&
      cudaMemcpyAsync_i8_6,&
      cudaMemcpyAsync_i8_6_c_int,&
      cudaMemcpyAsync_i8_6_c_size_t,&
      cudaMemcpyAsync_i8_7,&
      cudaMemcpyAsync_i8_7_c_int,&
      cudaMemcpyAsync_i8_7_c_size_t,&
      cudaMemcpyAsync_r4_0,&
      cudaMemcpyAsync_r4_0_c_int,&
      cudaMemcpyAsync_r4_0_c_size_t,&
      cudaMemcpyAsync_r4_1,&
      cudaMemcpyAsync_r4_1_c_int,&
      cudaMemcpyAsync_r4_1_c_size_t,&
      cudaMemcpyAsync_r4_2,&
      cudaMemcpyAsync_r4_2_c_int,&
      cudaMemcpyAsync_r4_2_c_size_t,&
      cudaMemcpyAsync_r4_3,&
      cudaMemcpyAsync_r4_3_c_int,&
      cudaMemcpyAsync_r4_3_c_size_t,&
      cudaMemcpyAsync_r4_4,&
      cudaMemcpyAsync_r4_4_c_int,&
      cudaMemcpyAsync_r4_4_c_size_t,&
      cudaMemcpyAsync_r4_5,&
      cudaMemcpyAsync_r4_5_c_int,&
      cudaMemcpyAsync_r4_5_c_size_t,&
      cudaMemcpyAsync_r4_6,&
      cudaMemcpyAsync_r4_6_c_int,&
      cudaMemcpyAsync_r4_6_c_size_t,&
      cudaMemcpyAsync_r4_7,&
      cudaMemcpyAsync_r4_7_c_int,&
      cudaMemcpyAsync_r4_7_c_size_t,&
      cudaMemcpyAsync_r8_0,&
      cudaMemcpyAsync_r8_0_c_int,&
      cudaMemcpyAsync_r8_0_c_size_t,&
      cudaMemcpyAsync_r8_1,&
      cudaMemcpyAsync_r8_1_c_int,&
      cudaMemcpyAsync_r8_1_c_size_t,&
      cudaMemcpyAsync_r8_2,&
      cudaMemcpyAsync_r8_2_c_int,&
      cudaMemcpyAsync_r8_2_c_size_t,&
      cudaMemcpyAsync_r8_3,&
      cudaMemcpyAsync_r8_3_c_int,&
      cudaMemcpyAsync_r8_3_c_size_t,&
      cudaMemcpyAsync_r8_4,&
      cudaMemcpyAsync_r8_4_c_int,&
      cudaMemcpyAsync_r8_4_c_size_t,&
      cudaMemcpyAsync_r8_5,&
      cudaMemcpyAsync_r8_5_c_int,&
      cudaMemcpyAsync_r8_5_c_size_t,&
      cudaMemcpyAsync_r8_6,&
      cudaMemcpyAsync_r8_6_c_int,&
      cudaMemcpyAsync_r8_6_c_size_t,&
      cudaMemcpyAsync_r8_7,&
      cudaMemcpyAsync_r8_7_c_int,&
      cudaMemcpyAsync_r8_7_c_size_t,&
      cudaMemcpyAsync_c4_0,&
      cudaMemcpyAsync_c4_0_c_int,&
      cudaMemcpyAsync_c4_0_c_size_t,&
      cudaMemcpyAsync_c4_1,&
      cudaMemcpyAsync_c4_1_c_int,&
      cudaMemcpyAsync_c4_1_c_size_t,&
      cudaMemcpyAsync_c4_2,&
      cudaMemcpyAsync_c4_2_c_int,&
      cudaMemcpyAsync_c4_2_c_size_t,&
      cudaMemcpyAsync_c4_3,&
      cudaMemcpyAsync_c4_3_c_int,&
      cudaMemcpyAsync_c4_3_c_size_t,&
      cudaMemcpyAsync_c4_4,&
      cudaMemcpyAsync_c4_4_c_int,&
      cudaMemcpyAsync_c4_4_c_size_t,&
      cudaMemcpyAsync_c4_5,&
      cudaMemcpyAsync_c4_5_c_int,&
      cudaMemcpyAsync_c4_5_c_size_t,&
      cudaMemcpyAsync_c4_6,&
      cudaMemcpyAsync_c4_6_c_int,&
      cudaMemcpyAsync_c4_6_c_size_t,&
      cudaMemcpyAsync_c4_7,&
      cudaMemcpyAsync_c4_7_c_int,&
      cudaMemcpyAsync_c4_7_c_size_t,&
      cudaMemcpyAsync_c8_0,&
      cudaMemcpyAsync_c8_0_c_int,&
      cudaMemcpyAsync_c8_0_c_size_t,&
      cudaMemcpyAsync_c8_1,&
      cudaMemcpyAsync_c8_1_c_int,&
      cudaMemcpyAsync_c8_1_c_size_t,&
      cudaMemcpyAsync_c8_2,&
      cudaMemcpyAsync_c8_2_c_int,&
      cudaMemcpyAsync_c8_2_c_size_t,&
      cudaMemcpyAsync_c8_3,&
      cudaMemcpyAsync_c8_3_c_int,&
      cudaMemcpyAsync_c8_3_c_size_t,&
      cudaMemcpyAsync_c8_4,&
      cudaMemcpyAsync_c8_4_c_int,&
      cudaMemcpyAsync_c8_4_c_size_t,&
      cudaMemcpyAsync_c8_5,&
      cudaMemcpyAsync_c8_5_c_int,&
      cudaMemcpyAsync_c8_5_c_size_t,&
      cudaMemcpyAsync_c8_6,&
      cudaMemcpyAsync_c8_6_c_int,&
      cudaMemcpyAsync_c8_6_c_size_t,&
      cudaMemcpyAsync_c8_7,&
      cudaMemcpyAsync_c8_7_c_int,&
      cudaMemcpyAsync_c8_7_c_size_t 
  end interface
  
  interface cudaMemcpy2D
    !> 
    !>    @brief Copies data between host and device.
    !>  
    !>    @param[in]   dest    Destination memory address
    !>    @param[in]   dpitch Pitch of destination memory
    !>    @param[in]   src    Source memory address
    !>    @param[in]   spitch Pitch of source memory
    !>    @param[in]   width  Width of matrix transfer (columns in bytes)
    !>    @param[in]   height Height of matrix transfer (rows)
    !>    @param[in]   kind   Type of transfer
    !>    @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
    !>   #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
    !>  
    !>    @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol,
    !>   hipMemcpyAsync
    !>  
    function cudaMemcpy2D_(dest,dpitch,src,spitch,width,height,myKind) &
&bind(c, name="cudaMemcpy2D")
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_
      type(c_ptr),value :: dest
      integer(c_size_t),value :: dpitch
      type(c_ptr),value :: src
      integer(c_size_t),value :: spitch
      integer(c_size_t),value :: width
      integer(c_size_t),value :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
    end function

    module procedure cudaMemcpy2D_l_0_c_int,&
      cudaMemcpy2D_l_0_c_size_t,&
      cudaMemcpy2D_l_1_c_int,&
      cudaMemcpy2D_l_1_c_size_t,&
      cudaMemcpy2D_l_2_c_int,&
      cudaMemcpy2D_l_2_c_size_t,&
      cudaMemcpy2D_i4_0_c_int,&
      cudaMemcpy2D_i4_0_c_size_t,&
      cudaMemcpy2D_i4_1_c_int,&
      cudaMemcpy2D_i4_1_c_size_t,&
      cudaMemcpy2D_i4_2_c_int,&
      cudaMemcpy2D_i4_2_c_size_t,&
      cudaMemcpy2D_i8_0_c_int,&
      cudaMemcpy2D_i8_0_c_size_t,&
      cudaMemcpy2D_i8_1_c_int,&
      cudaMemcpy2D_i8_1_c_size_t,&
      cudaMemcpy2D_i8_2_c_int,&
      cudaMemcpy2D_i8_2_c_size_t,&
      cudaMemcpy2D_r4_0_c_int,&
      cudaMemcpy2D_r4_0_c_size_t,&
      cudaMemcpy2D_r4_1_c_int,&
      cudaMemcpy2D_r4_1_c_size_t,&
      cudaMemcpy2D_r4_2_c_int,&
      cudaMemcpy2D_r4_2_c_size_t,&
      cudaMemcpy2D_r8_0_c_int,&
      cudaMemcpy2D_r8_0_c_size_t,&
      cudaMemcpy2D_r8_1_c_int,&
      cudaMemcpy2D_r8_1_c_size_t,&
      cudaMemcpy2D_r8_2_c_int,&
      cudaMemcpy2D_r8_2_c_size_t,&
      cudaMemcpy2D_c4_0_c_int,&
      cudaMemcpy2D_c4_0_c_size_t,&
      cudaMemcpy2D_c4_1_c_int,&
      cudaMemcpy2D_c4_1_c_size_t,&
      cudaMemcpy2D_c4_2_c_int,&
      cudaMemcpy2D_c4_2_c_size_t,&
      cudaMemcpy2D_c8_0_c_int,&
      cudaMemcpy2D_c8_0_c_size_t,&
      cudaMemcpy2D_c8_1_c_int,&
      cudaMemcpy2D_c8_1_c_size_t,&
      cudaMemcpy2D_c8_2_c_int,&
      cudaMemcpy2D_c8_2_c_size_t 
  end interface
  
  interface cudaMemcpy2DAsync
    !> 
    !>    @brief Copies data between host and device.
    !>  
    !>    @param[in]   dest    Destination memory address
    !>    @param[in]   dpitch Pitch of destination memory
    !>    @param[in]   src    Source memory address
    !>    @param[in]   spitch Pitch of source memory
    !>    @param[in]   width  Width of matrix transfer (columns in bytes)
    !>    @param[in]   height Height of matrix transfer (rows)
    !>    @param[in]   kind   Type of transfer
    !>    @param[in]   stream Stream to use
    !>    @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
    !>   #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
    !>  
    !>    @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol,
    !>   hipMemcpyAsync
    !>  
    function cudaMemcpy2DAsync_(dest,dpitch,src,spitch,width,height,&
&myKind,stream) bind(c, name="cudaMemcpy2DAsync")
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_
      type(c_ptr),value :: dest
      integer(c_size_t),value :: dpitch
      type(c_ptr),value :: src
      integer(c_size_t),value :: spitch
      integer(c_size_t),value :: width
      integer(c_size_t),value :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
    end function

    module procedure cudaMemcpy2DAsync_l_0_c_int,&
      cudaMemcpy2DAsync_l_0_c_size_t,&
      cudaMemcpy2DAsync_l_1_c_int,&
      cudaMemcpy2DAsync_l_1_c_size_t,&
      cudaMemcpy2DAsync_l_2_c_int,&
      cudaMemcpy2DAsync_l_2_c_size_t,&
      cudaMemcpy2DAsync_i4_0_c_int,&
      cudaMemcpy2DAsync_i4_0_c_size_t,&
      cudaMemcpy2DAsync_i4_1_c_int,&
      cudaMemcpy2DAsync_i4_1_c_size_t,&
      cudaMemcpy2DAsync_i4_2_c_int,&
      cudaMemcpy2DAsync_i4_2_c_size_t,&
      cudaMemcpy2DAsync_i8_0_c_int,&
      cudaMemcpy2DAsync_i8_0_c_size_t,&
      cudaMemcpy2DAsync_i8_1_c_int,&
      cudaMemcpy2DAsync_i8_1_c_size_t,&
      cudaMemcpy2DAsync_i8_2_c_int,&
      cudaMemcpy2DAsync_i8_2_c_size_t,&
      cudaMemcpy2DAsync_r4_0_c_int,&
      cudaMemcpy2DAsync_r4_0_c_size_t,&
      cudaMemcpy2DAsync_r4_1_c_int,&
      cudaMemcpy2DAsync_r4_1_c_size_t,&
      cudaMemcpy2DAsync_r4_2_c_int,&
      cudaMemcpy2DAsync_r4_2_c_size_t,&
      cudaMemcpy2DAsync_r8_0_c_int,&
      cudaMemcpy2DAsync_r8_0_c_size_t,&
      cudaMemcpy2DAsync_r8_1_c_int,&
      cudaMemcpy2DAsync_r8_1_c_size_t,&
      cudaMemcpy2DAsync_r8_2_c_int,&
      cudaMemcpy2DAsync_r8_2_c_size_t,&
      cudaMemcpy2DAsync_c4_0_c_int,&
      cudaMemcpy2DAsync_c4_0_c_size_t,&
      cudaMemcpy2DAsync_c4_1_c_int,&
      cudaMemcpy2DAsync_c4_1_c_size_t,&
      cudaMemcpy2DAsync_c4_2_c_int,&
      cudaMemcpy2DAsync_c4_2_c_size_t,&
      cudaMemcpy2DAsync_c8_0_c_int,&
      cudaMemcpy2DAsync_c8_0_c_size_t,&
      cudaMemcpy2DAsync_c8_1_c_int,&
      cudaMemcpy2DAsync_c8_1_c_size_t,&
      cudaMemcpy2DAsync_c8_2_c_int,&
      cudaMemcpy2DAsync_c8_2_c_size_t 
  end interface

  contains
    
                                                              
    function cudaMemcpy_l_0_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      logical(c_bool),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_0_c_int
      !
      cudaMemcpy_l_0_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_0_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      logical(c_bool),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_0_c_size_t
      !
      cudaMemcpy_l_0_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function

function cudaMemcpy_l_0(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      logical(c_bool),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_0
      !
      cudaMemcpy_l_0 = cudaMemcpy_(c_loc(dest),c_loc(src),1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_1_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_1_c_int
      !
      cudaMemcpy_l_1_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_1_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_1_c_size_t
      !
      cudaMemcpy_l_1_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function

function cudaMemcpy_l_1(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_1
      !
      cudaMemcpy_l_1 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_2_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_2_c_int
      !
      cudaMemcpy_l_2_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_2_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_2_c_size_t
      !
      cudaMemcpy_l_2_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function

function cudaMemcpy_l_2(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_2
      !
      cudaMemcpy_l_2 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_3_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_3_c_int
      !
      cudaMemcpy_l_3_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_3_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_3_c_size_t
      !
      cudaMemcpy_l_3_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function

function cudaMemcpy_l_3(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_3
      !
      cudaMemcpy_l_3 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_4_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_4_c_int
      !
      cudaMemcpy_l_4_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_4_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_4_c_size_t
      !
      cudaMemcpy_l_4_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function

function cudaMemcpy_l_4(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_4
      !
      cudaMemcpy_l_4 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_5_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_5_c_int
      !
      cudaMemcpy_l_5_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_5_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_5_c_size_t
      !
      cudaMemcpy_l_5_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function

function cudaMemcpy_l_5(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_5
      !
      cudaMemcpy_l_5 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_6_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_6_c_int
      !
      cudaMemcpy_l_6_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_6_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_6_c_size_t
      !
      cudaMemcpy_l_6_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function

function cudaMemcpy_l_6(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_6
      !
      cudaMemcpy_l_6 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_7_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_7_c_int
      !
      cudaMemcpy_l_7_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_l_7_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_7_c_size_t
      !
      cudaMemcpy_l_7_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*1_8,myKind)
    end function

function cudaMemcpy_l_7(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_l_7
      !
      cudaMemcpy_l_7 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_0_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_int),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_0_c_int
      !
      cudaMemcpy_i4_0_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_0_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_int),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_0_c_size_t
      !
      cudaMemcpy_i4_0_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_i4_0(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_int),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_0
      !
      cudaMemcpy_i4_0 = cudaMemcpy_(c_loc(dest),c_loc(src),4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_1_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_1_c_int
      !
      cudaMemcpy_i4_1_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_1_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_1_c_size_t
      !
      cudaMemcpy_i4_1_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_i4_1(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_1
      !
      cudaMemcpy_i4_1 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_2_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_2_c_int
      !
      cudaMemcpy_i4_2_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_2_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_2_c_size_t
      !
      cudaMemcpy_i4_2_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_i4_2(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_2
      !
      cudaMemcpy_i4_2 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_3_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_3_c_int
      !
      cudaMemcpy_i4_3_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_3_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_3_c_size_t
      !
      cudaMemcpy_i4_3_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_i4_3(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_3
      !
      cudaMemcpy_i4_3 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_4_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_4_c_int
      !
      cudaMemcpy_i4_4_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_4_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_4_c_size_t
      !
      cudaMemcpy_i4_4_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_i4_4(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_4
      !
      cudaMemcpy_i4_4 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_5_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_5_c_int
      !
      cudaMemcpy_i4_5_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_5_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_5_c_size_t
      !
      cudaMemcpy_i4_5_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_i4_5(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_5
      !
      cudaMemcpy_i4_5 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_6_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_6_c_int
      !
      cudaMemcpy_i4_6_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_6_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_6_c_size_t
      !
      cudaMemcpy_i4_6_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_i4_6(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_6
      !
      cudaMemcpy_i4_6 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_7_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_7_c_int
      !
      cudaMemcpy_i4_7_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i4_7_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_7_c_size_t
      !
      cudaMemcpy_i4_7_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_i4_7(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i4_7
      !
      cudaMemcpy_i4_7 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_0_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_long),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_0_c_int
      !
      cudaMemcpy_i8_0_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_0_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_long),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_0_c_size_t
      !
      cudaMemcpy_i8_0_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_i8_0(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_long),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_0
      !
      cudaMemcpy_i8_0 = cudaMemcpy_(c_loc(dest),c_loc(src),8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_1_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_1_c_int
      !
      cudaMemcpy_i8_1_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_1_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_1_c_size_t
      !
      cudaMemcpy_i8_1_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_i8_1(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_1
      !
      cudaMemcpy_i8_1 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_2_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_2_c_int
      !
      cudaMemcpy_i8_2_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_2_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_2_c_size_t
      !
      cudaMemcpy_i8_2_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_i8_2(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_2
      !
      cudaMemcpy_i8_2 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_3_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_3_c_int
      !
      cudaMemcpy_i8_3_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_3_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_3_c_size_t
      !
      cudaMemcpy_i8_3_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_i8_3(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_3
      !
      cudaMemcpy_i8_3 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_4_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_4_c_int
      !
      cudaMemcpy_i8_4_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_4_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_4_c_size_t
      !
      cudaMemcpy_i8_4_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_i8_4(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_4
      !
      cudaMemcpy_i8_4 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_5_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_5_c_int
      !
      cudaMemcpy_i8_5_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_5_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_5_c_size_t
      !
      cudaMemcpy_i8_5_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_i8_5(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_5
      !
      cudaMemcpy_i8_5 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_6_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_6_c_int
      !
      cudaMemcpy_i8_6_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_6_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_6_c_size_t
      !
      cudaMemcpy_i8_6_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_i8_6(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_6
      !
      cudaMemcpy_i8_6 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_7_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_7_c_int
      !
      cudaMemcpy_i8_7_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_i8_7_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_7_c_size_t
      !
      cudaMemcpy_i8_7_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_i8_7(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_i8_7
      !
      cudaMemcpy_i8_7 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_0_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      real(c_float),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_0_c_int
      !
      cudaMemcpy_r4_0_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_0_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      real(c_float),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_0_c_size_t
      !
      cudaMemcpy_r4_0_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_r4_0(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      real(c_float),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_0
      !
      cudaMemcpy_r4_0 = cudaMemcpy_(c_loc(dest),c_loc(src),4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_1_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_1_c_int
      !
      cudaMemcpy_r4_1_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_1_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_1_c_size_t
      !
      cudaMemcpy_r4_1_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_r4_1(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_1
      !
      cudaMemcpy_r4_1 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_2_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_2_c_int
      !
      cudaMemcpy_r4_2_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_2_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_2_c_size_t
      !
      cudaMemcpy_r4_2_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_r4_2(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_2
      !
      cudaMemcpy_r4_2 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_3_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_3_c_int
      !
      cudaMemcpy_r4_3_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_3_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_3_c_size_t
      !
      cudaMemcpy_r4_3_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_r4_3(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_3
      !
      cudaMemcpy_r4_3 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_4_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_4_c_int
      !
      cudaMemcpy_r4_4_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_4_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_4_c_size_t
      !
      cudaMemcpy_r4_4_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_r4_4(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_4
      !
      cudaMemcpy_r4_4 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_5_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_5_c_int
      !
      cudaMemcpy_r4_5_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_5_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_5_c_size_t
      !
      cudaMemcpy_r4_5_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_r4_5(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_5
      !
      cudaMemcpy_r4_5 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_6_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_6_c_int
      !
      cudaMemcpy_r4_6_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_6_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_6_c_size_t
      !
      cudaMemcpy_r4_6_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_r4_6(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_6
      !
      cudaMemcpy_r4_6 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_7_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_7_c_int
      !
      cudaMemcpy_r4_7_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r4_7_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_7_c_size_t
      !
      cudaMemcpy_r4_7_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*4_8,myKind)
    end function

function cudaMemcpy_r4_7(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r4_7
      !
      cudaMemcpy_r4_7 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_0_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      real(c_double),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_0_c_int
      !
      cudaMemcpy_r8_0_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_0_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      real(c_double),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_0_c_size_t
      !
      cudaMemcpy_r8_0_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_r8_0(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      real(c_double),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_0
      !
      cudaMemcpy_r8_0 = cudaMemcpy_(c_loc(dest),c_loc(src),8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_1_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_1_c_int
      !
      cudaMemcpy_r8_1_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_1_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_1_c_size_t
      !
      cudaMemcpy_r8_1_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_r8_1(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_1
      !
      cudaMemcpy_r8_1 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_2_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_2_c_int
      !
      cudaMemcpy_r8_2_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_2_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_2_c_size_t
      !
      cudaMemcpy_r8_2_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_r8_2(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_2
      !
      cudaMemcpy_r8_2 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_3_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_3_c_int
      !
      cudaMemcpy_r8_3_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_3_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_3_c_size_t
      !
      cudaMemcpy_r8_3_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_r8_3(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_3
      !
      cudaMemcpy_r8_3 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_4_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_4_c_int
      !
      cudaMemcpy_r8_4_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_4_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_4_c_size_t
      !
      cudaMemcpy_r8_4_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_r8_4(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_4
      !
      cudaMemcpy_r8_4 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_5_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_5_c_int
      !
      cudaMemcpy_r8_5_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_5_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_5_c_size_t
      !
      cudaMemcpy_r8_5_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_r8_5(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_5
      !
      cudaMemcpy_r8_5 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_6_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_6_c_int
      !
      cudaMemcpy_r8_6_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_6_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_6_c_size_t
      !
      cudaMemcpy_r8_6_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_r8_6(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_6
      !
      cudaMemcpy_r8_6 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_7_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_7_c_int
      !
      cudaMemcpy_r8_7_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_r8_7_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_7_c_size_t
      !
      cudaMemcpy_r8_7_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*8_8,myKind)
    end function

function cudaMemcpy_r8_7(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_r8_7
      !
      cudaMemcpy_r8_7 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_0_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      complex(c_float_complex),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_0_c_int
      !
      cudaMemcpy_c4_0_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_0_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      complex(c_float_complex),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_0_c_size_t
      !
      cudaMemcpy_c4_0_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function

function cudaMemcpy_c4_0(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      complex(c_float_complex),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_0
      !
      cudaMemcpy_c4_0 = cudaMemcpy_(c_loc(dest),c_loc(src),2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_1_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_1_c_int
      !
      cudaMemcpy_c4_1_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_1_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_1_c_size_t
      !
      cudaMemcpy_c4_1_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function

function cudaMemcpy_c4_1(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_1
      !
      cudaMemcpy_c4_1 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_2_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_2_c_int
      !
      cudaMemcpy_c4_2_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_2_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_2_c_size_t
      !
      cudaMemcpy_c4_2_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function

function cudaMemcpy_c4_2(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_2
      !
      cudaMemcpy_c4_2 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_3_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_3_c_int
      !
      cudaMemcpy_c4_3_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_3_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_3_c_size_t
      !
      cudaMemcpy_c4_3_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function

function cudaMemcpy_c4_3(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_3
      !
      cudaMemcpy_c4_3 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_4_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:,:),intent(in)    &
&:: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_4_c_int
      !
      cudaMemcpy_c4_4_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_4_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:,:),intent(in)    &
&:: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_4_c_size_t
      !
      cudaMemcpy_c4_4_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function

function cudaMemcpy_c4_4(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:,:),intent(in)    &
&:: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_4
      !
      cudaMemcpy_c4_4 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_5_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:),intent(in)  &
&  :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_5_c_int
      !
      cudaMemcpy_c4_5_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_5_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:),intent(in)  &
&  :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_5_c_size_t
      !
      cudaMemcpy_c4_5_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function

function cudaMemcpy_c4_5(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:),intent(in)  &
&  :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_5
      !
      cudaMemcpy_c4_5 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_6_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_6_c_int
      !
      cudaMemcpy_c4_6_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_6_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_6_c_size_t
      !
      cudaMemcpy_c4_6_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function

function cudaMemcpy_c4_6(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_6
      !
      cudaMemcpy_c4_6 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_7_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_7_c_int
      !
      cudaMemcpy_c4_7_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c4_7_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_7_c_size_t
      !
      cudaMemcpy_c4_7_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*4_8,myKind)
    end function

function cudaMemcpy_c4_7(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c4_7
      !
      cudaMemcpy_c4_7 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_0_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      complex(c_double_complex),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_0_c_int
      !
      cudaMemcpy_c8_0_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_0_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      complex(c_double_complex),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_0_c_size_t
      !
      cudaMemcpy_c8_0_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function

function cudaMemcpy_c8_0(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      complex(c_double_complex),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_0
      !
      cudaMemcpy_c8_0 = cudaMemcpy_(c_loc(dest),c_loc(src),2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_1_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_1_c_int
      !
      cudaMemcpy_c8_1_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_1_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_1_c_size_t
      !
      cudaMemcpy_c8_1_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function

function cudaMemcpy_c8_1(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_1
      !
      cudaMemcpy_c8_1 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_2_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_2_c_int
      !
      cudaMemcpy_c8_2_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_2_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_2_c_size_t
      !
      cudaMemcpy_c8_2_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function

function cudaMemcpy_c8_2(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_2
      !
      cudaMemcpy_c8_2 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_3_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_double_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_3_c_int
      !
      cudaMemcpy_c8_3_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_3_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_double_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_3_c_size_t
      !
      cudaMemcpy_c8_3_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function

function cudaMemcpy_c8_3(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_double_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_3
      !
      cudaMemcpy_c8_3 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_4_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:),intent(in)   &
& :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_4_c_int
      !
      cudaMemcpy_c8_4_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_4_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:),intent(in)   &
& :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_4_c_size_t
      !
      cudaMemcpy_c8_4_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function

function cudaMemcpy_c8_4(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:),intent(in)   &
& :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_4
      !
      cudaMemcpy_c8_4 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_5_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:),intent(in) &
&   :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_5_c_int
      !
      cudaMemcpy_c8_5_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_5_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:),intent(in) &
&   :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_5_c_size_t
      !
      cudaMemcpy_c8_5_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function

function cudaMemcpy_c8_5(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:),intent(in) &
&   :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_5
      !
      cudaMemcpy_c8_5 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_6_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_6_c_int
      !
      cudaMemcpy_c8_6_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_6_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_6_c_size_t
      !
      cudaMemcpy_c8_6_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function

function cudaMemcpy_c8_6(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_6
      !
      cudaMemcpy_c8_6 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_7_c_int(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_7_c_int
      !
      cudaMemcpy_c8_7_c_int = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function
                                                              
    function cudaMemcpy_c8_7_c_size_t(dest,src,length,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_7_c_size_t
      !
      cudaMemcpy_c8_7_c_size_t = cudaMemcpy_(c_loc(dest),c_loc(src),&
&length*2*8_8,myKind)
    end function

function cudaMemcpy_c8_7(dest,src,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy_c8_7
      !
      cudaMemcpy_c8_7 = cudaMemcpy_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind)
    end function

function cudaMemcpyAsync_l_0_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      logical(c_bool),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_0_c_int
      !
      cudaMemcpyAsync_l_0_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_0_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      logical(c_bool),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_0_c_size_t
      !
      cudaMemcpyAsync_l_0_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function

function cudaMemcpyAsync_l_0(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      logical(c_bool),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_0
      !
      cudaMemcpyAsync_l_0 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_1_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_1_c_int
      !
      cudaMemcpyAsync_l_1_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_1_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_1_c_size_t
      !
      cudaMemcpyAsync_l_1_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function

function cudaMemcpyAsync_l_1(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_1
      !
      cudaMemcpyAsync_l_1 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_2_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_2_c_int
      !
      cudaMemcpyAsync_l_2_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_2_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_2_c_size_t
      !
      cudaMemcpyAsync_l_2_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function

function cudaMemcpyAsync_l_2(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_2
      !
      cudaMemcpyAsync_l_2 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_3_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_3_c_int
      !
      cudaMemcpyAsync_l_3_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_3_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_3_c_size_t
      !
      cudaMemcpyAsync_l_3_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function

function cudaMemcpyAsync_l_3(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_3
      !
      cudaMemcpyAsync_l_3 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_4_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_4_c_int
      !
      cudaMemcpyAsync_l_4_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_4_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_4_c_size_t
      !
      cudaMemcpyAsync_l_4_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function

function cudaMemcpyAsync_l_4(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_4
      !
      cudaMemcpyAsync_l_4 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_5_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_5_c_int
      !
      cudaMemcpyAsync_l_5_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_5_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_5_c_size_t
      !
      cudaMemcpyAsync_l_5_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function

function cudaMemcpyAsync_l_5(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:),intent(inout) :: dest
      logical(c_bool),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_5
      !
      cudaMemcpyAsync_l_5 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_6_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_6_c_int
      !
      cudaMemcpyAsync_l_6_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_6_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_6_c_size_t
      !
      cudaMemcpyAsync_l_6_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function

function cudaMemcpyAsync_l_6(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_6
      !
      cudaMemcpyAsync_l_6 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_7_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_7_c_int
      !
      cudaMemcpyAsync_l_7_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_l_7_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_7_c_size_t
      !
      cudaMemcpyAsync_l_7_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*1_8,myKind,stream)
    end function

function cudaMemcpyAsync_l_7(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      logical(c_bool),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_l_7
      !
      cudaMemcpyAsync_l_7 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*1_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_0_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_int),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_0_c_int
      !
      cudaMemcpyAsync_i4_0_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_0_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_int),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_0_c_size_t
      !
      cudaMemcpyAsync_i4_0_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_i4_0(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_int),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_0
      !
      cudaMemcpyAsync_i4_0 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_1_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_1_c_int
      !
      cudaMemcpyAsync_i4_1_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_1_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_1_c_size_t
      !
      cudaMemcpyAsync_i4_1_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_i4_1(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_1
      !
      cudaMemcpyAsync_i4_1 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_2_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_2_c_int
      !
      cudaMemcpyAsync_i4_2_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_2_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_2_c_size_t
      !
      cudaMemcpyAsync_i4_2_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_i4_2(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_2
      !
      cudaMemcpyAsync_i4_2 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_3_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_3_c_int
      !
      cudaMemcpyAsync_i4_3_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_3_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_3_c_size_t
      !
      cudaMemcpyAsync_i4_3_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_i4_3(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_3
      !
      cudaMemcpyAsync_i4_3 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_4_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_4_c_int
      !
      cudaMemcpyAsync_i4_4_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_4_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_4_c_size_t
      !
      cudaMemcpyAsync_i4_4_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_i4_4(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_4
      !
      cudaMemcpyAsync_i4_4 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_5_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_5_c_int
      !
      cudaMemcpyAsync_i4_5_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_5_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_5_c_size_t
      !
      cudaMemcpyAsync_i4_5_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_i4_5(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_5
      !
      cudaMemcpyAsync_i4_5 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_6_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_6_c_int
      !
      cudaMemcpyAsync_i4_6_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_6_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_6_c_size_t
      !
      cudaMemcpyAsync_i4_6_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_i4_6(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      integer(c_int),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_6
      !
      cudaMemcpyAsync_i4_6 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_7_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_7_c_int
      !
      cudaMemcpyAsync_i4_7_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i4_7_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_7_c_size_t
      !
      cudaMemcpyAsync_i4_7_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_i4_7(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_int),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i4_7
      !
      cudaMemcpyAsync_i4_7 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_0_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_long),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_0_c_int
      !
      cudaMemcpyAsync_i8_0_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_0_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_long),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_0_c_size_t
      !
      cudaMemcpyAsync_i8_0_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_i8_0(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_long),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_0
      !
      cudaMemcpyAsync_i8_0 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_1_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_1_c_int
      !
      cudaMemcpyAsync_i8_1_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_1_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_1_c_size_t
      !
      cudaMemcpyAsync_i8_1_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_i8_1(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_1
      !
      cudaMemcpyAsync_i8_1 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_2_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_2_c_int
      !
      cudaMemcpyAsync_i8_2_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_2_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_2_c_size_t
      !
      cudaMemcpyAsync_i8_2_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_i8_2(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_2
      !
      cudaMemcpyAsync_i8_2 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_3_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_3_c_int
      !
      cudaMemcpyAsync_i8_3_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_3_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_3_c_size_t
      !
      cudaMemcpyAsync_i8_3_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_i8_3(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_3
      !
      cudaMemcpyAsync_i8_3 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_4_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_4_c_int
      !
      cudaMemcpyAsync_i8_4_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_4_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_4_c_size_t
      !
      cudaMemcpyAsync_i8_4_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_i8_4(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_4
      !
      cudaMemcpyAsync_i8_4 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_5_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_5_c_int
      !
      cudaMemcpyAsync_i8_5_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_5_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_5_c_size_t
      !
      cudaMemcpyAsync_i8_5_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_i8_5(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:),intent(inout) :: dest
      integer(c_long),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_5
      !
      cudaMemcpyAsync_i8_5 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_6_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_6_c_int
      !
      cudaMemcpyAsync_i8_6_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_6_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_6_c_size_t
      !
      cudaMemcpyAsync_i8_6_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_i8_6(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_6
      !
      cudaMemcpyAsync_i8_6 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_7_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_7_c_int
      !
      cudaMemcpyAsync_i8_7_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_i8_7_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_7_c_size_t
      !
      cudaMemcpyAsync_i8_7_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_i8_7(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      integer(c_long),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_i8_7
      !
      cudaMemcpyAsync_i8_7 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_0_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      real(c_float),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_0_c_int
      !
      cudaMemcpyAsync_r4_0_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_0_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      real(c_float),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_0_c_size_t
      !
      cudaMemcpyAsync_r4_0_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_r4_0(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      real(c_float),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_0
      !
      cudaMemcpyAsync_r4_0 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_1_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_1_c_int
      !
      cudaMemcpyAsync_r4_1_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_1_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_1_c_size_t
      !
      cudaMemcpyAsync_r4_1_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_r4_1(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_1
      !
      cudaMemcpyAsync_r4_1 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_2_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_2_c_int
      !
      cudaMemcpyAsync_r4_2_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_2_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_2_c_size_t
      !
      cudaMemcpyAsync_r4_2_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_r4_2(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_2
      !
      cudaMemcpyAsync_r4_2 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_3_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_3_c_int
      !
      cudaMemcpyAsync_r4_3_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_3_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_3_c_size_t
      !
      cudaMemcpyAsync_r4_3_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_r4_3(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_3
      !
      cudaMemcpyAsync_r4_3 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_4_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_4_c_int
      !
      cudaMemcpyAsync_r4_4_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_4_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_4_c_size_t
      !
      cudaMemcpyAsync_r4_4_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_r4_4(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_4
      !
      cudaMemcpyAsync_r4_4 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_5_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_5_c_int
      !
      cudaMemcpyAsync_r4_5_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_5_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_5_c_size_t
      !
      cudaMemcpyAsync_r4_5_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_r4_5(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_5
      !
      cudaMemcpyAsync_r4_5 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_6_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_6_c_int
      !
      cudaMemcpyAsync_r4_6_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_6_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_6_c_size_t
      !
      cudaMemcpyAsync_r4_6_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_r4_6(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_float),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_6
      !
      cudaMemcpyAsync_r4_6 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_7_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_7_c_int
      !
      cudaMemcpyAsync_r4_7_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r4_7_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_7_c_size_t
      !
      cudaMemcpyAsync_r4_7_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_r4_7(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_float),target,dimension(:,:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r4_7
      !
      cudaMemcpyAsync_r4_7 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_0_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      real(c_double),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_0_c_int
      !
      cudaMemcpyAsync_r8_0_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_0_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      real(c_double),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_0_c_size_t
      !
      cudaMemcpyAsync_r8_0_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_r8_0(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      real(c_double),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_0
      !
      cudaMemcpyAsync_r8_0 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_1_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_1_c_int
      !
      cudaMemcpyAsync_r8_1_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_1_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_1_c_size_t
      !
      cudaMemcpyAsync_r8_1_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_r8_1(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_1
      !
      cudaMemcpyAsync_r8_1 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_2_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_2_c_int
      !
      cudaMemcpyAsync_r8_2_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_2_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_2_c_size_t
      !
      cudaMemcpyAsync_r8_2_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_r8_2(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_2
      !
      cudaMemcpyAsync_r8_2 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_3_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_3_c_int
      !
      cudaMemcpyAsync_r8_3_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_3_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_3_c_size_t
      !
      cudaMemcpyAsync_r8_3_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_r8_3(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_3
      !
      cudaMemcpyAsync_r8_3 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_4_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_4_c_int
      !
      cudaMemcpyAsync_r8_4_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_4_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_4_c_size_t
      !
      cudaMemcpyAsync_r8_4_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_r8_4(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_4
      !
      cudaMemcpyAsync_r8_4 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_5_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_5_c_int
      !
      cudaMemcpyAsync_r8_5_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_5_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_5_c_size_t
      !
      cudaMemcpyAsync_r8_5_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_r8_5(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_5
      !
      cudaMemcpyAsync_r8_5 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_6_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_6_c_int
      !
      cudaMemcpyAsync_r8_6_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_6_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_6_c_size_t
      !
      cudaMemcpyAsync_r8_6_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_r8_6(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:),intent(inout) :: dest
      real(c_double),target,dimension(:,:,:,:,:,:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_6
      !
      cudaMemcpyAsync_r8_6 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_7_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_7_c_int
      !
      cudaMemcpyAsync_r8_7_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_r8_7_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_7_c_size_t
      !
      cudaMemcpyAsync_r8_7_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_r8_7(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(inout) :: &
&dest
      real(c_double),target,dimension(:,:,:,:,:,:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_r8_7
      !
      cudaMemcpyAsync_r8_7 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_0_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      complex(c_float_complex),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_0_c_int
      !
      cudaMemcpyAsync_c4_0_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_0_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      complex(c_float_complex),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_0_c_size_t
      !
      cudaMemcpyAsync_c4_0_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_c4_0(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      complex(c_float_complex),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_0
      !
      cudaMemcpyAsync_c4_0 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_1_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_1_c_int
      !
      cudaMemcpyAsync_c4_1_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_1_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_1_c_size_t
      !
      cudaMemcpyAsync_c4_1_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_c4_1(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_1
      !
      cudaMemcpyAsync_c4_1 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_2_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_2_c_int
      !
      cudaMemcpyAsync_c4_2_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_2_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_2_c_size_t
      !
      cudaMemcpyAsync_c4_2_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_c4_2(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_2
      !
      cudaMemcpyAsync_c4_2 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_3_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_3_c_int
      !
      cudaMemcpyAsync_c4_3_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_3_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_3_c_size_t
      !
      cudaMemcpyAsync_c4_3_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_c4_3(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_3
      !
      cudaMemcpyAsync_c4_3 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_4_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:,:),intent(in)    &
&:: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_4_c_int
      !
      cudaMemcpyAsync_c4_4_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_4_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:,:),intent(in)    &
&:: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_4_c_size_t
      !
      cudaMemcpyAsync_c4_4_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_c4_4(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:),intent(inout) &
&:: dest
      complex(c_float_complex),target,dimension(:,:,:,:),intent(in)    &
&:: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_4
      !
      cudaMemcpyAsync_c4_4 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_5_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:),intent(in)  &
&  :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_5_c_int
      !
      cudaMemcpyAsync_c4_5_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_5_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:),intent(in)  &
&  :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_5_c_size_t
      !
      cudaMemcpyAsync_c4_5_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_c4_5(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:),intent(in)  &
&  :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_5
      !
      cudaMemcpyAsync_c4_5 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_6_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_6_c_int
      !
      cudaMemcpyAsync_c4_6_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_6_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_6_c_size_t
      !
      cudaMemcpyAsync_c4_6_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_c4_6(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_6
      !
      cudaMemcpyAsync_c4_6 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_7_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_7_c_int
      !
      cudaMemcpyAsync_c4_7_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c4_7_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_7_c_size_t
      !
      cudaMemcpyAsync_c4_7_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*4_8,myKind,stream)
    end function

function cudaMemcpyAsync_c4_7(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_float_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c4_7
      !
      cudaMemcpyAsync_c4_7 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*4_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_0_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      complex(c_double_complex),target,intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_0_c_int
      !
      cudaMemcpyAsync_c8_0_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_0_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      complex(c_double_complex),target,intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_0_c_size_t
      !
      cudaMemcpyAsync_c8_0_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_c8_0(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      complex(c_double_complex),target,intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_0
      !
      cudaMemcpyAsync_c8_0 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_1_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_1_c_int
      !
      cudaMemcpyAsync_c8_1_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_1_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_1_c_size_t
      !
      cudaMemcpyAsync_c8_1_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_c8_1(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_1
      !
      cudaMemcpyAsync_c8_1 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_2_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_2_c_int
      !
      cudaMemcpyAsync_c8_2_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_2_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_2_c_size_t
      !
      cudaMemcpyAsync_c8_2_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_c8_2(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_2
      !
      cudaMemcpyAsync_c8_2 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_3_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_double_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_3_c_int
      !
      cudaMemcpyAsync_c8_3_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_3_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_double_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_3_c_size_t
      !
      cudaMemcpyAsync_c8_3_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_c8_3(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:),intent(inout) &
&:: dest
      complex(c_double_complex),target,dimension(:,:,:),intent(in)    &
&:: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_3
      !
      cudaMemcpyAsync_c8_3 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_4_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:),intent(in)   &
& :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_4_c_int
      !
      cudaMemcpyAsync_c8_4_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_4_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:),intent(in)   &
& :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_4_c_size_t
      !
      cudaMemcpyAsync_c8_4_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_c8_4(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:),intent(in)   &
& :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_4
      !
      cudaMemcpyAsync_c8_4 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_5_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:),intent(in) &
&   :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_5_c_int
      !
      cudaMemcpyAsync_c8_5_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_5_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:),intent(in) &
&   :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_5_c_size_t
      !
      cudaMemcpyAsync_c8_5_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_c8_5(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:),intent(in) &
&   :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_5
      !
      cudaMemcpyAsync_c8_5 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_6_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_6_c_int
      !
      cudaMemcpyAsync_c8_6_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_6_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_6_c_size_t
      !
      cudaMemcpyAsync_c8_6_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_c8_6(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:),&
&intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_6
      !
      cudaMemcpyAsync_c8_6 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_7_c_int(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_int),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_7_c_int
      !
      cudaMemcpyAsync_c8_7_c_int = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function
function cudaMemcpyAsync_c8_7_c_size_t(dest,src,length,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(c_size_t),intent(in) :: length
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_7_c_size_t
      !
      cudaMemcpyAsync_c8_7_c_size_t = cudaMemcpyAsync_(c_loc(dest),&
&c_loc(src),length*2*8_8,myKind,stream)
    end function

function cudaMemcpyAsync_c8_7(dest,src,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(inout) :: dest
      complex(c_double_complex),target,dimension(:,:,:,:,:,:,:),&
&intent(in)    :: src
      integer(kind(cudaMemcpyHostToHost)) :: myKind
      type(c_ptr) :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpyAsync_c8_7
      !
      cudaMemcpyAsync_c8_7 = cudaMemcpyAsync_(c_loc(dest),c_loc(src),&
&size(dest)*2*8_8,myKind,stream)
    end function
 

function cudaMemcpy2D_l_0_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      logical(c_bool),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_l_0_c_int
      !
      cudaMemcpy2D_l_0_c_int = cudaMemcpy2D_(c_loc(dest),1_8*dpitch,&
&c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_l_0_c_size_t(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      logical(c_bool),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_l_0_c_size_t
      !
      cudaMemcpy2D_l_0_c_size_t = cudaMemcpy2D_(c_loc(dest),1_8*dpitch,&
&c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_l_1_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_l_1_c_int
      !
      cudaMemcpy2D_l_1_c_int = cudaMemcpy2D_(c_loc(dest),1_8*dpitch,&
&c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_l_1_c_size_t(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_l_1_c_size_t
      !
      cudaMemcpy2D_l_1_c_size_t = cudaMemcpy2D_(c_loc(dest),1_8*dpitch,&
&c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_l_2_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_l_2_c_int
      !
      cudaMemcpy2D_l_2_c_int = cudaMemcpy2D_(c_loc(dest),1_8*dpitch,&
&c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_l_2_c_size_t(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_l_2_c_size_t
      !
      cudaMemcpy2D_l_2_c_size_t = cudaMemcpy2D_(c_loc(dest),1_8*dpitch,&
&c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i4_0_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_int),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i4_0_c_int
      !
      cudaMemcpy2D_i4_0_c_int = cudaMemcpy2D_(c_loc(dest),4_8*dpitch,&
&c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i4_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_int),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i4_0_c_size_t
      !
      cudaMemcpy2D_i4_0_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i4_1_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i4_1_c_int
      !
      cudaMemcpy2D_i4_1_c_int = cudaMemcpy2D_(c_loc(dest),4_8*dpitch,&
&c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i4_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i4_1_c_size_t
      !
      cudaMemcpy2D_i4_1_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i4_2_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i4_2_c_int
      !
      cudaMemcpy2D_i4_2_c_int = cudaMemcpy2D_(c_loc(dest),4_8*dpitch,&
&c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i4_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i4_2_c_size_t
      !
      cudaMemcpy2D_i4_2_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i8_0_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_long),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i8_0_c_int
      !
      cudaMemcpy2D_i8_0_c_int = cudaMemcpy2D_(c_loc(dest),8_8*dpitch,&
&c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i8_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_long),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i8_0_c_size_t
      !
      cudaMemcpy2D_i8_0_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i8_1_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i8_1_c_int
      !
      cudaMemcpy2D_i8_1_c_int = cudaMemcpy2D_(c_loc(dest),8_8*dpitch,&
&c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i8_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i8_1_c_size_t
      !
      cudaMemcpy2D_i8_1_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i8_2_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i8_2_c_int
      !
      cudaMemcpy2D_i8_2_c_int = cudaMemcpy2D_(c_loc(dest),8_8*dpitch,&
&c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_i8_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_i8_2_c_size_t
      !
      cudaMemcpy2D_i8_2_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r4_0_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_float),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r4_0_c_int
      !
      cudaMemcpy2D_r4_0_c_int = cudaMemcpy2D_(c_loc(dest),4_8*dpitch,&
&c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r4_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_float),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r4_0_c_size_t
      !
      cudaMemcpy2D_r4_0_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r4_1_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r4_1_c_int
      !
      cudaMemcpy2D_r4_1_c_int = cudaMemcpy2D_(c_loc(dest),4_8*dpitch,&
&c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r4_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r4_1_c_size_t
      !
      cudaMemcpy2D_r4_1_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r4_2_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r4_2_c_int
      !
      cudaMemcpy2D_r4_2_c_int = cudaMemcpy2D_(c_loc(dest),4_8*dpitch,&
&c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r4_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r4_2_c_size_t
      !
      cudaMemcpy2D_r4_2_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r8_0_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_double),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r8_0_c_int
      !
      cudaMemcpy2D_r8_0_c_int = cudaMemcpy2D_(c_loc(dest),8_8*dpitch,&
&c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r8_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_double),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r8_0_c_size_t
      !
      cudaMemcpy2D_r8_0_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r8_1_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r8_1_c_int
      !
      cudaMemcpy2D_r8_1_c_int = cudaMemcpy2D_(c_loc(dest),8_8*dpitch,&
&c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r8_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r8_1_c_size_t
      !
      cudaMemcpy2D_r8_1_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r8_2_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r8_2_c_int
      !
      cudaMemcpy2D_r8_2_c_int = cudaMemcpy2D_(c_loc(dest),8_8*dpitch,&
&c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_r8_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_r8_2_c_size_t
      !
      cudaMemcpy2D_r8_2_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c4_0_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      complex(c_float_complex),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c4_0_c_int
      !
      cudaMemcpy2D_c4_0_c_int = cudaMemcpy2D_(c_loc(dest),2*4_8*dpitch,&
&c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c4_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      complex(c_float_complex),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c4_0_c_size_t
      !
      cudaMemcpy2D_c4_0_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&2*4_8*dpitch,c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c4_1_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c4_1_c_int
      !
      cudaMemcpy2D_c4_1_c_int = cudaMemcpy2D_(c_loc(dest),2*4_8*dpitch,&
&c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c4_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c4_1_c_size_t
      !
      cudaMemcpy2D_c4_1_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&2*4_8*dpitch,c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c4_2_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      integer(c_int) :: dpitch
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c4_2_c_int
      !
      cudaMemcpy2D_c4_2_c_int = cudaMemcpy2D_(c_loc(dest),2*4_8*dpitch,&
&c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c4_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      integer(c_size_t) :: dpitch
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c4_2_c_size_t
      !
      cudaMemcpy2D_c4_2_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&2*4_8*dpitch,c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c8_0_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      complex(c_double_complex),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c8_0_c_int
      !
      cudaMemcpy2D_c8_0_c_int = cudaMemcpy2D_(c_loc(dest),2*8_8*dpitch,&
&c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c8_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      complex(c_double_complex),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c8_0_c_size_t
      !
      cudaMemcpy2D_c8_0_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&2*8_8*dpitch,c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c8_1_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      integer(c_int) :: dpitch
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c8_1_c_int
      !
      cudaMemcpy2D_c8_1_c_int = cudaMemcpy2D_(c_loc(dest),2*8_8*dpitch,&
&c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c8_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      integer(c_size_t) :: dpitch
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c8_1_c_size_t
      !
      cudaMemcpy2D_c8_1_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&2*8_8*dpitch,c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c8_2_c_int(dest,dpitch,src,spitch,width,height,&
&myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      integer(c_int) :: dpitch
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c8_2_c_int
      !
      cudaMemcpy2D_c8_2_c_int = cudaMemcpy2D_(c_loc(dest),2*8_8*dpitch,&
&c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind)
    end function
function cudaMemcpy2D_c8_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      integer(c_size_t) :: dpitch
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      integer(kind(cudaSuccess)) :: cudaMemcpy2D_c8_2_c_size_t
      !
      cudaMemcpy2D_c8_2_c_size_t = cudaMemcpy2D_(c_loc(dest),&
&2*8_8*dpitch,c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind)
    end function
    
function cudaMemcpy2DAsync_l_0_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      logical(c_bool),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_l_0_c_int
      !
      cudaMemcpy2DAsync_l_0_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&1_8*dpitch,c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_l_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      logical(c_bool),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_l_0_c_size_t
      !
      cudaMemcpy2DAsync_l_0_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&1_8*dpitch,c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_l_1_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_l_1_c_int
      !
      cudaMemcpy2DAsync_l_1_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&1_8*dpitch,c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_l_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      logical(c_bool),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_l_1_c_size_t
      !
      cudaMemcpy2DAsync_l_1_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&1_8*dpitch,c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_l_2_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_l_2_c_int
      !
      cudaMemcpy2DAsync_l_2_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&1_8*dpitch,c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_l_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      logical(c_bool),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      logical(c_bool),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_l_2_c_size_t
      !
      cudaMemcpy2DAsync_l_2_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&1_8*dpitch,c_loc(src),1_8*spitch,1_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i4_0_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_int),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i4_0_c_int
      !
      cudaMemcpy2DAsync_i4_0_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i4_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_int),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i4_0_c_size_t
      !
      cudaMemcpy2DAsync_i4_0_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i4_1_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i4_1_c_int
      !
      cudaMemcpy2DAsync_i4_1_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i4_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_int),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i4_1_c_size_t
      !
      cudaMemcpy2DAsync_i4_1_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i4_2_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i4_2_c_int
      !
      cudaMemcpy2DAsync_i4_2_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i4_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_int),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_int),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i4_2_c_size_t
      !
      cudaMemcpy2DAsync_i4_2_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i8_0_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_long),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i8_0_c_int
      !
      cudaMemcpy2DAsync_i8_0_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i8_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_long),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i8_0_c_size_t
      !
      cudaMemcpy2DAsync_i8_0_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i8_1_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i8_1_c_int
      !
      cudaMemcpy2DAsync_i8_1_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i8_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_long),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i8_1_c_size_t
      !
      cudaMemcpy2DAsync_i8_1_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i8_2_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i8_2_c_int
      !
      cudaMemcpy2DAsync_i8_2_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_i8_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      integer(c_long),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      integer(c_long),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_i8_2_c_size_t
      !
      cudaMemcpy2DAsync_i8_2_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r4_0_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_float),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r4_0_c_int
      !
      cudaMemcpy2DAsync_r4_0_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r4_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_float),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r4_0_c_size_t
      !
      cudaMemcpy2DAsync_r4_0_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r4_1_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r4_1_c_int
      !
      cudaMemcpy2DAsync_r4_1_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r4_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_float),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r4_1_c_size_t
      !
      cudaMemcpy2DAsync_r4_1_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r4_2_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r4_2_c_int
      !
      cudaMemcpy2DAsync_r4_2_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r4_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_float),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_float),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r4_2_c_size_t
      !
      cudaMemcpy2DAsync_r4_2_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&4_8*dpitch,c_loc(src),4_8*spitch,4_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r8_0_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_double),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r8_0_c_int
      !
      cudaMemcpy2DAsync_r8_0_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r8_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_double),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r8_0_c_size_t
      !
      cudaMemcpy2DAsync_r8_0_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r8_1_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r8_1_c_int
      !
      cudaMemcpy2DAsync_r8_1_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r8_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_double),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r8_1_c_size_t
      !
      cudaMemcpy2DAsync_r8_1_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r8_2_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      integer(c_int) :: dpitch
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r8_2_c_int
      !
      cudaMemcpy2DAsync_r8_2_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_r8_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      real(c_double),target,dimension(:,:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      real(c_double),target,dimension(:,:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_r8_2_c_size_t
      !
      cudaMemcpy2DAsync_r8_2_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&8_8*dpitch,c_loc(src),8_8*spitch,8_8*width,height*1_8,myKind,stream)
    end function
function cudaMemcpy2DAsync_c4_0_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      complex(c_float_complex),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c4_0_c_int
      !
      cudaMemcpy2DAsync_c4_0_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&2*4_8*dpitch,c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c4_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      complex(c_float_complex),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c4_0_c_size_t
      !
      cudaMemcpy2DAsync_c4_0_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&2*4_8*dpitch,c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c4_1_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      integer(c_int) :: dpitch
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c4_1_c_int
      !
      cudaMemcpy2DAsync_c4_1_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&2*4_8*dpitch,c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c4_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:),intent(inout) :: dest
      integer(c_size_t) :: dpitch
      complex(c_float_complex),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c4_1_c_size_t
      !
      cudaMemcpy2DAsync_c4_1_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&2*4_8*dpitch,c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c4_2_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      integer(c_int) :: dpitch
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c4_2_c_int
      !
      cudaMemcpy2DAsync_c4_2_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&2*4_8*dpitch,c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c4_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_float_complex),target,dimension(:,:),intent(inout) :: &
&dest
      integer(c_size_t) :: dpitch
      complex(c_float_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c4_2_c_size_t
      !
      cudaMemcpy2DAsync_c4_2_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&2*4_8*dpitch,c_loc(src),2*4_8*spitch,2*4_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c8_0_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      integer(c_int) :: dpitch
      complex(c_double_complex),target,intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c8_0_c_int
      !
      cudaMemcpy2DAsync_c8_0_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&2*8_8*dpitch,c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c8_0_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,intent(inout) :: dest
      integer(c_size_t) :: dpitch
      complex(c_double_complex),target,intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c8_0_c_size_t
      !
      cudaMemcpy2DAsync_c8_0_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&2*8_8*dpitch,c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c8_1_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      integer(c_int) :: dpitch
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c8_1_c_int
      !
      cudaMemcpy2DAsync_c8_1_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&2*8_8*dpitch,c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c8_1_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:),intent(inout) :: &
&dest
      integer(c_size_t) :: dpitch
      complex(c_double_complex),target,dimension(:),intent(in)    :: src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c8_1_c_size_t
      !
      cudaMemcpy2DAsync_c8_1_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&2*8_8*dpitch,c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c8_2_c_int(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      integer(c_int) :: dpitch
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_int) :: spitch
      integer(c_int) :: width
      integer(c_int) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c8_2_c_int
      !
      cudaMemcpy2DAsync_c8_2_c_int = cudaMemcpy2DAsync_(c_loc(dest),&
&2*8_8*dpitch,c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind,&
&stream)
    end function
function cudaMemcpy2DAsync_c8_2_c_size_t(dest,dpitch,src,spitch,width,&
&height,myKind,stream)
      use iso_c_binding
      use cudafor_enums
      use cudafor_types
      implicit none
      complex(c_double_complex),target,dimension(:,:),intent(inout) :: &
&dest
      integer(c_size_t) :: dpitch
      complex(c_double_complex),target,dimension(:,:),intent(in)    :: &
&src
      integer(c_size_t) :: spitch
      integer(c_size_t) :: width
      integer(c_size_t) :: height
      integer(kind(cudaMemcpyHostToHost)),value :: myKind
      type(c_ptr),value :: stream
      integer(kind(cudaSuccess)) :: cudaMemcpy2DAsync_c8_2_c_size_t
      !
      cudaMemcpy2DAsync_c8_2_c_size_t = cudaMemcpy2DAsync_(c_loc(dest),&
&2*8_8*dpitch,c_loc(src),2*8_8*spitch,2*8_8*width,height*1_8,myKind,&
&stream)
    end function

end module