Commit c0b0318b authored by ccfd's avatar ccfd
Browse files

first commit

parents
#ifndef __OCFD_NS_SOLVER_H
#define __OCFD_NS_SOLVER_H
#ifdef __cplusplus
extern "C"{
#endif
void NS_solver_real();
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_SCHEME_H
#define __OCFD_SCHEME_H
#include "parameters.h"
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
__device__ REAL minmod2(REAL x1, REAL x2);
__device__ REAL minmod4(REAL x1, REAL x2, REAL x3, REAL x4);
__global__ void OCFD_CD6_kernel(dim3 flagxyzb, cudaField pf, cudaField pfy, cudaJobPackage job);
__global__ void OCFD_CD8_kernel(dim3 flagxyzb, cudaField pf, cudaField pfy, cudaJobPackage job);
__global__ void OCFD_dx0_CD6_kernel(cudaField pf , cudaField pfx , cudaJobPackage job);
__global__ void OCFD_dy0_CD6_kernel(cudaField pf , cudaField pfy , cudaJobPackage job);
__global__ void OCFD_dz0_CD6_kernel(cudaField pf , cudaField pfz , cudaJobPackage job);
__global__ void OCFD_dx0_CD8_kernel(cudaField pf , cudaField pfx , cudaJobPackage job);
__global__ void OCFD_dy0_CD8_kernel(cudaField pf , cudaField pfy , cudaJobPackage job);
__global__ void OCFD_dz0_CD8_kernel(cudaField pf , cudaField pfz , cudaJobPackage job);
__device__ REAL OCFD_weno7_SYMBO_kernel_P(int WENO_LMT_FLAG, REAL *stencil);
__device__ REAL OCFD_weno7_SYMBO_kernel_M(int WENO_LMT_FLAG, REAL *stencil);
__global__ void OCFD_weno7_SYMBO_P_kernel(int i, int WENO_LMT_FLAG, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__global__ void OCFD_weno7_SYMBO_M_kernel(int i, int WENO_LMT_FLAG, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__device__ void aa(int a, ...);
__device__ REAL OCFD_weno5_kernel_P(REAL *stencil);
__device__ REAL OCFD_weno5_kernel_M(REAL *stencil);
__global__ void OCFD_weno5_P_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__global__ void OCFD_weno5_M_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__device__ REAL OCFD_weno7_kernel_P(REAL *stencil);
__device__ REAL OCFD_weno7_kernel_M(REAL *stencil);
__global__ void OCFD_weno7_P_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__global__ void OCFD_weno7_M_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__device__ REAL OCFD_NND2_kernel_P(REAL *stencil);
__device__ REAL OCFD_NND2_kernel_M(REAL *stencil);
__global__ void OCFD_NND2_P_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__global__ void OCFD_NND2_M_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__device__ REAL OCFD_UP7_kernel_P(REAL *stencil);
__device__ REAL OCFD_UP7_kernel_M(REAL *stencil);
__global__ void OCFD_UP7_P_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__global__ void OCFD_UP7_M_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__device__ REAL OCFD_OMP6_kernel_P(int OMP6_FLAG, REAL *stencil);
__device__ REAL OCFD_OMP6_kernel_M(int OMP6_FLAG, REAL *stencil);
__global__ void OCFD_OMP6_P_kernel(int i, int OMP6_FLAG, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__global__ void OCFD_OMP6_M_kernel(int i, int OMP6_FLAG, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaJobPackage job);
__device__ REAL OCFD_weno5_kernel_P(REAL *stencil);
__device__ REAL OCFD_weno5_kernel_M(REAL *stencil);
__device__ int get_data_kernel(int flagxyz, dim3 *coords, cudaSoA f, int num, REAL *stencil, int ka1, int kb1, REAL *sort, cudaJobPackage job);
//__device__ void put_du_p_kernel(dim3 flagxyzb, dim3 coords, REAL tmp_r, REAL tmp_l, cudaSoA du, REAL *stencil, int num, cudaField Ajac, cudaJobPackage job);
__device__ void put_du_p_kernel(dim3 flagxyz, dim3 coords, REAL tmp_r, REAL tmp_l, cudaSoA du, int num, cudaField Ajac, cudaJobPackage job);
__device__ void put_du_m_kernel(dim3 flagxyz, dim3 coords, REAL tmp_r, REAL tmp_l, cudaSoA du, int num, cudaField Ajac, cudaJobPackage job);
//__device__ void put_du_m_kernel(dim3 flagxyzb, dim3 coords, REAL tmp_r, REAL tmp_l, cudaSoA du, REAL *stencil, int num, cudaField Ajac, cudaJobPackage job);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_SCHEME_CHOOSE_H
#define __OCFD_SCHEME_CHOOSE_H
#include "parameters.h"
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
void OCFD_dx0(cudaField pf , cudaField pfx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream, int boundl, int boundr);
void OCFD_dx1(cudaSoA pf, cudaSoA pdu, cudaField Ajac, cudaField u, cudaField v, cudaField w, cudaField cc,
cudaField Ax, cudaField Ay, cudaField Az, cudaJobPackage job_in, dim3 blockdim_in, cudaStream_t *stream, int boundl, int boundr);
void OCFD_dx2(cudaSoA pf, cudaSoA pdu, cudaField Ajac, cudaField u, cudaField v, cudaField w, cudaField cc,
cudaField Ax, cudaField Ay, cudaField Az, cudaJobPackage job_in, dim3 blockdim_in, cudaStream_t *stream, int boundl, int boundr);
void OCFD_dy0(cudaField pf , cudaField pfx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream, int boundl, int boundr);
void OCFD_dy1(cudaSoA pf, cudaSoA pdu, cudaField Ajac, cudaField u, cudaField v, cudaField w, cudaField cc,
cudaField Ax, cudaField Ay, cudaField Az, cudaJobPackage job_in, dim3 blockdim_in, cudaStream_t *stream, int boundl, int boundr);
void OCFD_dy2(cudaSoA pf, cudaSoA pdu, cudaField Ajac, cudaField u, cudaField v, cudaField w, cudaField cc,
cudaField Ax, cudaField Ay, cudaField Az, cudaJobPackage job_in, dim3 blockdim_in, cudaStream_t *stream, int boundl, int boundr);
void OCFD_dz0(cudaField pf , cudaField pfx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream, int boundl, int boundr);
void OCFD_dz1(cudaSoA pf, cudaSoA pdu, cudaField Ajac, cudaField u, cudaField v, cudaField w, cudaField cc,
cudaField Ax, cudaField Ay, cudaField Az, cudaJobPackage job_in, dim3 blockdim_in, cudaStream_t *stream, int boundl, int boundr);
void OCFD_dz2(cudaSoA pf, cudaSoA pdu, cudaField Ajac, cudaField u, cudaField v, cudaField w, cudaField cc,
cudaField Ax, cudaField Ay, cudaField Az, cudaJobPackage job_in, dim3 blockdim_in, cudaStream_t *stream, int boundl, int boundr);
void OCFD_dx0_jac(cudaField pf , cudaField pfx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream, int bound);
void OCFD_dy0_jac(cudaField pf , cudaField pfx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream, int bound);
void OCFD_dz0_jac(cudaField pf , cudaField pfx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream, int bound);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_SCHEMES_HYBRID_AUTO_H
#define __OCFD_SCHEMES_HYBRID_AUTO_H
#include "parameters.h"
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
typedef struct HybridAuto_TYPE_{ //used in SCHEME_HYBRIDAUTO
REAL *P_intvs;
cudaField_int *scheme_x;
cudaField_int *scheme_y;
cudaField_int *scheme_z;
int Num_Patch_zones;
int *zones; //[6][Patch_max]
REAL *Pa_zones;
int IF_Smooth_dp;
int Style;
} HybridAuto_TYPE;
void Set_Scheme_HybridAuto(cudaStream_t *stream);
void Comput_P(cudaField *d, cudaField *T, cudaField *P, cudaStream_t *stream);
void Comput_grad(cudaField *f, cudaStream_t *stream);
void Smoothing_dp(cudaStream_t *stream);
void Patch_zones(cudaStream_t *stream);
void Boundary_dp(cudaStream_t *stream);
void Comput_Scheme_point(cudaStream_t *stream);
void Comput_Scheme_point_Jameson(cudaStream_t *stream);
__global__ void add_kernel(REAL *g_odata, int g_odata_size);
__device__ REAL warpReduce(REAL mySum);
__device__ int get_Hyscheme_flag_p_kernel(int flagxyz, dim3 coords, cudaField_int scheme, cudaJobPackage job);
__device__ int get_Hyscheme_flag_m_kernel(int flagxyz, dim3 coords, cudaField_int scheme, cudaJobPackage job);
__global__ void OCFD_HybridAuto_P_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField_int scheme, cudaJobPackage job);
__global__ void OCFD_HybridAuto_M_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField_int scheme, cudaJobPackage job);
__global__ void OCFD_HybridAuto_P_Jameson_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField_int scheme, cudaJobPackage job);
__global__ void OCFD_HybridAuto_M_Jameson_kernel(int i, dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField_int scheme, cudaJobPackage job);
void HybridAuto_scheme_IO();
void HybridAuto_scheme_Proportion();
void modify_NT(cudaStream_t *stream);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_STREAM_H
#define __OCFD_STREAM_H
#include "cuda.h"
#include "cuda_runtime.h"
#ifdef __cplusplus
extern "C"{
#endif
void du_comput(int KRK);
void opencfd_mem_init_Stream();
void opencfd_mem_finalize_Stream();
void* du_Jacobian3d_all(void* pthread_id);
void* du_invis_Jacobian3d(void* pthread_id);
void* du_invis_Jacobian3d_all(void* pthread_id);
void* du_invis_Jacobian3d_outer_exchange(cudaStream_t *stream);
void* du_invis_Jacobian3d_outer_x(cudaStream_t *stream);
void* du_invis_Jacobian3d_outer_y(cudaStream_t *stream);
void* du_invis_Jacobian3d_outer_z(cudaStream_t *stream);
void* du_vis_Jacobian3d(void* pthread_id);
void* du_vis_Jacobian3d_all(void* pthread_id);
void* du_vis_Jacobian3d_inner_x(cudaStream_t *stream);
void* du_vis_Jacobian3d_inner_y(cudaStream_t *stream);
void* du_vis_Jacobian3d_inner_z(cudaStream_t *stream);
void* du_vis_Jacobian3d_outer_x(cudaStream_t *stream);
void* du_vis_Jacobian3d_outer_y(cudaStream_t *stream);
void* du_vis_Jacobian3d_outer_z(cudaStream_t *stream);
#ifdef __cplusplus
}
#endif
#endif
#ifndef __OCFD_ANA_H
#define __OCFD_ANA_H
#include "cuda_commen.h"
#include "parameters.h"
#include "parameters_d.h"
#include "cuda_utility.h"
#include "math.h"
#include "OCFD_Schemes_hybrid_auto.h"
#include "OCFD_Schemes_Choose.h"
#include "OCFD_IO_mpi.h"
#include "utility.h"
#include "OCFD_IO.h"
#ifdef __cplusplus
extern "C"{
#endif
void ana_residual(cudaField PE_d, REAL *E0);
void ana_Jac();
void OCFD_ana(int style, int ID);
void ana_NAN_and_NT();
void init_time_average();
void get_inner(cudaField x1, cudaField x2);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_BOUND_SCHEME_H
#define __OCFD_BOUND_SCHEME_H
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
void OCFD_Dx0_bound(cudaField pf , cudaField pfx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream);
void OCFD_Dy0_bound(cudaField pf , cudaField pfx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream);
void OCFD_Dz0_bound(cudaField pf , cudaField pfx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream);
void OCFD_bound(dim3 *flagxyzb, int boundp, int boundm, cudaJobPackage job);
__device__ int OCFD_D0bound_scheme_kernel(REAL* tmp, dim3 flagxyzb, dim3 coords, REAL *stencil, int ka1, cudaJobPackage job);
__device__ int OCFD_bound_scheme_kernel_p(REAL* flag, dim3 flagxyzb, dim3 coords, REAL *stencil, int ka1, int kb1, cudaJobPackage job);
__device__ int OCFD_bound_scheme_kernel_m(REAL* flag, dim3 flagxyzb, dim3 coords, REAL *stencil, int ka1, int kb1, cudaJobPackage job);
#ifdef __cplusplus
}
#endif
#endif
#ifndef __OCFD_BOUNDARY_H
#define __OCFD_BOUNDARY_H
#include "parameters.h"
#ifdef __cplusplus
extern "C"{
#endif
void OCFD_bc();
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_BOUNDARY_LIFTBODY3D_H
#define __OCFD_BOUNDARY_LIFTBODY3D_H
#include "parameters.h"
#ifdef __cplusplus
extern "C"{
#endif
void bc_user_Liftbody3d();
void bc_user_Liftbody3d_simple();
void bc_user_Liftbody3d_plus();
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_BOUNDARY_COMPRESSION_H
#define __OCFD_BOUNDARY_COMPRESSION_H
#include "parameters.h"
#ifdef __cplusplus
extern "C"{
#endif
void get_ht_multifrequancy(REAL HT, REAL TT, int MT_MAX, REAL beta);
void bc_user_Compression_conner();
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_BOUNDARY_INIT_H
#define __OCFD_BOUNDARY_INIT_H
#include "parameters.h"
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
// used in boudary_liftbody*********************************************************************
extern REAL *pu2d_inlet; //[5][nz][ny]
extern REAL *pu2d_upper; //[5][ny][nx]
extern REAL * pv_dist_wall; // [ny][nx]
extern REAL *pv_dist_coeff; // [3][ny][nx]
extern REAL * pu_dist_upper; // [ny][nx]
extern cudaField *pu2d_inlet_d; //[5][nz][ny]
extern cudaField *pu2d_upper_d; //[5][ny][nx]
//extern cudaField *pv_dist_wall_d; // [ny][nx]
extern cudaField *pv_dist_coeff_d; // [3][ny][nx]
extern cudaField *pu_dist_upper_d; // [ny][nx]
void bc_parameter();
void bc_user_Liftbody3d_init();
//**********************************************************************************************
// used in boundary_compressible_conner*********************************************************
extern REAL *pub1; // [ny][4]
extern REAL *pfx; // [nx]
extern REAL *pgz; // [nz]
extern REAL *TM; // [MTMAX]
extern REAL *fait; // [MTMAX]
extern REAL SLZ;
extern cudaField *pub1_d; // [ny][4]
extern cudaField *pfx_d; // [nx]
extern cudaField *pgz_d; // [nz]
void bc_user_Compression_conner_init();
//**********************************************************************************************
void get_fait_multifrequancy(int MT_MAX);
void get_xy_blow_suction_multiwave(int NX, int MZ_MAX, REAL *xx,
REAL *fx, REAL *gz, REAL DIST_BEGIN, REAL DIST_END);
void get_xs_blow_suction_multiwave(int NX, int NZ, int MZ_MAX, REAL *xx,
REAL *zz, REAL SL, REAL *fx, REAL *gz, REAL DIST_BEGIN, REAL DIST_END);
#ifdef __cplusplus
}
#endif
#endif
#ifndef __OCFD_FILTERINT_H
#define __OCFD_FILTERINT_H
#include "parameters.h"
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
void filtering(REAL *pf,REAL *pf0,REAL *pp);
void filter_x3d(REAL *pf, REAL *pf0, REAL s0, int ib, int ie, int jb, int je, int kb, int ke);
void filter_y3d(REAL *pf, REAL *pf0, REAL s0, int ib, int ie, int jb, int je, int kb, int ke);
void filter_z3d(REAL *pf, REAL *pf0, REAL s0, int ib, int ie, int jb, int je, int kb, int ke);
void filter_x3d_shock(cudaSoA *pf, cudaSoA *pf0, cudaField *pp, REAL s0, REAL rth, int ib, int ie, int jb, int je, int kb, int ke, int IF_Filter);
void filter_y3d_shock(cudaSoA *pf, cudaSoA *pf0, cudaField *pp, REAL s0, REAL rth, int ib, int ie, int jb, int je, int kb, int ke, int IF_Filter);
void filter_z3d_shock(cudaSoA *pf, cudaSoA *pf0, cudaField *pp, REAL s0, REAL rth, int ib, int ie, int jb, int je, int kb, int ke, int IF_Filter);
void set_para_filtering();
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_FLUX_CHARTERIC_H
#define __OCFD_FLUX_CHARTERIC_H
#include "parameters.h"
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
__global__ void OCFD_weno7_SYMBO_character_P_kernel(int WENO_LMT_FLAG,
dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField u, cudaField v,
cudaField w, cudaField cc, cudaField Ax, cudaField Ay, cudaField Az,
cudaJobPackage job);
__global__ void OCFD_weno7_SYMBO_character_M_kernel(int WENO_LMT_FLAG,
dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField u, cudaField v,
cudaField w, cudaField cc, cudaField Ax, cudaField Ay, cudaField Az,
cudaJobPackage job);
__global__ void OCFD_HybridAuto_character_P_kernel(
dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField u, cudaField v,
cudaField w, cudaField cc, cudaField Ax, cudaField Ay, cudaField Az,
cudaField_int scheme, cudaJobPackage job);
__global__ void OCFD_HybridAuto_character_M_kernel(
dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField u, cudaField v,
cudaField w, cudaField cc, cudaField Ax, cudaField Ay, cudaField Az,
cudaField_int scheme, cudaJobPackage job);
__global__ void OCFD_HybridAuto_character_P_Jameson_kernel(
dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField u, cudaField v,
cudaField w, cudaField cc, cudaField Ax, cudaField Ay, cudaField Az,
cudaField_int scheme, cudaJobPackage job);
__global__ void OCFD_HybridAuto_character_M_Jameson_kernel(
dim3 flagxyzb, cudaSoA f, cudaSoA du, cudaField Ajac, cudaField u, cudaField v,
cudaField w, cudaField cc, cudaField Ax, cudaField Ay, cudaField Az,
cudaField_int scheme, cudaJobPackage job);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_INIT_H
#define __OCFD_INIT_H
#include "parameters.h"
#ifdef __cplusplus
extern "C"{
#endif
void init();
void opencfd_mem_init_all();
void opencfd_mem_finalize_all();
void opencfd_mem_init();
void opencfd_mem_finalize();
void opencfd_para_init();
void opencfd_mem_init_boundary();
void opencfd_mem_finalize_boundary();
void opencfd_mem_init_dev();
void opencfd_mem_finalize_dev();
void opencfd_para_init_dev();
#ifdef __cplusplus
}
#endif
#endif
#ifndef __OCFD_MPI_H
#define __OCFD_MPI_H
#include "parameters.h"
#ifdef __cplusplus
extern "C" {
#endif
void opencfd_mem_init_mpi();
void opencfd_mem_finalize_mpi();
void mpi_init(int * , char ***);
void mpi_finalize();
void part();
static inline int idx2int(int i,int j,int k){
return(i + j*(nx+2*LAP) + k*(nx+2*LAP)*(ny+2*LAP));
}
int my_mod1(int i,int n);
void New_MPI_datatype();
void get_i_node(int i_global,int * node_i, int * i_local);
void get_j_node(int j_global,int * node_j, int * j_local);
void get_k_node(int k_global,int * node_k, int * k_local);
int get_id(int npx1,int npy1,int npz1);
void exchange_boundary_xyz(REAL *pf);
void exchange_boundary_x(REAL *pf,int Iperiodic1);
void exchange_boundary_y(REAL *pf,int Iperiodic1);
void exchange_boundary_z(REAL *pf,int Iperiodic1);
void exchange_boundary_x_standard(REAL *pf, int Iperiodic1);
void exchange_boundary_y_standard(REAL *pf, int Iperiodic1);
void exchange_boundary_z_standard(REAL *pf, int Iperiodic1);
void exchange_boundary_x_deftype(REAL * pf);
void exchange_boundary_y_deftype(REAL * pf);
void exchange_boundary_z_deftype(REAL * pf);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_MPI_DEV_H
#define __OCFD_MPI_DEV_H
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C" {
#endif
void exchange_boundary_xyz_dev(REAL *hostptr, cudaField * devptr);
void exchange_boundary_x_dev(REAL *hostptr, cudaField * devptr, int Iperiodic1);
void exchange_boundary_y_dev(REAL *hostptr, cudaField * devptr, int Iperiodic1);
void exchange_boundary_z_dev(REAL *hostptr, cudaField * devptr, int Iperiodic1);
void exchange_boundary_x_standard_dev(REAL *hostptr, cudaField * devptr, int Iperiodic1);
void exchange_boundary_y_standard_dev(REAL *hostptr, cudaField * devptr, int Iperiodic1);
void exchange_boundary_z_standard_dev(REAL *hostptr, cudaField * devptr, int Iperiodic1);
void opencfd_mem_init_mpi_dev();
void opencfd_mem_finalize_mpi_dev();
void exchange_boundary_xyz_Async_packed_dev(REAL *hostptr, cudaField * devptr , cudaStream_t *stream);
void exchange_boundary_x_Async_packed_dev(REAL *hostptr , cudaField * devptr, int Iperiodic1 , cudaStream_t *stream);
void exchange_boundary_y_Async_packed_dev(REAL *hostptr , cudaField * devptr, int Iperiodic1 , cudaStream_t *stream);
void exchange_boundary_z_Async_packed_dev(REAL *hostptr , cudaField * devptr, int Iperiodic1 , cudaStream_t *stream);
void exchange_boundary_xyz_packed_dev(REAL *hostptr, cudaField * devptr);
void exchange_boundary_x_packed_dev(REAL *hostptr , cudaField * devptr, int Iperiodic1);
void exchange_boundary_y_packed_dev(REAL *hostptr , cudaField * devptr, int Iperiodic1);
void exchange_boundary_z_packed_dev(REAL *hostptr , cudaField * devptr, int Iperiodic1);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_SPLIT_H
#define __OCFD_SPLIT_H
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
//__global__ void split_Jac3d_Stager_Warming_ker(cudaField d0, cudaField u0, cudaField v0, cudaField w0, cudaField cc0, cudaSoA fp, cudaSoA fm, cudaField Akx, cudaField Aky, cudaField Akz, REAL tmp0, REAL split_C1, REAL split_C3, cudaJobPackage job);
typedef struct sw_split_
{
cudaField d;
cudaField u;
cudaField v;
cudaField w;
cudaField cc;
cudaField Akx;
cudaField Aky;
cudaField Akz;
cudaField Aix;
cudaField Aiy;
cudaField Aiz;
cudaField Asx;
cudaField Asy;
cudaField Asz;
} sw_split;
__global__ void split_Jac3d_Stager_Warming_ker(sw_split sw, cudaSoA fp_x, cudaSoA fm_x, cudaSoA fp_y, cudaSoA fm_y, cudaSoA fp_z, cudaSoA fm_z, REAL tmp0, REAL split_C1, REAL split_C3, cudaJobPackage job);
void Stager_Warming(cudaJobPackage job_in, cudaSoA *fp_x, cudaSoA *fm_x, cudaSoA *fp_y, cudaSoA *fm_y, cudaSoA *fp_z, cudaSoA *fm_z, cudaStream_t *stream);
typedef struct sw_split_out_
{
cudaField d;
cudaField u;
cudaField v;
cudaField w;
cudaField cc;
cudaField Ax;
cudaField Ay;
cudaField Az;
} sw_split_out;
void Stager_Warming_out(cudaJobPackage job_in, cudaSoA *fp, cudaSoA *fm, cudaStream_t *stream);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_TIME_H
#define __OCFD_TIME_H
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
void OCFD_time_advance(int KRK);
void OCFD_time_advance_plus(int KRK);
__global__ void OCFD_time_advance_ker1(cudaSoA f , cudaSoA fn , cudaSoA du , cudaJobPackage job);
__global__ void OCFD_time_advance_ker2(cudaSoA f , cudaSoA fn , cudaSoA du , cudaJobPackage job);
__global__ void OCFD_time_advance_ker3(cudaSoA f , cudaSoA fn , cudaSoA du , cudaSoA pf_lap , cudaJobPackage job);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#ifndef __OCFD_WARP_SHUFFLE_H
#define __OCFD_WARP_SHUFFLE_H
#include "cuda_commen.h"
#include "cuda_runtime.h"
#include "cuda.h"
#include "cuda_runtime_api.h"
#ifdef __cplusplus
extern "C"{
#endif
#ifdef __NVCC__
__device__ __forceinline__ double __shfl_up_double(double & val , unsigned char delta , unsigned char width ){
return __shfl_up_sync(0xffffffff , val , delta , width);
}
__device__ __forceinline__ double __shfl_down_double(double & val , unsigned char delta , unsigned char width ){
return __shfl_down_sync(0xffffffff , val , delta , width);
}
__device__ __forceinline__ double __shfl_double(double & val , unsigned char srcLane , unsigned char width){
return __shfl_sync(0xffffffff , val , srcLane , width);
}
__device__ __forceinline__ double __shfl_xor_double(double & val , unsigned char srcLane , unsigned char width){
return __shfl_xor_sync(0xffffffff , val , srcLane , width);
}
#else
#define __shfl_up_double(val , delta , witdh) __shfl_up_double_( *( (int2*)(&val) ) , delta , witdh)
__device__ __forceinline__ double __shfl_up_double_(int2 & val , unsigned char delta , unsigned char width ){
int2 out = *( (int2*)(&val) );
out.x = __shfl_up(out.x , delta , width);
out.y = __shfl_up(out.y , delta , width);
return ( *( (double*)(&out) ) );
}
#define __shfl_down_double(val , delta , witdh) __shfl_down_double_( *( (int2*)(&val) ) , delta , witdh)
__device__ __forceinline__ double __shfl_down_double_(int2 & val , unsigned char delta , unsigned char width ){
int2 out = *( (int2*)(&val) );
out.x = __shfl_down(out.x , delta , width);
out.y = __shfl_down(out.y , delta , width);
return ( *( (double*)(&out) ) );
}
#define __shfl_double(val , delta , witdh) __shfl_double_( *( (int2*)(&val) ) , delta , witdh)
__device__ __forceinline__ double __shfl_double_(int2 & val , unsigned char srcLane , unsigned char width){
int2 out = *( (int2*)(&val) );
out.x = __shfl(out.x , srcLane , width);
out.y = __shfl(out.y , srcLane , width);
return ( *( (double*)(&out) ) );
}
#define __shfl_xor_double(val , delta , witdh) __shfl_xor_double_( *( (int2*)(&val) ) , delta , witdh)
__device__ __forceinline__ double __shfl_xor_double_(int2 & val , unsigned char srcLane , unsigned char width){
int2 out = *( (int2*)(&val) );
out.x = __shfl_xor(out.x , srcLane , width);
out.y = __shfl_xor(out.y , srcLane , width);
return ( *( (double*)(&out) ) );
}
#endif
#ifdef __cplusplus
}
#endif
#endif
#ifndef _COMMEN_KERNEL_H
#define _COMMEN_KERNEL_H
#include "cuda_commen.h"
#ifdef __cplusplus
extern "C"{
#endif
__global__ void cuda_mem_value_init(REAL value , REAL * ptr , unsigned int pitch , unsigned int size_x , unsigned int size_y , unsigned int size_z);
void cuda_mem_value_init_warp(REAL value , REAL * ptr , unsigned int pitch , unsigned int size_x , unsigned int size_y , unsigned int size_z);
// eyes on no-lap region
__global__ void pri_to_cons_kernel(cudaSoA pcons , cudaField pd , cudaField pu , cudaField pv , cudaField pw , cudaField pT , cudaJobPackage job);
void pri_to_cons_kernel_warp(cudaSoA *pcons , cudaField *pd , cudaField *pu , cudaField *pv , cudaField *pw , cudaField *pT , cudaJobPackage job_in , dim3 blockdim_in );
__global__ void cons_to_pri_kernel(cudaSoA f, cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaField P , cudaJobPackage job);
void get_duvwT();
__global__ void get_Amu_kernal(cudaField Amu , cudaField T , cudaJobPackage job);
void get_Amu();
__global__ void sound_speed_kernel(cudaField T , cudaField cc , cudaJobPackage job);
__global__ void YF_Pe_XF(cudaField yF , cudaField xF , cudaField AJac , cudaJobPackage job);
__global__ void ZF_e_XF_P_YF(cudaField out , cudaField xF , cudaField yF , cudaJobPackage job);
__global__ void ZF_e_XF_P_YF_LAP(cudaField out , cudaField xF , cudaField yF , cudaJobPackage job);
__global__ void ZF_Pe_XF_P_YF(cudaField zF , cudaField xF , cudaField yF , cudaField AJac , cudaJobPackage job);
/* ========================================= */
// inline function
// #include "config_parameters.h"
// #include "cuda_commen.h"
// #include "cuda_utility.h"
// #include "parameters_d.h"
// __device__ inline void cons_to_pri_dev_fun(cudaField & d , cudaField & u , cudaField & v , cudaField & w , cudaField & T , cudaField & P , REAL & f0 , REAL & f1 , REAL & f2 , REAL & f3 , REAL & f4 ){
// get_Field_LAP(d , x+LAP , y+LAP , z+LAP) = f0;
// REAL u = f1/f0;
// get_Field_LAP(u , x+LAP , y+LAP , z+LAP) = u;
// REAL v = f2/f0;
// get_Field_LAP(v , x+LAP , y+LAP , z+LAP) = v;
// REAL w = f3/f0;
// get_Field_LAP(w , x+LAP , y+LAP , z+LAP) = w;
// REAL tmp = f4 - 0.5*f0*(u*u + v*v + w*w);
// get_Field_LAP(T , x+LAP , y+LAP , z+LAP) = tmp/(f0*Cv_d);
// tmp = tmp/d1;
// get_Field_LAP(P , x+LAP , y+LAP , z+LAP) = tmp*(Gamma_d - 1.0);
// }
// __device__ inline void get_Amu_kernal(cudaField & Amu , REAL & t){
// get_Field(Amu , x,y,z) = amu_C0_d * sqrt(t * t * t) / (Tsb_d + t);
// }
// __device__ inline void sound_speed_kernel(cudaField & cc , REAL & t){
// get_Field_LAP(cc , x,y,z) = sqrt( get_Field_LAP(T , x,y,z) )/Ama_d;
// }
#ifdef __cplusplus
}
#endif
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment