// boundary scheme #include "parameters.h" #include "utility.h" #include "OCFD_Schemes.h" #include "parameters_d.h" #include "cuda_commen.h" #include "cuda_utility.h" #include "mpi.h" #define PREPARE_x \ dim3 blockdim , griddim;\ cal_grid_block_dim(&griddim , &blockdim , blockdim_in.x , blockdim_in.y , blockdim_in.z , 1 , size.y , size.z);\ #define PREPARE_y \ dim3 blockdim , griddim;\ cal_grid_block_dim(&griddim , &blockdim , blockdim_in.x , blockdim_in.y , blockdim_in.z , size.x , 1 , size.z);\ #define PREPARE_z \ dim3 blockdim , griddim;\ cal_grid_block_dim(&griddim , &blockdim , blockdim_in.x , blockdim_in.y , blockdim_in.z , size.x , size.y , 1);\ #ifdef DEBUG_MODE #define CHECK_SIZE(dir , call)\ if(size.dir >= LAP){\ PREPARE_ ##dir\ call;\ }else{\ printf("job_in.start." #dir " = %d , job_in.size." #dir " = %d\n",job_in.start.dir , size.dir);\ printf("illegal size , to launch %s , size." #dir " >= LAP (%d) is required\n" , __FUNCTION__ ,LAP);\ MPI_Abort(MPI_COMM_WORLD , 1);\ } #else #define CHECK_SIZE(dir , call) \ PREPARE_ ##dir\ call; #endif #define CHECK_X(callm , callp)\ dim3 size;\ jobsize(&job_in , &size);\ if(npx == 0 && job_in.start.x == LAP){\ CHECK_SIZE(x , callm)\ }\ if(npx == NPX0-1 && (job_in.start.x + size.x == nx_lap) ){\ CHECK_SIZE(x , callp)\ } #define CHECK_Y(callm , callp)\ dim3 size;\ jobsize(&job_in , &size);\ if(npy == 0 && job_in.start.y == LAP){\ CHECK_SIZE(y , callm)\ }\ if(npy == NPY0-1 && (job_in.start.y + size.y == ny_lap) ){\ CHECK_SIZE(y , callp)\ } #define CHECK_Z(callm , callp)\ dim3 size;\ jobsize(&job_in , &size);\ if(npz == 0 && job_in.start.z == LAP){\ CHECK_SIZE(z , callm)\ }\ if(npz == NPZ0-1 && (job_in.start.z + size.z == nz_lap) ){\ CHECK_SIZE(z , callp)\ } #ifdef __cplusplus extern "C"{ #endif #define D0bound1(coords)\ if(coords == 0){\ *tmp = (stencil[-ka1+1] - stencil[-ka1]);\ return 0;\ }else if(coords == 1){\ *tmp = (stencil[-ka1+1] - stencil[-ka1-1])*0.5;\ return 0;\ }else if(coords == 2){\ *tmp = (stencil[-ka1-2] - 8.0*stencil[-ka1-1] + 8.0*stencil[-ka1+1] - stencil[-ka1+2])/12.0;\ return 0;\ }else if(coords == 3){\ *tmp = (stencil[-ka1+3] - stencil[-ka1-3]\ -9.0*(stencil[-ka1+2] - stencil[-ka1-2])\ +45.0*(stencil[-ka1+1] - stencil[-ka1-1]))/60.0;\ return 0;\ }\ #define D0bound2(coords)\ if(coords == -1){\ *tmp = (stencil[-ka1] - stencil[-ka1-1]);\ return 0;\ }else if(coords == -2){\ *tmp = (stencil[-ka1+1] - stencil[-ka1-1])*0.5;\ return 0;\ }else if(coords == -3){\ *tmp = (stencil[-ka1-2] - 8.0*stencil[-ka1-1] + 8.0*stencil[-ka1+1] - stencil[-ka1+2])/12.0;\ return 0;\ }else if(coords == -4){\ *tmp = (stencil[-ka1+3] - stencil[-ka1-3]\ -9.0*(stencil[-ka1+2] - stencil[-ka1-2])\ +45.0*(stencil[-ka1+1] - stencil[-ka1-1]))/60.0;\ return 0;\ }\ // =========================================================================================================== // __device__ int OCFD_D0bound_scheme_kernel(REAL* tmp, dim3 flagxyzb, dim3 coords, REAL *stencil, int ka1, cudaJobPackage job){ int tmp1; switch(flagxyzb.y){ case 1: { D0bound1(coords.x) } break; case 2: { D0bound1(coords.y) } break; case 3: { D0bound1(coords.z) } break; case 4: { tmp1 = coords.x + job.start.x - job.end.x; D0bound2(tmp1) } break; case 5: { tmp1 = coords.y + job.start.y - job.end.y; D0bound2(tmp1) } break; case 6: { tmp1 = coords.z + job.start.z - job.end.z; D0bound2(tmp1) } break; case 7: { D0bound1(coords.x) tmp1 = coords.x + job.start.x - job.end.x; D0bound2(tmp1) } break; case 8: { D0bound1(coords.y) tmp1 = coords.y + job.start.y - job.end.y; D0bound2(tmp1) } break; case 9: { D0bound1(coords.z) tmp1 = coords.z + job.start.z - job.end.z; D0bound2(tmp1) } break; } return 1; } __global__ void OCFD_Dx0_bound_kernel_m(cudaField f , cudaField fx , cudaJobPackage job){ // eyes on cells WITHOUT LAP unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(y < job.end.y && z < job.end.z){ // 0 get_Field(fx , 0 , y-LAP, z-LAP) = ( get_Field_LAP(f , LAP+1 , y , z) - get_Field_LAP(f , LAP , y , z))/hx_d; get_Field(fx , 1 , y-LAP, z-LAP) = ( get_Field_LAP(f , LAP+2 , y , z) - get_Field_LAP(f , LAP , y , z))*0.5/hx_d; get_Field(fx , 2 , y-LAP, z-LAP) = ( get_Field_LAP(f , LAP , y , z) - 8.0*get_Field_LAP(f , LAP+1 , y , z) + 8.0*get_Field_LAP(f , LAP+3 , y , z) - get_Field_LAP(f , LAP+4 , y , z))/(12.0*hx_d); get_Field(fx , 3 , y-LAP , z-LAP) = ( get_Field_LAP(f , LAP+6 , y , z) - get_Field_LAP(f , LAP , y , z) -9.0*(get_Field_LAP(f , LAP+5 , y , z) - get_Field_LAP(f , LAP+1 , y , z) ) +45.0*(get_Field_LAP(f , LAP+4 , y , z) - get_Field_LAP(f , LAP+2 , y , z)) )/(60.0*hx_d); } } __global__ void OCFD_Dx0_bound_kernel_p(cudaField f , cudaField fx , cudaJobPackage job){ // eyes on cells WITHOUT LAP unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(y < job.end.y && z < job.end.z){ unsigned int tmp = nx_d+LAP-1; get_Field(fx , nx_d - 1 , y-LAP , z-LAP) = ( get_Field_LAP(f , tmp , y , z) - get_Field_LAP(f , tmp-1 , y , z))/hx_d; get_Field(fx , nx_d - 2 , y-LAP , z-LAP) = ( get_Field_LAP(f , tmp , y , z) - get_Field_LAP(f , tmp-2 , y , z))*0.5/hx_d; get_Field(fx , nx_d - 3 , y-LAP , z-LAP) = ( get_Field_LAP(f , tmp-4 , y , z) - 8.0*get_Field_LAP(f , tmp-3 , y , z) + 8.0*get_Field_LAP(f , tmp-1 , y , z) - get_Field_LAP(f , tmp , y , z))/(12.0*hx_d); get_Field(fx , nx_d - 4 , y-LAP , z-LAP) = ( get_Field_LAP(f , tmp , y , z) - get_Field_LAP(f , tmp-6 , y , z) -9.0*( get_Field_LAP(f , tmp - 1 , y , z) - get_Field_LAP(f , tmp - 5 , y , z) ) +45.0*( get_Field_LAP(f , tmp - 2 , y , z) - get_Field_LAP(f , tmp - 4 , y , z)) )/(60.0*hx_d); } } // =========================================================================================================== // __global__ void OCFD_Dy0_bound_kernel_m(cudaField f , cudaField fx , cudaJobPackage job){ // eyes on cells WITHOUT LAP unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(z < job.end.z && x < job.end.x){ get_Field(fx , x-LAP, 0, z-LAP) = ( get_Field_LAP(f , x, LAP+1, z) - get_Field_LAP(f ,x, LAP, z))/hy_d; get_Field(fx , x-LAP, 1, z-LAP) = ( get_Field_LAP(f , x, LAP+2, z) - get_Field_LAP(f ,x, LAP, z))*0.5/hy_d; get_Field(fx , x-LAP, 2, z-LAP) = ( get_Field_LAP(f , x, LAP, z) - 8.0*get_Field_LAP(f, x, LAP+1, z) + 8.0*get_Field_LAP(f , x, LAP+3, z) - get_Field_LAP(f, x, LAP+4, z))/(12.0*hy_d); get_Field(fx , x-LAP, 3, z-LAP) = ( get_Field_LAP(f , x, LAP+6, z) - get_Field_LAP(f , x, LAP, z) -9.0*(get_Field_LAP(f , x, LAP+5, z) - get_Field_LAP(f , x, LAP+1, z) ) +45.0*(get_Field_LAP(f , x, LAP+4, z) - get_Field_LAP(f , x, LAP+2, z)) )/(60.0*hy_d); } } __global__ void OCFD_Dy0_bound_kernel_p(cudaField f , cudaField fx , cudaJobPackage job){ // eyes on cells WITHOUT LAP unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(z < job.end.z && x < job.end.x){ unsigned int tmp = ny_d+LAP-1; get_Field(fx, x-LAP, ny_d-1, z-LAP) = -( get_Field_LAP(f , x, tmp-1, z) - get_Field_LAP(f ,x, tmp, z))/hy_d; get_Field(fx, x-LAP, ny_d-2, z-LAP) = -( get_Field_LAP(f , x, tmp-2, z) - get_Field_LAP(f ,x, tmp, z))*0.5/hy_d; get_Field(fx, x-LAP, ny_d-3, z-LAP) = -( get_Field_LAP(f , x, tmp, z) - 8.0*get_Field_LAP(f, x, tmp-1, z) + 8.0*get_Field_LAP(f , x, tmp-3, z) - get_Field_LAP(f, x, tmp-4, z))/(12.0*hy_d); get_Field(fx, x-LAP, ny_d-4, z-LAP) = -( get_Field_LAP(f , x, tmp-6, z) - get_Field_LAP(f , x, tmp, z) -9.0*(get_Field_LAP(f , x, tmp-5, z) - get_Field_LAP(f , x, tmp-1, z) ) +45.0*(get_Field_LAP(f , x, tmp-4, z) - get_Field_LAP(f , x, tmp-2, z)) )/(60.0*hy_d); } } // =========================================================================================================== // __global__ void OCFD_Dz0_bound_kernel_m(cudaField f , cudaField fx , cudaJobPackage job){ // eyes on cells WITHOUT LAP unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; if(y < job.end.y && x < job.end.x){ get_Field(fx , x-LAP,y-LAP, 0) = ( get_Field_LAP(f , x,y, LAP+1) - get_Field_LAP(f ,x,y, LAP))/hz_d; get_Field(fx , x-LAP,y-LAP, 1) = ( get_Field_LAP(f , x,y, LAP+2) - get_Field_LAP(f ,x,y, LAP))*0.5/hz_d; get_Field(fx , x-LAP,y-LAP, 2) = ( get_Field_LAP(f , x, y , LAP ) - 8.0*get_Field_LAP(f , x,y, LAP+1) + 8.0*get_Field_LAP(f , x, y , LAP+3) - get_Field_LAP(f , x,y, LAP+4))/(12.0*hz_d); get_Field(fx , x-LAP,y-LAP, 3) = ( get_Field_LAP(f , x,y, LAP+6) - get_Field_LAP(f , x,y, LAP) -9.0*(get_Field_LAP(f , x,y, LAP+5) - get_Field_LAP(f , x,y, LAP+1) ) +45.0*(get_Field_LAP(f , x,y, LAP+4) - get_Field_LAP(f , x,y, LAP+2)) )/(60.0*hz_d); } } __global__ void OCFD_Dz0_bound_kernel_p(cudaField f , cudaField fx , cudaJobPackage job){ // eyes on cells WITHOUT LAP unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; if(y < job.end.y && x < job.end.x){ unsigned int tmp = nz_d+LAP-1; get_Field(fx , x-LAP,y-LAP, nz_d - 1)= ( get_Field_LAP(f , x,y, tmp) - get_Field_LAP(f , x,y, tmp-1))/hz_d; get_Field(fx , x-LAP,y-LAP, nz_d - 2) = ( get_Field_LAP(f , x,y, tmp) - get_Field_LAP(f , x,y, tmp-2))*0.5/hz_d; get_Field(fx , x-LAP,y-LAP, nz_d - 3) = ( get_Field_LAP(f , x,y, tmp-4) - 8.0*get_Field_LAP(f , x,y, tmp-3) + 8.0*get_Field_LAP(f , x,y, tmp-1) - get_Field_LAP(f , x,y, tmp ))/(12.0*hz_d); get_Field(fx , x-LAP,y-LAP, nz_d - 4) = ( get_Field_LAP(f , x,y, tmp ) - get_Field_LAP(f , x,y, tmp - 6) -9.0*(get_Field_LAP(f , x,y, tmp-1) - get_Field_LAP(f , x,y, tmp - 5) ) +45.0*(get_Field_LAP(f , x,y, tmp-2) - get_Field_LAP(f , x,y, tmp - 4)) )/(60.0*hz_d); } } void OCFD_Dx0_bound(cudaField f , cudaField fx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream){ CHECK_X( { CUDA_LAUNCH(( OCFD_Dx0_bound_kernel_m<<>>(f, fx, job_in) )); },{ CUDA_LAUNCH(( OCFD_Dx0_bound_kernel_p<<>>(f, fx, job_in) )); } ) } void OCFD_Dy0_bound(cudaField f , cudaField fx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream){ CHECK_Y( { CUDA_LAUNCH(( OCFD_Dy0_bound_kernel_m<<>>(f, fx, job_in) )); }, { CUDA_LAUNCH(( OCFD_Dy0_bound_kernel_p<<>>(f, fx, job_in) )); } ) } void OCFD_Dz0_bound(cudaField f , cudaField fx , cudaJobPackage job_in , dim3 blockdim_in, cudaStream_t *stream){ CHECK_Z( { CUDA_LAUNCH(( OCFD_Dz0_bound_kernel_m<<>>(f, fx, job_in) )); }, { CUDA_LAUNCH(( OCFD_Dz0_bound_kernel_p<<>>(f, fx, job_in) )); } ) } void OCFD_bound_non_ref(dim3 *flagxyzb, int Non_ref, cudaJobPackage job){ // eyes on field WITH LAPs dim3 size; int flag = 0; jobsize(&job, &size); switch(flagxyzb->x){ case 1: { if(npx == 0 && job.start.x == LAP){ flagxyzb->z = Non_ref; }else{ flagxyzb->z = 0; } } break; case 2: { if(npy == 0 && job.start.y == LAP){ flagxyzb->z = Non_ref; }else{ flagxyzb->z = 0; } } break; case 3: { if(npz == 0 && job.start.z == LAP){ flagxyzb->z = Non_ref; }else{ flagxyzb->z = 0; } } break; case 4: { if(npx == NPX0-1 && job.end.x == nx_lap){ flagxyzb->z = Non_ref; }else{ flagxyzb->z = 0; } } break; case 5: { if(npy == NPY0-1 && job.end.y == ny_lap){ flagxyzb->z = Non_ref; }else{ flagxyzb->z = 0; } } break; case 6: { if(npz == NPZ0-1 && job.end.z == nz_lap){ flagxyzb->z = Non_ref; }else{ flagxyzb->z = 0; } } break; } } void OCFD_bound(dim3 *flagxyzb, int boundp, int boundm, cudaJobPackage job){ // eyes on field WITH LAPs dim3 size; int flag = 0; jobsize(&job, &size); switch(flagxyzb->x){ case 1: case 4: { if(npx == 0 && job.start.x == LAP && boundp == 1){ flagxyzb->y = 1; flag = 1; } if(npx == NPX0-1 && job.end.x == nx_lap && boundm == 1){ flagxyzb->y = 4; if(flag == 1) flagxyzb->y = 7; } } break; case 2: case 5: { if(npy == 0 && job.start.y == LAP && boundp == 1){ flagxyzb->y = 2; flag = 1; } if(npy == NPY0-1 && job.end.y == ny_lap && boundm == 1){ flagxyzb->y = 5; if(flag == 1) flagxyzb->y = 8; } } break; case 3: case 6: { if(npz == 0 && job.start.z == LAP && boundp == 1){ flagxyzb->y = 3; flag = 1; } if(npz == NPZ0-1 && job.end.z == nz_lap && boundm == 1){ flagxyzb->y = 6; if(flag == 1) flagxyzb->y = 9; } } break; } } /*__device__ int OCFD_bound_scheme_kernel_p(int flag, dim3 flagxyzb, dim3 coords, cudaSoA du, int num, cudaField fx, REAL *stencil, int ka1, int kb1, cudaJobPackage job){ unsigned int offset_out = job.start.x + fx.pitch*(job.start.y + ny_d*job.start.z); if(flag != 0){ switch(flagxyzb.x){ case 4: if(threadIdx.x != 0) get_Field(fx, coords.x-LAP, coords.y-LAP, coords.z-LAP, offset_out) = (tmp_r - tmp_l)/hx_d; break; case 5: if(threadIdx.x != 0) get_Field(fx, coords.x-LAP, coords.y-LAP, coords.z-LAP, offset_out) = (tmp_r - tmp_l)/hy_d; break; case 6: if(threadIdx.x != 0) get_Field(fx, coords.x-LAP, coords.y-LAP, coords.z-LAP, offset_out) = (tmp_r - tmp_l)/hz_d; break; } switch(flagxyzb.y){ case 4: { if(coords.x == job.end.x-job.start.x-1){ REAL tmp = stencil[-ka1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1] - stencil[-ka1-1]); REAL tmp2 = stencil[-ka1-1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1-1] - stencil[-ka1-2]); get_Field(fx, coords.x-LAP, coords.y-LAP, coords.z-LAP, offset_out) = (tmp - tmp2)/hx_d; return 0; }else if(coords.x >= job.end.x-job.start.x-kb1){ REAL tmp = stencil[-ka1] + 0.5*minmod2(stencil[-ka1+1] - stencil[-ka1], stencil[-ka1] - stencil[-ka1-1]); REAL tmp2 = stencil[-ka1-1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1-1] - stencil[-ka1-2]); get_Field(fx, coords.x-LAP, coords.y-LAP, coords.z-LAP, offset_out) = (tmp - tmp2)/hx_d; return 0; } } break; case 5: { if(coords.y == job.end.y-job.start.y-1){ REAL tmp = stencil[-ka1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1] - stencil[-ka1-1]); REAL tmp2 = stencil[-ka1-1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1-1] - stencil[-ka1-2]); get_Field(fx, coords.x-LAP, coords.y-LAP, coords.z-LAP, offset_out) = (tmp - tmp2)/hy_d; return 0; }else if(coords.y >= job.end.y-job.start.y-kb1){ REAL tmp = stencil[-ka1] + 0.5*minmod2(stencil[-ka1+1] - stencil[-ka1], stencil[-ka1] - stencil[-ka1-1]); REAL tmp2 = stencil[-ka1-1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1-1] - stencil[-ka1-2]); get_Field(fx, coords.x-LAP, coords.y-LAP, coords.z-LAP, offset_out) = (tmp - tmp2)/hy_d; return 0; } } break; case 6: { if(coords.z == job.end.z-job.start.z-1){ REAL tmp = stencil[-ka1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1] - stencil[-ka1-1]); REAL tmp2 = stencil[-ka1-1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1-1] - stencil[-ka1-2]); get_Field(fx, coords.x-LAP, coords.y-LAP, coords.z-LAP, offset_out) = (tmp - tmp2)/hz_d; return 0; }else if(coords.z >= job.end.z-job.start.z-kb1){ REAL tmp = stencil[-ka1] + 0.5*minmod2(stencil[-ka1+1] - stencil[-ka1], stencil[-ka1] - stencil[-ka1-1]); REAL tmp2 = stencil[-ka1-1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1-1] - stencil[-ka1-2]); get_Field(fx, coords.x-LAP, coords.y-LAP, coords.z-LAP, offset_out) = (tmp - tmp2)/hz_d; return 0; } } break; } } return flag; }*/ __device__ REAL OCFD_weno5_kernel_P_right(REAL *stencil){ REAL S2 = 0.0; REAL tmp; REAL ep = 1e-6; tmp = stencil[0] - 2.0*stencil[1] + stencil[2]; S2 += 13*tmp*tmp; tmp = stencil[0] - 4.0*stencil[1] + 3.0*stencil[2]; S2 += 3*tmp*tmp; REAL a2 = 1.0/((12.0*ep + S2)*(12.0*ep + S2)); REAL q23 = (2.0*stencil[0] - 7.0*stencil[1] + 11.0*stencil[2]); tmp = a2*q23/(6.0*a2); return tmp; } __device__ REAL OCFD_weno5_kernel_P_lift(REAL *stencil){ REAL S0 = 0.0; REAL tmp; REAL ep = 1e-6; tmp = stencil[2] - 2.0*stencil[3] + stencil[4]; S0 += 13*tmp*tmp; tmp = 3.0*stencil[2] - 4.0*stencil[3] + stencil[4]; S0 += 3*tmp*tmp; REAL a0 = 1.0/((12.0*ep + S0)*(12.0*ep + S0)); REAL q03 = (2.0*stencil[2] + 5.0*stencil[3] - stencil[4]); tmp = a0*q03/(6.0*a0); return tmp; } __device__ REAL OCFD_weno5_kernel_M_right(REAL *stencil){ REAL S0 = 0.0; REAL tmp; REAL ep = 1e-6; tmp = stencil[2] - 2.0*stencil[1] + stencil[0]; S0 += 13*tmp*tmp; tmp = 3.0*stencil[2] - 4.0*stencil[1] + stencil[0]; S0 += 3*tmp*tmp; REAL a0 = 1.0/((12.0*ep + S0)*(12.0*ep + S0)); REAL q03 = (2.0*stencil[2] + 5.0*stencil[1] - stencil[0]); tmp = a0*q03/(6.0*a0); return tmp; } __device__ REAL OCFD_weno5_kernel_M_lift(REAL *stencil){ REAL S2 = 0.0; REAL tmp; REAL ep = 1e-6; tmp = stencil[4] - 2.0*stencil[3] + stencil[2]; S2 += 13*tmp*tmp; tmp = stencil[4] - 4.0*stencil[3] + 3.0*stencil[2]; S2 += 3*tmp*tmp; REAL a2 = 1.0/((12.0*ep + S2)*(12.0*ep + S2)); REAL q23 = (2.0*stencil[4] - 7.0*stencil[3] + 11.0*stencil[2]); tmp = a2*q23/(6.0*a2); return tmp; } __device__ REAL OCFD_weno5_kernel_P_right_plus(REAL *stencil){ REAL S1 = 0.0, S2 = 0.0; REAL tmp; REAL ep = 1e-6; tmp = stencil[1] - 2.0*stencil[2] + stencil[3]; S1 += 13*tmp*tmp; tmp = stencil[1] - stencil[3]; S1 += 3*tmp*tmp; REAL a1 = 6.0/((12.0*ep + S1)*(12.0*ep + S1)); REAL q13 = (-stencil[1] + 5.0*stencil[2] + 2.0*stencil[3]); tmp = stencil[0] - 2.0*stencil[1] + stencil[2]; S2 += 13*tmp*tmp; tmp = stencil[0] - 4.0*stencil[1] + 3.0*stencil[2]; S2 += 3*tmp*tmp; REAL a2 = 1.0/((12.0*ep + S2)*(12.0*ep + S2)); REAL q23 = (2.0*stencil[0] - 7.0*stencil[1] + 11.0*stencil[2]); tmp = (a1*q13 + a2*q23)/(6.0*(a1 + a2)); return tmp; } __device__ REAL OCFD_weno5_kernel_P_lift_plus(REAL *stencil){ REAL S0 = 0.0, S1 = 0.0; REAL tmp; REAL ep = 1e-6; tmp = stencil[2] - 2.0*stencil[3] + stencil[4]; S0 += 13*tmp*tmp; tmp = 3.0*stencil[2] - 4.0*stencil[3] + stencil[4]; S0 += 3*tmp*tmp; REAL a0 = 3.0/((12.0*ep + S0)*(12.0*ep + S0)); REAL q03 = (2.0*stencil[2] + 5.0*stencil[3] - stencil[4]); tmp = stencil[1] - 2.0*stencil[2] + stencil[3]; S1 += 13*tmp*tmp; tmp = stencil[1] - stencil[3]; S1 += 3*tmp*tmp; REAL a1 = 6.0/((12.0*ep + S1)*(12.0*ep + S1)); REAL q13 = (-stencil[1] + 5.0*stencil[2] + 2.0*stencil[3]); tmp = (a0*q03 + a1*q13)/(6.0*(a0 + a1)); return tmp; } __device__ REAL OCFD_weno5_kernel_M_right_plus(REAL *stencil){ REAL S0 = 0.0, S1 = 0.0; REAL tmp; REAL ep = 1e-6; tmp = stencil[2] - 2.0*stencil[1] + stencil[0]; S0 += 13*tmp*tmp; tmp = 3.0*stencil[2] - 4.0*stencil[1] + stencil[0]; S0 += 3*tmp*tmp; REAL a0 = 3.0/((12.0*ep + S0)*(12.0*ep + S0)); REAL q03 = (2.0*stencil[2] + 5.0*stencil[1] - stencil[0]); tmp = stencil[3] - 2.0*stencil[2] + stencil[1]; S1 += 13*tmp*tmp; tmp = stencil[3] - stencil[1]; S1 += 3*tmp*tmp; REAL a1 = 6.0/((12.0*ep + S1)*(12.0*ep + S1)); REAL q13 = (-stencil[3] + 5.0*stencil[2] + 2.0*stencil[1]); tmp = (a0*q03 + a1*q13)/(6.0*(a0 + a1)); return tmp; } __device__ REAL OCFD_weno5_kernel_M_lift_plus(REAL *stencil){ REAL S1 = 0.0, S2 = 0.0; REAL tmp; REAL ep = 1e-6; tmp = stencil[3] - 2.0*stencil[2] + stencil[1]; S1 += 13*tmp*tmp; tmp = stencil[3] - stencil[1]; S1 += 3*tmp*tmp; REAL a1 = 6.0/((12.0*ep + S1)*(12.0*ep + S1)); REAL q13 = (-stencil[3] + 5.0*stencil[2] + 2.0*stencil[1]); tmp = stencil[4] - 2.0*stencil[3] + stencil[2]; S2 += 13*tmp*tmp; tmp = stencil[4] - 4.0*stencil[3] + 3.0*stencil[2]; S2 += 3*tmp*tmp; REAL a2 = 1.0/((12.0*ep + S2)*(12.0*ep + S2)); REAL q23 = (2.0*stencil[4] - 7.0*stencil[3] + 11.0*stencil[2]); tmp = (a1*q13 + a2*q23)/(6.0*(a1 + a2)); return tmp; } //tmp = (2.0*stencil[-ka1+1] + 5.0*stencil[-ka1+2] + stencil[-ka1+3])/6.0; //tmp = (11.0*stencil[-ka1] - 7.0*stencil[-ka1-1] + 2.0*stencil[-ka1-2])/6.0; #define boundp1(coords)\ if(coords <= -ka1){\ if(coords == 0){\ *tmp = stencil[-ka1+1];\ }\ if(coords == 1){\ *tmp = OCFD_weno5_kernel_P_lift(&stencil[-ka1-2]);\ }\ if(coords == 2){\ *tmp = OCFD_weno5_kernel_P_lift_plus(&stencil[-ka1-2]);\ }\ if(coords == 3){\ *tmp = OCFD_weno5_kernel_P(&stencil[-ka1-2]);\ }\ return 0;\ }\ #define boundp2(coords)\ if(coords > -kb1){\ if(coords == -1){\ *tmp = stencil[-ka1] + 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1] - stencil[-ka1-1]);\ }\ if(coords == -2){\ *tmp = OCFD_weno5_kernel_P_right_plus(&stencil[-ka1-2]);\ }\ if(coords <= -3){\ *tmp = OCFD_weno5_kernel_P(&stencil[-ka1-2]);\ }\ return 0;\ }\ __device__ int OCFD_bound_scheme_kernel_p(REAL* tmp, dim3 flagxyzb, dim3 coords, REAL *stencil, int ka1, int kb1, cudaJobPackage job){ int tmp1; switch(flagxyzb.y){ case 1: { boundp1(coords.x) } break; case 2: { boundp1(coords.y) } break; case 3: { boundp1(coords.z) } break; case 4: { tmp1 = coords.x + job.start.x - job.end.x; boundp2(tmp1) } break; case 5: { tmp1 = coords.y + job.start.y - job.end.y; boundp2(tmp1) } break; case 6: { tmp1 = coords.z + job.start.z - job.end.z; boundp2(tmp1) } break; case 7: { boundp1(coords.x) tmp1 = coords.x + job.start.x - job.end.x; boundp2(tmp1) } break; case 8: { boundp1(coords.y) tmp1 = coords.y + job.start.y - job.end.y; boundp2(tmp1) } break; case 9: { boundp1(coords.z) tmp1 = coords.z + job.start.z - job.end.z; boundp2(tmp1) } break; } return 1; } #define boundm1(coords)\ if(coords < -ka1){\ if(coords == 0){\ *tmp = OCFD_weno5_kernel_M_lift(&stencil[-ka1-2]);\ }\ if(coords == 1){\ *tmp = OCFD_weno5_kernel_M_lift_plus(&stencil[-ka1-2]);\ }\ if(coords >= 2){\ *tmp = OCFD_weno5_kernel_M(&stencil[-ka1-2]);\ }\ return 0;\ }\ #define boundm2(coords)\ if(coords >= -kb1-1){\ if(coords == -1){\ *tmp = stencil[-ka1-1];\ }\ if(coords == -2){\ *tmp = stencil[-ka1] - 0.5*minmod2(stencil[-ka1] - stencil[-ka1-1], stencil[-ka1] - stencil[-ka1-1]);\ }\ if(coords == -3){\ *tmp = OCFD_weno5_kernel_M_right_plus(&stencil[-ka1-2]);\ }\ if(coords == -4){\ *tmp = OCFD_weno5_kernel_M(&stencil[-ka1-2]);\ }\ return 0;\ }\ __device__ int OCFD_bound_scheme_kernel_m(REAL* tmp, dim3 flagxyzb, dim3 coords, REAL *stencil, int ka1, int kb1, cudaJobPackage job){ int tmp1; switch(flagxyzb.y){ case 1: { boundm1(coords.x) } break; case 2: { boundm1(coords.y) } break; case 3: { boundm1(coords.z) } break; case 4: { tmp1 = coords.x + job.start.x - job.end.x; boundm2(tmp1) } break; case 5: { tmp1 = coords.y + job.start.y - job.end.y; boundm2(tmp1) } break; case 6: { tmp1 = coords.z + job.start.z - job.end.z; boundm2(tmp1) } break; case 7: { boundm1(coords.x) tmp1 = coords.x + job.start.x - job.end.x; boundm2(tmp1) } break; case 8: { boundm1(coords.y) tmp1 = coords.y + job.start.y - job.end.y; boundm2(tmp1) } break; case 9: { boundm1(coords.z) tmp1 = coords.z + job.start.z - job.end.z; boundm2(tmp1) } break; } return 1; } #ifdef __cplusplus } #endif