#include "hip/hip_runtime.h" // Boundary condition for flow over a 3D Liftbody ------------------------- #include #include #include #include "parameters.h" #include "parameters_d.h" #include "utility.h" #include "io_warp.h" #include "cuda_commen.h" #include "cuda_utility.h" //#include "OCFD_boundary_init.h" #include "OCFD_init.h" #ifdef __cplusplus extern "C"{ #endif extern cudaField *pu2d_inlet_d; //[5][nz][ny] extern cudaField *pu2d_upper_d; //[5][ny][nx] //extern cudaField *pv_dist_wall_d; // [ny][nx] extern cudaField *pv_dist_coeff_d; // [3][ny][nx] extern cudaField *pu_dist_upper_d; // [ny][nx] extern const char v_dist_need; extern const char TW_postive; extern REAL *fait; extern REAL *TM; __global__ void do_u2d_inlet_kernel(cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaField inlet , cudaJobPackage job){ // with LAPs unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(y < job.end.y && z < job.end.z){ unsigned int ylap = y-LAP; unsigned int zlap = z-LAP; for(int i = 0; i <= LAP; i++){ get_Field_LAP(d, i, y, z) = *(inlet.ptr + ylap + inlet.pitch * zlap); get_Field_LAP(u, i, y, z) = *(inlet.ptr + ylap + inlet.pitch *( zlap + 1*nz_d) ); get_Field_LAP(v, i, y, z) = *(inlet.ptr + ylap + inlet.pitch *( zlap + 2*nz_d) ); get_Field_LAP(w, i, y, z) = *(inlet.ptr + ylap + inlet.pitch *( zlap + 3*nz_d) ); get_Field_LAP(T, i, y, z) = *(inlet.ptr + ylap + inlet.pitch *( zlap + 4*nz_d) ); } } } /* ================================= */ __global__ void do_u2d_upper_kernel(cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaField upper , cudaField dist , cudaJobPackage job){ // with LAPs unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; if(x < job.end.x && y < job.end.y){ unsigned int xlap = x-LAP; unsigned int ylap = y-LAP; unsigned int ztmp = nz_lap_d - 1; get_Field_LAP(d , x , y , ztmp) = *(upper.ptr + xlap + upper.pitch * ylap ); get_Field_LAP(u , x , y , ztmp) = *(upper.ptr + xlap + upper.pitch *( ylap + 1*ny_d) ) + *(dist.ptr + xlap + dist.pitch * ylap ); get_Field_LAP(v , x , y , ztmp) = *(upper.ptr + xlap + upper.pitch *( ylap + 2*ny_d) ); get_Field_LAP(w , x , y , ztmp) = *(upper.ptr + xlap + upper.pitch *( ylap + 3*ny_d) ); get_Field_LAP(T , x , y , ztmp) = *(upper.ptr + xlap + upper.pitch *( ylap + 4*ny_d) ); } } __global__ void do_u_dist_upper_kernel(REAL sin_aoa , REAL cos_aoa , cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaField dist , cudaJobPackage job){ // with LAPs unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; if(x < job.end.x && y < job.end.y){ unsigned int xlap = x-LAP; unsigned int ylap = y-LAP; unsigned int ztmp = nz_lap_d - 1; get_Field_LAP(d , x , y , ztmp) = 1.0; get_Field_LAP(u , x , y , ztmp) = cos_aoa + *(dist.ptr + xlap + dist.pitch * ylap ); get_Field_LAP(v , x , y , ztmp) = 0.0; get_Field_LAP(w , x , y , ztmp) = sin_aoa; get_Field_LAP(T , x , y , ztmp) = 1.0; } } /* ============================================= */ __global__ void do_symmetry_kernel_m(cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaJobPackage job){ // with LAPs unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(x < job.end.x && y < job.end.y && z < job.end.z){ unsigned int ys = 2*LAP - y; get_Field_LAP(d, x, y, z) = get_Field_LAP(d, x, ys, z); get_Field_LAP(u, x, y, z) = get_Field_LAP(u, x, ys, z); get_Field_LAP(v, x, y, z) = -1.0*get_Field_LAP(v, x, ys, z); get_Field_LAP(w, x, y, z) = get_Field_LAP(w, x, ys, z); get_Field_LAP(T, x, y, z) = get_Field_LAP(T, x, ys, z); } } __global__ void do_symmetry_kernel_p(cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaJobPackage job){ // with LAPs unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(x < job.end.x && y < job.end.y && z < job.end.z){ unsigned int ys = 2*(ny_lap_d - 1) - y; get_Field_LAP(d, x, y, z) = get_Field_LAP(d, x, ys, z); get_Field_LAP(u, x, y, z) = get_Field_LAP(u, x, ys, z); get_Field_LAP(v, x, y, z) = -1.0*get_Field_LAP(v, x, ys, z); get_Field_LAP(w, x, y, z) = get_Field_LAP(w, x, ys, z); get_Field_LAP(T, x, y, z) = get_Field_LAP(T, x, ys, z); } } /* =============================================== */ __global__ void do_wall_kernel_T_V(REAL tw , cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaField coeff , REAL HT , cudaJobPackage job){ // with LAPs unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; if(x < job.end.x && y < job.end.y){ { unsigned int xlap = x-LAP; unsigned int ylap = y-LAP; get_Field_LAP(u , x , y , LAP) = *(coeff.ptr + xlap + coeff.pitch * ylap ) * HT; get_Field_LAP(v , x , y , LAP) = *(coeff.ptr + xlap + coeff.pitch *( ylap + 1*ny_d) ) * HT; get_Field_LAP(w , x , y , LAP) = *(coeff.ptr + xlap + coeff.pitch *( ylap + 2*ny_d) ) * HT; } get_Field_LAP(T , x , y , LAP) = tw; get_Field_LAP(d , x , y , LAP) = (4.0 * get_Field_LAP(d , x,y,LAP+1) * get_Field_LAP(T , x , y , LAP+1) - get_Field_LAP(d , x,y,LAP+2) * get_Field_LAP(T , x , y , LAP+2))/(3.0*tw); for(int i = 0; i < LAP; i++){ get_Field_LAP(d, x, y, i) = get_Field_LAP(d, x, y, 2*LAP-i); get_Field_LAP(u, x, y, i) = -get_Field_LAP(u, x, y, 2*LAP-i); get_Field_LAP(v, x, y, i) = -get_Field_LAP(v, x, y, 2*LAP-i); get_Field_LAP(w, x, y, i) = -get_Field_LAP(w, x, y, 2*LAP-i); get_Field_LAP(T, x, y, i) = get_Field_LAP(T, x, y, 2*LAP-i); } } } __global__ void do_wall_kernel_NT_V(cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaField coeff , REAL HT , cudaJobPackage job){ // with LAPs unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; if(x < job.end.x && y < job.end.y){ { unsigned int xlap = x-LAP; unsigned int ylap = y-LAP; get_Field_LAP(u , x , y , LAP) = *(coeff.ptr + xlap + coeff.pitch * ylap ) * HT; get_Field_LAP(v , x , y , LAP) = *(coeff.ptr + xlap + coeff.pitch *( ylap + 1*ny_d) ) * HT; get_Field_LAP(w , x , y , LAP) = *(coeff.ptr + xlap + coeff.pitch *( ylap + 2*ny_d) ) * HT; } get_Field_LAP(T , x , y , LAP) = (4.0 * get_Field_LAP(T , x , y , LAP+1) - get_Field_LAP(T , x , y , LAP+2))/3.0; get_Field_LAP(d , x , y , LAP) = (4.0 * get_Field_LAP(d , x , y , LAP+1) - get_Field_LAP(d , x , y , LAP+2))/3.0; for(int i = 0; i < LAP; i++){ get_Field_LAP(d, x, y, i) = get_Field_LAP(d, x, y, 2*LAP-i); get_Field_LAP(u, x, y, i) = -get_Field_LAP(u, x, y, 2*LAP-i); get_Field_LAP(v, x, y, i) = -get_Field_LAP(v, x, y, 2*LAP-i); get_Field_LAP(w, x, y, i) = -get_Field_LAP(w, x, y, 2*LAP-i); get_Field_LAP(T, x, y, i) = get_Field_LAP(T, x, y, 2*LAP-i); } } } __global__ void do_wall_kernel_T_NV(REAL tw , cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaField coeff , cudaJobPackage job){ // with LAPs unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; if(x < job.end.x && y < job.end.y){ get_Field_LAP(u , x , y , LAP) = 0.0; get_Field_LAP(v , x , y , LAP) = 0.0; get_Field_LAP(w , x , y , LAP) = 0.0; get_Field_LAP(T , x , y , LAP) = tw; get_Field_LAP(d , x , y , LAP) = (4.0 * get_Field_LAP(d , x,y,LAP+1) * get_Field_LAP(T , x , y , LAP+1) - get_Field_LAP(d , x,y,LAP+2) * get_Field_LAP(T , x , y , LAP+2))/(3.0*tw); for(int i = 0; i < LAP; i++){ get_Field_LAP(d, x, y, i) = get_Field_LAP(d, x, y, 2*LAP-i); get_Field_LAP(u, x, y, i) = -get_Field_LAP(u, x, y, 2*LAP-i); get_Field_LAP(v, x, y, i) = -get_Field_LAP(v, x, y, 2*LAP-i); get_Field_LAP(w, x, y, i) = -get_Field_LAP(w, x, y, 2*LAP-i); get_Field_LAP(T, x, y, i) = get_Field_LAP(T, x, y, 2*LAP-i); } } } __global__ void do_wall_kernel_NT_NV(cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaField coeff , cudaJobPackage job){ // with LAPs unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; if(x < job.end.x && y < job.end.y){ get_Field_LAP(u , x , y , LAP) = 0.0; get_Field_LAP(v , x , y , LAP) = 0.0; get_Field_LAP(w , x , y , LAP) = 0.0; get_Field_LAP(T , x , y , LAP) = (4.0 * get_Field_LAP(T , x , y , LAP+1) - get_Field_LAP(T , x , y , LAP+2))/3.0; get_Field_LAP(d , x , y , LAP) = (4.0 * get_Field_LAP(d , x , y , LAP+1) - get_Field_LAP(d , x , y , LAP+2))/3.0; for(int i = 0; i < LAP; i++){ get_Field_LAP(d, x, y, i) = get_Field_LAP(d, x, y, 2*LAP-i); get_Field_LAP(u, x, y, i) = -get_Field_LAP(u, x, y, 2*LAP-i); get_Field_LAP(v, x, y, i) = -get_Field_LAP(v, x, y, 2*LAP-i); get_Field_LAP(w, x, y, i) = -get_Field_LAP(w, x, y, 2*LAP-i); get_Field_LAP(T, x, y, i) = get_Field_LAP(T, x, y, 2*LAP-i); } } } /* ---------------------------------------------- */ /* ======================================== */ void bc_user_Liftbody3d(){ //-------------- boundary condition at i=1 (inlet) ----------------------------------------- if (npx == 0) { if (IF_WITHLEADING == 1) { printf(" Lift body with leading is not support yet \n"); exit(EXIT_FAILURE); } else { // without leading dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , 1 , BlockDimY , BlockDimZ , 1 , ny , nz); cudaJobPackage job( dim3(LAP,LAP,LAP) , dim3(LAP+1, ny_lap ,nz_lap) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_u2d_inlet_kernel, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , *pu2d_inlet_d , job) )); } } //---------------------bounrary at k=nz (upper) ------------------------------------------ if (npz == NPZ0 - 1) { if (IFLAG_UPPERBOUNDARY == 0) { // Out of blow shock dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , 1 , nx , ny , 1); cudaJobPackage job( dim3(LAP,LAP,LAP) , dim3(nx_lap, ny_lap ,LAP+1) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_u_dist_upper_kernel, dim3(griddim ), dim3(blockdim), 0, 0, Sin_AOA , Cos_AOA ,*pd_d , *pu_d , *pv_d , *pw_d , *pT_d , *pu_dist_upper_d, job) )); } else if (IFLAG_UPPERBOUNDARY == 1) { // In the blow shock dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , 1 , nx , ny , 1); cudaJobPackage job( dim3(LAP,LAP,LAP) , dim3(nx_lap, ny_lap ,LAP+1) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_u2d_upper_kernel, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , *pu2d_upper_d , *pu_dist_upper_d, job) )); } } //---------------------wall ------------------------------------ REAL ht = 0.; if(BETA > 0.){ for(int m = 0; m < MTMAX; m++){ //ht = ht + TM[m] * sin((m + 1)*BETA*tt + 2.*PI*fait[m]); ht = ht + TM[m] * sin((m + 1)*BETA*tt); } }else{ ht = 1.; } if (npz == 0) { if(v_dist_need){ if(TW_postive){ dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , 1 , nx , ny , 1); cudaJobPackage job( dim3(LAP,LAP,LAP) , dim3(nx_lap, ny_lap ,LAP+1) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_wall_kernel_T_V, dim3(griddim ), dim3(blockdim), 0, 0, TW , *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , *pv_dist_coeff_d , ht , job) )); }else{ dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , 1 , nx , ny , 1); cudaJobPackage job( dim3(LAP,LAP,LAP) , dim3(nx_lap, ny_lap ,LAP+1) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_wall_kernel_NT_V, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , *pv_dist_coeff_d, ht , job) )); } }else{ if(TW_postive){ dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , 1 , nx , ny , 1); cudaJobPackage job( dim3(LAP,LAP,LAP) , dim3(nx_lap, ny_lap ,LAP+1) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_wall_kernel_T_NV, dim3(griddim ), dim3(blockdim), 0, 0, TW , *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , *pv_dist_coeff_d , job) )); }else{ dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , 1 , nx , ny , 1); cudaJobPackage job( dim3(LAP,LAP,LAP) , dim3(nx_lap, ny_lap ,LAP+1) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_wall_kernel_NT_NV, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , *pv_dist_coeff_d , job) )); } } } //------------------------------------------------------------ //------------ Symmetry ----------- if (npy == 0 && IF_SYMMETRY == 1) { dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , BlockDimZ , nx , LAP , nz); cudaJobPackage job( dim3(LAP,0,LAP) , dim3(nx_lap, LAP ,nz_lap) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_symmetry_kernel_m, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , job) )); } if (npy == NPY0 - 1 && IF_SYMMETRY == 1) { dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , BlockDimZ , nx , LAP , nz); cudaJobPackage job( dim3(LAP,ny_lap,LAP) , dim3(nx_lap, ny_2lap ,nz_lap) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_symmetry_kernel_p, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , job) )); } } /* =============================================================================== */ __global__ void simple_boundary_condition(cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaJobPackage job){ // with LAPs unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(x < job.end.x && y < job.end.y && z < job.end.z ){ get_Field_LAP(d , x , y , z) = 1.0; get_Field_LAP(u , x , y , z) = 1.0; get_Field_LAP(v , x , y , z) = 0.0; get_Field_LAP(w , x , y , z) = 0.0; get_Field_LAP(T , x , y , z) = 1.0; } } void bc_user_Liftbody3d_simple(){ //-------------- boundary condition at i=1 (inlet) ----------------------------------------- if (npx == 0) { dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , 1 , BlockDimY , BlockDimZ , 1 , ny , nz); cudaJobPackage job( dim3(LAP,LAP,LAP) , dim3(LAP+1, ny_lap ,nz_lap) ); CUDA_LAUNCH(( hipLaunchKernelGGL(simple_boundary_condition, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , job) )); } //---------------------bounrary at k=nz (upper) ------------------------------------------ if (npz == NPZ0 - 1) { dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , 1 , nx , ny , 1); cudaJobPackage job( dim3(LAP,LAP,nz_lap - 1) , dim3(nx_lap, ny_lap ,nz_lap) ); CUDA_LAUNCH(( hipLaunchKernelGGL(simple_boundary_condition, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , job) )); } //---------------------wall ------------------------------------ if (npz == 0) { dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , 1 , nx , ny , 1); cudaJobPackage job( dim3(LAP,LAP,LAP) , dim3(nx_lap, ny_lap ,LAP+1) ); CUDA_LAUNCH(( hipLaunchKernelGGL(simple_boundary_condition, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , job) )); } //------------------------------------------------------------ //------------ Symmetry ----------- if (npy == 0 && IF_SYMMETRY == 1) { dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , BlockDimZ , nx , LAP , nz); cudaJobPackage job( dim3(LAP,0,LAP) , dim3(nx_lap, LAP ,nz_lap) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_symmetry_kernel_m, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , job) )); } if (npy == NPY0 - 1 && IF_SYMMETRY == 1) { dim3 blockdim , griddim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , BlockDimZ , nx , LAP , nz); cudaJobPackage job( dim3(LAP,ny_lap,LAP) , dim3(nx_lap, ny_2lap ,nz_lap) ); CUDA_LAUNCH(( hipLaunchKernelGGL(do_symmetry_kernel_p, dim3(griddim ), dim3(blockdim), 0, 0, *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , job) )); } } #ifdef __cplusplus } #endif