#include "cuda.h" #include "cuda_runtime.h" #include "cuda_commen.h" #include "cuda_utility.h" #include "parameters.h" #include "parameters_d.h" #ifdef __cplusplus extern "C"{ #endif __global__ void cuda_mem_value_init(REAL value, REAL *ptr, unsigned int pitch, unsigned int size_x, unsigned int size_y, unsigned int size_z) { unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; unsigned int z = blockIdx.z * blockDim.z + threadIdx.z; if (x < size_x && y < size_y && z < size_z) { *(ptr + x + pitch * (y + z * size_y)) = value; } } void cuda_mem_value_init_warp(REAL value, REAL *ptr, unsigned int pitch, unsigned int size_x, unsigned int size_y, unsigned int size_z){ dim3 griddim ; dim3 blockdim ; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , BlockDimZ , size_x , size_y , size_z); cuda_mem_value_init<<>>(value, ptr , pitch , size_x , size_y , size_z); } /* ========================= */ __global__ void pri_to_cons_kernel(cudaSoA pcons , cudaField pd , cudaField pu , cudaField pv , cudaField pw , cudaField pT ,cudaJobPackage job ){ // eyes on cells WITHOUT LAPs unsigned int x = blockIdx.x * blockDim.x + threadIdx.x + job.start.x; unsigned int y = blockIdx.y * blockDim.y + threadIdx.y + job.start.y; unsigned int z = blockIdx.z * blockDim.z + threadIdx.z + job.start.z; REAL d,u,v,w,T; if(x>>(*pcons , *pd , *pu , *pv , *pw , *pT , job_in) )) } /* ========================= */ __global__ void cons_to_pri_kernel(cudaSoA f, cudaField d , cudaField u , cudaField v , cudaField w , cudaField T , cudaField P , cudaJobPackage job){ // eyes on no-lap region unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(x < job.end.x && y < job.end.y && z < job.end.z){ REAL d1,u1,v1,w1,T1,d2,T2; d1 = get_SoA(f, x, y, z, 0); get_Field_LAP(d, x+LAP, y+LAP, z+LAP) = d2 = d1; u1 = get_SoA(f, x, y, z, 1); u1 = u1/d1; get_Field_LAP(u, x+LAP, y+LAP, z+LAP) = u1; v1 = get_SoA(f, x, y, z, 2); v1 = v1/d1; get_Field_LAP(v, x+LAP, y+LAP, z+LAP) = v1; w1 = get_SoA(f, x, y, z, 3); w1 = w1/d1; get_Field_LAP(w, x+LAP, y+LAP, z+LAP) = w1; T1 = get_SoA(f, x, y, z, 4); get_Field_LAP(T, x+LAP, y+LAP, z+LAP) = T2 = (T1 - 0.5*d1*(u1*u1 + v1*v1 + w1*w1))/(d1*Cv_d); // T1 = T1/(d1*Cv_d); //T1 = T1 - 0.5*(u1*u1 + v1*v1 + w1*w1)/d1; // get_Field_LAP(P, x+LAP, y+LAP, z+LAP) = T1*(Gamma_d - 1.0); // get_Field_LAP(T , x+LAP , y+LAP , z+LAP) = T1/Cv_d; get_Field_LAP(P, x+LAP, y+LAP, z+LAP) = T2*d2/(Gamma_d*Ama_d*Ama_d); } } void get_duvwT() { dim3 griddim , blockdim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , BlockDimZ , nx,ny,nz); cudaJobPackage job(dim3(0,0,0) , dim3(nx,ny,nz)); CUDA_LAUNCH(( cons_to_pri_kernel<<>>(*pf_d , *pd_d , *pu_d , *pv_d , *pw_d , *pT_d , *pP_d , job) )) } // -----Computation of viscousity --------------------------------------------- __global__ void get_Amu_kernal(cudaField Amu , cudaField T , cudaJobPackage job){ // eyes on field without LAP unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if(x < job.end.x && y < job.end.y && z < job.end.z){ REAL tmp = get_Field_LAP(T , x+LAP , y+LAP , z+LAP); get_Field(Amu , x,y,z) = amu_C0_d * sqrt(tmp * tmp * tmp) / (Tsb_d + tmp); } } void get_Amu() { dim3 griddim , blockdim; cal_grid_block_dim(&griddim , &blockdim , BlockDimX , BlockDimY , BlockDimZ , nx,ny,nz); cudaJobPackage job(dim3(0,0,0) , dim3(nx,ny,nz)); CUDA_LAUNCH(( get_Amu_kernal<<>>(*pAmu_d , *pT_d , job) )) } /* ======================================================== */ __global__ void sound_speed_kernel(cudaField T , cudaField cc , cudaJobPackage job){ // eyes on no-lap region unsigned int x = blockDim.x * blockIdx.x + threadIdx.x + job.start.x; unsigned int y = blockDim.y * blockIdx.y + threadIdx.y + job.start.y; unsigned int z = blockDim.z * blockIdx.z + threadIdx.z + job.start.z; if( x