Commit 96f3680d authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed CUDA implementation of O(n^3/2) Ewald

parent 86a4baba
...@@ -300,7 +300,7 @@ struct cudaGmxSimulation { ...@@ -300,7 +300,7 @@ struct cudaGmxSimulation {
float cellVolume; // Ewald parameter alpha (a.k.a. kappa) float cellVolume; // Ewald parameter alpha (a.k.a. kappa)
float alphaEwald; // Ewald parameter alpha (a.k.a. kappa) float alphaEwald; // Ewald parameter alpha (a.k.a. kappa)
float factorEwald; // - 1 ( 4 * alphaEwald * alphaEwald) float factorEwald; // - 1 ( 4 * alphaEwald * alphaEwald)
float kmax; // Maximum number of reciprocal vectors int kmax; // Maximum number of reciprocal vectors
float reactionFieldK; // Constant for reaction field correction float reactionFieldK; // Constant for reaction field correction
float probeRadius; // SASA probe radius float probeRadius; // SASA probe radius
float surfaceAreaFactor; // ACE approximation surface area factor float surfaceAreaFactor; // ACE approximation surface area factor
...@@ -337,9 +337,8 @@ struct cudaGmxSimulation { ...@@ -337,9 +337,8 @@ struct cudaGmxSimulation {
float collisionProbability; // Collision probability for Andersen thermostat float collisionProbability; // Collision probability for Andersen thermostat
float2* pObcData; // Pointer to fixed Born data float2* pObcData; // Pointer to fixed Born data
float2* pAttr; // Pointer to additional atom attributes (sig, eps) float2* pAttr; // Pointer to additional atom attributes (sig, eps)
float2* pEikr; // Pointer to exponents of reciprocal vectors and atom coordinates (ewald) float2* pEwaldEikr; // Pointer to exponents of reciprocal vectors and atom coordinates (ewald)
float2* pStructureFactor; // Pointer to the structure factors (ewald) float2* pEwaldCosSinSum; // Pointer to the cos/sin sums (ewald)
float2* pCosSinSum; // Pointer to the cos/sin sums (ewald)
unsigned int bonds; // Number of bonds unsigned int bonds; // Number of bonds
int4* pBondID; // Bond atom and output buffer IDs int4* pBondID; // Bond atom and output buffer IDs
float2* pBondParameter; // Bond parameters float2* pBondParameter; // Bond parameters
......
...@@ -429,18 +429,13 @@ void gpuSetEwaldParameters(gpuContext gpu)//, float alphaEwald, int kmax ) ...@@ -429,18 +429,13 @@ void gpuSetEwaldParameters(gpuContext gpu)//, float alphaEwald, int kmax )
// hard coded alphaEwald and kmax, no interface yet // hard coded alphaEwald and kmax, no interface yet
float alpha = 3.123413f; float alpha = 3.123413f;
float PI = 3.14159265358979323846f;
float TWO_PI = 2.0f * PI;
gpu->sim.recipBoxSizeX = TWO_PI / gpu->sim.periodicBoxSizeX ;
gpu->sim.recipBoxSizeY = TWO_PI / gpu->sim.periodicBoxSizeY ;
gpu->sim.recipBoxSizeZ = TWO_PI / gpu->sim.periodicBoxSizeZ ;
gpu->sim.cellVolume = gpu->sim.periodicBoxSizeX * gpu->sim.periodicBoxSizeY * gpu->sim.periodicBoxSizeZ;
gpu->sim.alphaEwald = alpha; gpu->sim.alphaEwald = alpha;
gpu->sim.factorEwald = -1 / (4*alpha*alpha); gpu->sim.factorEwald = -1 / (4*alpha*alpha);
gpu->sim.kmax = 20+1; gpu->sim.kmax = 20+1;
gpu->psEwaldEikr = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms*gpu->sim.kmax, 1, "EwaldEikr");
gpu->sim.pEwaldEikr = gpu->psEwaldEikr->_pDevStream[0];
gpu->psEwaldCosSinSum = new CUDAStream<float2>((gpu->sim.kmax*2-1) * (gpu->sim.kmax*2-1) * (gpu->sim.kmax*2-1), 1, "EwaldCosSinSum");
gpu->sim.pEwaldCosSinSum = gpu->psEwaldCosSinSum->_pDevStream[0];
} }
extern "C" extern "C"
...@@ -449,6 +444,10 @@ void gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize ...@@ -449,6 +444,10 @@ void gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize
gpu->sim.periodicBoxSizeX = xsize; gpu->sim.periodicBoxSizeX = xsize;
gpu->sim.periodicBoxSizeY = ysize; gpu->sim.periodicBoxSizeY = ysize;
gpu->sim.periodicBoxSizeZ = zsize; gpu->sim.periodicBoxSizeZ = zsize;
gpu->sim.recipBoxSizeX = 2.0f*PI/gpu->sim.periodicBoxSizeX;
gpu->sim.recipBoxSizeY = 2.0f*PI/gpu->sim.periodicBoxSizeY;
gpu->sim.recipBoxSizeZ = 2.0f*PI/gpu->sim.periodicBoxSizeZ;
gpu->sim.cellVolume = gpu->sim.periodicBoxSizeX*gpu->sim.periodicBoxSizeY*gpu->sim.periodicBoxSizeZ;
} }
extern "C" extern "C"
...@@ -1002,12 +1001,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu) ...@@ -1002,12 +1001,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.pObcChain = gpu->psObcChain->_pDevStream[0]; gpu->sim.pObcChain = gpu->psObcChain->_pDevStream[0];
gpu->psSigEps2 = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "SigEps2"); gpu->psSigEps2 = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "SigEps2");
gpu->sim.pAttr = gpu->psSigEps2->_pDevStream[0]; gpu->sim.pAttr = gpu->psSigEps2->_pDevStream[0];
gpu->psEwaldEikr = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "EwaldEikr");
gpu->sim.pEikr = gpu->psEwaldEikr->_pDevStream[0];
gpu->psEwaldStructureFactor = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "EwaldStructureFactor");
gpu->sim.pStructureFactor = gpu->psEwaldStructureFactor->_pDevStream[0];
gpu->psEwaldCosSinSum = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "EwaldCosSinSum");
gpu->sim.pCosSinSum = gpu->psEwaldCosSinSum->_pDevStream[0];
gpu->psObcData = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "ObcData"); gpu->psObcData = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "ObcData");
gpu->sim.pObcData = gpu->psObcData->_pDevStream[0]; gpu->sim.pObcData = gpu->psObcData->_pDevStream[0];
gpu->psStepSize = new CUDAStream<float2>(1, 1, "StepSize"); gpu->psStepSize = new CUDAStream<float2>(1, 1, "StepSize");
...@@ -1281,6 +1274,8 @@ void* gpuInit(int numAtoms) ...@@ -1281,6 +1274,8 @@ void* gpuInit(int numAtoms)
gpu->psRbDihedralParameter2 = NULL; gpu->psRbDihedralParameter2 = NULL;
gpu->psLJ14ID = NULL; gpu->psLJ14ID = NULL;
gpu->psLJ14Parameter = NULL; gpu->psLJ14Parameter = NULL;
gpu->psEwaldEikr = NULL;
gpu->psEwaldCosSinSum = NULL;
gpu->psShakeID = NULL; gpu->psShakeID = NULL;
gpu->psShakeParameter = NULL; gpu->psShakeParameter = NULL;
gpu->psSettleID = NULL; gpu->psSettleID = NULL;
...@@ -1409,9 +1404,11 @@ void gpuShutDown(gpuContext gpu) ...@@ -1409,9 +1404,11 @@ void gpuShutDown(gpuContext gpu)
delete gpu->psxVector4; delete gpu->psxVector4;
delete gpu->psvVector4; delete gpu->psvVector4;
delete gpu->psSigEps2; delete gpu->psSigEps2;
if (gpu->psEwaldEikr != NULL)
{
delete gpu->psEwaldEikr; delete gpu->psEwaldEikr;
delete gpu->psEwaldStructureFactor;
delete gpu->psEwaldCosSinSum; delete gpu->psEwaldCosSinSum;
}
delete gpu->psObcData; delete gpu->psObcData;
delete gpu->psObcChain; delete gpu->psObcChain;
delete gpu->psBornForce; delete gpu->psBornForce;
......
...@@ -87,7 +87,6 @@ struct _gpuContext { ...@@ -87,7 +87,6 @@ struct _gpuContext {
CUDAStream<float4>* psvVector4; CUDAStream<float4>* psvVector4;
CUDAStream<float2>* psSigEps2; CUDAStream<float2>* psSigEps2;
CUDAStream<float2>* psEwaldEikr; CUDAStream<float2>* psEwaldEikr;
CUDAStream<float2>* psEwaldStructureFactor;
CUDAStream<float2>* psEwaldCosSinSum; CUDAStream<float2>* psEwaldCosSinSum;
CUDAStream<float2>* psObcData; CUDAStream<float2>* psObcData;
CUDAStream<float>* psObcChain; CUDAStream<float>* psObcChain;
......
...@@ -44,24 +44,6 @@ ...@@ -44,24 +44,6 @@
} }
__device__ float2 FloatMultFloat2(float r, float2 a)
{
float2 b;
b.x = r*a.x;
b.y = r*a.y;
return b;
}
__device__ float2 FloatMultConjFloat2(float r, float2 a)
{
float2 b;
b.x = r*a.y;
b.y = r*a.x;
return b;
}
__global__ void kCalculateEwaldFastEikr_kernel() __global__ void kCalculateEwaldFastEikr_kernel()
{ {
...@@ -81,23 +63,23 @@ __global__ void kCalculateEwaldFastEikr_kernel() ...@@ -81,23 +63,23 @@ __global__ void kCalculateEwaldFastEikr_kernel()
// k = 0, explicitly // k = 0, explicitly
for(unsigned int m = 0; (m < 3); m++) { for(unsigned int m = 0; (m < 3); m++) {
cSim.pEikr[atom*kmax*3 + 0 + m].x = 1; cSim.pEwaldEikr[atom*kmax*3 + 0 + m].x = 1;
cSim.pEikr[atom*kmax*3 + 0 + m].y = 0; cSim.pEwaldEikr[atom*kmax*3 + 0 + m].y = 0;
} }
// k = 1, explicitly // k = 1, explicitly
cSim.pEikr[atom*kmax*3 + 3 + 0].x = cos(apos.x*cSim.recipBoxSizeX); cSim.pEwaldEikr[atom*kmax*3 + 3 + 0].x = cos(apos.x*cSim.recipBoxSizeX);
cSim.pEikr[atom*kmax*3 + 3 + 0].y = sin(apos.x*cSim.recipBoxSizeX); cSim.pEwaldEikr[atom*kmax*3 + 3 + 0].y = sin(apos.x*cSim.recipBoxSizeX);
cSim.pEikr[atom*kmax*3 + 3 + 1].x = cos(apos.y*cSim.recipBoxSizeY); cSim.pEwaldEikr[atom*kmax*3 + 3 + 1].x = cos(apos.y*cSim.recipBoxSizeY);
cSim.pEikr[atom*kmax*3 + 3 + 1].y = sin(apos.y*cSim.recipBoxSizeY); cSim.pEwaldEikr[atom*kmax*3 + 3 + 1].y = sin(apos.y*cSim.recipBoxSizeY);
cSim.pEikr[atom*kmax*3 + 3 + 2].x = cos(apos.z*cSim.recipBoxSizeZ); cSim.pEwaldEikr[atom*kmax*3 + 3 + 2].x = cos(apos.z*cSim.recipBoxSizeZ);
cSim.pEikr[atom*kmax*3 + 3 + 2].y = sin(apos.z*cSim.recipBoxSizeZ); cSim.pEwaldEikr[atom*kmax*3 + 3 + 2].y = sin(apos.z*cSim.recipBoxSizeZ);
// k > 1, by recursion // k > 1, by recursion
for(unsigned int k=2; (k<kmax); k++) { for(unsigned int k=2; (k<kmax); k++) {
for(unsigned int m=0; (m<3); m++) { for(unsigned int m=0; (m<3); m++) {
cSim.pEikr[atom*kmax*3 + k*3 + m] = MultofFloat2 (cSim.pEikr[atom*kmax*3 + (k-1)*3 + m] , cSim.pEikr[atom*kmax*3 + 3 + m]); cSim.pEwaldEikr[atom*kmax*3 + k*3 + m] = MultofFloat2 (cSim.pEwaldEikr[atom*kmax*3 + (k-1)*3 + m] , cSim.pEwaldEikr[atom*kmax*3 + 3 + m]);
} }
} }
...@@ -105,122 +87,32 @@ __global__ void kCalculateEwaldFastEikr_kernel() ...@@ -105,122 +87,32 @@ __global__ void kCalculateEwaldFastEikr_kernel()
} }
} }
__global__ void kCalculateEwaldFastStructureFactors_kernel()
{
// hard-coded maximum k-vectors, no interface yet
int kmax = cSim.kmax;
float4 apos;
int lowry = 0;
int lowrz = 1;
int numRx = 20+1;
int numRy = 20+1;
int numRz = 20+1;
unsigned int totalK = (numRx*2 - 1) * (numRy*2 - 1) * (numRz*2 - 1);
float2 tab_xy;
int index;
unsigned int atom = threadIdx.x + blockIdx.x * blockDim.x;
while (atom < cSim.atoms)
{
apos = cSim.pPosq[atom];
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for(int rx = 0; rx < numRx; rx++)
{
for(int ry = lowry; ry < numRy; ry++)
{
if(ry >= 0)
{
tab_xy = MultofFloat2 (cSim.pEikr[atom*kmax*3 + rx*3 + 0] , cSim.pEikr[atom*kmax*3 + ry*3 + 1]);
}
else
{
tab_xy = ConjMultofFloat2 (cSim.pEikr[atom*kmax*3 + rx*3 + 0] , cSim.pEikr[atom*kmax*3 - ry*3 + 1]);
}
for (int rz = lowrz; rz < numRz; rz++)
{
index = rx * (numRy*2 - 1 ) * (numRz*2 - 1) + (ry + numRy -1 ) * (numRz * 2 - 1) + (rz + numRz -1 );
if( rz >= 0)
{
cSim.pStructureFactor[atom*totalK + index] = FloatMultFloat2 ( (apos.w ), MultofFloat2 (tab_xy ,cSim.pEikr[atom*kmax*3 + rz*3 + 2] ));
}
else
{
cSim.pStructureFactor[atom*totalK + index] = FloatMultFloat2 ( ( apos.w ), ConjMultofFloat2 (tab_xy ,cSim.pEikr[atom*kmax*3 - rz*3 + 2] ));
}
cSim.pCosSinSum[index].x = 0.0;
cSim.pCosSinSum[index].y = 0.0;
lowrz = 1 - numRz;
}
lowry = 1 - numRy;
}
}
atom += blockDim.x * gridDim.x;
}
}
__global__ void kCalculateEwaldFastCosSinSums_kernel() __global__ void kCalculateEwaldFastCosSinSums_kernel()
{ {
const unsigned int ksize = 2*cSim.kmax-1;
// float2 eikr; const unsigned int totalK = ksize*ksize*ksize;
int lowry = 0; unsigned int index = threadIdx.x + blockIdx.x * blockDim.x;
int lowrz = 1; while (index < totalK)
int numRx = 20+1; {
int numRy = 20+1; int rx = index/(ksize*ksize);
int numRz = 20+1; int remainder = index - rx*ksize*ksize;
unsigned int totalK = (numRx*2 - 1) * (numRy*2 - 1) * (numRz*2 - 1); int ry = remainder/ksize;
int rz = remainder - ry*ksize - cSim.kmax + 1;
int index; ry += -cSim.kmax + 1;
float2 sum = make_float2(0.0f, 0.0f);
unsigned int rx = threadIdx.x + blockIdx.x * blockDim.x;
while (rx < numRx)
{
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
// for(int rx = 0; rx < numRx; rx++) {
for(int ry = lowry; ry < numRy; ry++) {
for (int rz = lowrz; rz < numRz; rz++) {
index = rx * (numRy*2 - 1 ) * (numRz*2 - 1) + (ry + numRy -1 ) * (numRz * 2 - 1) + (rz + numRz -1 );
for (int atom = 0; atom < cSim.atoms; atom++) for (int atom = 0; atom < cSim.atoms; atom++)
{ {
cSim.pCosSinSum[index].x += cSim.pStructureFactor[atom*totalK + index].x; float2 tab_xy = (ry >= 0 ? MultofFloat2 (cSim.pEwaldEikr[atom*cSim.kmax*3 + rx*3 + 0], cSim.pEwaldEikr[atom*cSim.kmax*3 + ry*3 + 1]) :
cSim.pCosSinSum[index].y += cSim.pStructureFactor[atom*totalK + index].y; ConjMultofFloat2 (cSim.pEwaldEikr[atom*cSim.kmax*3 + rx*3 + 0], cSim.pEwaldEikr[atom*cSim.kmax*3 - ry*3 + 1]));
} float charge = cSim.pPosq[atom].w;
float2 structureFactor = (rz >= 0 ? MultofFloat2(tab_xy, cSim.pEwaldEikr[atom*cSim.kmax*3 + rz*3 + 2]) :
lowrz = 1 - numRz; ConjMultofFloat2(tab_xy, cSim.pEwaldEikr[atom*cSim.kmax*3 - rz*3 + 2]));
} sum.x += charge*structureFactor.x;
lowry = 1 - numRy; sum.y += charge*structureFactor.y;
} }
cSim.pEwaldCosSinSum[index] = sum;
rx += blockDim.x * gridDim.x; index += blockDim.x * gridDim.x;
} }
} }
__global__ void kCalculateEwaldFastForces_kernel() __global__ void kCalculateEwaldFastForces_kernel()
...@@ -228,53 +120,41 @@ __global__ void kCalculateEwaldFastForces_kernel() ...@@ -228,53 +120,41 @@ __global__ void kCalculateEwaldFastForces_kernel()
float PI = 3.14159265358979323846f; float PI = 3.14159265358979323846f;
const float epsilon = 1.0; const float epsilon = 1.0;
float recipCoeff = (4*PI/cSim.V/epsilon); float recipCoeff = cSim.epsfac*(4*PI/cSim.cellVolume/epsilon);
int lowry = 0; int lowry = 0;
int lowrz = 1; int lowrz = 1;
int numRx = 20+1; const int numRx = cSim.kmax;
int numRy = 20+1; const int numRy = cSim.kmax;
int numRz = 20+1; const int numRz = cSim.kmax;
unsigned int totalK = (numRx*2 - 1) * (numRy*2 - 1) * (numRz*2 - 1);
int index;
unsigned int atom = threadIdx.x + blockIdx.x * blockDim.x; unsigned int atom = threadIdx.x + blockIdx.x * blockDim.x;
while (atom < cSim.atoms) while (atom < cSim.atoms)
{ {
float charge = cSim.pPosq[atom].w;
for(int rx = 0; rx < numRx; rx++) { for (int rx = 0; rx < numRx; rx++) {
float kx = rx * cSim.recipBoxSizeX; float kx = rx * cSim.recipBoxSizeX;
for (int ry = lowry; ry < numRy; ry++) {
for(int ry = lowry; ry < numRy; ry++) {
float ky = ry * cSim.recipBoxSizeY; float ky = ry * cSim.recipBoxSizeY;
float2 tab_xy = (ry >= 0 ? MultofFloat2(cSim.pEwaldEikr[atom*cSim.kmax*3 + rx*3 + 0], cSim.pEwaldEikr[atom*cSim.kmax*3 + ry*3 + 1]) :
ConjMultofFloat2(cSim.pEwaldEikr[atom*cSim.kmax*3 + rx*3 + 0], cSim.pEwaldEikr[atom*cSim.kmax*3 - ry*3 + 1]));
for (int rz = lowrz; rz < numRz; rz++) { for (int rz = lowrz; rz < numRz; rz++) {
float kz = rz * cSim.recipBoxSizeZ; float kz = rz * cSim.recipBoxSizeZ;
int index = rx*(numRy*2-1)*(numRz*2-1) + (ry+numRy-1)*(numRz*2-1) + (rz+numRz-1);
// next one is scary! float k2 = kx*kx + ky*ky + kz*kz;
index = rx * (numRy*2 - 1 ) * (numRz*2 - 1) + (ry + numRy -1 ) * (numRz * 2 - 1) + (rz + numRz -1 );
float k2 = kx * kx + ky * ky + kz * kz;
float ak = exp(k2*cSim.factorEwald) / k2; float ak = exp(k2*cSim.factorEwald) / k2;
float2 structureFactor = (rz >= 0 ? MultofFloat2(tab_xy, cSim.pEwaldEikr[atom*cSim.kmax*3 + rz*3 + 2]) :
float dEdR = ak * (cSim.pCosSinSum[index].x * cSim.pStructureFactor[atom*totalK + index].y - cSim.pCosSinSum[index].y * cSim.pStructureFactor[atom*totalK + index].x); ConjMultofFloat2(tab_xy, cSim.pEwaldEikr[atom*cSim.kmax*3 - rz*3 + 2]));
float dEdR = ak * charge * (cSim.pEwaldCosSinSum[index].x*structureFactor.y - cSim.pEwaldCosSinSum[index].y*structureFactor.x);
cSim.pForce4[atom].x += 2 * recipCoeff * dEdR * kx ; cSim.pForce4[atom].x += 2 * recipCoeff * dEdR * kx;
cSim.pForce4[atom].y += 2 * recipCoeff * dEdR * ky ; cSim.pForce4[atom].y += 2 * recipCoeff * dEdR * ky;
cSim.pForce4[atom].z += 2 * recipCoeff * dEdR * kz ; cSim.pForce4[atom].z += 2 * recipCoeff * dEdR * kz;
lowrz = 1 - numRz; lowrz = 1 - numRz;
} }
lowry = 1 - numRy; lowry = 1 - numRy;
} }
} }
atom += blockDim.x * gridDim.x; atom += blockDim.x * gridDim.x;
} }
} }
...@@ -207,22 +207,15 @@ void kCalculateCDLJForces(gpuContext gpu) ...@@ -207,22 +207,15 @@ void kCalculateCDLJForces(gpuContext gpu)
} }
else else
{ {
// Vanilla (N2) Ewald
kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces"); LAUNCHERROR("kCalculateCDLJEwaldDirectForces");
kCalculateCDLJEwaldReciprocalForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>(); kCalculateEwaldFastEikr_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateCDLJEwaldReciprocalForces"); LAUNCHERROR("kCalculateEwaldFastEikr");
// If using Fast Ewald, uncomment the lines below kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
// kCalculateEwaldFastEikr_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>(); kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastEikr"); LAUNCHERROR("kCalculateEwaldFastForces");
// kCalculateEwaldFastStructureFactors_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastStructureFactors_kernel");
// kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastCosSinSums");
// kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastForces");
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment