/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Rossen P. Apostolov, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
/* Define multiply operations for floats */
__device__ float2 MultofFloat2(float2 a, float2 b)
{
float2 c;
c.x = a.x * b.x - a.y * b.y;
c.y = a.x * b.y + a.y * b.x;
return c;
}
__device__ float2 ConjMultofFloat2(float2 a, float2 b)
{
float2 c;
c.x = a.x*b.x + a.y*b.y;
c.y = a.y*b.x - a.x*b.y;
return c;
}
__device__ float2 FloatMultFloat2(float r, float2 a)
{
float2 b;
b.x = r*a.x;
b.y = r*a.y;
return b;
}
__device__ float2 FloatMultConjFloat2(float r, float2 a)
{
float2 b;
b.x = r*a.y;
b.y = r*a.x;
return b;
}
__global__ void kCalculateEwaldFastEikr_kernel()
{
int kmax = cSim.kmax;
float4 apos;
unsigned int atom = threadIdx.x + blockIdx.x * blockDim.x;
while (atom < cSim.atoms)
{
apos = cSim.pPosq[atom];
//generic form of the array
// pEikr[ atomID*kmax*3 + k*3 + m]
// k = 0, explicitly
for(unsigned int m = 0; (m < 3); m++) {
cSim.pEikr[atom*kmax*3 + 0 + m].x = 1;
cSim.pEikr[atom*kmax*3 + 0 + m].y = 0;
}
// k = 1, explicitly
cSim.pEikr[atom*kmax*3 + 3 + 0].x = cos(apos.x*cSim.recipBoxSizeX);
cSim.pEikr[atom*kmax*3 + 3 + 0].y = sin(apos.x*cSim.recipBoxSizeX);
cSim.pEikr[atom*kmax*3 + 3 + 1].x = cos(apos.y*cSim.recipBoxSizeY);
cSim.pEikr[atom*kmax*3 + 3 + 1].y = sin(apos.y*cSim.recipBoxSizeY);
cSim.pEikr[atom*kmax*3 + 3 + 2].x = cos(apos.z*cSim.recipBoxSizeZ);
cSim.pEikr[atom*kmax*3 + 3 + 2].y = sin(apos.z*cSim.recipBoxSizeZ);
// k > 1, by recursion
for(unsigned int k=2; (k= 0)
{
tab_xy = MultofFloat2 (cSim.pEikr[atom*kmax*3 + rx*3 + 0] , cSim.pEikr[atom*kmax*3 + ry*3 + 1]);
}
else
{
tab_xy = ConjMultofFloat2 (cSim.pEikr[atom*kmax*3 + rx*3 + 0] , cSim.pEikr[atom*kmax*3 - ry*3 + 1]);
}
for (int rz = lowrz; rz < numRz; rz++)
{
index = rx * (numRy*2 - 1 ) * (numRz*2 - 1) + (ry + numRy -1 ) * (numRz * 2 - 1) + (rz + numRz -1 );
if( rz >= 0)
{
cSim.pStructureFactor[atom*totalK + index] = FloatMultFloat2 ( (apos.w ), MultofFloat2 (tab_xy ,cSim.pEikr[atom*kmax*3 + rz*3 + 2] ));
}
else
{
cSim.pStructureFactor[atom*totalK + index] = FloatMultFloat2 ( ( apos.w ), ConjMultofFloat2 (tab_xy ,cSim.pEikr[atom*kmax*3 - rz*3 + 2] ));
}
cSim.pCosSinSum[index].x = 0.0;
cSim.pCosSinSum[index].y = 0.0;
lowrz = 1 - numRz;
}
lowry = 1 - numRy;
}
}
atom += blockDim.x * gridDim.x;
}
}
__global__ void kCalculateEwaldFastCosSinSums_kernel()
{
// float2 eikr;
int lowry = 0;
int lowrz = 1;
int numRx = 20+1;
int numRy = 20+1;
int numRz = 20+1;
unsigned int totalK = (numRx*2 - 1) * (numRy*2 - 1) * (numRz*2 - 1);
int index;
unsigned int rx = threadIdx.x + blockIdx.x * blockDim.x;
while (rx < numRx)
{
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
// for(int rx = 0; rx < numRx; rx++) {
for(int ry = lowry; ry < numRy; ry++) {
for (int rz = lowrz; rz < numRz; rz++) {
index = rx * (numRy*2 - 1 ) * (numRz*2 - 1) + (ry + numRy -1 ) * (numRz * 2 - 1) + (rz + numRz -1 );
for (int atom = 0; atom < cSim.atoms; atom++)
{
cSim.pCosSinSum[index].x += cSim.pStructureFactor[atom*totalK + index].x;
cSim.pCosSinSum[index].y += cSim.pStructureFactor[atom*totalK + index].y;
}
lowrz = 1 - numRz;
}
lowry = 1 - numRy;
}
rx += blockDim.x * gridDim.x;
}
}
__global__ void kCalculateEwaldFastForces_kernel()
{
float PI = 3.14159265358979323846f;
const float epsilon = 1.0;
float recipCoeff = (4*PI/cSim.V/epsilon);
int lowry = 0;
int lowrz = 1;
int numRx = 20+1;
int numRy = 20+1;
int numRz = 20+1;
unsigned int totalK = (numRx*2 - 1) * (numRy*2 - 1) * (numRz*2 - 1);
int index;
unsigned int atom = threadIdx.x + blockIdx.x * blockDim.x;
while (atom < cSim.atoms)
{
for(int rx = 0; rx < numRx; rx++) {
float kx = rx * cSim.recipBoxSizeX;
for(int ry = lowry; ry < numRy; ry++) {
float ky = ry * cSim.recipBoxSizeY;
for (int rz = lowrz; rz < numRz; rz++) {
float kz = rz * cSim.recipBoxSizeZ;
// next one is scary!
index = rx * (numRy*2 - 1 ) * (numRz*2 - 1) + (ry + numRy -1 ) * (numRz * 2 - 1) + (rz + numRz -1 );
float k2 = kx * kx + ky * ky + kz * kz;
float ak = exp(k2*cSim.factorEwald) / k2;
float dEdR = ak * (cSim.pCosSinSum[index].x * cSim.pStructureFactor[atom*totalK + index].y - cSim.pCosSinSum[index].y * cSim.pStructureFactor[atom*totalK + index].x);
cSim.pForce4[atom].x += 2 * recipCoeff * dEdR * kx ;
cSim.pForce4[atom].y += 2 * recipCoeff * dEdR * ky ;
cSim.pForce4[atom].z += 2 * recipCoeff * dEdR * kz ;
lowrz = 1 - numRz;
}
lowry = 1 - numRy;
}
}
atom += blockDim.x * gridDim.x;
}
}