Commit 630bdd40 authored by Peter Eastman's avatar Peter Eastman
Browse files

Added other files that somehow got ommitted from earlier checkin

parent 9b7a9f65
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for evalauating nonbonded forces. It is included
* several times in kCalculateCDLJForces.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit, int numWorkUnits)
{
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
int pos = warp*numWorkUnits/totalWarps;
int end = (warp+1)*numWorkUnits/totalWarps;
int lasty = -1;
while (pos < end)
{
// Extract cell coordinates from appropriate work unit
unsigned int x = workUnit[pos];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
bool bExclusionFlag = (x & 0x1);
x = (x >> 17) << GRIDBITS;
float4 apos; // Local atom x, y, z, q
float3 af; // Local atom fx, fy, fz
float dx;
float dy;
float dz;
float r2;
float invR;
float sig;
float sig2;
float sig6;
float eps;
float dEdR;
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
int tj = tgx;
Atom* psA = &sA[tbx];
unsigned int i = x + tgx;
apos = cSim.pPosq[i];
float2 a = cSim.pAttr[i];
af.x = 0.0f;
af.y = 0.0f;
af.z = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA[threadIdx.x].x = apos.x;
sA[threadIdx.x].y = apos.y;
sA[threadIdx.x].z = apos.z;
sA[threadIdx.x].q = apos.w;
sA[threadIdx.x].sig = a.x;
sA[threadIdx.x].eps = a.y;
apos.w *= cSim.epsfac;
if (!bExclusionFlag)
{
for (unsigned int j = 0; j < GRID; j++)
{
dx = psA[j].x - apos.x;
dy = psA[j].y - apos.y;
dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
invR = 1.0f / sqrt(r2);
sig = a.x + psA[j].sig;
sig2 = invR * sig;
sig2 *= sig2;
sig6 = sig2 * sig2 * sig2;
eps = a.y * psA[j].eps;
dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
#else
dEdR += apos.w * psA[j].q * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
}
#endif
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
}
}
else // bExclusion
{
unsigned int excl = cSim.pExclusion[x * cSim.exclusionStride + y + tgx];
for (unsigned int j = 0; j < GRID; j++)
{
dx = psA[j].x - apos.x;
dy = psA[j].y - apos.y;
dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
invR = 1.0f / sqrt(r2);
sig = a.x + psA[j].sig;
sig2 = invR * sig;
sig2 *= sig2;
sig6 = sig2 * sig2 * sig2;
eps = a.y * psA[j].eps;
dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
#else
dEdR += apos.w * psA[j].q * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r2 > cSim.nonbondedCutoffSqr)
#else
if (!(excl & 0x1))
#endif
{
dEdR = 0.0f;
}
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
excl >>= 1;
}
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int offset = x + tgx + warp*cSim.stride;
of = cSim.pForce4a[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
cSim.pForce4a[offset] = of;
#else
of.x = af.x;
of.y = af.y;
of.z = af.z;
of.w = 0.0f;
int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = of;
#endif
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
if (lasty != y)
{
int j = y + tgx;
float4 temp = cSim.pPosq[j];
float2 temp1 = cSim.pAttr[j];
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].q = temp.w;
sA[threadIdx.x].sig = temp1.x;
sA[threadIdx.x].eps = temp1.y;
}
sA[threadIdx.x].fx = 0.0f;
sA[threadIdx.x].fy = 0.0f;
sA[threadIdx.x].fz = 0.0f;
apos.w *= cSim.epsfac;
if (!bExclusionFlag)
{
for (unsigned int j = 0; j < GRID; j++)
{
dx = psA[tj].x - apos.x;
dy = psA[tj].y - apos.y;
dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
invR = 1.0f / sqrt(r2);
sig = a.x + psA[tj].sig;
sig2 = invR * sig;
sig2 *= sig2;
sig6 = sig2 * sig2 * sig2;
eps = a.y * psA[tj].eps;
dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
#else
dEdR += apos.w * psA[tj].q * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
}
#endif
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
tj = (tj + 1) & (GRID - 1);
}
}
else // bExclusion
{
// Read fixed atom data into registers and GRF
unsigned int excl = cSim.pExclusion[x * cSim.exclusionStride + y + tgx];
excl = (excl >> tgx) | (excl << (GRID - tgx));
for (unsigned int j = 0; j < GRID; j++)
{
dx = psA[tj].x - apos.x;
dy = psA[tj].y - apos.y;
dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
invR = 1.0f / sqrt(r2);
sig = a.x + psA[tj].sig;
sig2 = invR * sig;
sig2 *= sig2;
sig6 = sig2 * sig2 * sig2;
eps = a.y * psA[tj].eps;
dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
#else
dEdR += apos.w * psA[tj].q * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r2 > cSim.nonbondedCutoffSqr)
#else
if (!(excl & 0x1))
#endif
{
dEdR = 0.0f;
}
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
excl >>= 1;
tj = (tj + 1) & (GRID - 1);
}
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int offset = x + tgx + warp*cSim.stride;
of = cSim.pForce4a[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
cSim.pForce4a[offset] = of;
offset = y + tgx + warp*cSim.stride;
of = cSim.pForce4a[offset];
of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy;
of.z += sA[threadIdx.x].fz;
cSim.pForce4a[offset] = of;
#else
of.x = af.x;
of.y = af.y;
of.z = af.z;
of.w = 0.0f;
int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = of;
of.x = sA[threadIdx.x].fx;
of.y = sA[threadIdx.x].fy;
of.z = sA[threadIdx.x].fz;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = of;
#endif
lasty = y;
}
pos++;
}
}
This diff is collapsed.
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for calculating Born sums. It is included
* several times in kCalculateObcGbsaBornSum.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit, int workUnits)
{
extern __shared__ Atom sA[];
int end = workUnits / gridDim.x;
int pos = end - (threadIdx.x >> GRIDBITS) - 1;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
#endif
while (pos >= 0)
{
// Extract cell coordinates from appropriate work unit
unsigned int x = workUnit[pos + (blockIdx.x*workUnits)/gridDim.x];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
x = (x >> 17) << GRIDBITS;
float dx;
float dy;
float dz;
float r2;
float r;
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
int tj = tgx;
Atom* psA = &sA[tbx];
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned int i = x + tgx;
float4 apos = cSim.pPosq[i]; // Local atom x, y, z, sum
float2 ar = cSim.pObcData[i]; // Local atom vr, sr
sA[threadIdx.x].x = apos.x;
sA[threadIdx.x].y = apos.y;
sA[threadIdx.x].z = apos.z;
sA[threadIdx.x].r = ar.x;
sA[threadIdx.x].sr = ar.y;
apos.w = 0.0f;
for (unsigned int j = 0; j < GRID; j++)
{
dx = psA[j].x - apos.x;
dy = psA[j].y - apos.y;
dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#if defined USE_PERIODIC
if (i < cSim.atoms && x+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#endif
{
r = sqrt(r2);
float rInverse = 1.0f / r;
float rScaledRadiusJ = r + psA[j].sr;
if ((j != tgx) && (ar.x < rScaledRadiusJ))
{
float l_ij = 1.0f / max(ar.x, fabs(r - psA[j].sr));
float u_ij = 1.0f / rScaledRadiusJ;
float l_ij2 = l_ij * l_ij;
float u_ij2 = u_ij * u_ij;
float ratio = log(u_ij / l_ij);
apos.w += l_ij -
u_ij +
0.25f * r * (u_ij2 - l_ij2) +
(0.50f * rInverse * ratio) +
(0.25f * psA[j].sr * psA[j].sr * rInverse) *
(l_ij2 - u_ij2);
if (ar.x < (psA[j].r - r))
{
apos.w += 2.0f * ((1.0f / ar.x) - l_ij);
}
}
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int offset = x + tgx + warp*cSim.stride;
cSim.pBornSum[offset] += apos.w;
#else
int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pBornSum[offset] = apos.w;
#endif
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
int j = y + tgx;
unsigned int i = x + tgx;
float4 temp = cSim.pPosq[j];
float2 temp1 = cSim.pObcData[j];
float4 apos = cSim.pPosq[i]; // Local atom x, y, z, sum
float2 ar = cSim.pObcData[i]; // Local atom vr, sr
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].r = temp1.x;
sA[threadIdx.x].sr = temp1.y;
sA[threadIdx.x].sum = apos.w = 0.0f;
for (unsigned int j = 0; j < GRID; j++)
{
dx = psA[tj].x - apos.x;
dy = psA[tj].y - apos.y;
dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#ifdef USE_PERIODIC
if (i < cSim.atoms && y+tj < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#endif
{
r = sqrt(r2);
float rInverse = 1.0f / r;
float rScaledRadiusJ = r + psA[tj].sr;
if (ar.x < rScaledRadiusJ)
{
float l_ij = 1.0f / max(ar.x, fabs(r - psA[tj].sr));
float u_ij = 1.0f / rScaledRadiusJ;
float l_ij2 = l_ij * l_ij;
float u_ij2 = u_ij * u_ij;
float ratio = log(u_ij / l_ij);
float term = l_ij -
u_ij +
0.25f * r * (u_ij2 - l_ij2) +
(0.50f * rInverse * ratio) +
(0.25f * psA[tj].sr * psA[tj].sr * rInverse) *
(l_ij2 - u_ij2);
if (ar.x < (psA[tj].sr - r))
{
term += 2.0f * ((1.0f / ar.x) - l_ij);
}
apos.w += term;
}
float rScaledRadiusI = r + ar.y;
if (psA[tj].r < rScaledRadiusI)
{
float l_ij = 1.0f / max(psA[tj].r, fabs(r - ar.y));
float u_ij = 1.0f / rScaledRadiusI;
float l_ij2 = l_ij * l_ij;
float u_ij2 = u_ij * u_ij;
float ratio = log(u_ij / l_ij);
float term = l_ij -
u_ij +
0.25f * r * (u_ij2 - l_ij2) +
(0.50f * rInverse * ratio) +
(0.25f * ar.y * ar.y * rInverse) *
(l_ij2 - u_ij2);
if (psA[tj].r < (ar.y - r))
{
term += 2.0f * ((1.0f / psA[tj].r) - l_ij);
}
psA[tj].sum += term;
}
}
tj = (tj - 1) & (GRID - 1);
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int offset = x + tgx + warp*cSim.stride;
cSim.pBornSum[offset] += apos.w;
offset = y + tgx + warp*cSim.stride;
cSim.pBornSum[offset] += sA[threadIdx.x].sum;
#else
int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pBornSum[offset] = apos.w;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pBornSum[offset] = sA[threadIdx.x].sum;
#endif
}
pos -= cSim.nonbond_workBlock;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateObcGbsaForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* workUnit, int numWorkUnits)
{
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
int pos = warp*numWorkUnits/totalWarps;
int end = (warp+1)*numWorkUnits/totalWarps;
int lasty = -1;
while (pos < end)
{
// Extract cell coordinates from appropriate work unit
unsigned int x = workUnit[pos];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
x = (x >> 17) << GRIDBITS;
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int i = x + tgx;
float4 apos = cSim.pPosq[i];
float2 a = cSim.pObcData[i];
float fb = cSim.pBornForce[i];
unsigned int tbx = threadIdx.x - tgx;
int tj = tgx;
Atom* psA = &sA[tbx];
float3 af;
sA[threadIdx.x].fx = af.x = 0.0f;
sA[threadIdx.x].fy = af.y = 0.0f;
sA[threadIdx.x].fz = af.z = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA[threadIdx.x].x = apos.x;
sA[threadIdx.x].y = apos.y;
sA[threadIdx.x].z = apos.z;
sA[threadIdx.x].r = a.x;
sA[threadIdx.x].sr = a.y;
sA[threadIdx.x].sr2 = a.y * a.y;
sA[threadIdx.x].fb = fb;
for (unsigned int j = (tgx+1)&(GRID-1); j != tgx; j = (j+1)&(GRID-1))
{
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
// Atom I Born forces and sum
float rScaledRadiusJ = r + psA[j].sr;
float l_ij = 1.0f / max(a.x, fabs(r - psA[j].sr));
float u_ij = 1.0f / rScaledRadiusJ;
float rInverse = 1.0f / r;
float l_ij2 = l_ij * l_ij;
float u_ij2 = u_ij * u_ij;
float r2Inverse = rInverse * rInverse;
float t1 = log (u_ij / l_ij);
float t2 = (l_ij2 - u_ij2);
float t3 = t2 * rInverse;
t1 *= rInverse;
// Born Forces term
float term = 0.125f *
(1.000f + psA[j].sr2 * r2Inverse) * t3 +
0.250f * t1 * r2Inverse;
float dE = fb * term;
#if defined USE_PERIODIC
if (a.x >= rScaledRadiusJ || i >= cSim.atoms || x+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (a.x >= rScaledRadiusJ || r2 > cSim.nonbondedCutoffSqr)
#else
if (a.x >= rScaledRadiusJ)
#endif
{
dE = 0.0f;
}
float d = dx * dE;
af.x -= d;
psA[j].fx += d;
d = dy * dE;
af.y -= d;
psA[j].fy += d;
d = dz * dE;
af.z -= d;
psA[j].fz += d;
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int offset = x + tgx + warp*cSim.stride;
of = cSim.pForce4b[offset];
of.x += af.x + sA[threadIdx.x].fx;
of.y += af.y + sA[threadIdx.x].fy;
of.z += af.z + sA[threadIdx.x].fz;
cSim.pForce4b[offset] = of;
#else
int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
of.x = af.x + sA[threadIdx.x].fx;
of.y = af.y + sA[threadIdx.x].fy;
of.z = af.z + sA[threadIdx.x].fz;
of.w = 0.0f;
cSim.pForce4b[offset] = of;
#endif
}
else
{
// Read fixed atom data into registers and GRF
if (lasty != y)
{
int j = y + tgx;
float4 temp = cSim.pPosq[j];
float2 temp1 = cSim.pObcData[j];
sA[threadIdx.x].fb = cSim.pBornForce[j];
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].r = temp1.x;
sA[threadIdx.x].sr = temp1.y;
sA[threadIdx.x].sr2 = temp1.y * temp1.y;
}
float sr2 = a.y * a.y;
for (int j = 0; j < GRID; j++)
{
float dx = psA[tj].x - apos.x;
float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
// Interleaved Atom I and J Born Forces and sum components
float r2Inverse = 1.0f / r2;
float rScaledRadiusJ = r + psA[tj].sr;
float rScaledRadiusI = r + a.y;
float rInverse = 1.0f / r;
float l_ijJ = 1.0f / max(a.x, fabs(r - psA[tj].sr));
float l_ijI = 1.0f / max(psA[tj].r, fabs(r - a.y));
float u_ijJ = 1.0f / rScaledRadiusJ;
float u_ijI = 1.0f / rScaledRadiusI;
float l_ij2J = l_ijJ * l_ijJ;
float l_ij2I = l_ijI * l_ijI;
float u_ij2J = u_ijJ * u_ijJ;
float u_ij2I = u_ijI * u_ijI;
float t1J = log (u_ijJ / l_ijJ);
float t1I = log (u_ijI / l_ijI);
float t2J = (l_ij2J - u_ij2J);
float t2I = (l_ij2I - u_ij2I);
float t3J = t2J * rInverse;
float t3I = t2I * rInverse;
t1J *= rInverse;
t1I *= rInverse;
// Born Forces term
float term = 0.125f *
(1.000f + psA[tj].sr2 * r2Inverse) * t3J +
0.250f * t1J * r2Inverse;
float dE = fb * term;
#if defined USE_PERIODIC
if (a.x >= rScaledRadiusJ || i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (a.x >= rScaledRadiusJ || r2 > cSim.nonbondedCutoffSqr)
#else
if (a.x >= rScaledRadiusJ)
#endif
{
dE = 0.0f;
}
float d = dx * dE;
af.x -= d;
psA[tj].fx += d;
d = dy * dE;
af.y -= d;
psA[tj].fy += d;
d = dz * dE;
af.z -= d;
psA[tj].fz += d;
// Atom J Born sum term
term = 0.125f *
(1.000f + sr2 * r2Inverse) * t3I +
0.250f * t1I * r2Inverse;
dE = psA[tj].fb * term;
#ifdef USE_PERIODIC
if (psA[tj].r >= rScaledRadiusI || i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (psA[tj].r >= rScaledRadiusI || r2 > cSim.nonbondedCutoffSqr)
#else
if (psA[tj].r >= rScaledRadiusI)
#endif
{
dE = 0.0f;
}
dx *= dE;
dy *= dE;
dz *= dE;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
af.x -= dx;
af.y -= dy;
af.z -= dz;
tj = (tj + 1) & (GRID - 1);
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int offset = x + tgx + warp*cSim.stride;
of = cSim.pForce4b[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
cSim.pForce4b[offset] = of;
offset = y + tgx + warp*cSim.stride;
of = cSim.pForce4b[offset];
of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy;
of.z += sA[threadIdx.x].fz;
cSim.pForce4b[offset] = of;
#else
int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
of.x = af.x;
of.y = af.y;
of.z = af.z;
of.w = 0.0f;
cSim.pForce4b[offset] = of;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
of.x = sA[threadIdx.x].fx;
of.y = sA[threadIdx.x].fy;
of.z = sA[threadIdx.x].fz;
cSim.pForce4b[offset] = of;
#endif
}
lasty = y;
pos++;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for identifying interacting blocks. It is included
* several times in kCalculateCDLJForces.cu with different #defines to generate
* different versions of the kernels.
*/
/**
* Find a bounding box for the atoms in each block.
*/
__global__ void METHOD_NAME(kFindBlockBounds, _kernel)()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int base = pos << GRIDBITS;
if (base < cSim.atoms)
{
float4 apos = cSim.pPosq[base];
#ifdef USE_PERIODIC
apos.x -= floor(apos.x/cSim.periodicBoxSizeX)*cSim.periodicBoxSizeX;
apos.y -= floor(apos.y/cSim.periodicBoxSizeY)*cSim.periodicBoxSizeY;
apos.z -= floor(apos.z/cSim.periodicBoxSizeZ)*cSim.periodicBoxSizeZ;
float4 firstPoint = apos;
#endif
float minx = apos.x;
float maxx = apos.x;
float miny = apos.y;
float maxy = apos.y;
float minz = apos.z;
float maxz = apos.z;
for (unsigned int i = 1; i < GRID; i++)
{
apos = cSim.pPosq[base+i];
#ifdef USE_PERIODIC
apos.x -= floor((apos.x-firstPoint.x)/cSim.periodicBoxSizeX+0.5)*cSim.periodicBoxSizeX;
apos.y -= floor((apos.y-firstPoint.y)/cSim.periodicBoxSizeY+0.5)*cSim.periodicBoxSizeY;
apos.z -= floor((apos.z-firstPoint.z)/cSim.periodicBoxSizeZ+0.5)*cSim.periodicBoxSizeZ;
#endif
minx = min(minx, apos.x);
maxx = max(maxx, apos.x);
miny = min(miny, apos.y);
maxy = max(maxy, apos.y);
minz = min(minz, apos.z);
maxz = max(maxz, apos.z);
}
cSim.pGridBoundingBox[pos] = make_float4(0.5f*(maxx-minx), 0.5f*(maxy-miny), 0.5f*(maxz-minz), 0);
cSim.pGridCenter[pos] = make_float4(0.5f*(maxx+minx), 0.5f*(maxy+miny), 0.5f*(maxz+minz), 0);
}
}
/**
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting.
*/
__global__ void METHOD_NAME(kFindBlocksWithInteractions, _kernel)()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
if (pos < cSim.workUnits)
{
// Extract cell coordinates from appropriate work unit
unsigned int x = cSim.pWorkUnit[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x >> 17);
// Find the distance between the bounding boxes of the two cells.
float4 centera = cSim.pGridCenter[x];
float4 centerb = cSim.pGridCenter[y];
float dx = centera.x-centerb.x;
float dy = centera.y-centerb.y;
float dz = centera.z-centerb.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float4 boxSizea = cSim.pGridBoundingBox[x];
float4 boxSizeb = cSim.pGridBoundingBox[y];
dx = max(0.0f, abs(dx)-boxSizea.x-boxSizeb.x);
dy = max(0.0f, abs(dy)-boxSizea.y-boxSizeb.y);
dz = max(0.0f, abs(dz)-boxSizea.z-boxSizeb.z);
cSim.pInteractionFlag[pos] = (dx*dx+dy*dy+dz*dz > cSim.nonbondedCutoffSqr ? 0 : 1);
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
//#include <fstream>
using namespace std;
#define DeltaShake
#include "gputypes.h"
static __constant__ cudaGmxSimulation cSim;
void SetSettleSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetSettleSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
/**
* This is based on the setlep FORTRAN routine by Shuichi Miyamoto. See
* S. Miyamoto and P. Kollman, J. Comp. Chem., vol 13, no. 8, pp. 952-962 (1992).
*/
__global__ void kApplyFirstSettle_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.settleConstraints)
{
int4 atomID = cSim.pSettleID[pos];
float2 params = cSim.pSettleParameter[pos];
float4 apos0 = cSim.pOldPosq[atomID.x];
float4 xp0 = cSim.pPosqP[atomID.x];
float4 apos1 = cSim.pOldPosq[atomID.y];
float4 xp1 = cSim.pPosqP[atomID.y];
float4 apos2 = cSim.pOldPosq[atomID.z];
float4 xp2 = cSim.pPosqP[atomID.z];
float m0 = 1.0/cSim.pVelm4[atomID.x].w;
float m1 = 1.0/cSim.pVelm4[atomID.y].w;
float m2 = 1.0/cSim.pVelm4[atomID.z].w;
float xb0 = apos1.x-apos0.x;
float yb0 = apos1.y-apos0.y;
float zb0 = apos1.z-apos0.z;
float xc0 = apos2.x-apos0.x;
float yc0 = apos2.y-apos0.y;
float zc0 = apos2.z-apos0.z;
float dist1 = sqrt(xb0*xb0 + yb0*yb0 + zb0*zb0);
float dist2 = sqrt(xc0*xc0 + yc0*yc0 + zc0*zc0);
float totalMass = m0+m1+m2;
float xcom = ((apos0.x+xp0.x)*m0 + (apos1.x+xp1.x)*m1 + (apos2.x+xp2.x)*m2) / totalMass;
float ycom = ((apos0.y+xp0.y)*m0 + (apos1.y+xp1.y)*m1 + (apos2.y+xp2.y)*m2) / totalMass;
float zcom = ((apos0.z+xp0.z)*m0 + (apos1.z+xp1.z)*m1 + (apos2.z+xp2.z)*m2) / totalMass;
float xa1 = apos0.x + xp0.x - xcom;
float ya1 = apos0.y + xp0.y - ycom;
float za1 = apos0.z + xp0.z - zcom;
float xb1 = apos1.x + xp1.x - xcom;
float yb1 = apos1.y + xp1.y - ycom;
float zb1 = apos1.z + xp1.z - zcom;
float xc1 = apos2.x + xp2.x - xcom;
float yc1 = apos2.y + xp2.y - ycom;
float zc1 = apos2.z + xp2.z - zcom;
float xaksZd = yb0*zc0 - zb0*yc0;
float yaksZd = zb0*xc0 - xb0*zc0;
float zaksZd = xb0*yc0 - yb0*xc0;
float xaksXd = ya1*zaksZd - za1*yaksZd;
float yaksXd = za1*xaksZd - xa1*zaksZd;
float zaksXd = xa1*yaksZd - ya1*xaksZd;
float xaksYd = yaksZd*zaksXd - zaksZd*yaksXd;
float yaksYd = zaksZd*xaksXd - xaksZd*zaksXd;
float zaksYd = xaksZd*yaksXd - yaksZd*xaksXd;
float axlng = sqrt(xaksXd * xaksXd + yaksXd * yaksXd + zaksXd * zaksXd);
float aylng = sqrt(xaksYd * xaksYd + yaksYd * yaksYd + zaksYd * zaksYd);
float azlng = sqrt(xaksZd * xaksZd + yaksZd * yaksZd + zaksZd * zaksZd);
float trns11 = xaksXd / axlng;
float trns21 = yaksXd / axlng;
float trns31 = zaksXd / axlng;
float trns12 = xaksYd / aylng;
float trns22 = yaksYd / aylng;
float trns32 = zaksYd / aylng;
float trns13 = xaksZd / azlng;
float trns23 = yaksZd / azlng;
float trns33 = zaksZd / azlng;
float xb0d = trns11*xb0 + trns21*yb0 + trns31*zb0;
float yb0d = trns12*xb0 + trns22*yb0 + trns32*zb0;
float xc0d = trns11*xc0 + trns21*yc0 + trns31*zc0;
float yc0d = trns12*xc0 + trns22*yc0 + trns32*zc0;
// float xa1d = trns11*xa1 + trns21*ya1 + trns31*za1;
// float ya1d = trns12*xa1 + trns22*ya1 + trns32*za1;
float za1d = trns13*xa1 + trns23*ya1 + trns33*za1;
float xb1d = trns11*xb1 + trns21*yb1 + trns31*zb1;
float yb1d = trns12*xb1 + trns22*yb1 + trns32*zb1;
float zb1d = trns13*xb1 + trns23*yb1 + trns33*zb1;
float xc1d = trns11*xc1 + trns21*yc1 + trns31*zc1;
float yc1d = trns12*xc1 + trns22*yc1 + trns32*zc1;
float zc1d = trns13*xc1 + trns23*yc1 + trns33*zc1;
// --- Step2 A2' ---
float rc = 0.5*params.y;
float rb = sqrt(params.x*params.x-rc*rc);
float ra = rb*(m1+m2)/totalMass;
rb -= ra;
float sinphi = za1d / ra;
float cosphi = sqrt(1.0 - sinphi*sinphi);
float sinpsi = ( zb1d - zc1d ) / (2*rc*cosphi);
float cospsi = sqrt(1.0 - sinpsi*sinpsi);
float ya2d = ra * cosphi;
float xb2d = - rc * cospsi;
float yb2d = - rb * cosphi - rc *sinpsi * sinphi;
float yc2d = - rb * cosphi + rc *sinpsi * sinphi;
float xb2d2 = xb2d * xb2d;
float hh2 = 4.0 * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d);
float deltx = 2.0 * xb2d + sqrt ( 4.0 * xb2d2 - hh2 + params.y*params.y );
xb2d -= deltx * 0.5;
// --- Step3 al,be,ga ---
float alpa = ( xb2d * (xb0d-xc0d) + yb0d * yb2d + yc0d * yc2d );
float beta = ( xb2d * (yc0d-yb0d) + xb0d * yb2d + xc0d * yc2d );
float gama = xb0d * yb1d - xb1d * yb0d + xc0d * yc1d - xc1d * yc0d;
float al2be2 = alpa * alpa + beta * beta;
float sinthe = ( alpa*gama - beta * sqrt ( al2be2 - gama * gama ) ) / al2be2;
// --- Step4 A3' ---
float costhe = sqrt (1.0 - sinthe * sinthe );
float xa3d = - ya2d * sinthe;
float ya3d = ya2d * costhe;
float za3d = za1d;
float xb3d = xb2d * costhe - yb2d * sinthe;
float yb3d = xb2d * sinthe + yb2d * costhe;
float zb3d = zb1d;
float xc3d = - xb2d * costhe - yc2d * sinthe;
float yc3d = - xb2d * sinthe + yc2d * costhe;
float zc3d = zc1d;
// --- Step5 A3 ---
float xa3 = trns11*xa3d + trns12*ya3d + trns13*za3d;
float ya3 = trns21*xa3d + trns22*ya3d + trns23*za3d;
float za3 = trns31*xa3d + trns32*ya3d + trns33*za3d;
float xb3 = trns11*xb3d + trns12*yb3d + trns13*zb3d;
float yb3 = trns21*xb3d + trns22*yb3d + trns23*zb3d;
float zb3 = trns31*xb3d + trns32*yb3d + trns33*zb3d;
float xc3 = trns11*xc3d + trns12*yc3d + trns13*zc3d;
float yc3 = trns21*xc3d + trns22*yc3d + trns23*zc3d;
float zc3 = trns31*xc3d + trns32*yc3d + trns33*zc3d;
xp0.x = xcom + xa3 - apos0.x;
xp0.y = ycom + ya3 - apos0.y;
xp0.z = zcom + za3 - apos0.z;
xp1.x = xcom + xb3 - apos1.x;
xp1.y = ycom + yb3 - apos1.y;
xp1.z = zcom + zb3 - apos1.z;
xp2.x = xcom + xc3 - apos2.x;
xp2.y = ycom + yc3 - apos2.y;
xp2.z = zcom + zc3 - apos2.z;
cSim.pPosqP[atomID.x] = xp0;
cSim.pPosqP[atomID.y] = xp1;
cSim.pPosqP[atomID.z] = xp2;
pos += blockDim.x * gridDim.x;
}
}
void kApplyFirstSettle(gpuContext gpu)
{
// printf("kApplyFirstSettle\n");
if (gpu->sim.settleConstraints > 0)
{
kApplyFirstSettle_kernel<<<gpu->sim.blocks, gpu->sim.settle_threads_per_block>>>();
LAUNCHERROR("kApplyFirstSettle");
}
}
__global__ void kApplySecondSettle_kernel()
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.settleConstraints)
{
int4 atomID = cSim.pSettleID[pos];
float2 params = cSim.pSettleParameter[pos];
float4 apos0 = cSim.pOldPosq[atomID.x];
float4 xp0 = cSim.pPosq[atomID.x];
float4 apos1 = cSim.pOldPosq[atomID.y];
float4 xp1 = cSim.pPosq[atomID.y];
float4 apos2 = cSim.pOldPosq[atomID.z];
float4 xp2 = cSim.pPosq[atomID.z];
float m0 = 1.0/cSim.pVelm4[atomID.x].w;
float m1 = 1.0/cSim.pVelm4[atomID.y].w;
float m2 = 1.0/cSim.pVelm4[atomID.z].w;
float xb0 = apos1.x-apos0.x;
float yb0 = apos1.y-apos0.y;
float zb0 = apos1.z-apos0.z;
float xc0 = apos2.x-apos0.x;
float yc0 = apos2.y-apos0.y;
float zc0 = apos2.z-apos0.z;
float dist1 = sqrt(xb0*xb0 + yb0*yb0 + zb0*zb0);
float dist2 = sqrt(xc0*xc0 + yc0*yc0 + zc0*zc0);
float totalMass = m0+m1+m2;
float xcom = ((apos0.x+xp0.x)*m0 + (apos1.x+xp1.x)*m1 + (apos2.x+xp2.x)*m2) / totalMass;
float ycom = ((apos0.y+xp0.y)*m0 + (apos1.y+xp1.y)*m1 + (apos2.y+xp2.y)*m2) / totalMass;
float zcom = ((apos0.z+xp0.z)*m0 + (apos1.z+xp1.z)*m1 + (apos2.z+xp2.z)*m2) / totalMass;
float xa1 = apos0.x + xp0.x - xcom;
float ya1 = apos0.y + xp0.y - ycom;
float za1 = apos0.z + xp0.z - zcom;
float xb1 = apos1.x + xp1.x - xcom;
float yb1 = apos1.y + xp1.y - ycom;
float zb1 = apos1.z + xp1.z - zcom;
float xc1 = apos2.x + xp2.x - xcom;
float yc1 = apos2.y + xp2.y - ycom;
float zc1 = apos2.z + xp2.z - zcom;
float xaksZd = yb0*zc0 - zb0*yc0;
float yaksZd = zb0*xc0 - xb0*zc0;
float zaksZd = xb0*yc0 - yb0*xc0;
float xaksXd = ya1*zaksZd - za1*yaksZd;
float yaksXd = za1*xaksZd - xa1*zaksZd;
float zaksXd = xa1*yaksZd - ya1*xaksZd;
float xaksYd = yaksZd*zaksXd - zaksZd*yaksXd;
float yaksYd = zaksZd*xaksXd - xaksZd*zaksXd;
float zaksYd = xaksZd*yaksXd - yaksZd*xaksXd;
float axlng = sqrt(xaksXd * xaksXd + yaksXd * yaksXd + zaksXd * zaksXd);
float aylng = sqrt(xaksYd * xaksYd + yaksYd * yaksYd + zaksYd * zaksYd);
float azlng = sqrt(xaksZd * xaksZd + yaksZd * yaksZd + zaksZd * zaksZd);
float trns11 = xaksXd / axlng;
float trns21 = yaksXd / axlng;
float trns31 = zaksXd / axlng;
float trns12 = xaksYd / aylng;
float trns22 = yaksYd / aylng;
float trns32 = zaksYd / aylng;
float trns13 = xaksZd / azlng;
float trns23 = yaksZd / azlng;
float trns33 = zaksZd / azlng;
float xb0d = trns11*xb0 + trns21*yb0 + trns31*zb0;
float yb0d = trns12*xb0 + trns22*yb0 + trns32*zb0;
float xc0d = trns11*xc0 + trns21*yc0 + trns31*zc0;
float yc0d = trns12*xc0 + trns22*yc0 + trns32*zc0;
// float xa1d = trns11*xa1 + trns21*ya1 + trns31*za1;
// float ya1d = trns12*xa1 + trns22*ya1 + trns32*za1;
float za1d = trns13*xa1 + trns23*ya1 + trns33*za1;
float xb1d = trns11*xb1 + trns21*yb1 + trns31*zb1;
float yb1d = trns12*xb1 + trns22*yb1 + trns32*zb1;
float zb1d = trns13*xb1 + trns23*yb1 + trns33*zb1;
float xc1d = trns11*xc1 + trns21*yc1 + trns31*zc1;
float yc1d = trns12*xc1 + trns22*yc1 + trns32*zc1;
float zc1d = trns13*xc1 + trns23*yc1 + trns33*zc1;
// --- Step2 A2' ---
float rc = 0.5*params.y;
float rb = sqrt(params.x*params.x-rc*rc);
float ra = rb*(m1+m2)/totalMass;
rb -= ra;
float sinphi = za1d / ra;
float cosphi = sqrt(1.0 - sinphi*sinphi);
float sinpsi = ( zb1d - zc1d ) / (2*rc*cosphi);
float cospsi = sqrt(1.0 - sinpsi*sinpsi);
float ya2d = ra * cosphi;
float xb2d = - rc * cospsi;
float yb2d = - rb * cosphi - rc *sinpsi * sinphi;
float yc2d = - rb * cosphi + rc *sinpsi * sinphi;
float xb2d2 = xb2d * xb2d;
float hh2 = 4.0 * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d);
float deltx = 2.0 * xb2d + sqrt ( 4.0 * xb2d2 - hh2 + params.y*params.y );
xb2d -= deltx * 0.5;
// --- Step3 al,be,ga ---
float alpa = ( xb2d * (xb0d-xc0d) + yb0d * yb2d + yc0d * yc2d );
float beta = ( xb2d * (yc0d-yb0d) + xb0d * yb2d + xc0d * yc2d );
float gama = xb0d * yb1d - xb1d * yb0d + xc0d * yc1d - xc1d * yc0d;
float al2be2 = alpa * alpa + beta * beta;
float sinthe = ( alpa*gama - beta * sqrt ( al2be2 - gama * gama ) ) / al2be2;
// --- Step4 A3' ---
float costhe = sqrt (1.0 - sinthe * sinthe );
float xa3d = - ya2d * sinthe;
float ya3d = ya2d * costhe;
float za3d = za1d;
float xb3d = xb2d * costhe - yb2d * sinthe;
float yb3d = xb2d * sinthe + yb2d * costhe;
float zb3d = zb1d;
float xc3d = - xb2d * costhe - yc2d * sinthe;
float yc3d = - xb2d * sinthe + yc2d * costhe;
float zc3d = zc1d;
// --- Step5 A3 ---
float xa3 = trns11*xa3d + trns12*ya3d + trns13*za3d;
float ya3 = trns21*xa3d + trns22*ya3d + trns23*za3d;
float za3 = trns31*xa3d + trns32*ya3d + trns33*za3d;
float xb3 = trns11*xb3d + trns12*yb3d + trns13*zb3d;
float yb3 = trns21*xb3d + trns22*yb3d + trns23*zb3d;
float zb3 = trns31*xb3d + trns32*yb3d + trns33*zb3d;
float xc3 = trns11*xc3d + trns12*yc3d + trns13*zc3d;
float yc3 = trns21*xc3d + trns22*yc3d + trns23*zc3d;
float zc3 = trns31*xc3d + trns32*yc3d + trns33*zc3d;
xp0.x = xcom + xa3;
xp0.y = ycom + ya3;
xp0.z = zcom + za3;
xp1.x = xcom + xb3;
xp1.y = ycom + yb3;
xp1.z = zcom + zb3;
xp2.x = xcom + xc3;
xp2.y = ycom + yc3;
xp2.z = zcom + zc3;
cSim.pPosq[atomID.x] = xp0;
cSim.pPosq[atomID.y] = xp1;
cSim.pPosq[atomID.z] = xp2;
pos += blockDim.x * gridDim.x;
}
}
void kApplySecondSettle(gpuContext gpu)
{
// printf("kApplySecondSettle\n");
if (gpu->sim.settleConstraints > 0)
{
kApplySecondSettle_kernel<<<gpu->sim.blocks, gpu->sim.settle_threads_per_block>>>();
LAUNCHERROR("kApplySecondSettle");
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment