/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
unsigned int* workUnit,
float* outputField, float* outputFieldPolar
#ifdef AMOEBA_DEBUG
, float4* debugArray, unsigned int targetAtom
#endif
){
extern __shared__ MutualInducedParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
const float uscale = 1.0f;
#ifdef AMOEBA_DEBUG
float4 pullBack[4];
#endif
while (pos < end)
{
unsigned int x;
unsigned int y;
bool bExclusionFlag;
// Extract cell coordinates
decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
MutualInducedParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
MutualInducedParticle localParticle;
loadMutualInducedShared( &localParticle, atomI );
float fieldSum[3];
float fieldPolarSum[3];
// 0: field at i due to j
// 1: field at i due to j polar
fieldSum[0] = 0.0f;
fieldSum[1] = 0.0f;
fieldSum[2] = 0.0f;
fieldPolarSum[0] = 0.0f;
fieldPolarSum[1] = 0.0f;
fieldPolarSum[2] = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// load shared data
loadMutualInducedShared( &(sA[threadIdx.x]), atomI );
for (unsigned int j = 0; j < GRID; j++)
{
// load coords, charge, ...
float4 delta;
float prefactor2;
if( ( (atomI != (y + j)) && (atomI < cSim.atoms) && ((y+j) < cSim.atoms) ) ){
setupMutualInducedFieldPairIxn_kernel( localParticle, psA[j], uscale, &delta, &prefactor2 );
//delta.w = prefactor2 = 0.0f;
calculateMutualInducedFieldPairIxn_kernel( psA[j].inducedDipole, delta, prefactor2, fieldSum );
calculateMutualInducedFieldPairIxn_kernel( psA[j].inducedDipolePolar, delta, prefactor2, fieldPolarSum );
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cSim.paddedNumberOfAtoms);
#else
unsigned int offset = 3*(x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
#endif
load3dArray( offset, fieldSum, outputField );
load3dArray( offset, fieldPolarSum, outputFieldPolar);
} else {
if (lasty != y)
{
unsigned int atomJ = y + tgx;
loadMutualInducedShared( &(sA[threadIdx.x]), atomJ );
}
unsigned int flags = cSim.pInteractionFlag[pos];
if (flags == 0) {
// No interactions in this block.
} else {
#ifndef INCLUDE_MI_FIELD_BUFFERS
flags = 0xFFFFFFFF;
#endif
// zero shared fields
zeroMutualInducedParticleSharedField( &(sA[threadIdx.x]) );
for (unsigned int j = 0; j < GRID; j++){
if ((flags&(1<> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, fieldSum, outputField );
load3dArray( offset, fieldPolarSum, outputFieldPolar);
offset = 3*(y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, sA[threadIdx.x].field, outputField );
load3dArray( offset, sA[threadIdx.x].fieldPolar, outputFieldPolar);
#endif
lasty = y;
} // end of pInteractionFlag block
} // end of x == y block
pos++;
}
}