Commit 072fc120 authored by Peter Eastman's avatar Peter Eastman
Browse files

Refactored nonbonded kernel to allow the same code to be used for other forces

parent d70147d5
...@@ -121,6 +121,10 @@ void OpenCLContext::addForce(OpenCLForceInfo* force) { ...@@ -121,6 +121,10 @@ void OpenCLContext::addForce(OpenCLForceInfo* force) {
} }
string OpenCLContext::loadSourceFromFile(const string& filename) const { string OpenCLContext::loadSourceFromFile(const string& filename) const {
return loadSourceFromFile(filename, map<string, string>());
}
string OpenCLContext::loadSourceFromFile(const string& filename, const std::map<std::string, std::string>& replacements) const {
ifstream file((Platform::getDefaultPluginsDirectory()+"/opencl/"+filename).c_str()); ifstream file((Platform::getDefaultPluginsDirectory()+"/opencl/"+filename).c_str());
if (!file.is_open()) if (!file.is_open())
throw OpenMMException("Unable to load kernel: "+filename); throw OpenMMException("Unable to load kernel: "+filename);
...@@ -132,6 +136,14 @@ string OpenCLContext::loadSourceFromFile(const string& filename) const { ...@@ -132,6 +136,14 @@ string OpenCLContext::loadSourceFromFile(const string& filename) const {
kernel += '\n'; kernel += '\n';
} }
file.close(); file.close();
for (map<string, string>::const_iterator iter = replacements.begin(); iter != replacements.end(); iter++) {
int index = -1;
do {
index = kernel.find(iter->first);
if (index != kernel.npos)
kernel.replace(index, iter->first.size(), iter->second);
} while (index != kernel.npos);
}
return kernel; return kernel;
} }
......
...@@ -137,6 +137,13 @@ public: ...@@ -137,6 +137,13 @@ public:
* Load OpenCL source code from a file in the kernels directory. * Load OpenCL source code from a file in the kernels directory.
*/ */
std::string loadSourceFromFile(const std::string& filename) const; std::string loadSourceFromFile(const std::string& filename) const;
/**
* Load OpenCL source code from a file in the kernels directory.
*
* @param filename the file to load
* @param replacements a set of strings that should be replaced with new strings wherever they appear in the
*/
std::string loadSourceFromFile(const std::string& filename, const std::map<std::string, std::string>& replacements) const;
/** /**
* Create an OpenCL Program from source code. * Create an OpenCL Program from source code.
*/ */
......
...@@ -576,7 +576,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -576,7 +576,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// } // }
// data.nonbondedMethod = method; // data.nonbondedMethod = method;
// gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method); // gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getCutoffDistance(), exclusionList, defines); string source = cl.loadSourceFromFile("coulombLennardJones.cl", defines);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getCutoffDistance(), exclusionList, source);
cl.getNonbondedUtilities().addParameter("sigmaEpsilon", "float2", 8, sigmaEpsilon->getDeviceBuffer()); cl.getNonbondedUtilities().addParameter("sigmaEpsilon", "float2", 8, sigmaEpsilon->getDeviceBuffer());
cutoffSquared = force.getCutoffDistance()*force.getCutoffDistance(); cutoffSquared = force.getCutoffDistance()*force.getCutoffDistance();
......
...@@ -68,7 +68,7 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() { ...@@ -68,7 +68,7 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete compact; delete compact;
} }
void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const vector<vector<int> >& exclusionList, const map<string, string>& defines) { void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel) {
if (cutoff != -1.0) { if (cutoff != -1.0) {
if (usesCutoff != useCutoff) if (usesCutoff != useCutoff)
throw OpenMMException("All Forces must agree on whether to use a cutoff"); throw OpenMMException("All Forces must agree on whether to use a cutoff");
...@@ -86,18 +86,13 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic ...@@ -86,18 +86,13 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic
} }
if (!sameExclusions) if (!sameExclusions)
throw OpenMMException("All Forces must have identical exceptions"); throw OpenMMException("All Forces must have identical exceptions");
for (map<string, string>::const_iterator iter = defines.begin(); iter != defines.end(); ++iter) {
map<string, string>::const_iterator existing = defines.find(iter->first);
if (existing != defines.end() && existing->second != iter->second)
throw OpenMMException("Two Forces define different values for "+iter->first);
}
} }
else { else {
useCutoff = usesCutoff; useCutoff = usesCutoff;
usePeriodic = usesPeriodic; usePeriodic = usesPeriodic;
cutoff = cutoffDistance; cutoff = cutoffDistance;
atomExclusions = exclusionList; atomExclusions = exclusionList;
kernelDefines.insert(defines.begin(), defines.end()); kernelSource += kernel+"\n";
} }
} }
...@@ -122,14 +117,58 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -122,14 +117,58 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
// Create kernels. // Create kernels.
map<string, string> defines = kernelDefines; map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = kernelSource;
stringstream args;
for (int i = 0; i < parameters.size(); i++) {
args << ", __global ";
args << parameters[i].type;
args << "* global_";
args << parameters[i].name;
args << ", __local ";
args << parameters[i].type;
args << "* local_";
args << parameters[i].name;
}
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream load1;
for (int i = 0; i < parameters.size(); i++) {
load1 << parameters[i].type;
load1 << " ";
load1 << parameters[i].name;
load1 << "1 = global_";
load1 << parameters[i].name;
load1 << "[i];";
}
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
stringstream load2j;
for (int i = 0; i < parameters.size(); i++) {
load2j << parameters[i].type;
load2j << " ";
load2j << parameters[i].name;
load2j << "2 = local_";
load2j << parameters[i].name;
load2j << "[tbx+j];";
}
replacements["LOAD_ATOM2_PARAMETERS_J"] = load2j.str();
stringstream load2tj;
for (int i = 0; i < parameters.size(); i++) {
load2tj << parameters[i].type;
load2tj << " ";
load2tj << parameters[i].name;
load2tj << "2 = local_";
load2tj << parameters[i].name;
load2tj << "[tbx+tj];";
}
replacements["LOAD_ATOM2_PARAMETERS_TJ"] = load2tj.str();
map<string, string> defines;
if (forceBufferPerAtomBlock) if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (useCutoff) if (useCutoff)
defines["USE_CUTOFF"] = "1"; defines["USE_CUTOFF"] = "1";
if (usePeriodic) if (usePeriodic)
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
cl::Program forceProgram = context.createProgram(context.loadSourceFromFile("nonbonded.cl"), defines); cl::Program forceProgram = context.createProgram(context.loadSourceFromFile("nonbonded.cl", replacements), defines);
forceKernel = cl::Kernel(forceProgram, "computeNonbonded"); forceKernel = cl::Kernel(forceProgram, "computeNonbonded");
cl::Program interactingBlocksProgram = context.createProgram(context.loadSourceFromFile("findInteractingBlocks.cl"), defines); cl::Program interactingBlocksProgram = context.createProgram(context.loadSourceFromFile("findInteractingBlocks.cl"), defines);
findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds"); findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
......
...@@ -52,9 +52,9 @@ public: ...@@ -52,9 +52,9 @@ public:
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction * @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
* @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false) * @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false)
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded * @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param defines preprocessor macros to define when compiling the kernel * @param kernel the code to evaluate the interaction
*/ */
void addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::map<std::string, std::string>& defines); void addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel);
/** /**
* Add a per-atom parameter that interactions may depend on. * Add a per-atom parameter that interactions may depend on.
* *
...@@ -143,6 +143,7 @@ private: ...@@ -143,6 +143,7 @@ private:
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
OpenCLCompact* compact; OpenCLCompact* compact;
std::string kernelSource;
std::map<std::string, std::string> kernelDefines; std::map<std::string, std::string> kernelDefines;
double cutoff; double cutoff;
bool useCutoff, usePeriodic, forceBufferPerAtomBlock, hasComputedInteractions; bool useCutoff, usePeriodic, forceBufferPerAtomBlock, hasComputedInteractions;
......
{
float sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
float sig2 = invR*sig;
sig2 *= sig2;
float sig6 = sig2*sig2*sig2;
float eps = sigmaEpsilon1.y*sigmaEpsilon2.y;
float tempForce = eps*(12.0f*sig6 - 6.0f)*sig6;
tempEnergy += eps*(sig6 - 1.0f)*sig6;
const float EpsilonFactor = 138.935485f;
#ifdef USE_CUTOFF
tempForce += EpsilonFactor*posq1.w*posq2.w*(invR - 2.0f*REACTION_FIELD_K*r2);
tempEnergy += EpsilonFactor*posq1.w*posq2.w*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C);
#else
tempForce += EpsilonFactor*posq1.w*posq2.w*invR;
tempEnergy += EpsilonFactor*posq1.w*posq2.w*invR;
#endif
dEdR += tempForce*invR*invR;
}
const unsigned int TileSize = 32; const unsigned int TileSize = 32;
const float EpsilonFactor = 138.935485f;
/** /**
* Compute nonbonded interactions. * Compute nonbonded interactions.
...@@ -7,11 +6,11 @@ const float EpsilonFactor = 138.935485f; ...@@ -7,11 +6,11 @@ const float EpsilonFactor = 138.935485f;
__kernel void computeNonbonded(int numTiles, int paddedNumAtoms, __kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* tiles, __global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* tiles,
__global unsigned int* exclusions, __global unsigned int* exclusionIndices, __local float4* local_posq, __local float4* local_force, __global unsigned int* exclusions, __global unsigned int* exclusionIndices, __local float4* local_posq, __local float4* local_force
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionFlags, __local float4* tempBuffer, , float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionFlags, __local float4* tempBuffer
#endif #endif
__global float2* sigmaEpsilon, __local float2* local_sigmaEpsilon) { PARAMETER_ARGUMENTS) {
unsigned int totalWarps = get_global_size(0)/TileSize; unsigned int totalWarps = get_global_size(0)/TileSize;
unsigned int warp = get_global_id(0)/TileSize; unsigned int warp = get_global_id(0)/TileSize;
unsigned int pos = warp*numTiles/totalWarps; unsigned int pos = warp*numTiles/totalWarps;
...@@ -20,54 +19,42 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms, ...@@ -20,54 +19,42 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
unsigned int lasty = 0xFFFFFFFF; unsigned int lasty = 0xFFFFFFFF;
while (pos < end) { while (pos < end) {
// Extract tile coordinates from appropriate work unit // Extract the coordinates of this tile
unsigned int x = tiles[pos]; unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff)*TileSize; unsigned int y = ((x >> 2) & 0x7fff)*TileSize;
bool hasExclusions = (x & 0x1); bool hasExclusions = (x & 0x1);
x = (x>>17)*TileSize; x = (x>>17)*TileSize;
float4 apos; // Local atom x, y, z, q
float4 af = 0.0f; // Local atom fx, fy, fz
unsigned int tgx = get_local_id(0) & (TileSize-1); unsigned int tgx = get_local_id(0) & (TileSize-1);
unsigned int tbx = get_local_id(0) - tgx; unsigned int tbx = get_local_id(0) - tgx;
unsigned int tj = tgx; unsigned int tj = tgx;
unsigned int i = x + tgx; unsigned int i = x + tgx;
apos = posq[i]; float4 force = 0.0f;
float2 a = sigmaEpsilon[i]; float4 posq1 = posq[i];
LOAD_ATOM1_PARAMETERS
if (x == y) { if (x == y) {
// Handle diagonals uniquely at 50% efficiency // This tile is on the diagonal.
// Read fixed atom data into registers and GRF
local_posq[get_local_id(0)] = apos; local_posq[get_local_id(0)] = posq1;
local_sigmaEpsilon[get_local_id(0)] = a; local_sigmaEpsilon[get_local_id(0)] = sigmaEpsilon1;
apos.w *= EpsilonFactor;
unsigned int xi = x/TileSize; unsigned int xi = x/TileSize;
unsigned int tile = xi+xi*paddedNumAtoms/TileSize-xi*(xi+1)/2; unsigned int tile = xi+xi*paddedNumAtoms/TileSize-xi*(xi+1)/2;
unsigned int excl = exclusions[exclusionIndices[tile]+tgx]; unsigned int excl = exclusions[exclusionIndices[tile]+tgx];
for (unsigned int j = 0; j < TileSize; j++) { for (unsigned int j = 0; j < TileSize; j++) {
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
float4 delta = (float4) (local_posq[tbx+j].xyz - apos.xyz, 0.0f); float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
float r2 = delta.x * delta.x + delta.y * delta.y + delta.z * delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = 1.0f / sqrt(r2); float r = sqrt(r2);
float sig = a.x + local_sigmaEpsilon[tbx+j].x; float invR = 1.0f / r;
float sig2 = invR * sig; float4 posq2 = local_posq[tbx+j];
sig2 *= sig2; LOAD_ATOM2_PARAMETERS_J
float sig6 = sig2 * sig2 * sig2; float dEdR = 0.0f;
float eps = a.y * local_sigmaEpsilon[tbx+j].y; float tempEnergy = 0.0f;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6; COMPUTE_INTERACTION
float tempEnergy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * local_posq[tbx+j].w * (invR - 2.0f * REACTION_FIELD_K * r2);
tempEnergy += apos.w * local_posq[tbx+j].w * (invR + REACTION_FIELD_K * r2 - REACTION_FIELD_C);
#else
dEdR += apos.w * local_posq[tbx+j].w * invR;
tempEnergy += apos.w * local_posq[tbx+j].w * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (isExcluded || r2 > cutoffSquared) { if (isExcluded || r2 > cutoffSquared) {
#else #else
...@@ -78,35 +65,33 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms, ...@@ -78,35 +65,33 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
} }
energy += 0.5f*tempEnergy; energy += 0.5f*tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
af.xyz -= delta.xyz; force.xyz -= delta.xyz;
excl >>= 1; excl >>= 1;
} }
// Write results // Write results
float4 of; float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
of.xyz = af.xyz; of.xyz = force.xyz;
of.w = 0.0f; of.w = 0.0f;
unsigned int offset = x + tgx + (x/TileSize) * paddedNumAtoms; unsigned int offset = x + tgx + (x/TileSize)*paddedNumAtoms;
forceBuffers[offset] = of; forceBuffers[offset] = of;
#else #else
unsigned int offset = x + tgx + warp*paddedNumAtoms; unsigned int offset = x + tgx + warp*paddedNumAtoms;
of = forceBuffers[offset]; of = forceBuffers[offset];
of.xyz += af.xyz; of.xyz += force.xyz;
forceBuffers[offset] = of; forceBuffers[offset] = of;
#endif #endif
} }
else { else {
// 100% utilization // This is an off-diagonal tile.
// Read fixed atom data into registers and GRF
if (lasty != y) { if (lasty != y) {
unsigned int j = y + tgx; unsigned int j = y + tgx;
float2 temp1 = sigmaEpsilon[j];
local_posq[get_local_id(0)] = posq[j]; local_posq[get_local_id(0)] = posq[j];
local_sigmaEpsilon[get_local_id(0)] = sigmaEpsilon[j]; local_sigmaEpsilon[get_local_id(0)] = global_sigmaEpsilon[j];
} }
local_force[get_local_id(0)] = 0.0f; local_force[get_local_id(0)] = 0.0f;
apos.w *= EpsilonFactor;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos]; unsigned int flags = interactionFlags[pos];
if (!hasExclusions && flags != 0xFFFFFFFF) { if (!hasExclusions && flags != 0xFFFFFFFF) {
...@@ -119,29 +104,20 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms, ...@@ -119,29 +104,20 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
for (unsigned int j = 0; j < TileSize; j++) { for (unsigned int j = 0; j < TileSize; j++) {
if ((flags&(1<<j)) != 0) { if ((flags&(1<<j)) != 0) {
bool isExcluded = false; bool isExcluded = false;
float4 delta = (float4) (local_posq[tbx+j].xyz - apos.xyz, 0.0f); float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
float r2 = delta.x * delta.x + delta.y * delta.y + delta.z * delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = 1.0f / sqrt(r2); float r = sqrt(r2);
float sig = a.x + local_sigmaEpsilon[tbx+j].x; float invR = 1.0f / r;
float sig2 = invR * sig; float4 posq2 = local_posq[tbx+j];
sig2 *= sig2; LOAD_ATOM2_PARAMETERS_J
float sig6 = sig2 * sig2 * sig2; float dEdR = 0.0f;
float eps = a.y * local_sigmaEpsilon[tbx+j].y; float tempEnergy = 0.0f;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6; COMPUTE_INTERACTION
float tempEnergy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * local_posq[tbx+j].w * (invR - 2.0f * REACTION_FIELD_K * r2);
tempEnergy += apos.w * local_posq[tbx+j].w * (invR + REACTION_FIELD_K * r2 - REACTION_FIELD_C);
#else
dEdR += apos.w * local_posq[tbx+j].w * invR;
tempEnergy += apos.w * local_posq[tbx+j].w * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r2 > cutoffSquared) { if (r2 > cutoffSquared) {
dEdR = 0.0f; dEdR = 0.0f;
...@@ -150,7 +126,7 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms, ...@@ -150,7 +126,7 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
#endif #endif
energy += tempEnergy; energy += tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
af.xyz -= delta.xyz; force.xyz -= delta.xyz;
tempBuffer[get_local_id(0)] = delta; tempBuffer[get_local_id(0)] = delta;
// Sum the forces on atom j. // Sum the forces on atom j.
...@@ -172,7 +148,8 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms, ...@@ -172,7 +148,8 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
else else
#endif #endif
{ {
// Read fixed atom data into registers and GRF // Compute the full set of interactions in this tile.
unsigned int xi = x/TileSize; unsigned int xi = x/TileSize;
unsigned int yi = y/TileSize; unsigned int yi = y/TileSize;
unsigned int tile = xi+yi*paddedNumAtoms/TileSize-yi*(yi+1)/2; unsigned int tile = xi+yi*paddedNumAtoms/TileSize-yi*(yi+1)/2;
...@@ -180,29 +157,20 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms, ...@@ -180,29 +157,20 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
excl = (excl >> tgx) | (excl << (TileSize - tgx)); excl = (excl >> tgx) | (excl << (TileSize - tgx));
for (unsigned int j = 0; j < TileSize; j++) { for (unsigned int j = 0; j < TileSize; j++) {
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
float4 delta = (float4) (local_posq[tbx+tj].xyz - apos.xyz, 0.0f); float4 delta = (float4) (local_posq[tbx+tj].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
float r2 = delta.x * delta.x + delta.y * delta.y + delta.z * delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float invR = 1.0f / sqrt(r2); float r = sqrt(r2);
float sig = a.x + local_sigmaEpsilon[tbx+tj].x; float invR = 1.0f / r;
float sig2 = invR * sig; float4 posq2 = local_posq[tbx+tj];
sig2 *= sig2; LOAD_ATOM2_PARAMETERS_TJ
float sig6 = sig2 * sig2 * sig2; float dEdR = 0.0f;
float eps = a.y * local_sigmaEpsilon[tbx+tj].y; float tempEnergy = 0.0f;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6; COMPUTE_INTERACTION
float tempEnergy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * local_posq[tbx+tj].w * (invR - 2.0f * REACTION_FIELD_K * r2);
tempEnergy += apos.w * local_posq[tbx+tj].w * (invR + REACTION_FIELD_K * r2 - REACTION_FIELD_C);
#else
dEdR += apos.w * local_posq[tbx+tj].w * invR;
tempEnergy += apos.w * local_posq[tbx+tj].w * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (isExcluded || r2 > cutoffSquared) { if (isExcluded || r2 > cutoffSquared) {
#else #else
...@@ -213,7 +181,7 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms, ...@@ -213,7 +181,7 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
} }
energy += tempEnergy; energy += tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
af.xyz -= delta.xyz; force.xyz -= delta.xyz;
local_force[tbx+tj].xyz += delta.xyz; local_force[tbx+tj].xyz += delta.xyz;
excl >>= 1; excl >>= 1;
tj = (tj + 1) & (TileSize - 1); tj = (tj + 1) & (TileSize - 1);
...@@ -223,17 +191,17 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms, ...@@ -223,17 +191,17 @@ __kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
// Write results // Write results
float4 of; float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
of.xyz = af.xyz; of.xyz = force.xyz;
of.w = 0.0f; of.w = 0.0f;
unsigned int offset = x + tgx + (y/TileSize) * paddedNumAtoms; unsigned int offset = x + tgx + (y/TileSize)*paddedNumAtoms;
forceBuffers[offset] = of; forceBuffers[offset] = of;
of = local_force[get_local_id(0)]; of = local_force[get_local_id(0)];
offset = y + tgx + (x/TileSize) * paddedNumAtoms; offset = y + tgx + (x/TileSize)*paddedNumAtoms;
forceBuffers[offset] = of; forceBuffers[offset] = of;
#else #else
unsigned int offset = x + tgx + warp*paddedNumAtoms; unsigned int offset = x + tgx + warp*paddedNumAtoms;
of = forceBuffers[offset]; of = forceBuffers[offset];
of.xyz += af.xyz; of.xyz += force.xyz;
forceBuffers[offset] = of; forceBuffers[offset] = of;
offset = y + tgx + warp*paddedNumAtoms; offset = y + tgx + warp*paddedNumAtoms;
of = forceBuffers[offset]; of = forceBuffers[offset];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment