"plugins/vscode:/vscode.git/clone" did not exist on "80d8311ecb148f019fd9eb8493ad4f03d8f8e5f6"
Commit f4591cad authored by Yutong Zhao's avatar Yutong Zhao
Browse files

bugfix to ifdef conditions in nonbonded.cu

parent f2276667
...@@ -415,11 +415,14 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF ...@@ -415,11 +415,14 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
numTiles = (int) (endFraction*totalTiles)-startTileIndex; numTiles = (int) (endFraction*totalTiles)-startTileIndex;
} }
#include <map>
CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) { CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) {
map<string, string> defines; map<string, string> defines;
if (context.getComputeCapability() >= 3.0 && !context.getUseDoublePrecision()) if (context.getComputeCapability() >= 3.0 && !context.getUseDoublePrecision()) {
defines["ENABLE_SHUFFLE"] = "1"; defines["ENABLE_SHUFFLE"] = "1";
}
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = source; replacements["COMPUTE_INTERACTION"] = source;
...@@ -462,7 +465,12 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -462,7 +465,12 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
} }
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str(); replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
bool useShuffle = (defines["ENABLE_SHUFFLE"]=="1"); bool useShuffle;
if(defines.find("ENABLE_SHUFFLE") != defines.end()) {
useShuffle = true;
} else {
useShuffle = false;
}
// Part 1. Defines for on diagonal exclusion tiles // Part 1. Defines for on diagonal exclusion tiles
stringstream loadLocal1; stringstream loadLocal1;
...@@ -510,11 +518,6 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -510,11 +518,6 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
if(useShuffle) { if(useShuffle) {
for(int i=0; i< (int) params.size(); i++) { for(int i=0; i< (int) params.size(); i++) {
declareLocal2<<params[i].getType()<<" shfl"<<params[i].getName()<<";\n"; declareLocal2<<params[i].getType()<<" shfl"<<params[i].getName()<<";\n";
//if (params[i].getNumComponents() == 1) {
//declareLocal2<<params[i].getType()<<" "<<params[i].getName()<<" = global_"<<params[i].getName()<<"[j];\n";
//} else {
// declareLocal2<<params[i].getType()<<" temp"<<params[i].getName()<<";\n";
//}
} }
} else { } else {
// not used if using shared memory // not used if using shared memory
...@@ -576,8 +579,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -576,8 +579,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
shuffleWarpData<<"shfl"<<params[i].getName()<<"=real_shfl(shfl"<<params[i].getName()<<", tgx+1);\n"; shuffleWarpData<<"shfl"<<params[i].getName()<<"=real_shfl(shfl"<<params[i].getName()<<", tgx+1);\n";
} else { } else {
for(int j=0;j<params[i].getNumComponents();j++) { for(int j=0;j<params[i].getNumComponents();j++) {
// looks something like // looks something like shflsigmaEpsilon.x = real_shfl(shflsigmaEpsilon.x,tgx+1);
// shflsigmaEpsilon.x = real_shfl(shflsigmaEpsilon.x,tgx+1);
shuffleWarpData<<"shfl"<<params[i].getName() shuffleWarpData<<"shfl"<<params[i].getName()
<<"."<<suffixes[j]<<"=real_shfl(shfl" <<"."<<suffixes[j]<<"=real_shfl(shfl"
<<params[i].getName()<<"."<<suffixes[j] <<params[i].getName()<<"."<<suffixes[j]
...@@ -614,8 +616,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -614,8 +616,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines["LAST_EXCLUSION_TILE"] = context.intToString(endExclusionIndex); defines["LAST_EXCLUSION_TILE"] = context.intToString(endExclusionIndex);
if ((localDataSize/4)%2 == 0 && !context.getUseDoublePrecision()) if ((localDataSize/4)%2 == 0 && !context.getUseDoublePrecision())
defines["PARAMETER_SIZE_IS_EVEN"] = "1"; defines["PARAMETER_SIZE_IS_EVEN"] = "1";
if (context.getComputeCapability() >= 3.0 && !context.getUseDoublePrecision()) //if (context.getComputeCapability() >= 3.0 && !context.getUseDoublePrecision())
defines["ENABLE_SHUFFLE"] = "1"; // defines["ENABLE_SHUFFLE"] = "1";
CUmodule program = context.createModule(CudaKernelSources::vectorOps+context.replaceStrings(CudaKernelSources::nonbonded, replacements), defines); CUmodule program = context.createModule(CudaKernelSources::vectorOps+context.replaceStrings(CudaKernelSources::nonbonded, replacements), defines);
CUfunction kernel = context.getKernel(program, "computeNonbonded"); CUfunction kernel = context.getKernel(program, "computeNonbonded");
......
...@@ -124,7 +124,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -124,7 +124,7 @@ extern "C" __global__ void computeNonbonded(
// This tile is on the diagonal. // This tile is on the diagonal.
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
real4 shflPosq = posq1; real4 shflPosq = posq1;
#elif #else
localData[threadIdx.x].x = posq1.x; localData[threadIdx.x].x = posq1.x;
localData[threadIdx.x].y = posq1.y; localData[threadIdx.x].y = posq1.y;
localData[threadIdx.x].z = posq1.z; localData[threadIdx.x].z = posq1.z;
...@@ -139,7 +139,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -139,7 +139,7 @@ extern "C" __global__ void computeNonbonded(
real4 posq2; real4 posq2;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
BROADCAST_WARP_DATA BROADCAST_WARP_DATA
#elif #else
posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
#endif #endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
...@@ -188,7 +188,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -188,7 +188,7 @@ extern "C" __global__ void computeNonbonded(
shflForce.x = 0.0f; shflForce.x = 0.0f;
shflForce.y = 0.0f; shflForce.y = 0.0f;
shflForce.z = 0.0f; shflForce.z = 0.0f;
#elif #else
localData[threadIdx.x].x = shflPosq.x; localData[threadIdx.x].x = shflPosq.x;
localData[threadIdx.x].y = shflPosq.y; localData[threadIdx.x].y = shflPosq.y;
localData[threadIdx.x].z = shflPosq.z; localData[threadIdx.x].z = shflPosq.z;
...@@ -207,7 +207,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -207,7 +207,7 @@ extern "C" __global__ void computeNonbonded(
int atom2 = tbx+tj; int atom2 = tbx+tj;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
real4 posq2 = shflPosq; real4 posq2 = shflPosq;
#elif #else
real4 posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); real4 posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
#endif #endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
...@@ -246,7 +246,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -246,7 +246,7 @@ extern "C" __global__ void computeNonbonded(
shflForce.y += delta.y; shflForce.y += delta.y;
shflForce.z += delta.z; shflForce.z += delta.z;
#elif #else
localData[tbx+tj].fx += delta.x; localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fy += delta.y; localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z; localData[tbx+tj].fz += delta.z;
...@@ -259,7 +259,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -259,7 +259,7 @@ extern "C" __global__ void computeNonbonded(
shflForce.x += dEdR2.x; shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y; shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z; shflForce.z += dEdR2.z;
#elif #else
localData[tbx+tj].fx += dEdR2.x; localData[tbx+tj].fx += dEdR2.x;
localData[tbx+tj].fy += dEdR2.y; localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
...@@ -284,7 +284,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -284,7 +284,7 @@ extern "C" __global__ void computeNonbonded(
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000)));
#elif #else
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
...@@ -383,7 +383,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -383,7 +383,7 @@ extern "C" __global__ void computeNonbonded(
// Load position of atom j from from global memory // Load position of atom j from from global memory
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
shflPosq = posq[j]; shflPosq = posq[j];
#elif #else
localData[threadIdx.x].x = posq[j].x; localData[threadIdx.x].x = posq[j].x;
localData[threadIdx.x].y = posq[j].y; localData[threadIdx.x].y = posq[j].y;
localData[threadIdx.x].z = posq[j].z; localData[threadIdx.x].z = posq[j].z;
...@@ -406,7 +406,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -406,7 +406,7 @@ extern "C" __global__ void computeNonbonded(
shflPosq.x -= floor((shflPosq.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; shflPosq.x -= floor((shflPosq.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
shflPosq.y -= floor((shflPosq.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; shflPosq.y -= floor((shflPosq.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
shflPosq.z -= floor((shflPosq.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; shflPosq.z -= floor((shflPosq.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#elif #else
localData[threadIdx.x].x -= floor((localData[threadIdx.x].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; localData[threadIdx.x].x -= floor((localData[threadIdx.x].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].y -= floor((localData[threadIdx.x].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; localData[threadIdx.x].y -= floor((localData[threadIdx.x].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].z -= floor((localData[threadIdx.x].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; localData[threadIdx.x].z -= floor((localData[threadIdx.x].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
...@@ -416,7 +416,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -416,7 +416,7 @@ extern "C" __global__ void computeNonbonded(
int atom2 = tbx+tj; int atom2 = tbx+tj;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
real4 posq2 = shflPosq; real4 posq2 = shflPosq;
#elif #else
real4 posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); real4 posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
#endif #endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
...@@ -448,7 +448,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -448,7 +448,7 @@ extern "C" __global__ void computeNonbonded(
shflForce.y += delta.y; shflForce.y += delta.y;
shflForce.z += delta.z; shflForce.z += delta.z;
#elif #else
localData[tbx+tj].fx += delta.x; localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fy += delta.y; localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z; localData[tbx+tj].fz += delta.z;
...@@ -461,7 +461,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -461,7 +461,7 @@ extern "C" __global__ void computeNonbonded(
shflForce.x += dEdR2.x; shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y; shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z; shflForce.z += dEdR2.z;
#elif #else
localData[tbx+tj].fx += dEdR2.x; localData[tbx+tj].fx += dEdR2.x;
localData[tbx+tj].fy += dEdR2.y; localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
...@@ -483,7 +483,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -483,7 +483,7 @@ extern "C" __global__ void computeNonbonded(
int atom2 = tbx+tj; int atom2 = tbx+tj;
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
real4 posq2 = shflPosq; real4 posq2 = shflPosq;
#elif #else
real4 posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); real4 posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
#endif #endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
...@@ -522,7 +522,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -522,7 +522,7 @@ extern "C" __global__ void computeNonbonded(
shflForce.y += delta.y; shflForce.y += delta.y;
shflForce.z += delta.z; shflForce.z += delta.z;
#elif #else
localData[tbx+tj].fx += delta.x; localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fy += delta.y; localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z; localData[tbx+tj].fz += delta.z;
...@@ -535,7 +535,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -535,7 +535,7 @@ extern "C" __global__ void computeNonbonded(
shflForce.x += dEdR2.x; shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y; shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z; shflForce.z += dEdR2.z;
#elif #else
localData[tbx+tj].fx += dEdR2.x; localData[tbx+tj].fx += dEdR2.x;
localData[tbx+tj].fy += dEdR2.y; localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
...@@ -565,7 +565,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -565,7 +565,7 @@ extern "C" __global__ void computeNonbonded(
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000))); atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000))); atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000))); atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000)));
#elif #else
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000))); atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000))); atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000))); atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment