Commit 6943ef5b authored by Yutong Zhao's avatar Yutong Zhao
Browse files

Merge pull request #9 from peastman/master

Bug fixes to use of shuffle
parents 178aa003 8a8873c6
......@@ -416,12 +416,6 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
}
CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) {
map<string, string> defines;
if (context.getComputeCapability() >= 3.0 && !context.getUseDoublePrecision()) {
defines["ENABLE_SHUFFLE"] = "1";
}
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = source;
const string suffixes[] = {"x", "y", "z", "w"};
......@@ -463,12 +457,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
bool useShuffle;
if(defines.find("ENABLE_SHUFFLE") != defines.end()) {
useShuffle = true;
} else {
useShuffle = false;
}
bool useShuffle = (context.getComputeCapability() >= 3.0);
// Part 1. Defines for on diagonal exclusion tiles
stringstream loadLocal1;
......@@ -589,6 +578,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
replacements["SHUFFLE_WARP_DATA"] = shuffleWarpData.str();
map<string, string> defines;
if (useCutoff)
defines["USE_CUTOFF"] = "1";
if (usePeriodic)
......@@ -597,6 +587,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines["USE_EXCLUSIONS"] = "1";
if (isSymmetric)
defines["USE_SYMMETRIC"] = "1";
if (useShuffle)
defines["ENABLE_SHUFFLE"] = "1";
defines["THREAD_BLOCK_SIZE"] = context.intToString(forceThreadBlockSize);
defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff);
defines["CUTOFF"] = context.doubleToString(cutoff);
......
......@@ -12,6 +12,7 @@ typedef struct {
} AtomData;
#endif
#ifdef ENABLE_SHUFFLE
//support for 64 bit shuffles
static __inline__ __device__ float real_shfl(float var, int srcLane) {
return __shfl(var, srcLane);
......@@ -24,6 +25,7 @@ static __inline__ __device__ double real_shfl(double var, int srcLane) {
lo = __shfl(lo, srcLane);
return __hiloint2double( hi, lo );
}
#endif
/**
* Compute nonbonded interactions. The kernel is separated into two parts,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment