Commit 69e75377 authored by Peter Eastman's avatar Peter Eastman
Browse files

Added "const" and "restrict" to lots of kernel arguments to let the compiler do more optimizations

parent bf8b9f30
...@@ -154,12 +154,12 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -154,12 +154,12 @@ void OpenCLBondedUtilities::initialize(const System& system) {
const vector<int>& set = *iter; const vector<int>& set = *iter;
int setSize = set.size(); int setSize = set.size();
stringstream s; stringstream s;
s<<"__kernel void computeBondedForces(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq"; s<<"__kernel void computeBondedForces(__global float4* restrict forceBuffers, __global float* restrict energyBuffer, __global const float4* restrict posq";
for (int i = 0; i < setSize; i++) { for (int i = 0; i < setSize; i++) {
int force = set[i]; int force = set[i];
string indexType = "uint"+(indexWidth[force] == 1 ? "" : OpenCLExpressionUtilities::intToString(indexWidth[force])); string indexType = "uint"+(indexWidth[force] == 1 ? "" : OpenCLExpressionUtilities::intToString(indexWidth[force]));
s<<", __global "<<indexType<<"* atomIndices"<<i; s<<", __global const "<<indexType<<"* restrict atomIndices"<<i;
s<<", __global "<<indexType<<"* bufferIndices"<<i; s<<", __global const "<<indexType<<"* restrict bufferIndices"<<i;
} }
for (int i = 0; i < (int) arguments.size(); i++) for (int i = 0; i < (int) arguments.size(); i++)
s<<", __global "<<argTypes[i]<<"* customArg"<<(i+1); s<<", __global "<<argTypes[i]<<"* customArg"<<(i+1);
......
...@@ -1739,7 +1739,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1739,7 +1739,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction")); tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer())); cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
tableArgs << ", __global float4* " << arrayName; tableArgs << ", __global const float4* restrict " << arrayName;
} }
if (force.getNumFunctions() > 0) { if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY); tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
...@@ -1837,7 +1837,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1837,7 +1837,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1); string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << paramName << ", __local " << buffer.getType() << "* local_" << paramName; extraArgs << ", __global const " << buffer.getType() << "* restrict global_" << paramName << ", __local " << buffer.getType() << "* restrict local_" << paramName;
loadLocal1 << "local_" << paramName << "[localAtomIndex] = " << paramName << "1;\n"; loadLocal1 << "local_" << paramName << "[localAtomIndex] = " << paramName << "1;\n";
loadLocal2 << "local_" << paramName << "[localAtomIndex] = global_" << paramName << "[j];\n"; loadLocal2 << "local_" << paramName << "[localAtomIndex] = global_" << paramName << "[j];\n";
load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n"; load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
...@@ -1884,12 +1884,12 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1884,12 +1884,12 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1); string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << paramName; extraArgs << ", __global const " << buffer.getType() << "* restrict " << paramName;
} }
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1); string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << valueName; extraArgs << ", __global " << buffer.getType() << "* restrict global_" << valueName;
reductionSource << buffer.getType() << " local_" << valueName << ";\n"; reductionSource << buffer.getType() << " local_" << valueName << ";\n";
} }
reductionSource << "local_values" << computedValues->getParameterSuffix(0) << " = sum;\n"; reductionSource << "local_values" << computedValues->getParameterSuffix(0) << " = sum;\n";
...@@ -1977,7 +1977,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1977,7 +1977,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1); string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << paramName << ", __local " << buffer.getType() << "* local_" << paramName; extraArgs << ", __global const " << buffer.getType() << "* restrict global_" << paramName << ", __local " << buffer.getType() << "* restrict local_" << paramName;
loadLocal1 << "local_" << paramName << "[localAtomIndex] = " << paramName << "1;\n"; loadLocal1 << "local_" << paramName << "[localAtomIndex] = " << paramName << "1;\n";
loadLocal2 << "local_" << paramName << "[localAtomIndex] = global_" << paramName << "[j];\n"; loadLocal2 << "local_" << paramName << "[localAtomIndex] = global_" << paramName << "[j];\n";
load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n"; load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
...@@ -1986,17 +1986,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1986,17 +1986,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1); string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << valueName << ", __local " << buffer.getType() << "* local_" << valueName; extraArgs << ", __global const " << buffer.getType() << "* restrict global_" << valueName << ", __local " << buffer.getType() << "* restrict local_" << valueName;
loadLocal1 << "local_" << valueName << "[localAtomIndex] = " << valueName << "1;\n"; loadLocal1 << "local_" << valueName << "[localAtomIndex] = " << valueName << "1;\n";
loadLocal2 << "local_" << valueName << "[localAtomIndex] = global_" << valueName << "[j];\n"; loadLocal2 << "local_" << valueName << "[localAtomIndex] = global_" << valueName << "[j];\n";
load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n"; load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n";
load2 << buffer.getType() << " " << valueName << "2 = local_" << valueName << "[atom2];\n"; load2 << buffer.getType() << " " << valueName << "2 = local_" << valueName << "[atom2];\n";
} }
if (useLong) { if (useLong) {
extraArgs << ", __global long* derivBuffers"; extraArgs << ", __global long* restrict derivBuffers";
for (int i = 0; i < force.getNumComputedValues(); i++) { for (int i = 0; i < force.getNumComputedValues(); i++) {
string index = intToString(i+1); string index = intToString(i+1);
extraArgs << ", __local float* local_deriv" << index; extraArgs << ", __local float* restrict local_deriv" << index;
clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n"; clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n";
declare1 << "float deriv" << index << "_1 = 0.0f;\n"; declare1 << "float deriv" << index << "_1 = 0.0f;\n";
load2 << "float deriv" << index << "_2 = 0.0f;\n"; load2 << "float deriv" << index << "_2 = 0.0f;\n";
...@@ -2011,7 +2011,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2011,7 +2011,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1); string index = intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* derivBuffers" << index << ", __local " << buffer.getType() << "* local_deriv" << index; extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index << ", __local " << buffer.getType() << "* restrict local_deriv" << index;
clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n"; clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n";
declare1 << buffer.getType() << " deriv" << index << "_1 = 0.0f;\n"; declare1 << buffer.getType() << " deriv" << index << "_1 = 0.0f;\n";
load2 << buffer.getType() << " deriv" << index << "_2 = 0.0f;\n"; load2 << buffer.getType() << " deriv" << index << "_2 = 0.0f;\n";
...@@ -2068,21 +2068,21 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2068,21 +2068,21 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1); string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << paramName; extraArgs << ", __global const " << buffer.getType() << "* restrict " << paramName;
} }
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1); string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << valueName; extraArgs << ", __global const " << buffer.getType() << "* restrict " << valueName;
} }
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1); string index = intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* derivBuffers" << index; extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index;
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n"; compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
} }
if (useLong) { if (useLong) {
extraArgs << ", __global long* derivBuffersIn"; extraArgs << ", __global const long* restrict derivBuffersIn";
for (int i = 0; i < energyDerivs->getNumParameters(); ++i) for (int i = 0; i < energyDerivs->getNumParameters(); ++i)
reduce << "derivBuffers" << energyDerivs->getParameterSuffix(i, "[index]") << reduce << "derivBuffers" << energyDerivs->getParameterSuffix(i, "[index]") <<
" = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << intToString(i) << "];\n"; " = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << intToString(i) << "];\n";
...@@ -2147,17 +2147,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2147,17 +2147,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1); string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << paramName; extraArgs << ", __global const " << buffer.getType() << "* restrict " << paramName;
} }
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1); string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << valueName; extraArgs << ", __global const " << buffer.getType() << "* restrict " << valueName;
} }
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1); string index = intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* derivBuffers" << index; extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index;
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n"; compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
} }
map<string, string> variables; map<string, string> variables;
...@@ -2866,12 +2866,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -2866,12 +2866,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max); vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction")); tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
tableArgs << ", __global float4* " << arrayName; tableArgs << ", __global const float4* restrict " << arrayName;
} }
if (force.getNumFunctions() > 0) { if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY); tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec); tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
tableArgs << ", __global float4* functionParams"; tableArgs << ", __global const float4* restrict functionParams";
} }
// Record information about parameters. // Record information about parameters.
...@@ -2973,15 +2973,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -2973,15 +2973,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
// Next it needs to load parameters from global memory. // Next it needs to load parameters from global memory.
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
extraArgs << ", __global float* globals"; extraArgs << ", __global const float* restrict globals";
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
extraArgs << ", __global "+buffer.getType()+"* donor"+buffer.getName(); extraArgs << ", __global const "+buffer.getType()+"* restrict donor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+intToString(i+1)+" = donor"+buffer.getName()+"[index];\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+intToString(i+1)+" = donor"+buffer.getName()+"[index];\n");
} }
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
extraArgs << ", __global "+buffer.getType()+"* acceptor"+buffer.getName(); extraArgs << ", __global const "+buffer.getType()+"* restrict acceptor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n");
} }
......
...@@ -399,9 +399,9 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -399,9 +399,9 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
replacements["ATOM_PARAMETER_DATA"] = localData.str(); replacements["ATOM_PARAMETER_DATA"] = localData.str();
stringstream args; stringstream args;
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
args << ", __global "; args << ", __global const ";
args << params[i].getType(); args << params[i].getType();
args << "* global_"; args << "* restrict global_";
args << params[i].getName(); args << params[i].getName();
} }
for (int i = 0; i < (int) arguments.size(); i++) { for (int i = 0; i < (int) arguments.size(); i++) {
...@@ -411,11 +411,11 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -411,11 +411,11 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
} }
else { else {
if ((arguments[i].getMemory().getInfo<CL_MEM_FLAGS>() & CL_MEM_READ_ONLY) == 0) if ((arguments[i].getMemory().getInfo<CL_MEM_FLAGS>() & CL_MEM_READ_ONLY) == 0)
args << ", __global "; args << ", __global const ";
else else
args << ", __constant "; args << ", __constant ";
args << arguments[i].getType(); args << arguments[i].getType();
args << "* "; args << "* restrict ";
args << arguments[i].getName(); args << arguments[i].getName();
} }
} }
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
* Apply the Andersen thermostat to adjust particle velocities. * Apply the Andersen thermostat to adjust particle velocities.
*/ */
__kernel void applyAndersenThermostat(float collisionFrequency, float kT, __global float4* velm, __global float2* stepSize, __global float4* random, __kernel void applyAndersenThermostat(float collisionFrequency, float kT, __global float4* velm, __global const float2* restrict stepSize, __global const float4* restrict random,
unsigned int randomIndex, __global int* atomGroups) { unsigned int randomIndex, __global const int* restrict atomGroups) {
float collisionProbability = 1.0f-exp(-collisionFrequency*stepSize[0].y); float collisionProbability = 1.0f-exp(-collisionFrequency*stepSize[0].y);
float randomRange = erf(collisionProbability/sqrt(2.0f)); float randomRange = erf(collisionProbability/sqrt(2.0f));
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) { for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
* Perform the first step of Brownian integration. * Perform the first step of Brownian integration.
*/ */
__kernel void integrateBrownianPart1(float tauDeltaT, float noiseAmplitude, __global float4* force, __kernel void integrateBrownianPart1(float tauDeltaT, float noiseAmplitude, __global const float4* restrict force,
__global float4* posDelta, __global float4* velm, __global float4* random, unsigned int randomIndex) { __global float4* restrict posDelta, __global const float4* restrict velm, __global const float4* restrict random, unsigned int randomIndex) {
randomIndex += get_global_id(0); randomIndex += get_global_id(0);
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) { for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
float invMass = velm[index].w; float invMass = velm[index].w;
...@@ -16,7 +16,7 @@ __kernel void integrateBrownianPart1(float tauDeltaT, float noiseAmplitude, __gl ...@@ -16,7 +16,7 @@ __kernel void integrateBrownianPart1(float tauDeltaT, float noiseAmplitude, __gl
* Perform the second step of Brownian integration. * Perform the second step of Brownian integration.
*/ */
__kernel void integrateBrownianPart2(float oneOverDeltaT, __global float4* posq, __global float4* velm, __global float4* posDelta) { __kernel void integrateBrownianPart2(float oneOverDeltaT, __global float4* posq, __global float4* velm, __global const float4* restrict posDelta) {
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) { for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
float4 delta = posDelta[index]; float4 delta = posDelta[index];
velm[index].xyz = oneOverDeltaT*delta.xyz; velm[index].xyz = oneOverDeltaT*delta.xyz;
......
/** /**
* Compute the direction each constraint is pointing in. This is called once at the beginning of constraint evaluation. * Compute the direction each constraint is pointing in. This is called once at the beginning of constraint evaluation.
*/ */
__kernel void computeConstraintDirections(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions) { __kernel void computeConstraintDirections(__global const int2* restrict constraintAtoms, __global float4* restrict constraintDistance, __global const float4* restrict atomPositions) {
for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) { for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
// Compute the direction for this constraint. // Compute the direction for this constraint.
...@@ -19,8 +19,8 @@ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __glob ...@@ -19,8 +19,8 @@ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __glob
/** /**
* Compute the force applied by each constraint. * Compute the force applied by each constraint.
*/ */
__kernel void computeConstraintForce(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions, __kernel void computeConstraintForce(__global const int2* restrict constraintAtoms, __global const float4* restrict constraintDistance, __global const float4* restrict atomPositions,
__global float* reducedMass, __global float* delta1, __global int* converged, float tol, int iteration) { __global const float* restrict reducedMass, __global float* restrict delta1, __global int* restrict converged, float tol, int iteration) {
__local int groupConverged; __local int groupConverged;
if (converged[1-iteration%2]) { if (converged[1-iteration%2]) {
if (get_global_id(0) == 0) if (get_global_id(0) == 0)
...@@ -58,8 +58,8 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl ...@@ -58,8 +58,8 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl
/** /**
* Multiply the vector of constraint forces by the constraint matrix. * Multiply the vector of constraint forces by the constraint matrix.
*/ */
__kernel void multiplyByConstraintMatrix(__global float* delta1, __global float* delta2, __global int* constraintMatrixColumn, __kernel void multiplyByConstraintMatrix(__global const float* restrict delta1, __global float* restrict delta2, __global const int* restrict constraintMatrixColumn,
__global float* constraintMatrixValue, __global int* converged, int iteration) { __global const float* restrict constraintMatrixValue, __global const int* restrict converged, int iteration) {
if (converged[iteration%2]) if (converged[iteration%2])
return; // The constraint iteration has already converged. return; // The constraint iteration has already converged.
...@@ -81,8 +81,8 @@ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float* ...@@ -81,8 +81,8 @@ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float*
/** /**
* Update the atom positions based on constraint forces. * Update the atom positions based on constraint forces.
*/ */
__kernel void updateAtomPositions(__global int* numAtomConstraints, __global int* atomConstraints, __global float4* constraintDistance, __kernel void updateAtomPositions(__global const int* restrict numAtomConstraints, __global const int* restrict atomConstraints, __global const float4* restrict constraintDistance,
__global float4* atomPositions, __global float4* velm, __global float* delta1, __global float* delta2, __global int* converged, int iteration) { __global float4* restrict atomPositions, __global const float4* restrict velm, __global const float* restrict delta1, __global const float* restrict delta2, __global int* restrict converged, int iteration) {
if (get_global_id(0) == 0) if (get_global_id(0) == 0)
converged[1-iteration%2] = 1; converged[1-iteration%2] = 1;
if (converged[iteration%2]) if (converged[iteration%2])
......
...@@ -56,7 +56,7 @@ unsigned int sumReduce128(__local unsigned int* arr) { ...@@ -56,7 +56,7 @@ unsigned int sumReduce128(__local unsigned int* arr) {
return arr[0]; return arr[0];
} }
__kernel void countElts(__global unsigned int* dgBlockCounts, __global unsigned int* dgValid, const unsigned int len, __local unsigned int* dsCount) { __kernel void countElts(__global unsigned int* restrict dgBlockCounts, __global const unsigned int* restrict dgValid, const unsigned int len, __local unsigned int* restrict dsCount) {
dsCount[get_local_id(0)] = 0; dsCount[get_local_id(0)] = 0;
unsigned int ub; unsigned int ub;
const unsigned int eltsPerBlock = len/get_num_groups(0) + ((len % get_num_groups(0)) ? 1 : 0); const unsigned int eltsPerBlock = len/get_num_groups(0) + ((len % get_num_groups(0)) ? 1 : 0);
...@@ -110,9 +110,9 @@ int compactSIMDPrefixSum(__local const unsigned int* dsData, __local const unsig ...@@ -110,9 +110,9 @@ int compactSIMDPrefixSum(__local const unsigned int* dsData, __local const unsig
return numValid; return numValid;
} }
__kernel void moveValidElementsStaged(__global unsigned int* dgData, __global unsigned int* dgCompact, __global unsigned int* dgValid, __kernel void moveValidElementsStaged(__global const unsigned int* restrict dgData, __global unsigned int* restrict dgCompact, __global const unsigned int* restrict dgValid,
__global unsigned int* dgBlockCounts, unsigned int len, __global unsigned int* dNumValidElements, __global const unsigned int* restrict dgBlockCounts, unsigned int len, __global unsigned int* restrict dNumValidElements,
__local unsigned int* inBlock, __local unsigned int* validBlock, __local unsigned int* compactBlock) { __local unsigned int* restrict inBlock, __local unsigned int* restrict validBlock, __local unsigned int* restrict compactBlock) {
__local unsigned int dsLocalIndex[256]; __local unsigned int dsLocalIndex[256];
int blockOutOffset=0; int blockOutOffset=0;
// Sum up the blockCounts before us to find our offset // Sum up the blockCounts before us to find our offset
......
...@@ -6,11 +6,11 @@ ...@@ -6,11 +6,11 @@
* Compute a force based on pair interactions. * Compute a force based on pair interactions.
*/ */
__kernel void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer, __local float4* local_force, __kernel void computeN2Energy(__global float4* restrict forceBuffers, __global float* restrict energyBuffer, __local float4* restrict local_force,
__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __global unsigned int* exclusionIndices, __global const float4* restrict posq, __local float4* restrict local_posq, __global const unsigned int* restrict exclusions, __global const unsigned int* restrict exclusionIndices,
__global unsigned int* exclusionRowIndices, __local float4* tempBuffer, __global const unsigned int* restrict exclusionRowIndices, __local float4* restrict tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global const unsigned int* restrict interactionFlags
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
......
...@@ -7,11 +7,11 @@ ...@@ -7,11 +7,11 @@
*/ */
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer, __local float4* local_force, void computeN2Energy(__global float4* restrict forceBuffers, __global float* restrict energyBuffer, __local float4* restrict local_force,
__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __global unsigned int* exclusionIndices, __global const float4* restrict posq, __local float4* restrict local_posq, __global const unsigned int* restrict exclusions, __global const unsigned int* restrict exclusionIndices,
__global unsigned int* exclusionRowIndices, __local float4* tempForceBuffer, __global const unsigned int* restrict exclusionRowIndices, __local float4* restrict tempForceBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
......
...@@ -14,15 +14,15 @@ ...@@ -14,15 +14,15 @@
*/ */
__kernel void computeN2Energy( __kernel void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
__global long* forceBuffers, __global long* restrict forceBuffers,
#else #else
__global float4* forceBuffers, __global float4* restrict forceBuffers,
#endif #endif
__global float* energyBuffer, __local float4* local_force, __global float* restrict energyBuffer, __local float4* restrict local_force,
__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __global unsigned int* exclusionIndices, __global const float4* restrict posq, __local float4* restrict local_posq, __global const unsigned int* restrict exclusions, __global const unsigned int* restrict exclusionIndices,
__global unsigned int* exclusionRowIndices, __local float4* tempBuffer, __global const unsigned int* restrict exclusionRowIndices, __local float4* restrict tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global const unsigned int* restrict interactionFlags
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms. * Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
*/ */
__kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global float4* forceBuffers, __global float* energyBuffer, __global float4* posq __kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global float4* restrict forceBuffers, __global float* restrict energyBuffer, __global const float4* restrict posq
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
float energy = 0.0f; float energy = 0.0f;
unsigned int index = get_global_id(0); unsigned int index = get_global_id(0);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* Compute chain rule terms for computed values that depend explicitly on particle coordinates. * Compute chain rule terms for computed values that depend explicitly on particle coordinates.
*/ */
__kernel void computeGradientChainRuleTerms(__global float4* forceBuffers, __global float4* posq __kernel void computeGradientChainRuleTerms(__global float4* restrict forceBuffers, __global const float4* restrict posq
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
unsigned int index = get_global_id(0); unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
......
...@@ -4,11 +4,11 @@ ...@@ -4,11 +4,11 @@
* Compute a value based on pair interactions. * Compute a value based on pair interactions.
*/ */
__kernel void computeN2Value(__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __kernel void computeN2Value(__global const float4* restrict posq, __local float4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __global float* global_value, __local float* local_value, __global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global float* restrict global_value, __local float* restrict local_value,
__local float* tempBuffer, __local float* restrict tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global const unsigned int* restrict interactionFlags
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
......
...@@ -5,11 +5,11 @@ ...@@ -5,11 +5,11 @@
*/ */
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeN2Value(__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, void computeN2Value(__global const float4* restrict posq, __local float4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __global float* global_value, __local float* local_value, __global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global float* restrict global_value, __local float* restrict local_value,
__local float* tempBuffer, __local float* restrict tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
......
...@@ -7,16 +7,16 @@ ...@@ -7,16 +7,16 @@
/** /**
* Compute a value based on pair interactions. * Compute a value based on pair interactions.
*/ */
__kernel void computeN2Value(__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __kernel void computeN2Value(__global const float4* restrict posq, __local float4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices,
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
__global long* global_value, __global long* restrict global_value,
#else #else
__global float* global_value, __global float* restrict global_value,
#endif #endif
__local float* local_value, __local float* tempBuffer, __local float* restrict local_value, __local float* restrict tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global const unsigned int* restrict interactionFlags
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
......
...@@ -55,8 +55,8 @@ float4 computeCross(float4 vec1, float4 vec2) { ...@@ -55,8 +55,8 @@ float4 computeCross(float4 vec1, float4 vec2) {
/** /**
* Compute forces on donors. * Compute forces on donors.
*/ */
__kernel void computeDonorForces(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global int4* exclusions, __kernel void computeDonorForces(__global float4* restrict forceBuffers, __global float* restrict energyBuffer, __global const float4* restrict posq, __global const int4* restrict exclusions,
__global int4* donorAtoms, __global int4* acceptorAtoms, __global int4* donorBufferIndices, __local float4* posBuffer, float4 periodicBoxSize, float4 invPeriodicBoxSize __global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local float4* posBuffer, float4 periodicBoxSize, float4 invPeriodicBoxSize
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
float energy = 0.0f; float energy = 0.0f;
float4 f1 = (float4) 0; float4 f1 = (float4) 0;
...@@ -141,8 +141,8 @@ __kernel void computeDonorForces(__global float4* forceBuffers, __global float* ...@@ -141,8 +141,8 @@ __kernel void computeDonorForces(__global float4* forceBuffers, __global float*
/** /**
* Compute forces on acceptors. * Compute forces on acceptors.
*/ */
__kernel void computeAcceptorForces(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global int4* exclusions, __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __global float* restrict energyBuffer, __global const float4* restrict posq, __global const int4* restrict exclusions,
__global int4* donorAtoms, __global int4* acceptorAtoms, __global int4* acceptorBufferIndices, __local float4* posBuffer, float4 periodicBoxSize, float4 invPeriodicBoxSize __global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local float4* restrict posBuffer, float4 periodicBoxSize, float4 invPeriodicBoxSize
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
float4 f1 = (float4) 0; float4 f1 = (float4) 0;
float4 f2 = (float4) 0; float4 f2 = (float4) 0;
......
...@@ -7,7 +7,7 @@ float2 multofFloat2(float2 a, float2 b) { ...@@ -7,7 +7,7 @@ float2 multofFloat2(float2 a, float2 b) {
* Precompute the cosine and sine sums which appear in each force term. * Precompute the cosine and sine sums which appear in each force term.
*/ */
__kernel void calculateEwaldCosSinSums(__global float* energyBuffer, __global float4* posq, __global float2* cosSinSum, float4 reciprocalPeriodicBoxSize, float reciprocalCoefficient) { __kernel void calculateEwaldCosSinSums(__global float* restrict energyBuffer, __global const float4* restrict posq, __global float2* restrict cosSinSum, float4 reciprocalPeriodicBoxSize, float reciprocalCoefficient) {
const unsigned int ksizex = 2*KMAX_X-1; const unsigned int ksizex = 2*KMAX_X-1;
const unsigned int ksizey = 2*KMAX_Y-1; const unsigned int ksizey = 2*KMAX_Y-1;
const unsigned int ksizez = 2*KMAX_Z-1; const unsigned int ksizez = 2*KMAX_Z-1;
...@@ -58,7 +58,7 @@ __kernel void calculateEwaldCosSinSums(__global float* energyBuffer, __global fl ...@@ -58,7 +58,7 @@ __kernel void calculateEwaldCosSinSums(__global float* energyBuffer, __global fl
* previous routine. * previous routine.
*/ */
__kernel void calculateEwaldForces(__global float4* forceBuffers, __global float4* posq, __global float2* cosSinSum, float4 reciprocalPeriodicBoxSize, float reciprocalCoefficient) { __kernel void calculateEwaldForces(__global float4* restrict forceBuffers, __global const float4* restrict posq, __global const float2* restrict cosSinSum, float4 reciprocalPeriodicBoxSize, float reciprocalCoefficient) {
unsigned int atom = get_global_id(0); unsigned int atom = get_global_id(0);
while (atom < NUM_ATOMS) { while (atom < NUM_ATOMS) {
float4 force = forceBuffers[atom]; float4 force = forceBuffers[atom];
......
...@@ -6,7 +6,8 @@ float2 multiplyComplex(float2 c1, float2 c2) { ...@@ -6,7 +6,8 @@ float2 multiplyComplex(float2 c1, float2 c2) {
* Perform a 1D FFT on each row along one axis. * Perform a 1D FFT on each row along one axis.
*/ */
__kernel void execFFT(__global float2* in, __global float2* out, float sign, __local float2* w, __local float2* data0, __local float2* data1) { __kernel void execFFT(__global const float2* restrict in, __global float2* restrict out, float sign, __local float2* restrict w,
__local float2* restrict data0, __local float2* restrict data1) {
for (int i = get_local_id(0); i < ZSIZE; i += get_local_size(0)) for (int i = get_local_id(0); i < ZSIZE; i += get_local_size(0))
w[i] = (float2) (cos(-sign*i*2*M_PI/ZSIZE), sin(-sign*i*2*M_PI/ZSIZE)); w[i] = (float2) (cos(-sign*i*2*M_PI/ZSIZE), sin(-sign*i*2*M_PI/ZSIZE));
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
/** /**
* Find a bounding box for the atoms in each block. * Find a bounding box for the atoms in each block.
*/ */
__kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* posq, __global float4* blockCenter, __global float4* blockBoundingBox, __global unsigned int* interactionCount) { __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global const float4* restrict posq, __global float4* restrict blockCenter, __global float4* restrict blockBoundingBox, __global unsigned int* restrict interactionCount) {
int index = get_global_id(0); int index = get_global_id(0);
int base = index*TILE_SIZE; int base = index*TILE_SIZE;
while (base < numAtoms) { while (base < numAtoms) {
...@@ -47,7 +47,7 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe ...@@ -47,7 +47,7 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
*/ */
void storeInteractionData(__local ushort2* buffer, __local int* valid, __local short* sum, __local ushort2* temp, __local int* baseIndex, void storeInteractionData(__local ushort2* buffer, __local int* valid, __local short* sum, __local ushort2* temp, __local int* baseIndex,
__global unsigned int* interactionCount, __global ushort2* interactingTiles, float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionCount, __global ushort2* interactingTiles, float cutoffSquared, float4 periodicBoxSize,
float4 invPeriodicBoxSize, __global float4* posq, __global float4* blockCenter, __global float4* blockBoundingBox, unsigned int maxTiles) { float4 invPeriodicBoxSize, __global const float4* posq, __global const float4* blockCenter, __global const float4* blockBoundingBox, unsigned int maxTiles) {
// The buffer is full, so we need to compact it and write out results. Start by doing a parallel prefix sum. // The buffer is full, so we need to compact it and write out results. Start by doing a parallel prefix sum.
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE) for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
...@@ -144,9 +144,9 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s ...@@ -144,9 +144,9 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart, * Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting. * mark them as non-interacting.
*/ */
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter, __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global const float4* restrict blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles, __global const float4* restrict blockBoundingBox, __global unsigned int* restrict interactionCount, __global ushort2* restrict interactingTiles,
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles, unsigned int startTileIndex, __global unsigned int* restrict interactionFlags, __global const float4* restrict posq, unsigned int maxTiles, unsigned int startTileIndex,
unsigned int endTileIndex) { unsigned int endTileIndex) {
__local ushort2 buffer[BUFFER_SIZE]; __local ushort2 buffer[BUFFER_SIZE];
__local int valid[BUFFER_SIZE]; __local int valid[BUFFER_SIZE];
...@@ -220,8 +220,8 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox ...@@ -220,8 +220,8 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
* Compare each atom in one block to the bounding box of another block, and set * Compare each atom in one block to the bounding box of another block, and set
* flags for which ones are interacting. * flags for which ones are interacting.
*/ */
__kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* posq, __global ushort2* tiles, __global float4* blockCenter, __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global const float4* restrict posq, __global const ushort2* restrict tiles, __global const float4* restrict blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local unsigned int* flags, unsigned int maxTiles) { __global const float4* restrict blockBoundingBox, __global unsigned int* restrict interactionFlags, __global const unsigned int* restrict interactionCount, __local unsigned int* restrict flags, unsigned int maxTiles) {
unsigned int totalWarps = get_global_size(0)/TILE_SIZE; unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
unsigned int warp = get_global_id(0)/TILE_SIZE; unsigned int warp = get_global_id(0)/TILE_SIZE;
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
/** /**
* Find a bounding box for the atoms in each block. * Find a bounding box for the atoms in each block.
*/ */
__kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* posq, __global float4* blockCenter, __global float4* blockBoundingBox, __global unsigned int* interactionCount) { __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global const float4* restrict posq, __global float4* restrict blockCenter, __global float4* restrict blockBoundingBox, __global unsigned int* restrict interactionCount) {
int index = get_global_id(0); int index = get_global_id(0);
int base = index*TILE_SIZE; int base = index*TILE_SIZE;
while (base < numAtoms) { while (base < numAtoms) {
...@@ -121,9 +121,9 @@ void storeInteractionData(ushort2* buffer, int numValid, __global unsigned int* ...@@ -121,9 +121,9 @@ void storeInteractionData(ushort2* buffer, int numValid, __global unsigned int*
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart, * Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting. * mark them as non-interacting.
*/ */
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter, __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global const float4* restrict blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles, __global const float4* restrict blockBoundingBox, __global unsigned int* restrict interactionCount, __global ushort2* restrict interactingTiles,
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles, unsigned int startTileIndex, __global unsigned int* restrict interactionFlags, __global const float4* restrict posq, unsigned int maxTiles, unsigned int startTileIndex,
unsigned int endTileIndex) { unsigned int endTileIndex) {
ushort2 buffer[BUFFER_SIZE]; ushort2 buffer[BUFFER_SIZE];
int valuesInBuffer = 0; int valuesInBuffer = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment