Commit 4949017b authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing CUDA implementation of parameter derivatives

parent eae8def5
...@@ -826,13 +826,15 @@ public: ...@@ -826,13 +826,15 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force); void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private: private:
double cutoff; double cutoff;
bool hasInitializedKernels, needParameterGradient; bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues; int maxTiles, numComputedValues;
CudaContext& cu; CudaContext& cu;
CudaParameterSet* params; CudaParameterSet* params;
CudaParameterSet* computedValues; CudaParameterSet* computedValues;
CudaParameterSet* energyDerivs; CudaParameterSet* energyDerivs;
CudaParameterSet* energyDerivChain; CudaParameterSet* energyDerivChain;
std::vector<CudaParameterSet*> dValuedParam;
std::vector<CudaArray*> dValue0dParam;
CudaArray* longEnergyDerivs; CudaArray* longEnergyDerivs;
CudaArray* globals; CudaArray* globals;
CudaArray* valueBuffers; CudaArray* valueBuffers;
......
...@@ -2917,6 +2917,10 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() { ...@@ -2917,6 +2917,10 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
delete valueBuffers; delete valueBuffers;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i]; delete tabulatedFunctions[i];
for (int i = 0; i < dValue0dParam.size(); i++)
delete dValue0dParam[i];
for (int i = 0; i < dValuedParam.size(); i++)
delete dValuedParam[i];
} }
void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) { void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) {
...@@ -3009,18 +3013,24 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3009,18 +3013,24 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
vector<vector<Lepton::ParsedExpression> > valueGradientExpressions(force.getNumComputedValues()); vector<vector<Lepton::ParsedExpression> > valueGradientExpressions(force.getNumComputedValues());
vector<vector<Lepton::ParsedExpression> > valueDerivExpressions(force.getNumComputedValues()); vector<vector<Lepton::ParsedExpression> > valueDerivExpressions(force.getNumComputedValues());
vector<vector<Lepton::ParsedExpression> > valueParamDerivExpressions(force.getNumComputedValues());
needParameterGradient = false; needParameterGradient = false;
for (int i = 1; i < force.getNumComputedValues(); i++) { for (int i = 0; i < force.getNumComputedValues(); i++) {
Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize(); Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
valueGradientExpressions[i].push_back(ex.differentiate("x").optimize()); if (i > 0) {
valueGradientExpressions[i].push_back(ex.differentiate("y").optimize()); valueGradientExpressions[i].push_back(ex.differentiate("x").optimize());
valueGradientExpressions[i].push_back(ex.differentiate("z").optimize()); valueGradientExpressions[i].push_back(ex.differentiate("y").optimize());
if (!isZeroExpression(valueGradientExpressions[i][0]) || !isZeroExpression(valueGradientExpressions[i][1]) || !isZeroExpression(valueGradientExpressions[i][2])) valueGradientExpressions[i].push_back(ex.differentiate("z").optimize());
needParameterGradient = true; if (!isZeroExpression(valueGradientExpressions[i][0]) || !isZeroExpression(valueGradientExpressions[i][1]) || !isZeroExpression(valueGradientExpressions[i][2]))
for (int j = 0; j < i; j++) needParameterGradient = true;
valueDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize()); for (int j = 0; j < i; j++)
valueDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize());
}
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
valueParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).optimize());
} }
vector<vector<Lepton::ParsedExpression> > energyDerivExpressions(force.getNumEnergyTerms()); vector<vector<Lepton::ParsedExpression> > energyDerivExpressions(force.getNumEnergyTerms());
vector<vector<Lepton::ParsedExpression> > energyParamDerivExpressions(force.getNumEnergyTerms());
vector<bool> needChainForValue(force.getNumComputedValues(), false); vector<bool> needChainForValue(force.getNumComputedValues(), false);
for (int i = 0; i < force.getNumEnergyTerms(); i++) { for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression; string expression;
...@@ -3042,10 +3052,21 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3042,10 +3052,21 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
needChainForValue[j] = true; needChainForValue[j] = true;
} }
} }
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).optimize());
} }
longEnergyDerivs = CudaArray::create<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives"); longEnergyDerivs = CudaArray::create<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true); energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
energyDerivChain = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true); energyDerivChain = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
dValuedParam.push_back(new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "dValuedParam", true));
dValue0dParam.push_back(CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam"));
cu.addAutoclearBuffer(*dValue0dParam.back());
string name = force.getEnergyParameterDerivativeName(i);
cu.addEnergyParameterDerivative(name);
}
// Create the kernels. // Create the kernels.
...@@ -3077,11 +3098,18 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3077,11 +3098,18 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[0], functions).optimize(); Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[0], functions).optimize();
n2ValueExpressions["tempValue1 = "] = ex; n2ValueExpressions["tempValue1 = "] = ex;
n2ValueExpressions["tempValue2 = "] = ex.renameVariables(rename); n2ValueExpressions["tempValue2 = "] = ex.renameVariables(rename);
for (int i = 0; i < valueParamDerivExpressions[0].size(); i++) {
string variableBase = "temp_dValue0dParam"+cu.intToString(i+1);
if (!isZeroExpression(valueParamDerivExpressions[0][i])) {
n2ValueExpressions[variableBase+"_1 = "] = valueParamDerivExpressions[0][i];
n2ValueExpressions[variableBase+"_2 = "] = valueParamDerivExpressions[0][i].renameVariables(rename);
}
}
n2ValueSource << cu.getExpressionUtilities().createExpressions(n2ValueExpressions, variables, functionList, functionDefinitions, "temp"); n2ValueSource << cu.getExpressionUtilities().createExpressions(n2ValueExpressions, variables, functionList, functionDefinitions, "temp");
map<string, string> replacements; map<string, string> replacements;
string n2ValueStr = n2ValueSource.str(); string n2ValueStr = n2ValueSource.str();
replacements["COMPUTE_VALUE"] = n2ValueStr; replacements["COMPUTE_VALUE"] = n2ValueStr;
stringstream extraArgs, atomParams, loadLocal1, loadLocal2, load1, load2; stringstream extraArgs, atomParams, loadLocal1, loadLocal2, load1, load2, tempDerivs1, tempDerivs2, storeDeriv1, storeDeriv2;
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals"; extraArgs << ", const float* globals";
pairValueUsesParam.resize(params->getBuffers().size(), false); pairValueUsesParam.resize(params->getBuffers().size(), false);
...@@ -3100,12 +3128,31 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3100,12 +3128,31 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
atomParamSize += buffer.getNumComponents(); atomParamSize += buffer.getNumComponents();
} }
} }
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string derivName = "dValue0dParam"+cu.intToString(i+1);
extraArgs << ", unsigned long long* __restrict__ global_" << derivName;
atomParams << "real " << derivName << ";\n";
loadLocal2 << "localData[localAtomIndex]." << derivName << " = 0;\n";
load1 << "real " << derivName << " = 0;\n";
if (!isZeroExpression(valueParamDerivExpressions[0][i])) {
load2 << "real temp_" << derivName << "_1 = 0;\n";
load2 << "real temp_" << derivName << "_2 = 0;\n";
tempDerivs1 << derivName << " += temp_" << derivName << "_1;\n";
tempDerivs2 << "localData[tbx+tj]." << derivName << " += temp_" << derivName << "_2;\n";
storeDeriv1 << "atomicAdd(&global_" << derivName << "[offset1], static_cast<unsigned long long>((long long) (" << derivName << "*0x100000000)));\n";
storeDeriv2 << "atomicAdd(&global_" << derivName << "[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x]." << derivName << "*0x100000000)));\n";
}
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["ATOM_PARAMETER_DATA"] = atomParams.str(); replacements["ATOM_PARAMETER_DATA"] = atomParams.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str(); replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str(); replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str(); replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str(); replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
replacements["ADD_TEMP_DERIVS1"] = tempDerivs1.str();
replacements["ADD_TEMP_DERIVS2"] = tempDerivs2.str();
replacements["STORE_PARAM_DERIVS1"] = storeDeriv1.str();
replacements["STORE_PARAM_DERIVS2"] = storeDeriv2.str();
if (useCutoff) if (useCutoff)
pairValueDefines["USE_CUTOFF"] = "1"; pairValueDefines["USE_CUTOFF"] = "1";
if (usePeriodic) if (usePeriodic)
...@@ -3128,7 +3175,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3128,7 +3175,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
{ {
// Create the kernel to reduce the N2 value and calculate other values. // Create the kernel to reduce the N2 value and calculate other values.
stringstream reductionSource, extraArgs; stringstream reductionSource, extraArgs, deriv0;
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals"; extraArgs << ", const float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
...@@ -3142,6 +3189,14 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3142,6 +3189,14 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
extraArgs << ", " << buffer.getType() << "* __restrict__ global_" << valueName; extraArgs << ", " << buffer.getType() << "* __restrict__ global_" << valueName;
reductionSource << buffer.getType() << " local_" << valueName << ";\n"; reductionSource << buffer.getType() << " local_" << valueName << ";\n";
} }
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string variableName = "dValuedParam_0_"+cu.intToString(i);
extraArgs << ", const long long* __restrict__ dValue0dParam" << i;
deriv0 << "real " << variableName << " = (1.0f/0x100000000)*dValue0dParam" << i << "[index];\n";
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
extraArgs << ", real* __restrict__ global_dValuedParam_" << j << "_" << i;
deriv0 << "global_dValuedParam_0_" << i << "[index] = dValuedParam_0_" << i << ";\n";
}
reductionSource << "local_values" << computedValues->getParameterSuffix(0) << " = sum;\n"; reductionSource << "local_values" << computedValues->getParameterSuffix(0) << " = sum;\n";
map<string, string> variables; map<string, string> variables;
variables["x"] = "pos.x"; variables["x"] = "pos.x";
...@@ -3161,8 +3216,26 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3161,8 +3216,26 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
string valueName = "values"+cu.intToString(i+1); string valueName = "values"+cu.intToString(i+1);
reductionSource << "global_" << valueName << "[index] = local_" << valueName << ";\n"; reductionSource << "global_" << valueName << "[index] = local_" << valueName << ";\n";
} }
if (needEnergyParamDerivs) {
map<string, Lepton::ParsedExpression> derivExpressions;
for (int i = 1; i < force.getNumComputedValues(); i++) {
for (int j = 0; j < valueParamDerivExpressions[i].size(); j++)
derivExpressions["real dValuedParam_"+cu.intToString(i)+"_"+cu.intToString(j)+" = "] = valueParamDerivExpressions[i][j];
for (int j = 0; j < i; j++)
derivExpressions["real dVdV_"+cu.intToString(i)+"_"+cu.intToString(j)+" = "] = valueDerivExpressions[i][j];
}
reductionSource << cu.getExpressionUtilities().createExpressions(derivExpressions, variables, functionList, functionDefinitions, "derivChain_temp");
for (int i = 1; i < force.getNumComputedValues(); i++) {
for (int j = 0; j < i; j++)
for (int k = 0; k < valueParamDerivExpressions[i].size(); k++)
reductionSource << "dValuedParam_" << i << "_" << k << " += dVdV_" << i << "_" << j << "*dValuedParam_" << j <<"_" << k << ";\n";
for (int j = 0; j < valueParamDerivExpressions[i].size(); j++)
reductionSource << "global_dValuedParam_" << i << "_" << j << "[index] = dValuedParam_" << i << "_" << j << ";\n";
}
}
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["REDUCE_PARAM0_DERIV"] = deriv0.str();
replacements["COMPUTE_VALUES"] = reductionSource.str(); replacements["COMPUTE_VALUES"] = reductionSource.str();
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
...@@ -3207,6 +3280,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3207,6 +3280,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+index+"_2 += "] = energyDerivExpressions[i][2*j+1]; n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+index+"_2 += "] = energyDerivExpressions[i][2*j+1];
} }
} }
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
n2EnergyExpressions["energyParamDeriv"+cu.intToString(j)+" += interactionScale*"] = energyParamDerivExpressions[i][j];
if (exclude) if (exclude)
n2EnergySource << "if (!isExcluded) {\n"; n2EnergySource << "if (!isExcluded) {\n";
n2EnergySource << cu.getExpressionUtilities().createExpressions(n2EnergyExpressions, variables, functionList, functionDefinitions, "temp"); n2EnergySource << cu.getExpressionUtilities().createExpressions(n2EnergyExpressions, variables, functionList, functionDefinitions, "temp");
...@@ -3216,7 +3291,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3216,7 +3291,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
map<string, string> replacements; map<string, string> replacements;
string n2EnergyStr = n2EnergySource.str(); string n2EnergyStr = n2EnergySource.str();
replacements["COMPUTE_INTERACTION"] = n2EnergyStr; replacements["COMPUTE_INTERACTION"] = n2EnergyStr;
stringstream extraArgs, atomParams, loadLocal1, loadLocal2, clearLocal, load1, load2, declare1, recordDeriv, storeDerivs1, storeDerivs2; stringstream extraArgs, atomParams, loadLocal1, loadLocal2, clearLocal, load1, load2, declare1, recordDeriv, storeDerivs1, storeDerivs2, initParamDerivs, saveParamDerivs;
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals"; extraArgs << ", const float* globals";
pairEnergyUsesParam.resize(params->getBuffers().size(), false); pairEnergyUsesParam.resize(params->getBuffers().size(), false);
...@@ -3262,6 +3337,17 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3262,6 +3337,17 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
storeDerivs2 << "STORE_DERIVATIVE_2(" << index << ")\n"; storeDerivs2 << "STORE_DERIVATIVE_2(" << index << ")\n";
atomParamSize++; atomParamSize++;
} }
if (needEnergyParamDerivs) {
extraArgs << ", mixed* __restrict__ energyParamDerivs";
const vector<string>& allParamDerivNames = cu.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
initParamDerivs << "mixed energyParamDeriv" << i << " = 0;\n";
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == force.getEnergyParameterDerivativeName(i))
saveParamDerivs << "energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*" << numDerivs << "+" << index << "] += energyParamDeriv" << i << ";\n";
}
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["ATOM_PARAMETER_DATA"] = atomParams.str(); replacements["ATOM_PARAMETER_DATA"] = atomParams.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str(); replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
...@@ -3273,6 +3359,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3273,6 +3359,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
replacements["RECORD_DERIVATIVE_2"] = recordDeriv.str(); replacements["RECORD_DERIVATIVE_2"] = recordDeriv.str();
replacements["STORE_DERIVATIVES_1"] = storeDerivs1.str(); replacements["STORE_DERIVATIVES_1"] = storeDerivs1.str();
replacements["STORE_DERIVATIVES_2"] = storeDerivs2.str(); replacements["STORE_DERIVATIVES_2"] = storeDerivs2.str();
replacements["INIT_PARAM_DERIVS"] = initParamDerivs.str();
replacements["SAVE_PARAM_DERIVS"] = saveParamDerivs.str();
if (useCutoff) if (useCutoff)
pairEnergyDefines["USE_CUTOFF"] = "1"; pairEnergyDefines["USE_CUTOFF"] = "1";
if (usePeriodic) if (usePeriodic)
...@@ -3293,7 +3381,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3293,7 +3381,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
{ {
// Create the kernel to reduce the derivatives and calculate per-particle energy terms. // Create the kernel to reduce the derivatives and calculate per-particle energy terms.
stringstream compute, extraArgs, load; stringstream compute, extraArgs, load, initParamDerivs, saveParamDerivs;
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals"; extraArgs << ", const float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
...@@ -3321,6 +3409,17 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3321,6 +3409,17 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
for (int i = 0; i < energyDerivs->getNumParameters(); ++i) for (int i = 0; i < energyDerivs->getNumParameters(); ++i)
load << "derivBuffers" << energyDerivs->getParameterSuffix(i, "[index]") << load << "derivBuffers" << energyDerivs->getParameterSuffix(i, "[index]") <<
" = RECIP(0x100000000)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << cu.intToString(i) << "];\n"; " = RECIP(0x100000000)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << cu.intToString(i) << "];\n";
if (needEnergyParamDerivs) {
extraArgs << ", mixed* __restrict__ energyParamDerivs";
const vector<string>& allParamDerivNames = cu.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
initParamDerivs << "mixed energyParamDeriv" << i << " = 0;\n";
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == force.getEnergyParameterDerivativeName(i))
saveParamDerivs << "energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*" << numDerivs << "+" << index << "] += energyParamDeriv" << i << ";\n";
}
}
// Compute the various expressions. // Compute the various expressions.
...@@ -3354,6 +3453,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3354,6 +3453,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
expressions["/*"+cu.intToString(i+1)+"*/ force.y -= "] = grady; expressions["/*"+cu.intToString(i+1)+"*/ force.y -= "] = grady;
if (!isZeroExpression(gradz)) if (!isZeroExpression(gradz))
expressions["/*"+cu.intToString(i+1)+"*/ force.z -= "] = gradz; expressions["/*"+cu.intToString(i+1)+"*/ force.z -= "] = gradz;
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
expressions["/*"+cu.intToString(i+1)+"*/ energyParamDeriv"+cu.intToString(j)+" += "] = energyParamDerivExpressions[i][j];
} }
for (int i = 1; i < force.getNumComputedValues(); i++) for (int i = 1; i < force.getNumComputedValues(); i++)
for (int j = 0; j < i; j++) for (int j = 0; j < i; j++)
...@@ -3384,16 +3485,19 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3384,16 +3485,19 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["LOAD_DERIVATIVES"] = load.str(); replacements["LOAD_DERIVATIVES"] = load.str();
replacements["COMPUTE_ENERGY"] = compute.str(); replacements["COMPUTE_ENERGY"] = compute.str();
replacements["INIT_PARAM_DERIVS"] = initParamDerivs.str();
replacements["SAVE_PARAM_DERIVS"] = saveParamDerivs.str();
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customGBEnergyPerParticle, replacements), defines); CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customGBEnergyPerParticle, replacements), defines);
perParticleEnergyKernel = cu.getKernel(module, "computePerParticleEnergy"); perParticleEnergyKernel = cu.getKernel(module, "computePerParticleEnergy");
} }
if (needParameterGradient) { if (needParameterGradient || needEnergyParamDerivs) {
// Create the kernel to compute chain rule terms for computed values that depend explicitly on particle coordinates. // Create the kernel to compute chain rule terms for computed values that depend explicitly on particle coordinates, and for
// derivatives with respect to global parameters.
stringstream compute, extraArgs; stringstream compute, extraArgs, initParamDerivs, saveParamDerivs;
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals"; extraArgs << ", const float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
...@@ -3412,6 +3516,19 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3412,6 +3516,19 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
extraArgs << ", " << buffer.getType() << "* __restrict__ derivBuffers" << index; extraArgs << ", " << buffer.getType() << "* __restrict__ derivBuffers" << index;
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n"; compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
} }
if (needEnergyParamDerivs) {
extraArgs << ", mixed* __restrict__ energyParamDerivs";
const vector<string>& allParamDerivNames = cu.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
extraArgs << ", real* __restrict__ dValuedParam_" << j << "_" << i;
initParamDerivs << "mixed energyParamDeriv" << i << " = 0;\n";
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == force.getEnergyParameterDerivativeName(i))
saveParamDerivs << "energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*" << numDerivs << "+" << index << "] += energyParamDeriv" << i << ";\n";
}
}
map<string, string> variables; map<string, string> variables;
variables["x"] = "pos.x"; variables["x"] = "pos.x";
variables["y"] = "pos.y"; variables["y"] = "pos.y";
...@@ -3422,34 +3539,42 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3422,34 +3539,42 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]"; variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < force.getNumComputedValues(); i++) for (int i = 0; i < force.getNumComputedValues(); i++)
variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]"); variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
for (int i = 1; i < force.getNumComputedValues(); i++) { if (needParameterGradient) {
string is = cu.intToString(i); for (int i = 1; i < force.getNumComputedValues(); i++) {
compute << "real3 dV"<<is<<"dR = make_real3(0);\n"; string is = cu.intToString(i);
for (int j = 1; j < i; j++) { compute << "real3 dV"<<is<<"dR = make_real3(0);\n";
if (!isZeroExpression(valueDerivExpressions[i][j])) { for (int j = 1; j < i; j++) {
map<string, Lepton::ParsedExpression> derivExpressions; if (!isZeroExpression(valueDerivExpressions[i][j])) {
string js = cu.intToString(j); map<string, Lepton::ParsedExpression> derivExpressions;
derivExpressions["real dV"+is+"dV"+js+" = "] = valueDerivExpressions[i][j]; string js = cu.intToString(j);
compute << cu.getExpressionUtilities().createExpressions(derivExpressions, variables, functionList, functionDefinitions, "temp_"+is+"_"+js); derivExpressions["real dV"+is+"dV"+js+" = "] = valueDerivExpressions[i][j];
compute << "dV"<<is<<"dR += dV"<<is<<"dV"<<js<<"*dV"<<js<<"dR;\n"; compute << cu.getExpressionUtilities().createExpressions(derivExpressions, variables, functionList, functionDefinitions, "temp_"+is+"_"+js);
compute << "dV"<<is<<"dR += dV"<<is<<"dV"<<js<<"*dV"<<js<<"dR;\n";
}
} }
map<string, Lepton::ParsedExpression> gradientExpressions;
if (!isZeroExpression(valueGradientExpressions[i][0]))
gradientExpressions["dV"+is+"dR.x += "] = valueGradientExpressions[i][0];
if (!isZeroExpression(valueGradientExpressions[i][1]))
gradientExpressions["dV"+is+"dR.y += "] = valueGradientExpressions[i][1];
if (!isZeroExpression(valueGradientExpressions[i][2]))
gradientExpressions["dV"+is+"dR.z += "] = valueGradientExpressions[i][2];
compute << cu.getExpressionUtilities().createExpressions(gradientExpressions, variables, functionList, functionDefinitions, "temp");
}
for (int i = 1; i < force.getNumComputedValues(); i++) {
string is = cu.intToString(i);
compute << "force -= deriv"<<energyDerivs->getParameterSuffix(i)<<"*dV"<<is<<"dR;\n";
} }
map<string, Lepton::ParsedExpression> gradientExpressions;
if (!isZeroExpression(valueGradientExpressions[i][0]))
gradientExpressions["dV"+is+"dR.x += "] = valueGradientExpressions[i][0];
if (!isZeroExpression(valueGradientExpressions[i][1]))
gradientExpressions["dV"+is+"dR.y += "] = valueGradientExpressions[i][1];
if (!isZeroExpression(valueGradientExpressions[i][2]))
gradientExpressions["dV"+is+"dR.z += "] = valueGradientExpressions[i][2];
compute << cu.getExpressionUtilities().createExpressions(gradientExpressions, variables, functionList, functionDefinitions, "temp");
}
for (int i = 1; i < force.getNumComputedValues(); i++) {
string is = cu.intToString(i);
compute << "force -= deriv"<<energyDerivs->getParameterSuffix(i)<<"*dV"<<is<<"dR;\n";
} }
if (needEnergyParamDerivs)
for (int i = 0; i < force.getNumComputedValues(); i++)
for (int j = 0; j < dValuedParam.size(); j++)
compute << "energyParamDeriv"<<j<<" += deriv"<<energyDerivs->getParameterSuffix(i)<<"*dValuedParam_"<<i<<"_"<<j<<"[index];\n";
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_FORCES"] = compute.str(); replacements["COMPUTE_FORCES"] = compute.str();
replacements["INIT_PARAM_DERIVS"] = initParamDerivs.str();
replacements["SAVE_PARAM_DERIVS"] = saveParamDerivs.str();
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
...@@ -3605,6 +3730,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3605,6 +3730,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
if (pairValueUsesParam[i]) if (pairValueUsesParam[i])
pairValueArgs.push_back(&params->getBuffers()[i].getMemory()); pairValueArgs.push_back(&params->getBuffers()[i].getMemory());
} }
for (int i = 0; i < dValue0dParam.size(); i++)
pairValueArgs.push_back(&dValue0dParam[i]->getDevicePointer());
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
pairValueArgs.push_back(&tabulatedFunctions[i]->getDevicePointer()); pairValueArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer()); perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer());
...@@ -3615,6 +3742,11 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3615,6 +3742,11 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
perParticleValueArgs.push_back(&params->getBuffers()[i].getMemory()); perParticleValueArgs.push_back(&params->getBuffers()[i].getMemory());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleValueArgs.push_back(&computedValues->getBuffers()[i].getMemory()); perParticleValueArgs.push_back(&computedValues->getBuffers()[i].getMemory());
for (int i = 0; i < dValuedParam.size(); i++) {
perParticleValueArgs.push_back(&dValue0dParam[i]->getDevicePointer());
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
perParticleValueArgs.push_back(&dValuedParam[i]->getBuffers()[j].getMemory());
}
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleValueArgs.push_back(&tabulatedFunctions[i]->getDevicePointer()); perParticleValueArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
pairEnergyArgs.push_back(&cu.getForce().getDevicePointer()); pairEnergyArgs.push_back(&cu.getForce().getDevicePointer());
...@@ -3648,6 +3780,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3648,6 +3780,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory()); pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
} }
pairEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer()); pairEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
if (needEnergyParamDerivs)
pairEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
pairEnergyArgs.push_back(&tabulatedFunctions[i]->getDevicePointer()); pairEnergyArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer()); perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer());
...@@ -3664,9 +3798,11 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3664,9 +3798,11 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
for (int i = 0; i < (int) energyDerivChain->getBuffers().size(); i++) for (int i = 0; i < (int) energyDerivChain->getBuffers().size(); i++)
perParticleEnergyArgs.push_back(&energyDerivChain->getBuffers()[i].getMemory()); perParticleEnergyArgs.push_back(&energyDerivChain->getBuffers()[i].getMemory());
perParticleEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer()); perParticleEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
if (needEnergyParamDerivs)
perParticleEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleEnergyArgs.push_back(&tabulatedFunctions[i]->getDevicePointer()); perParticleEnergyArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
if (needParameterGradient) { if (needParameterGradient || needEnergyParamDerivs) {
gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer()); gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer());
gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer()); gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals != NULL) if (globals != NULL)
...@@ -3677,6 +3813,12 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3677,6 +3813,12 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
gradientChainRuleArgs.push_back(&computedValues->getBuffers()[i].getMemory()); gradientChainRuleArgs.push_back(&computedValues->getBuffers()[i].getMemory());
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
gradientChainRuleArgs.push_back(&energyDerivs->getBuffers()[i].getMemory()); gradientChainRuleArgs.push_back(&energyDerivs->getBuffers()[i].getMemory());
if (needEnergyParamDerivs) {
gradientChainRuleArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (int i = 0; i < dValuedParam.size(); i++)
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
gradientChainRuleArgs.push_back(&dValuedParam[i]->getBuffers()[j].getMemory());
}
} }
} }
if (globals != NULL) { if (globals != NULL) {
...@@ -3703,7 +3845,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3703,7 +3845,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
cu.executeKernel(perParticleValueKernel, &perParticleValueArgs[0], cu.getPaddedNumAtoms()); cu.executeKernel(perParticleValueKernel, &perParticleValueArgs[0], cu.getPaddedNumAtoms());
cu.executeKernel(pairEnergyKernel, &pairEnergyArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); cu.executeKernel(pairEnergyKernel, &pairEnergyArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
cu.executeKernel(perParticleEnergyKernel, &perParticleEnergyArgs[0], cu.getPaddedNumAtoms()); cu.executeKernel(perParticleEnergyKernel, &perParticleEnergyArgs[0], cu.getPaddedNumAtoms());
if (needParameterGradient) if (needParameterGradient || needEnergyParamDerivs)
cu.executeKernel(gradientChainRuleKernel, &gradientChainRuleArgs[0], cu.getPaddedNumAtoms()); cu.executeKernel(gradientChainRuleKernel, &gradientChainRuleArgs[0], cu.getPaddedNumAtoms());
return 0.0; return 0.0;
} }
......
...@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
mixed energy = 0; mixed energy = 0;
INIT_PARAM_DERIVS
__shared__ AtomData localData[THREAD_BLOCK_SIZE]; __shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
...@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 0.5f;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
...@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
...@@ -266,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -266,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r; dEdR /= -r;
...@@ -309,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -309,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r; dEdR /= -r;
...@@ -353,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -353,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
pos++; pos++;
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
} }
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed energy = 0; mixed energy = 0;
INIT_PARAM_DERIVS
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Load the derivatives // Load the derivatives
...@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc ...@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc
COMPUTE_ENERGY COMPUTE_ENERGY
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
} }
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ forceBuffers, const real4* __restrict__ posq extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ forceBuffers, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
INIT_PARAM_DERIVS
const real scale = RECIP((real) 0x100000000); const real scale = RECIP((real) 0x100000000);
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 pos = posq[index]; real4 pos = posq[index];
...@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ ...@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__
forceBuffers[index+PADDED_NUM_ATOMS] = (long long) (force.y*0x100000000); forceBuffers[index+PADDED_NUM_ATOMS] = (long long) (force.y*0x100000000);
forceBuffers[index+PADDED_NUM_ATOMS*2] = (long long) (force.z*0x100000000); forceBuffers[index+PADDED_NUM_ATOMS*2] = (long long) (force.z*0x100000000);
} }
SAVE_PARAM_DERIVS
} }
...@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
COMPUTE_VALUE COMPUTE_VALUE
} }
value += tempValue1; value += tempValue1;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
} }
value += tempValue1; value += tempValue1;
localData[tbx+tj].value += tempValue2; localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results. // Write results.
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset1 = x*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (value*0x100000000))); atomicAdd(&global_value[offset1], static_cast<unsigned long long>((long long) (value*0x100000000)));
STORE_PARAM_DERIVS1
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; unsigned int offset2 = y*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000))); atomicAdd(&global_value[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
STORE_PARAM_DERIVS2
} }
} }
...@@ -244,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -244,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
} }
value += tempValue1; value += tempValue1;
localData[tbx+tj].value += tempValue2; localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
} }
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
...@@ -276,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -276,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
} }
value += tempValue1; value += tempValue1;
localData[tbx+tj].value += tempValue2; localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -285,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -285,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results. // Write results.
atomicAdd(&global_value[atom1], static_cast<unsigned long long>((long long) (value*0x100000000))); unsigned int offset1 = atom1;
atomicAdd(&global_value[offset1], static_cast<unsigned long long>((long long) (value*0x100000000)));
STORE_PARAM_DERIVS1
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[threadIdx.x]; unsigned int atom2 = atomIndices[threadIdx.x];
#else #else
unsigned int atom2 = y*TILE_SIZE + tgx; unsigned int atom2 = y*TILE_SIZE + tgx;
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) if (atom2 < PADDED_NUM_ATOMS) {
atomicAdd(&global_value[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000))); unsigned int offset2 = atom2;
atomicAdd(&global_value[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
STORE_PARAM_DERIVS2
}
} }
pos++; pos++;
} }
......
...@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu ...@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
// Load the pairwise value // Load the pairwise value
real sum = valueBuffers[index]/(real) 0x100000000; real sum = valueBuffers[index]/(real) 0x100000000;
REDUCE_PARAM0_DERIV
// Now calculate other values // Now calculate other values
......
...@@ -3301,7 +3301,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3301,7 +3301,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
string variableName = "dValuedParam_0_"+cl.intToString(i); string variableName = "dValuedParam_0_"+cl.intToString(i);
if (useLong) { if (useLong) {
extraArgs << ", __global const long* restrict dValue0dParam" << i; extraArgs << ", __global const long* restrict dValue0dParam" << i;
deriv0 << "real " << variableName << " = (1.0f/0x100000000)*dValue0dParam[index];\n"; deriv0 << "real " << variableName << " = (1.0f/0x100000000)*dValue0dParam" << i << "[index];\n";
} }
else { else {
extraArgs << ", __global const real* restrict dValue0dParam" << i; extraArgs << ", __global const real* restrict dValue0dParam" << i;
......
...@@ -320,7 +320,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -320,7 +320,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
unsigned int atom2 = y*TILE_SIZE + tgx; unsigned int atom2 = y*TILE_SIZE + tgx;
#endif #endif
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned in offset1 = atom1; unsigned int offset1 = atom1;
atom_add(&global_value[offset1], (long) (value*0x100000000)); atom_add(&global_value[offset1], (long) (value*0x100000000));
STORE_PARAM_DERIVS1 STORE_PARAM_DERIVS1
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment