Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
4949017b
Commit
4949017b
authored
Jul 27, 2016
by
Peter Eastman
Browse files
Continuing CUDA implementation of parameter derivatives
parent
eae8def5
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
218 additions
and
49 deletions
+218
-49
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+3
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+181
-39
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+6
-0
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
+2
-0
platforms/cuda/src/kernels/customGBGradientChainRule.cu
platforms/cuda/src/kernels/customGBGradientChainRule.cu
+2
-0
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+21
-7
platforms/cuda/src/kernels/customGBValuePerParticle.cu
platforms/cuda/src/kernels/customGBValuePerParticle.cu
+1
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-1
platforms/opencl/src/kernels/customGBValueN2.cl
platforms/opencl/src/kernels/customGBValueN2.cl
+1
-1
No files found.
platforms/cuda/include/CudaKernels.h
View file @
4949017b
...
...
@@ -826,13 +826,15 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
private:
double
cutoff
;
bool
hasInitializedKernels
,
needParameterGradient
;
bool
hasInitializedKernels
,
needParameterGradient
,
needEnergyParamDerivs
;
int
maxTiles
,
numComputedValues
;
CudaContext
&
cu
;
CudaParameterSet
*
params
;
CudaParameterSet
*
computedValues
;
CudaParameterSet
*
energyDerivs
;
CudaParameterSet
*
energyDerivChain
;
std
::
vector
<
CudaParameterSet
*>
dValuedParam
;
std
::
vector
<
CudaArray
*>
dValue0dParam
;
CudaArray
*
longEnergyDerivs
;
CudaArray
*
globals
;
CudaArray
*
valueBuffers
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
4949017b
...
...
@@ -2917,6 +2917,10 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
delete
valueBuffers
;
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
delete
tabulatedFunctions
[
i
];
for
(
int
i
=
0
;
i
<
dValue0dParam
.
size
();
i
++
)
delete
dValue0dParam
[
i
];
for
(
int
i
=
0
;
i
<
dValuedParam
.
size
();
i
++
)
delete
dValuedParam
[
i
];
}
void
CudaCalcCustomGBForceKernel
::
initialize
(
const
System
&
system
,
const
CustomGBForce
&
force
)
{
...
...
@@ -3009,9 +3013,11 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
vector
<
vector
<
Lepton
::
ParsedExpression
>
>
valueGradientExpressions
(
force
.
getNumComputedValues
());
vector
<
vector
<
Lepton
::
ParsedExpression
>
>
valueDerivExpressions
(
force
.
getNumComputedValues
());
vector
<
vector
<
Lepton
::
ParsedExpression
>
>
valueParamDerivExpressions
(
force
.
getNumComputedValues
());
needParameterGradient
=
false
;
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
Lepton
::
ParsedExpression
ex
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
i
],
functions
).
optimize
();
if
(
i
>
0
)
{
valueGradientExpressions
[
i
].
push_back
(
ex
.
differentiate
(
"x"
).
optimize
());
valueGradientExpressions
[
i
].
push_back
(
ex
.
differentiate
(
"y"
).
optimize
());
valueGradientExpressions
[
i
].
push_back
(
ex
.
differentiate
(
"z"
).
optimize
());
...
...
@@ -3020,7 +3026,11 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
for
(
int
j
=
0
;
j
<
i
;
j
++
)
valueDerivExpressions
[
i
].
push_back
(
ex
.
differentiate
(
computedValueNames
[
j
]).
optimize
());
}
for
(
int
j
=
0
;
j
<
force
.
getNumEnergyParameterDerivatives
();
j
++
)
valueParamDerivExpressions
[
i
].
push_back
(
ex
.
differentiate
(
force
.
getEnergyParameterDerivativeName
(
j
)).
optimize
());
}
vector
<
vector
<
Lepton
::
ParsedExpression
>
>
energyDerivExpressions
(
force
.
getNumEnergyTerms
());
vector
<
vector
<
Lepton
::
ParsedExpression
>
>
energyParamDerivExpressions
(
force
.
getNumEnergyTerms
());
vector
<
bool
>
needChainForValue
(
force
.
getNumComputedValues
(),
false
);
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyTerms
();
i
++
)
{
string
expression
;
...
...
@@ -3042,10 +3052,21 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
needChainForValue
[
j
]
=
true
;
}
}
for
(
int
j
=
0
;
j
<
force
.
getNumEnergyParameterDerivatives
();
j
++
)
energyParamDerivExpressions
[
i
].
push_back
(
ex
.
differentiate
(
force
.
getEnergyParameterDerivativeName
(
j
)).
optimize
());
}
longEnergyDerivs
=
CudaArray
::
create
<
long
long
>
(
cu
,
force
.
getNumComputedValues
()
*
cu
.
getPaddedNumAtoms
(),
"customGBLongEnergyDerivatives"
);
energyDerivs
=
new
CudaParameterSet
(
cu
,
force
.
getNumComputedValues
(),
cu
.
getPaddedNumAtoms
(),
"customGBEnergyDerivatives"
,
true
);
energyDerivChain
=
new
CudaParameterSet
(
cu
,
force
.
getNumComputedValues
(),
cu
.
getPaddedNumAtoms
(),
"customGBEnergyDerivativeChain"
,
true
);
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
needEnergyParamDerivs
=
(
force
.
getNumEnergyParameterDerivatives
()
>
0
);
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
{
dValuedParam
.
push_back
(
new
CudaParameterSet
(
cu
,
force
.
getNumComputedValues
(),
cu
.
getPaddedNumAtoms
(),
"dValuedParam"
,
true
));
dValue0dParam
.
push_back
(
CudaArray
::
create
<
long
long
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"dValue0dParam"
));
cu
.
addAutoclearBuffer
(
*
dValue0dParam
.
back
());
string
name
=
force
.
getEnergyParameterDerivativeName
(
i
);
cu
.
addEnergyParameterDerivative
(
name
);
}
// Create the kernels.
...
...
@@ -3077,11 +3098,18 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
Lepton
::
ParsedExpression
ex
=
Lepton
::
Parser
::
parse
(
computedValueExpressions
[
0
],
functions
).
optimize
();
n2ValueExpressions
[
"tempValue1 = "
]
=
ex
;
n2ValueExpressions
[
"tempValue2 = "
]
=
ex
.
renameVariables
(
rename
);
for
(
int
i
=
0
;
i
<
valueParamDerivExpressions
[
0
].
size
();
i
++
)
{
string
variableBase
=
"temp_dValue0dParam"
+
cu
.
intToString
(
i
+
1
);
if
(
!
isZeroExpression
(
valueParamDerivExpressions
[
0
][
i
]))
{
n2ValueExpressions
[
variableBase
+
"_1 = "
]
=
valueParamDerivExpressions
[
0
][
i
];
n2ValueExpressions
[
variableBase
+
"_2 = "
]
=
valueParamDerivExpressions
[
0
][
i
].
renameVariables
(
rename
);
}
}
n2ValueSource
<<
cu
.
getExpressionUtilities
().
createExpressions
(
n2ValueExpressions
,
variables
,
functionList
,
functionDefinitions
,
"temp"
);
map
<
string
,
string
>
replacements
;
string
n2ValueStr
=
n2ValueSource
.
str
();
replacements
[
"COMPUTE_VALUE"
]
=
n2ValueStr
;
stringstream
extraArgs
,
atomParams
,
loadLocal1
,
loadLocal2
,
load1
,
load2
;
stringstream
extraArgs
,
atomParams
,
loadLocal1
,
loadLocal2
,
load1
,
load2
,
tempDerivs1
,
tempDerivs2
,
storeDeriv1
,
storeDeriv2
;
if
(
force
.
getNumGlobalParameters
()
>
0
)
extraArgs
<<
", const float* globals"
;
pairValueUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
...
...
@@ -3100,12 +3128,31 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
atomParamSize
+=
buffer
.
getNumComponents
();
}
}
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
{
string
derivName
=
"dValue0dParam"
+
cu
.
intToString
(
i
+
1
);
extraArgs
<<
", unsigned long long* __restrict__ global_"
<<
derivName
;
atomParams
<<
"real "
<<
derivName
<<
";
\n
"
;
loadLocal2
<<
"localData[localAtomIndex]."
<<
derivName
<<
" = 0;
\n
"
;
load1
<<
"real "
<<
derivName
<<
" = 0;
\n
"
;
if
(
!
isZeroExpression
(
valueParamDerivExpressions
[
0
][
i
]))
{
load2
<<
"real temp_"
<<
derivName
<<
"_1 = 0;
\n
"
;
load2
<<
"real temp_"
<<
derivName
<<
"_2 = 0;
\n
"
;
tempDerivs1
<<
derivName
<<
" += temp_"
<<
derivName
<<
"_1;
\n
"
;
tempDerivs2
<<
"localData[tbx+tj]."
<<
derivName
<<
" += temp_"
<<
derivName
<<
"_2;
\n
"
;
storeDeriv1
<<
"atomicAdd(&global_"
<<
derivName
<<
"[offset1], static_cast<unsigned long long>((long long) ("
<<
derivName
<<
"*0x100000000)));
\n
"
;
storeDeriv2
<<
"atomicAdd(&global_"
<<
derivName
<<
"[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x]."
<<
derivName
<<
"*0x100000000)));
\n
"
;
}
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"ATOM_PARAMETER_DATA"
]
=
atomParams
.
str
();
replacements
[
"LOAD_LOCAL_PARAMETERS_FROM_1"
]
=
loadLocal1
.
str
();
replacements
[
"LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"
]
=
loadLocal2
.
str
();
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
replacements
[
"LOAD_ATOM2_PARAMETERS"
]
=
load2
.
str
();
replacements
[
"ADD_TEMP_DERIVS1"
]
=
tempDerivs1
.
str
();
replacements
[
"ADD_TEMP_DERIVS2"
]
=
tempDerivs2
.
str
();
replacements
[
"STORE_PARAM_DERIVS1"
]
=
storeDeriv1
.
str
();
replacements
[
"STORE_PARAM_DERIVS2"
]
=
storeDeriv2
.
str
();
if
(
useCutoff
)
pairValueDefines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
usePeriodic
)
...
...
@@ -3128,7 +3175,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
{
// Create the kernel to reduce the N2 value and calculate other values.
stringstream
reductionSource
,
extraArgs
;
stringstream
reductionSource
,
extraArgs
,
deriv0
;
if
(
force
.
getNumGlobalParameters
()
>
0
)
extraArgs
<<
", const float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
...
...
@@ -3142,6 +3189,14 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
extraArgs
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ global_"
<<
valueName
;
reductionSource
<<
buffer
.
getType
()
<<
" local_"
<<
valueName
<<
";
\n
"
;
}
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
{
string
variableName
=
"dValuedParam_0_"
+
cu
.
intToString
(
i
);
extraArgs
<<
", const long long* __restrict__ dValue0dParam"
<<
i
;
deriv0
<<
"real "
<<
variableName
<<
" = (1.0f/0x100000000)*dValue0dParam"
<<
i
<<
"[index];
\n
"
;
for
(
int
j
=
0
;
j
<
dValuedParam
[
i
]
->
getBuffers
().
size
();
j
++
)
extraArgs
<<
", real* __restrict__ global_dValuedParam_"
<<
j
<<
"_"
<<
i
;
deriv0
<<
"global_dValuedParam_0_"
<<
i
<<
"[index] = dValuedParam_0_"
<<
i
<<
";
\n
"
;
}
reductionSource
<<
"local_values"
<<
computedValues
->
getParameterSuffix
(
0
)
<<
" = sum;
\n
"
;
map
<
string
,
string
>
variables
;
variables
[
"x"
]
=
"pos.x"
;
...
...
@@ -3161,8 +3216,26 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
string
valueName
=
"values"
+
cu
.
intToString
(
i
+
1
);
reductionSource
<<
"global_"
<<
valueName
<<
"[index] = local_"
<<
valueName
<<
";
\n
"
;
}
if
(
needEnergyParamDerivs
)
{
map
<
string
,
Lepton
::
ParsedExpression
>
derivExpressions
;
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
for
(
int
j
=
0
;
j
<
valueParamDerivExpressions
[
i
].
size
();
j
++
)
derivExpressions
[
"real dValuedParam_"
+
cu
.
intToString
(
i
)
+
"_"
+
cu
.
intToString
(
j
)
+
" = "
]
=
valueParamDerivExpressions
[
i
][
j
];
for
(
int
j
=
0
;
j
<
i
;
j
++
)
derivExpressions
[
"real dVdV_"
+
cu
.
intToString
(
i
)
+
"_"
+
cu
.
intToString
(
j
)
+
" = "
]
=
valueDerivExpressions
[
i
][
j
];
}
reductionSource
<<
cu
.
getExpressionUtilities
().
createExpressions
(
derivExpressions
,
variables
,
functionList
,
functionDefinitions
,
"derivChain_temp"
);
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
for
(
int
j
=
0
;
j
<
i
;
j
++
)
for
(
int
k
=
0
;
k
<
valueParamDerivExpressions
[
i
].
size
();
k
++
)
reductionSource
<<
"dValuedParam_"
<<
i
<<
"_"
<<
k
<<
" += dVdV_"
<<
i
<<
"_"
<<
j
<<
"*dValuedParam_"
<<
j
<<
"_"
<<
k
<<
";
\n
"
;
for
(
int
j
=
0
;
j
<
valueParamDerivExpressions
[
i
].
size
();
j
++
)
reductionSource
<<
"global_dValuedParam_"
<<
i
<<
"_"
<<
j
<<
"[index] = dValuedParam_"
<<
i
<<
"_"
<<
j
<<
";
\n
"
;
}
}
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"REDUCE_PARAM0_DERIV"
]
=
deriv0
.
str
();
replacements
[
"COMPUTE_VALUES"
]
=
reductionSource
.
str
();
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
...
...
@@ -3207,6 +3280,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
n2EnergyExpressions
[
"/*"
+
cu
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_2 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
}
}
for
(
int
j
=
0
;
j
<
force
.
getNumEnergyParameterDerivatives
();
j
++
)
n2EnergyExpressions
[
"energyParamDeriv"
+
cu
.
intToString
(
j
)
+
" += interactionScale*"
]
=
energyParamDerivExpressions
[
i
][
j
];
if
(
exclude
)
n2EnergySource
<<
"if (!isExcluded) {
\n
"
;
n2EnergySource
<<
cu
.
getExpressionUtilities
().
createExpressions
(
n2EnergyExpressions
,
variables
,
functionList
,
functionDefinitions
,
"temp"
);
...
...
@@ -3216,7 +3291,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
map
<
string
,
string
>
replacements
;
string
n2EnergyStr
=
n2EnergySource
.
str
();
replacements
[
"COMPUTE_INTERACTION"
]
=
n2EnergyStr
;
stringstream
extraArgs
,
atomParams
,
loadLocal1
,
loadLocal2
,
clearLocal
,
load1
,
load2
,
declare1
,
recordDeriv
,
storeDerivs1
,
storeDerivs2
;
stringstream
extraArgs
,
atomParams
,
loadLocal1
,
loadLocal2
,
clearLocal
,
load1
,
load2
,
declare1
,
recordDeriv
,
storeDerivs1
,
storeDerivs2
,
initParamDerivs
,
saveParamDerivs
;
if
(
force
.
getNumGlobalParameters
()
>
0
)
extraArgs
<<
", const float* globals"
;
pairEnergyUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
...
...
@@ -3262,6 +3337,17 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
storeDerivs2
<<
"STORE_DERIVATIVE_2("
<<
index
<<
")
\n
"
;
atomParamSize
++
;
}
if
(
needEnergyParamDerivs
)
{
extraArgs
<<
", mixed* __restrict__ energyParamDerivs"
;
const
vector
<
string
>&
allParamDerivNames
=
cu
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
{
initParamDerivs
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
for
(
int
index
=
0
;
index
<
numDerivs
;
index
++
)
if
(
allParamDerivNames
[
index
]
==
force
.
getEnergyParameterDerivativeName
(
i
))
saveParamDerivs
<<
"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"
<<
numDerivs
<<
"+"
<<
index
<<
"] += energyParamDeriv"
<<
i
<<
";
\n
"
;
}
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"ATOM_PARAMETER_DATA"
]
=
atomParams
.
str
();
replacements
[
"LOAD_LOCAL_PARAMETERS_FROM_1"
]
=
loadLocal1
.
str
();
...
...
@@ -3273,6 +3359,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
replacements
[
"RECORD_DERIVATIVE_2"
]
=
recordDeriv
.
str
();
replacements
[
"STORE_DERIVATIVES_1"
]
=
storeDerivs1
.
str
();
replacements
[
"STORE_DERIVATIVES_2"
]
=
storeDerivs2
.
str
();
replacements
[
"INIT_PARAM_DERIVS"
]
=
initParamDerivs
.
str
();
replacements
[
"SAVE_PARAM_DERIVS"
]
=
saveParamDerivs
.
str
();
if
(
useCutoff
)
pairEnergyDefines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
usePeriodic
)
...
...
@@ -3293,7 +3381,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
{
// Create the kernel to reduce the derivatives and calculate per-particle energy terms.
stringstream
compute
,
extraArgs
,
load
;
stringstream
compute
,
extraArgs
,
load
,
initParamDerivs
,
saveParamDerivs
;
if
(
force
.
getNumGlobalParameters
()
>
0
)
extraArgs
<<
", const float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
...
...
@@ -3321,6 +3409,17 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
for
(
int
i
=
0
;
i
<
energyDerivs
->
getNumParameters
();
++
i
)
load
<<
"derivBuffers"
<<
energyDerivs
->
getParameterSuffix
(
i
,
"[index]"
)
<<
" = RECIP(0x100000000)*derivBuffersIn[index+PADDED_NUM_ATOMS*"
<<
cu
.
intToString
(
i
)
<<
"];
\n
"
;
if
(
needEnergyParamDerivs
)
{
extraArgs
<<
", mixed* __restrict__ energyParamDerivs"
;
const
vector
<
string
>&
allParamDerivNames
=
cu
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
{
initParamDerivs
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
for
(
int
index
=
0
;
index
<
numDerivs
;
index
++
)
if
(
allParamDerivNames
[
index
]
==
force
.
getEnergyParameterDerivativeName
(
i
))
saveParamDerivs
<<
"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"
<<
numDerivs
<<
"+"
<<
index
<<
"] += energyParamDeriv"
<<
i
<<
";
\n
"
;
}
}
// Compute the various expressions.
...
...
@@ -3354,6 +3453,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
expressions
[
"/*"
+
cu
.
intToString
(
i
+
1
)
+
"*/ force.y -= "
]
=
grady
;
if
(
!
isZeroExpression
(
gradz
))
expressions
[
"/*"
+
cu
.
intToString
(
i
+
1
)
+
"*/ force.z -= "
]
=
gradz
;
for
(
int
j
=
0
;
j
<
force
.
getNumEnergyParameterDerivatives
();
j
++
)
expressions
[
"/*"
+
cu
.
intToString
(
i
+
1
)
+
"*/ energyParamDeriv"
+
cu
.
intToString
(
j
)
+
" += "
]
=
energyParamDerivExpressions
[
i
][
j
];
}
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
for
(
int
j
=
0
;
j
<
i
;
j
++
)
...
...
@@ -3384,16 +3485,19 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"LOAD_DERIVATIVES"
]
=
load
.
str
();
replacements
[
"COMPUTE_ENERGY"
]
=
compute
.
str
();
replacements
[
"INIT_PARAM_DERIVS"
]
=
initParamDerivs
.
str
();
replacements
[
"SAVE_PARAM_DERIVS"
]
=
saveParamDerivs
.
str
();
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customGBEnergyPerParticle
,
replacements
),
defines
);
perParticleEnergyKernel
=
cu
.
getKernel
(
module
,
"computePerParticleEnergy"
);
}
if
(
needParameterGradient
)
{
// Create the kernel to compute chain rule terms for computed values that depend explicitly on particle coordinates.
if
(
needParameterGradient
||
needEnergyParamDerivs
)
{
// Create the kernel to compute chain rule terms for computed values that depend explicitly on particle coordinates, and for
// derivatives with respect to global parameters.
stringstream
compute
,
extraArgs
;
stringstream
compute
,
extraArgs
,
initParamDerivs
,
saveParamDerivs
;
if
(
force
.
getNumGlobalParameters
()
>
0
)
extraArgs
<<
", const float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
...
...
@@ -3412,6 +3516,19 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
extraArgs
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ derivBuffers"
<<
index
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
}
if
(
needEnergyParamDerivs
)
{
extraArgs
<<
", mixed* __restrict__ energyParamDerivs"
;
const
vector
<
string
>&
allParamDerivNames
=
cu
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
{
for
(
int
j
=
0
;
j
<
dValuedParam
[
i
]
->
getBuffers
().
size
();
j
++
)
extraArgs
<<
", real* __restrict__ dValuedParam_"
<<
j
<<
"_"
<<
i
;
initParamDerivs
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
for
(
int
index
=
0
;
index
<
numDerivs
;
index
++
)
if
(
allParamDerivNames
[
index
]
==
force
.
getEnergyParameterDerivativeName
(
i
))
saveParamDerivs
<<
"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"
<<
numDerivs
<<
"+"
<<
index
<<
"] += energyParamDeriv"
<<
i
<<
";
\n
"
;
}
}
map
<
string
,
string
>
variables
;
variables
[
"x"
]
=
"pos.x"
;
variables
[
"y"
]
=
"pos.y"
;
...
...
@@ -3422,6 +3539,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
variables
[
force
.
getGlobalParameterName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
variables
[
computedValueNames
[
i
]]
=
"values"
+
computedValues
->
getParameterSuffix
(
i
,
"[index]"
);
if
(
needParameterGradient
)
{
for
(
int
i
=
1
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
is
=
cu
.
intToString
(
i
);
compute
<<
"real3 dV"
<<
is
<<
"dR = make_real3(0);
\n
"
;
...
...
@@ -3447,9 +3565,16 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
string
is
=
cu
.
intToString
(
i
);
compute
<<
"force -= deriv"
<<
energyDerivs
->
getParameterSuffix
(
i
)
<<
"*dV"
<<
is
<<
"dR;
\n
"
;
}
}
if
(
needEnergyParamDerivs
)
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
for
(
int
j
=
0
;
j
<
dValuedParam
.
size
();
j
++
)
compute
<<
"energyParamDeriv"
<<
j
<<
" += deriv"
<<
energyDerivs
->
getParameterSuffix
(
i
)
<<
"*dValuedParam_"
<<
i
<<
"_"
<<
j
<<
"[index];
\n
"
;
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"COMPUTE_FORCES"
]
=
compute
.
str
();
replacements
[
"INIT_PARAM_DERIVS"
]
=
initParamDerivs
.
str
();
replacements
[
"SAVE_PARAM_DERIVS"
]
=
saveParamDerivs
.
str
();
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
...
...
@@ -3605,6 +3730,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
if
(
pairValueUsesParam
[
i
])
pairValueArgs
.
push_back
(
&
params
->
getBuffers
()[
i
].
getMemory
());
}
for
(
int
i
=
0
;
i
<
dValue0dParam
.
size
();
i
++
)
pairValueArgs
.
push_back
(
&
dValue0dParam
[
i
]
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
pairValueArgs
.
push_back
(
&
tabulatedFunctions
[
i
]
->
getDevicePointer
());
perParticleValueArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
...
...
@@ -3615,6 +3742,11 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
perParticleValueArgs
.
push_back
(
&
params
->
getBuffers
()[
i
].
getMemory
());
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
perParticleValueArgs
.
push_back
(
&
computedValues
->
getBuffers
()[
i
].
getMemory
());
for
(
int
i
=
0
;
i
<
dValuedParam
.
size
();
i
++
)
{
perParticleValueArgs
.
push_back
(
&
dValue0dParam
[
i
]
->
getDevicePointer
());
for
(
int
j
=
0
;
j
<
dValuedParam
[
i
]
->
getBuffers
().
size
();
j
++
)
perParticleValueArgs
.
push_back
(
&
dValuedParam
[
i
]
->
getBuffers
()[
j
].
getMemory
());
}
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
perParticleValueArgs
.
push_back
(
&
tabulatedFunctions
[
i
]
->
getDevicePointer
());
pairEnergyArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
...
...
@@ -3648,6 +3780,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
pairEnergyArgs
.
push_back
(
&
computedValues
->
getBuffers
()[
i
].
getMemory
());
}
pairEnergyArgs
.
push_back
(
&
longEnergyDerivs
->
getDevicePointer
());
if
(
needEnergyParamDerivs
)
pairEnergyArgs
.
push_back
(
&
cu
.
getEnergyParamDerivBuffer
().
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
pairEnergyArgs
.
push_back
(
&
tabulatedFunctions
[
i
]
->
getDevicePointer
());
perParticleEnergyArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
...
...
@@ -3664,9 +3798,11 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivChain
->
getBuffers
().
size
();
i
++
)
perParticleEnergyArgs
.
push_back
(
&
energyDerivChain
->
getBuffers
()[
i
].
getMemory
());
perParticleEnergyArgs
.
push_back
(
&
longEnergyDerivs
->
getDevicePointer
());
if
(
needEnergyParamDerivs
)
perParticleEnergyArgs
.
push_back
(
&
cu
.
getEnergyParamDerivBuffer
().
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
perParticleEnergyArgs
.
push_back
(
&
tabulatedFunctions
[
i
]
->
getDevicePointer
());
if
(
needParameterGradient
)
{
if
(
needParameterGradient
||
needEnergyParamDerivs
)
{
gradientChainRuleArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
gradientChainRuleArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
if
(
globals
!=
NULL
)
...
...
@@ -3677,6 +3813,12 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
gradientChainRuleArgs
.
push_back
(
&
computedValues
->
getBuffers
()[
i
].
getMemory
());
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
gradientChainRuleArgs
.
push_back
(
&
energyDerivs
->
getBuffers
()[
i
].
getMemory
());
if
(
needEnergyParamDerivs
)
{
gradientChainRuleArgs
.
push_back
(
&
cu
.
getEnergyParamDerivBuffer
().
getDevicePointer
());
for
(
int
i
=
0
;
i
<
dValuedParam
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
dValuedParam
[
i
]
->
getBuffers
().
size
();
j
++
)
gradientChainRuleArgs
.
push_back
(
&
dValuedParam
[
i
]
->
getBuffers
()[
j
].
getMemory
());
}
}
}
if
(
globals
!=
NULL
)
{
...
...
@@ -3703,7 +3845,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
cu
.
executeKernel
(
perParticleValueKernel
,
&
perParticleValueArgs
[
0
],
cu
.
getPaddedNumAtoms
());
cu
.
executeKernel
(
pairEnergyKernel
,
&
pairEnergyArgs
[
0
],
nb
.
getNumForceThreadBlocks
()
*
nb
.
getForceThreadBlockSize
(),
nb
.
getForceThreadBlockSize
());
cu
.
executeKernel
(
perParticleEnergyKernel
,
&
perParticleEnergyArgs
[
0
],
cu
.
getPaddedNumAtoms
());
if
(
needParameterGradient
)
if
(
needParameterGradient
||
needEnergyParamDerivs
)
cu
.
executeKernel
(
gradientChainRuleKernel
,
&
gradientChainRuleArgs
[
0
],
cu
.
getPaddedNumAtoms
());
return
0.0
;
}
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
4949017b
...
...
@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
mixed
energy
=
0
;
INIT_PARAM_DERIVS
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
// First loop: process tiles that contain exclusions.
...
...
@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
y
*
TILE_SIZE
+
j
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
0.5
f
;
#ifdef USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
);
#endif
...
...
@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
y
*
TILE_SIZE
+
tj
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1
;
#ifdef USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
);
#endif
...
...
@@ -266,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
atomIndices
[
tbx
+
tj
];
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-
r
;
...
...
@@ -309,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
atomIndices
[
tbx
+
tj
];
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-
r
;
...
...
@@ -353,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
pos
++
;
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
SAVE_PARAM_DERIVS
}
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
View file @
4949017b
...
...
@@ -5,6 +5,7 @@
extern
"C"
__global__
void
computePerParticleEnergy
(
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
PARAMETER_ARGUMENTS
)
{
mixed
energy
=
0
;
INIT_PARAM_DERIVS
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load the derivatives
...
...
@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc
COMPUTE_ENERGY
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
SAVE_PARAM_DERIVS
}
platforms/cuda/src/kernels/customGBGradientChainRule.cu
View file @
4949017b
...
...
@@ -4,6 +4,7 @@
extern
"C"
__global__
void
computeGradientChainRuleTerms
(
long
long
*
__restrict__
forceBuffers
,
const
real4
*
__restrict__
posq
PARAMETER_ARGUMENTS
)
{
INIT_PARAM_DERIVS
const
real
scale
=
RECIP
((
real
)
0x100000000
);
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
index
];
...
...
@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
(
long
long
)
(
force
.
y
*
0x100000000
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
(
long
long
)
(
force
.
z
*
0x100000000
);
}
SAVE_PARAM_DERIVS
}
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
4949017b
...
...
@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
COMPUTE_VALUE
}
value
+=
tempValue1
;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
...
...
@@ -244,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -276,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -285,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
atomicAdd
(
&
global_value
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
unsigned
int
offset1
=
atom1
;
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
#ifdef USE_CUTOFF
unsigned
int
atom2
=
atomIndices
[
threadIdx
.
x
];
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
atomicAdd
(
&
global_value
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
unsigned
int
offset2
=
atom2
;
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
pos
++
;
}
...
...
platforms/cuda/src/kernels/customGBValuePerParticle.cu
View file @
4949017b
...
...
@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
// Load the pairwise value
real
sum
=
valueBuffers
[
index
]
/
(
real
)
0x100000000
;
REDUCE_PARAM0_DERIV
// Now calculate other values
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
4949017b
...
...
@@ -3301,7 +3301,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
string variableName = "dValuedParam_0_"+cl.intToString(i);
if (useLong) {
extraArgs << ", __global const long* restrict dValue0dParam" << i;
deriv0 << "real " << variableName << " = (1.0f/0x100000000)*dValue0dParam[index];\n";
deriv0 << "real " << variableName << " = (1.0f/0x100000000)*dValue0dParam
" << i << "
[index];\n";
}
else {
extraArgs << ", __global const real* restrict dValue0dParam" << i;
...
...
platforms/opencl/src/kernels/customGBValueN2.cl
View file @
4949017b
...
...
@@ -320,7 +320,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
unsigned
int
atom2
=
y*TILE_SIZE
+
tgx
;
#
endif
#
ifdef
SUPPORTS_64_BIT_ATOMICS
unsigned
in
offset1
=
atom1
;
unsigned
in
t
offset1
=
atom1
;
atom_add
(
&global_value[offset1],
(
long
)
(
value*0x100000000
))
;
STORE_PARAM_DERIVS1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment