Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f15f591b
Commit
f15f591b
authored
Feb 14, 2018
by
Peter Eastman
Browse files
Continuing to convert CudaArrays.
parent
b8c86406
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
430 additions
and
598 deletions
+430
-598
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+86
-98
platforms/cuda/src/CudaBondedUtilities.cpp
platforms/cuda/src/CudaBondedUtilities.cpp
+5
-4
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+339
-496
No files found.
platforms/cuda/include/CudaKernels.h
View file @
f15f591b
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -265,9 +265,8 @@ private:
class
CudaCalcHarmonicBondForceKernel
:
public
CalcHarmonicBondForceKernel
{
public:
CudaCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcHarmonicBondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
~
CudaCalcHarmonicBondForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -298,7 +297,7 @@ private:
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
CudaArray
*
params
;
CudaArray
params
;
};
/**
...
...
@@ -307,7 +306,7 @@ private:
class
CudaCalcCustomBondForceKernel
:
public
CalcCustomBondForceKernel
{
public:
CudaCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomBondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
{
}
~
CudaCalcCustomBondForceKernel
();
/**
...
...
@@ -341,7 +340,7 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
};
...
...
@@ -352,9 +351,8 @@ private:
class
CudaCalcHarmonicAngleForceKernel
:
public
CalcHarmonicAngleForceKernel
{
public:
CudaCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcHarmonicAngleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
~
CudaCalcHarmonicAngleForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -385,7 +383,7 @@ private:
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
CudaArray
*
params
;
CudaArray
params
;
};
/**
...
...
@@ -394,7 +392,7 @@ private:
class
CudaCalcCustomAngleForceKernel
:
public
CalcCustomAngleForceKernel
{
public:
CudaCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomAngleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
{
}
~
CudaCalcCustomAngleForceKernel
();
/**
...
...
@@ -428,7 +426,7 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
};
...
...
@@ -439,9 +437,8 @@ private:
class
CudaCalcPeriodicTorsionForceKernel
:
public
CalcPeriodicTorsionForceKernel
{
public:
CudaCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcPeriodicTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
~
CudaCalcPeriodicTorsionForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -472,7 +469,7 @@ private:
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
CudaArray
*
params
;
CudaArray
params
;
};
/**
...
...
@@ -481,9 +478,8 @@ private:
class
CudaCalcRBTorsionForceKernel
:
public
CalcRBTorsionForceKernel
{
public:
CudaCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcRBTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
params1
(
NULL
),
params2
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
~
CudaCalcRBTorsionForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -514,8 +510,8 @@ private:
CudaContext
&
cu
;
ForceInfo
*
info
;
const
System
&
system
;
CudaArray
*
params1
;
CudaArray
*
params2
;
CudaArray
params1
;
CudaArray
params2
;
};
/**
...
...
@@ -524,9 +520,8 @@ private:
class
CudaCalcCMAPTorsionForceKernel
:
public
CalcCMAPTorsionForceKernel
{
public:
CudaCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCMAPTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
coefficients
(
NULL
),
mapPositions
(
NULL
),
torsionMaps
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
~
CudaCalcCMAPTorsionForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -558,9 +553,9 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
std
::
vector
<
int2
>
mapPositionsVec
;
CudaArray
*
coefficients
;
CudaArray
*
mapPositions
;
CudaArray
*
torsionMaps
;
CudaArray
coefficients
;
CudaArray
mapPositions
;
CudaArray
torsionMaps
;
};
/**
...
...
@@ -569,7 +564,7 @@ private:
class
CudaCalcCustomTorsionForceKernel
:
public
CalcCustomTorsionForceKernel
{
public:
CudaCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
{
}
~
CudaCalcCustomTorsionForceKernel
();
/**
...
...
@@ -603,7 +598,7 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
};
...
...
@@ -614,9 +609,7 @@ private:
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
cu
(
cu
),
hasInitializedFFT
(
false
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
}
~
CudaCalcNonbondedForceKernel
();
/**
...
...
@@ -682,20 +675,20 @@ private:
CudaContext
&
cu
;
ForceInfo
*
info
;
bool
hasInitializedFFT
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
exceptionParams
;
CudaArray
*
cosSinSums
;
CudaArray
*
directPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeDispersionBsplineModuliX
;
CudaArray
*
pmeDispersionBsplineModuliY
;
CudaArray
*
pmeDispersionBsplineModuliZ
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
sigmaEpsilon
;
CudaArray
exceptionParams
;
CudaArray
cosSinSums
;
CudaArray
directPmeGrid
;
CudaArray
reciprocalPmeGrid
;
CudaArray
pmeBsplineModuliX
;
CudaArray
pmeBsplineModuliY
;
CudaArray
pmeBsplineModuliZ
;
CudaArray
pmeDispersionBsplineModuliX
;
CudaArray
pmeDispersionBsplineModuliY
;
CudaArray
pmeDispersionBsplineModuliZ
;
CudaArray
pmeAtomRange
;
CudaArray
pmeAtomGridIndex
;
CudaArray
pmeEnergyBuffer
;
CudaSort
*
sort
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
...
...
@@ -737,7 +730,7 @@ private:
class
CudaCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
CudaCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
interactionGroupData
(
NULL
),
forceCopy
(
NULL
),
system
(
system
),
hasInitializedKernel
(
false
)
{
cu
(
cu
),
params
(
NULL
),
forceCopy
(
NULL
),
system
(
system
),
hasInitializedKernel
(
false
)
{
}
~
CudaCalcCustomNonbondedForceKernel
();
/**
...
...
@@ -769,13 +762,13 @@ private:
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
interactionGroupData
;
CudaArray
globals
;
CudaArray
interactionGroupData
;
CUfunction
interactionGroupKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
double
longRangeCoefficient
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
;
...
...
@@ -790,9 +783,8 @@ private:
class
CudaCalcGBSAOBCForceKernel
:
public
CalcGBSAOBCForceKernel
{
public:
CudaCalcGBSAOBCForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
CalcGBSAOBCForceKernel
(
name
,
platform
),
cu
(
cu
),
hasCreatedKernels
(
false
)
,
params
(
NULL
),
bornSum
(
NULL
),
bornRadii
(
NULL
),
bornForce
(
NULL
),
obcChain
(
NULL
)
{
hasCreatedKernels
(
false
)
{
}
~
CudaCalcGBSAOBCForceKernel
();
/**
* Initialize the kernel.
*
...
...
@@ -823,11 +815,11 @@ private:
int
maxTiles
;
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaArray
*
params
;
CudaArray
*
bornSum
;
CudaArray
*
bornRadii
;
CudaArray
*
bornForce
;
CudaArray
*
obcChain
;
CudaArray
params
;
CudaArray
bornSum
;
CudaArray
bornRadii
;
CudaArray
bornForce
;
CudaArray
obcChain
;
CUfunction
computeBornSumKernel
;
CUfunction
reduceBornSumKernel
;
CUfunction
force1Kernel
;
...
...
@@ -841,8 +833,7 @@ private:
class
CudaCalcCustomGBForceKernel
:
public
CalcCustomGBForceKernel
{
public:
CudaCalcCustomGBForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomGBForceKernel
(
name
,
platform
),
hasInitializedKernels
(
false
),
cu
(
cu
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
energyDerivChain
(
NULL
),
longEnergyDerivs
(
NULL
),
globals
(
NULL
),
valueBuffers
(
NULL
),
system
(
system
)
{
hasInitializedKernels
(
false
),
cu
(
cu
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
energyDerivChain
(
NULL
),
system
(
system
)
{
}
~
CudaCalcCustomGBForceKernel
();
/**
...
...
@@ -880,13 +871,13 @@ private:
CudaParameterSet
*
energyDerivs
;
CudaParameterSet
*
energyDerivChain
;
std
::
vector
<
CudaParameterSet
*>
dValuedParam
;
std
::
vector
<
CudaArray
*
>
dValue0dParam
;
CudaArray
*
longEnergyDerivs
;
CudaArray
*
globals
;
CudaArray
*
valueBuffers
;
std
::
vector
<
CudaArray
>
dValue0dParam
;
CudaArray
longEnergyDerivs
;
CudaArray
globals
;
CudaArray
valueBuffers
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
std
::
vector
<
bool
>
pairValueUsesParam
,
pairEnergyUsesParam
,
pairEnergyUsesValue
;
const
System
&
system
;
CUfunction
pairValueKernel
,
perParticleValueKernel
,
pairEnergyKernel
,
perParticleEnergyKernel
,
gradientChainRuleKernel
;
...
...
@@ -901,7 +892,7 @@ private:
class
CudaCalcCustomExternalForceKernel
:
public
CalcCustomExternalForceKernel
{
public:
CudaCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomExternalForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
{
}
~
CudaCalcCustomExternalForceKernel
();
/**
...
...
@@ -935,7 +926,7 @@ private:
ForceInfo
*
info
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
};
...
...
@@ -946,8 +937,7 @@ private:
class
CudaCalcCustomHbondForceKernel
:
public
CalcCustomHbondForceKernel
{
public:
CudaCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
donors
(
NULL
),
acceptors
(
NULL
),
globals
(
NULL
),
donorExclusions
(
NULL
),
acceptorExclusions
(
NULL
),
system
(
system
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
system
(
system
)
{
}
~
CudaCalcCustomHbondForceKernel
();
/**
...
...
@@ -981,14 +971,14 @@ private:
ForceInfo
*
info
;
CudaParameterSet
*
donorParams
;
CudaParameterSet
*
acceptorParams
;
CudaArray
*
globals
;
CudaArray
*
donors
;
CudaArray
*
acceptors
;
CudaArray
*
donorExclusions
;
CudaArray
*
acceptorExclusions
;
CudaArray
globals
;
CudaArray
donors
;
CudaArray
acceptors
;
CudaArray
donorExclusions
;
CudaArray
acceptorExclusions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
std
::
vector
<
void
*>
donorArgs
,
acceptorArgs
;
const
System
&
system
;
CUfunction
donorKernel
,
acceptorKernel
;
...
...
@@ -1000,7 +990,7 @@ private:
class
CudaCalcCustomCentroidBondForceKernel
:
public
CalcCustomCentroidBondForceKernel
{
public:
CudaCalcCustomCentroidBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomCentroidBondForceKernel
(
name
,
platform
),
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
groupParticles
(
NULL
),
groupWeights
(
NULL
),
groupOffsets
(
NULL
),
groupForces
(
NULL
),
bondGroups
(
NULL
),
centerPositions
(
NULL
),
system
(
system
)
{
cu
(
cu
),
params
(
NULL
),
system
(
system
)
{
}
~
CudaCalcCustomCentroidBondForceKernel
();
/**
...
...
@@ -1034,16 +1024,16 @@ private:
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
groupParticles
;
CudaArray
*
groupWeights
;
CudaArray
*
groupOffsets
;
CudaArray
*
groupForces
;
CudaArray
*
bondGroups
;
CudaArray
*
centerPositions
;
CudaArray
globals
;
CudaArray
groupParticles
;
CudaArray
groupWeights
;
CudaArray
groupOffsets
;
CudaArray
groupForces
;
CudaArray
bondGroups
;
CudaArray
centerPositions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
std
::
vector
<
void
*>
groupForcesArgs
;
CUfunction
computeCentersKernel
,
groupForcesKernel
,
applyForcesKernel
;
const
System
&
system
;
...
...
@@ -1055,7 +1045,7 @@ private:
class
CudaCalcCustomCompoundBondForceKernel
:
public
CalcCustomCompoundBondForceKernel
{
public:
CudaCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomCompoundBondForceKernel
(
name
,
platform
),
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
system
(
system
)
{
cu
(
cu
),
params
(
NULL
),
system
(
system
)
{
}
~
CudaCalcCustomCompoundBondForceKernel
();
/**
...
...
@@ -1088,10 +1078,10 @@ private:
CudaContext
&
cu
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
const
System
&
system
;
};
...
...
@@ -1101,9 +1091,7 @@ private:
class
CudaCalcCustomManyParticleForceKernel
:
public
CalcCustomManyParticleForceKernel
{
public:
CudaCalcCustomManyParticleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomManyParticleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
params
(
NULL
),
particleTypes
(
NULL
),
orderIndex
(
NULL
),
particleOrder
(
NULL
),
exclusions
(
NULL
),
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
neighborPairs
(
NULL
),
numNeighborPairs
(
NULL
),
neighborStartIndex
(
NULL
),
numNeighborsForAtom
(
NULL
),
neighbors
(
NULL
),
system
(
system
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
params
(
NULL
),
system
(
system
)
{
}
~
CudaCalcCustomManyParticleForceKernel
();
/**
...
...
@@ -1138,21 +1126,21 @@ private:
NonbondedMethod
nonbondedMethod
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
CudaParameterSet
*
params
;
CudaArray
*
particleTypes
;
CudaArray
*
orderIndex
;
CudaArray
*
particleOrder
;
CudaArray
*
exclusions
;
CudaArray
*
exclusionStartIndex
;
CudaArray
*
blockCenter
;
CudaArray
*
blockBoundingBox
;
CudaArray
*
neighborPairs
;
CudaArray
*
numNeighborPairs
;
CudaArray
*
neighborStartIndex
;
CudaArray
*
numNeighborsForAtom
;
CudaArray
*
neighbors
;
CudaArray
particleTypes
;
CudaArray
orderIndex
;
CudaArray
particleOrder
;
CudaArray
exclusions
;
CudaArray
exclusionStartIndex
;
CudaArray
blockCenter
;
CudaArray
blockBoundingBox
;
CudaArray
neighborPairs
;
CudaArray
numNeighborPairs
;
CudaArray
neighborStartIndex
;
CudaArray
numNeighborsForAtom
;
CudaArray
neighbors
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
std
::
vector
<
void
*>
forceArgs
,
blockBoundsArgs
,
neighborsArgs
,
startIndicesArgs
,
copyPairsArgs
;
const
System
&
system
;
CUfunction
forceKernel
,
blockBoundsKernel
,
neighborsKernel
,
startIndicesKernel
,
copyPairsKernel
;
...
...
platforms/cuda/src/CudaBondedUtilities.cpp
View file @
f15f591b
...
...
@@ -84,8 +84,10 @@ void CudaBondedUtilities::initialize(const System& system) {
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
{
int
numBonds
=
forceAtoms
[
i
].
size
();
int
numAtoms
=
forceAtoms
[
i
][
0
].
size
();
int
numArrays
=
(
numAtoms
+
3
)
/
4
;
int
startAtom
=
0
;
while
(
startAtom
<
numAtoms
)
{
atomIndices
[
i
].
resize
(
numArrays
);
for
(
int
j
=
0
;
j
<
numArrays
;
j
++
)
{
int
width
=
min
(
numAtoms
-
startAtom
,
4
);
int
paddedWidth
=
(
width
==
3
?
4
:
width
);
vector
<
unsigned
int
>
indexVec
(
paddedWidth
*
numBonds
);
...
...
@@ -93,9 +95,8 @@ void CudaBondedUtilities::initialize(const System& system) {
for
(
int
atom
=
0
;
atom
<
width
;
atom
++
)
indexVec
[
bond
*
paddedWidth
+
atom
]
=
forceAtoms
[
i
][
bond
][
startAtom
+
atom
];
}
atomIndices
[
i
].
push_back
(
CudaArray
());
atomIndices
[
i
].
back
().
initialize
(
context
,
numBonds
,
4
*
paddedWidth
,
"bondedIndices"
);
atomIndices
[
i
].
back
().
upload
(
&
indexVec
[
0
]);
atomIndices
[
i
][
j
].
initialize
(
context
,
numBonds
,
4
*
paddedWidth
,
"bondedIndices"
);
atomIndices
[
i
][
j
].
upload
(
&
indexVec
[
0
]);
startAtom
+=
width
;
}
}
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
f15f591b
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -505,12 +505,6 @@ private:
const HarmonicBondForce& force;
};
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
...
...
@@ -520,18 +514,18 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
if (numBonds == 0)
return;
vector<vector<int> > atoms(numBonds, vector<int>(2));
params
= CudaArray::creat
e<float2>(cu, numBonds, "bondParams");
params
.initializ
e<float2>(cu, numBonds, "bondParams");
vector<float2> paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
->
getDevicePointer(), "float2");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
.
getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
...
...
@@ -560,7 +554,7 @@ void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& conte
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
...
...
@@ -600,8 +594,6 @@ CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
...
...
@@ -649,9 +641,9 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals
->
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
...
...
@@ -680,7 +672,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
}
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -689,7 +681,7 @@ double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool include
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -749,12 +741,6 @@ private:
const HarmonicAngleForce& force;
};
CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
...
...
@@ -764,7 +750,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
if (numAngles == 0)
return;
vector<vector<int> > atoms(numAngles, vector<int>(3));
params
= CudaArray::creat
e<float2>(cu, numAngles, "angleParams");
params
.initializ
e<float2>(cu, numAngles, "angleParams");
vector<float2> paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
double angle, k;
...
...
@@ -772,11 +758,11 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
paramVector[i] = make_float2((float) angle, (float) k);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
->
getDevicePointer(), "float2");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
.
getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
...
...
@@ -805,7 +791,7 @@ void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& cont
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = make_float2((float) angle, (float) k);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
...
...
@@ -846,8 +832,6 @@ CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
...
...
@@ -895,9 +879,9 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals
->
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
...
...
@@ -926,7 +910,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
}
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -935,7 +919,7 @@ double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includ
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -996,12 +980,6 @@ private:
const PeriodicTorsionForce& force;
};
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
...
...
@@ -1011,7 +989,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
if (numTorsions == 0)
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params
= CudaArray::creat
e<float4>(cu, numTorsions, "periodicTorsionParams");
params
.initializ
e<float4>(cu, numTorsions, "periodicTorsionParams");
vector<float4> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int periodicity;
...
...
@@ -1019,11 +997,11 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
->
getDevicePointer(), "float4");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
.
getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
...
...
@@ -1052,7 +1030,7 @@ void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& co
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
...
...
@@ -1087,14 +1065,6 @@ private:
const RBTorsionForce& force;
};
CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
cu.setAsCurrent();
if (params1 != NULL)
delete params1;
if (params2 != NULL)
delete params2;
}
void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
...
...
@@ -1104,8 +1074,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
if (numTorsions == 0)
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params1
= CudaArray::creat
e<float4>(cu, numTorsions, "rbTorsionParams1");
params2
= CudaArray::creat
e<float2>(cu, numTorsions, "rbTorsionParams2");
params1
.initializ
e<float4>(cu, numTorsions, "rbTorsionParams1");
params2
.initializ
e<float2>(cu, numTorsions, "rbTorsionParams2");
vector<float4> paramVector1(numTorsions);
vector<float2> paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) {
...
...
@@ -1115,13 +1085,13 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1
->
upload(paramVector1);
params2
->
upload(paramVector2);
params1
.
upload(paramVector1);
params2
.
upload(paramVector2);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce;
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1
->
getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2
->
getDevicePointer(), "float2");
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1
.
getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2
.
getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
...
...
@@ -1152,8 +1122,8 @@ void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context,
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1
->
upload(paramVector1);
params2
->
upload(paramVector2);
params1
.
upload(paramVector1);
params2
.
upload(paramVector2);
// Mark that the current reordering may be invalid.
...
...
@@ -1190,15 +1160,6 @@ private:
const CMAPTorsionForce& force;
};
CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
...
...
@@ -1230,17 +1191,17 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
vector<int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients
= CudaArray::creat
e<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions
= CudaArray::creat
e<int2>(cu, numMaps, "cmapTorsionMapPositions");
torsionMaps
= CudaArray::creat
e<int>(cu, numTorsions, "cmapTorsionMaps");
coefficients
->
upload(coeffVec);
mapPositions
->
upload(mapPositionsVec);
torsionMaps
->
upload(torsionMapsVec);
coefficients
.initializ
e<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions
.initializ
e<int2>(cu, numMaps, "cmapTorsionMapPositions");
torsionMaps
.initializ
e<int>(cu, numTorsions, "cmapTorsionMaps");
coefficients
.
upload(coeffVec);
mapPositions
.
upload(mapPositionsVec);
torsionMaps
.
upload(torsionMapsVec);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients
->
getDevicePointer(), "float4");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions
->
getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps
->
getDevicePointer(), "int");
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients
.
getDevicePointer(), "float4");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions
.
getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps
.
getDevicePointer(), "int");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
...
...
@@ -1256,9 +1217,9 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (mapPositions
->
getSize() != numMaps)
if (mapPositions
.
getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps
->
getSize() != numTorsions)
if (torsionMaps
.
getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps.
...
...
@@ -1281,7 +1242,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
coefficients
->
upload(coeffVec);
coefficients
.
upload(coeffVec);
// Update the indices.
...
...
@@ -1290,7 +1251,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
}
torsionMaps
->
upload(torsionMapsVec);
torsionMaps
.
upload(torsionMapsVec);
}
class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
...
...
@@ -1327,8 +1288,6 @@ private:
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
...
...
@@ -1376,9 +1335,9 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals
->
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
...
...
@@ -1407,7 +1366,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
}
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -1416,7 +1375,7 @@ double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -1576,34 +1535,6 @@ private:
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
cu.setAsCurrent();
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (directPmeGrid != NULL)
delete directPmeGrid;
if (reciprocalPmeGrid != NULL)
delete reciprocalPmeGrid;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (sort != NULL)
delete sort;
if (fft != NULL)
...
...
@@ -1647,7 +1578,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
sigmaEpsilon
= CudaArray::creat
e<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon
.initializ
e<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
...
...
@@ -1682,7 +1613,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exclusionList[exclusion.second].push_back(exclusion.first);
}
posq.upload(&temp[0]);
sigmaEpsilon
->
upload(sigmaEpsilonVector);
sigmaEpsilon
.
upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
...
...
@@ -1740,7 +1671,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
cosSinSums
= new CudaArray
(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
cosSinSums
.initialize
(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
...
...
@@ -1844,22 +1775,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
directPmeGrid
= new CudaArray
(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid
= new CudaArray
(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(
*
directPmeGrid);
pmeBsplineModuliX
= new CudaArray
(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY
= new CudaArray
(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ
= new CudaArray
(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
directPmeGrid
.initialize
(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid
.initialize
(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(directPmeGrid);
pmeBsplineModuliX
.initialize
(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY
.initialize
(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ
.initialize
(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
pmeDispersionBsplineModuliX
= new CudaArray
(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY
= new CudaArray
(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ
= new CudaArray
(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
pmeDispersionBsplineModuliX
.initialize
(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY
.initialize
(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ
.initialize
(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
pmeAtomRange
= CudaArray::creat
e<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex
= CudaArray::creat
e<int2>(cu, numParticles, "pmeAtomGridIndex");
pmeAtomRange
.initializ
e<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex
.initializ
e<int2>(cu, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer
= new CudaArray
(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(
*
pmeEnergyBuffer);
pmeEnergyBuffer
.initialize
(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(pmeEnergyBuffer);
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
int cufftVersion;
cufftGetVersion(&cufftVersion);
...
...
@@ -1903,7 +1834,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"),
*
pmeEnergyBuffer, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), pmeEnergyBuffer, recipForceGroup));
}
hasInitializedFFT = true;
...
...
@@ -1916,9 +1847,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
xsize = gridSizeX;
ysize = gridSizeY;
zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ;
xmoduli =
&
pmeBsplineModuliX;
ymoduli =
&
pmeBsplineModuliY;
zmoduli =
&
pmeBsplineModuliZ;
}
else {
if (!doLJPME)
...
...
@@ -1926,9 +1857,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ;
xmoduli =
&
pmeDispersionBsplineModuliX;
ymoduli =
&
pmeDispersionBsplineModuliY;
zmoduli =
&
pmeDispersionBsplineModuliZ;
}
int maxSize = max(max(xsize, ysize), zsize);
vector<double> data(PmeOrder);
...
...
@@ -2008,7 +1939,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon
->
getDevicePointer()));
sizeof(float2), sigmaEpsilon
.
getDevicePointer()));
// Initialize the exceptions.
...
...
@@ -2019,7 +1950,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams
= CudaArray::creat
e<float4>(cu, numExceptions, "exceptionParams");
exceptionParams
.initializ
e<float4>(cu, numExceptions, "exceptionParams");
vector<float4> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
...
...
@@ -2027,9 +1958,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams
->
upload(exceptionParamsVector);
exceptionParams
.
upload(exceptionParamsVector);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams
->
getDevicePointer(), "float4");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams
.
getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
info = new ForceInfo(force);
...
...
@@ -2037,13 +1968,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
if (cosSinSums
!= NULL
&& includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums
->
getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums
->
getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums
->
getDevicePointer(), cu.getPeriodicBoxSizePointer()};
if (cosSinSums
.isInitialized()
&& includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums
.
getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums
.
getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums
.
getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
}
if (directPmeGrid
!= NULL
&& includeReciprocal) {
if (directPmeGrid
.isInitialized()
&& includeReciprocal) {
if (usePmeStream)
cu.setCurrentStream(pmeStream);
...
...
@@ -2075,118 +2006,118 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
// Execute the reciprocal space kernels.
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(
*
pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
->
getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
.
getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid
->
getDevicePointer()};
void* finishSpreadArgs[] = {&directPmeGrid
.
getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
}
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(fftForward, (double*) directPmeGrid
->
getDevicePointer(), (double2*) reciprocalPmeGrid
->
getDevicePointer());
cufftExecD2Z(fftForward, (double*) directPmeGrid
.
getDevicePointer(), (double2*) reciprocalPmeGrid
.
getDevicePointer());
else
cufftExecR2C(fftForward, (float*) directPmeGrid
->
getDevicePointer(), (float2*) reciprocalPmeGrid
->
getDevicePointer());
cufftExecR2C(fftForward, (float*) directPmeGrid
.
getDevicePointer(), (float2*) reciprocalPmeGrid
.
getDevicePointer());
}
else {
fft->execFFT(
*
directPmeGrid,
*
reciprocalPmeGrid, true);
fft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
}
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid
->
getDevicePointer(), usePmeStream ? &pmeEnergyBuffer
->
getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX
->
getDevicePointer(), &pmeBsplineModuliY
->
getDevicePointer(), &pmeBsplineModuliZ
->
getDevicePointer(),
void* computeEnergyArgs[] = {&reciprocalPmeGrid
.
getDevicePointer(), usePmeStream ? &pmeEnergyBuffer
.
getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX
.
getDevicePointer(), &pmeBsplineModuliY
.
getDevicePointer(), &pmeBsplineModuliZ
.
getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
}
void* convolutionArgs[] = {&reciprocalPmeGrid
->
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX
->
getDevicePointer(), &pmeBsplineModuliY
->
getDevicePointer(), &pmeBsplineModuliZ
->
getDevicePointer(),
void* convolutionArgs[] = {&reciprocalPmeGrid
.
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX
.
getDevicePointer(), &pmeBsplineModuliY
.
getDevicePointer(), &pmeBsplineModuliZ
.
getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid
->
getDevicePointer(), (double*) directPmeGrid
->
getDevicePointer());
cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid
.
getDevicePointer(), (double*) directPmeGrid
.
getDevicePointer());
else
cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid
->
getDevicePointer(), (float*) directPmeGrid
->
getDevicePointer());
cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid
.
getDevicePointer(), (float*) directPmeGrid
.
getDevicePointer());
}
else {
fft->execFFT(
*
reciprocalPmeGrid,
*
directPmeGrid, false);
fft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
->
getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
.
getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// now we assume that LJPME can only be used if electrostatic PME is also active.
if (doLJPME) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(
*
pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
cu.clearBuffer(
*
directPmeGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.clearBuffer(directPmeGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
->
getDevicePointer(),
&sigmaEpsilon
->
getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
.
getDevicePointer(),
&sigmaEpsilon
.
getDevicePointer()};
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid
->
getDevicePointer()};
void* finishSpreadArgs[] = {&directPmeGrid
.
getDevicePointer()};
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
}
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid
->
getDevicePointer(), (double2*) reciprocalPmeGrid
->
getDevicePointer());
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid
.
getDevicePointer(), (double2*) reciprocalPmeGrid
.
getDevicePointer());
else
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid
->
getDevicePointer(), (float2*) reciprocalPmeGrid
->
getDevicePointer());
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid
.
getDevicePointer(), (float2*) reciprocalPmeGrid
.
getDevicePointer());
}
else {
dispersionFft->execFFT(
*
directPmeGrid,
*
reciprocalPmeGrid, true);
dispersionFft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
}
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid
->
getDevicePointer(), usePmeStream ? &pmeEnergyBuffer
->
getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX
->
getDevicePointer(), &pmeDispersionBsplineModuliY
->
getDevicePointer(), &pmeDispersionBsplineModuliZ
->
getDevicePointer(),
void* computeEnergyArgs[] = {&reciprocalPmeGrid
.
getDevicePointer(), usePmeStream ? &pmeEnergyBuffer
.
getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX
.
getDevicePointer(), &pmeDispersionBsplineModuliY
.
getDevicePointer(), &pmeDispersionBsplineModuliZ
.
getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
void* convolutionArgs[] = {&reciprocalPmeGrid
->
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX
->
getDevicePointer(), &pmeDispersionBsplineModuliY
->
getDevicePointer(), &pmeDispersionBsplineModuliZ
->
getDevicePointer(),
void* convolutionArgs[] = {&reciprocalPmeGrid
.
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX
.
getDevicePointer(), &pmeDispersionBsplineModuliY
.
getDevicePointer(), &pmeDispersionBsplineModuliZ
.
getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid
->
getDevicePointer(), (double*) directPmeGrid
->
getDevicePointer());
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid
.
getDevicePointer(), (double*) directPmeGrid
.
getDevicePointer());
else
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid
->
getDevicePointer(), (float*) directPmeGrid
->
getDevicePointer());
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid
.
getDevicePointer(), (float*) directPmeGrid
.
getDevicePointer());
}
else {
dispersionFft->execFFT(
*
reciprocalPmeGrid,
*
directPmeGrid, false);
dispersionFft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
->
getDevicePointer(),
&sigmaEpsilon
->
getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
.
getDevicePointer(),
&sigmaEpsilon
.
getDevicePointer()};
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
if (usePmeStream) {
...
...
@@ -2253,7 +2184,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
sumSquaredCharges += charge*charge;
}
cu.setCharges(chargeVector);
sigmaEpsilon
->
upload(sigmaEpsilonVector);
sigmaEpsilon
.
upload(sigmaEpsilonVector);
// Record the exceptions.
...
...
@@ -2265,7 +2196,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
}
exceptionParams
->
upload(exceptionParamsVector);
exceptionParams
.
upload(exceptionParamsVector);
}
// Compute other values.
...
...
@@ -2355,12 +2286,6 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (auto function : tabulatedFunctions)
delete function;
if (forceCopy != NULL)
delete forceCopy;
}
...
...
@@ -2377,7 +2302,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
int numParticles = force.getNumParticles();
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
vector<vector<float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
...
...
@@ -2402,6 +2327,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -2410,9 +2336,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
tabulatedFunctions.size()-1]->
getDevicePointer()));
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[
i].
upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
i].
getDevicePointer()));
if (width == 1)
tableTypes.push_back("float");
else
...
...
@@ -2427,8 +2353,8 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals
!= NULL
)
globals
->
upload(globalParamValues);
if (globals
.isInitialized()
)
globals
.
upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
...
...
@@ -2482,9 +2408,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
if (globals
!= NULL
) {
globals
->
upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals
->
getDevicePointer()));
if (globals
.isInitialized()
) {
globals
.
upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals
.
getDevicePointer()));
}
}
info = new ForceInfo(force);
...
...
@@ -2660,8 +2586,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(make_int4(0, 0, 0, 0));
}
interactionGroupData
= CudaArray::creat
e<int4>(cu, groupData.size(), "interactionGroupData");
interactionGroupData
->
upload(groupData);
interactionGroupData
.initializ
e<int4>(cu, groupData.size(), "interactionGroupData");
interactionGroupData
.
upload(groupData);
// Create the kernel.
...
...
@@ -2687,7 +2613,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
args<<", const "<<buffers[i].getType()<<"* __restrict__ global_params"<<(i+1);
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
if (globals
!= NULL
)
if (globals
.isInitialized()
)
args<<", const float* __restrict__ globals";
if (hasParamDerivs)
args << ", mixed* __restrict__ energyParamDerivs";
...
...
@@ -2758,7 +2684,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
}
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -2767,7 +2693,7 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
globalParamValues[i] = value;
}
if (changed) {
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
if (forceCopy != NULL) {
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
...
...
@@ -2778,13 +2704,13 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
}
if (interactionGroupData
!= NULL
) {
if (interactionGroupData
.isInitialized()
) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
interactionGroupArgs.push_back(&cu.getForce().getDevicePointer());
interactionGroupArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
interactionGroupArgs.push_back(&cu.getPosq().getDevicePointer());
interactionGroupArgs.push_back(&interactionGroupData
->
getDevicePointer());
interactionGroupArgs.push_back(&interactionGroupData
.
getDevicePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getInvPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecXPointer());
...
...
@@ -2792,10 +2718,10 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs.push_back(cu.getPeriodicBoxVecZPointer());
for (auto& buffer : params->getBuffers())
interactionGroupArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
interactionGroupArgs.push_back(&function
->
getDevicePointer());
if (globals
!= NULL
)
interactionGroupArgs.push_back(&globals
->
getDevicePointer());
for (auto
&
function : tabulatedFunctions)
interactionGroupArgs.push_back(&function
.
getDevicePointer());
if (globals
.isInitialized()
)
interactionGroupArgs.push_back(&globals
.
getDevicePointer());
if (hasParamDerivs)
interactionGroupArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
}
...
...
@@ -2855,38 +2781,24 @@ private:
const GBSAOBCForce& force;
};
CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (bornSum != NULL)
delete bornSum;
if (bornRadii != NULL)
delete bornRadii;
if (bornForce != NULL)
delete bornForce;
if (obcChain != NULL)
delete obcChain;
}
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
params
= CudaArray::creat
e<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
params
.initializ
e<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
if (cu.getUseDoublePrecision()) {
bornRadii
= CudaArray::creat
e<double>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain
= CudaArray::creat
e<double>(cu, cu.getPaddedNumAtoms(), "obcChain");
bornRadii
.initializ
e<double>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain
.initializ
e<double>(cu, cu.getPaddedNumAtoms(), "obcChain");
}
else {
bornRadii
= CudaArray::creat
e<float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain
= CudaArray::creat
e<float>(cu, cu.getPaddedNumAtoms(), "obcChain");
bornRadii
.initializ
e<float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain
.initializ
e<float>(cu, cu.getPaddedNumAtoms(), "obcChain");
}
bornSum
= CudaArray::creat
e<long long>(cu, cu.getPaddedNumAtoms(), "bornSum");
bornForce
= CudaArray::creat
e<long long>(cu, cu.getPaddedNumAtoms(), "bornForce");
cu.addAutoclearBuffer(
*
bornSum);
cu.addAutoclearBuffer(
*
bornForce);
bornSum
.initializ
e<long long>(cu, cu.getPaddedNumAtoms(), "bornSum");
bornForce
.initializ
e<long long>(cu, cu.getPaddedNumAtoms(), "bornForce");
cu.addAutoclearBuffer(bornSum);
cu.addAutoclearBuffer(bornForce);
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
...
...
@@ -2904,7 +2816,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
posqf[i] = make_float4(0, 0, 0, (float) charge);
}
posq.upload(&temp[0]);
params
->
upload(paramsVector);
params
.
upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
...
...
@@ -2912,8 +2824,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
cutoff = force.getCutoffDistance();
string source = CudaKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params
->
getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce
->
getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params
.
getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce
.
getDevicePointer()));
info = new ForceInfo(force);
cu.addForce(info);
}
...
...
@@ -2951,9 +2863,9 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
map<string, string> replacements;
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::gbsaObc1, replacements), defines);
computeBornSumKernel = cu.getKernel(module, "computeBornSum");
computeSumArgs.push_back(&bornSum
->
getDevicePointer());
computeSumArgs.push_back(&bornSum
.
getDevicePointer());
computeSumArgs.push_back(&cu.getPosq().getDevicePointer());
computeSumArgs.push_back(¶ms
->
getDevicePointer());
computeSumArgs.push_back(¶ms
.
getDevicePointer());
if (nb.getUseCutoff()) {
computeSumArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer());
...
...
@@ -2972,10 +2884,10 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
computeSumArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
force1Kernel = cu.getKernel(module, "computeGBSAForce1");
force1Args.push_back(&cu.getForce().getDevicePointer());
force1Args.push_back(&bornForce
->
getDevicePointer());
force1Args.push_back(&bornForce
.
getDevicePointer());
force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
force1Args.push_back(&cu.getPosq().getDevicePointer());
force1Args.push_back(&bornRadii
->
getDevicePointer());
force1Args.push_back(&bornRadii
.
getDevicePointer());
force1Args.push_back(NULL);
if (nb.getUseCutoff()) {
force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
...
...
@@ -3008,12 +2920,12 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
}
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
float alpha = 1.0f, beta = 0.8f, gamma = 4.85f;
void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum
->
getDevicePointer(), ¶ms
->
getDevicePointer(),
&bornRadii
->
getDevicePointer(), &obcChain
->
getDevicePointer()};
void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum
.
getDevicePointer(), ¶ms
.
getDevicePointer(),
&bornRadii
.
getDevicePointer(), &obcChain
.
getDevicePointer()};
cu.executeKernel(reduceBornSumKernel, reduceSumArgs, cu.getPaddedNumAtoms());
cu.executeKernel(force1Kernel, &force1Args[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
void* reduceForceArgs[] = {&bornForce
->
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), ¶ms
->
getDevicePointer(),
&bornRadii
->
getDevicePointer(), &obcChain
->
getDevicePointer()};
void* reduceForceArgs[] = {&bornForce
.
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), ¶ms
.
getDevicePointer(),
&bornRadii
.
getDevicePointer(), &obcChain
.
getDevicePointer()};
cu.executeKernel(reduceBornForceKernel, &reduceForceArgs[0], cu.getPaddedNumAtoms());
return 0.0;
}
...
...
@@ -3039,7 +2951,7 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
}
cu.setCharges(chargeVector);
params
->
upload(paramsVector);
params
.
upload(paramsVector);
// Mark that the current reordering may be invalid.
...
...
@@ -3087,16 +2999,6 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
delete energyDerivs;
if (energyDerivChain != NULL)
delete energyDerivChain;
if (longEnergyDerivs != NULL)
delete longEnergyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
for (auto function : tabulatedFunctions)
delete function;
for (auto d : dValue0dParam)
delete d;
for (auto d : dValuedParam)
delete d;
}
...
...
@@ -3135,7 +3037,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cu.getUseDoublePrecision());
if (force.getNumGlobalParameters() > 0)
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
vector<vector<float> > paramVector(paddedNumParticles, vector<float>(numParams, 0));
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
...
...
@@ -3159,6 +3061,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -3167,9 +3070,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
tabulatedFunctions.size()-1]->
getDevicePointer()));
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[
i].
upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
i].
getDevicePointer()));
tableArgs << ", const float";
if (width > 1)
tableArgs << width;
...
...
@@ -3184,8 +3087,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals
!= NULL
)
globals
->
upload(globalParamValues);
if (globals
.isInitialized()
)
globals
.
upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms.
...
...
@@ -3233,15 +3136,16 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).optimize());
}
longEnergyDerivs
= CudaArray::creat
e<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
longEnergyDerivs
.initializ
e<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
energyDerivChain = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
dValue0dParam.resize(force.getNumEnergyParameterDerivatives());
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
dValuedParam.push_back(new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "dValuedParam", true, cu.getUseDoublePrecision()));
dValue0dParam
.push_back(CudaArray::creat
e<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam")
)
;
cu.addAutoclearBuffer(
*
dValue0dParam
.back()
);
dValue0dParam
[i].initializ
e<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam");
cu.addAutoclearBuffer(dValue0dParam
[i]
);
string name = force.getEnergyParameterDerivativeName(i);
cu.addEnergyParameterDerivative(name);
}
...
...
@@ -3826,9 +3730,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
}
if (globals
!= NULL
) {
globals
->
upload(globalParamValues);
arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals
->
getDevicePointer()));
if (globals
.isInitialized()
) {
globals
.
upload(globalParamValues);
arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals
.
getDevicePointer()));
}
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (auto param : parameters)
...
...
@@ -3838,7 +3742,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
}
info = new ForceInfo(force);
cu.addForce(info);
cu.addAutoclearBuffer(
*
longEnergyDerivs);
cu.addAutoclearBuffer(longEnergyDerivs);
}
double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
...
...
@@ -3881,13 +3785,13 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
// Set arguments for kernels.
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
valueBuffers
= CudaArray::creat
e<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
cu.addAutoclearBuffer(
*
valueBuffers);
cu.clearBuffer(valueBuffers
->
getDevicePointer(), sizeof(long long)*valueBuffers
->
getSize());
valueBuffers
.initializ
e<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
cu.addAutoclearBuffer(valueBuffers);
cu.clearBuffer(valueBuffers
.
getDevicePointer(), sizeof(long long)*valueBuffers
.
getSize());
pairValueArgs.push_back(&cu.getPosq().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionTiles().getDevicePointer());
pairValueArgs.push_back(&valueBuffers
->
getDevicePointer());
pairValueArgs.push_back(&valueBuffers
.
getDevicePointer());
if (nb.getUseCutoff()) {
pairValueArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer());
...
...
@@ -3903,31 +3807,31 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
}
else
pairValueArgs.push_back(&maxTiles);
if (globals
!= NULL
)
pairValueArgs.push_back(&globals
->
getDevicePointer());
if (globals
.isInitialized()
)
pairValueArgs.push_back(&globals
.
getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairValueUsesParam[i])
pairValueArgs.push_back(¶ms->getBuffers()[i].getMemory());
}
for (auto d : dValue0dParam)
pairValueArgs.push_back(&d
->
getDevicePointer());
for (auto function : tabulatedFunctions)
pairValueArgs.push_back(&function
->
getDevicePointer());
for (auto
&
d : dValue0dParam)
pairValueArgs.push_back(&d
.
getDevicePointer());
for (auto
&
function : tabulatedFunctions)
pairValueArgs.push_back(&function
.
getDevicePointer());
perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer());
perParticleValueArgs.push_back(&valueBuffers
->
getDevicePointer());
if (globals
!= NULL
)
perParticleValueArgs.push_back(&globals
->
getDevicePointer());
perParticleValueArgs.push_back(&valueBuffers
.
getDevicePointer());
if (globals
.isInitialized()
)
perParticleValueArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : params->getBuffers())
perParticleValueArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
perParticleValueArgs.push_back(&buffer.getMemory());
for (int i = 0; i < dValuedParam.size(); i++) {
perParticleValueArgs.push_back(&dValue0dParam[i]
->
getDevicePointer());
perParticleValueArgs.push_back(&dValue0dParam[i]
.
getDevicePointer());
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
perParticleValueArgs.push_back(&dValuedParam[i]->getBuffers()[j].getMemory());
}
for (auto function : tabulatedFunctions)
perParticleValueArgs.push_back(&function
->
getDevicePointer());
for (auto
&
function : tabulatedFunctions)
perParticleValueArgs.push_back(&function
.
getDevicePointer());
pairEnergyArgs.push_back(&cu.getForce().getDevicePointer());
pairEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
...
...
@@ -3949,8 +3853,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
}
else
pairEnergyArgs.push_back(&maxTiles);
if (globals
!= NULL
)
pairEnergyArgs.push_back(&globals
->
getDevicePointer());
if (globals
.isInitialized()
)
pairEnergyArgs.push_back(&globals
.
getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairEnergyUsesParam[i])
pairEnergyArgs.push_back(¶ms->getBuffers()[i].getMemory());
...
...
@@ -3959,16 +3863,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
if (pairEnergyUsesValue[i])
pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
}
pairEnergyArgs.push_back(&longEnergyDerivs
->
getDevicePointer());
pairEnergyArgs.push_back(&longEnergyDerivs
.
getDevicePointer());
if (needEnergyParamDerivs)
pairEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (auto function : tabulatedFunctions)
pairEnergyArgs.push_back(&function
->
getDevicePointer());
for (auto
&
function : tabulatedFunctions)
pairEnergyArgs.push_back(&function
.
getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals
!= NULL
)
perParticleEnergyArgs.push_back(&globals
->
getDevicePointer());
if (globals
.isInitialized()
)
perParticleEnergyArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : params->getBuffers())
perParticleEnergyArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
...
...
@@ -3977,16 +3881,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
perParticleEnergyArgs.push_back(&buffer.getMemory());
for (auto& buffer : energyDerivChain->getBuffers())
perParticleEnergyArgs.push_back(&buffer.getMemory());
perParticleEnergyArgs.push_back(&longEnergyDerivs
->
getDevicePointer());
perParticleEnergyArgs.push_back(&longEnergyDerivs
.
getDevicePointer());
if (needEnergyParamDerivs)
perParticleEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (auto function : tabulatedFunctions)
perParticleEnergyArgs.push_back(&function
->
getDevicePointer());
for (auto
&
function : tabulatedFunctions)
perParticleEnergyArgs.push_back(&function
.
getDevicePointer());
if (needParameterGradient || needEnergyParamDerivs) {
gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer());
gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals
!= NULL
)
gradientChainRuleArgs.push_back(&globals
->
getDevicePointer());
if (globals
.isInitialized()
)
gradientChainRuleArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : params->getBuffers())
gradientChainRuleArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
...
...
@@ -4001,7 +3905,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
}
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -4010,7 +3914,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
pairEnergyArgs[5] = &includeEnergy;
if (nb.getUseCutoff()) {
...
...
@@ -4089,8 +3993,6 @@ CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
...
...
@@ -4146,9 +4048,9 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
variables[name] = "particleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
globals
->
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
...
...
@@ -4170,7 +4072,7 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
}
double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -4179,7 +4081,7 @@ double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool inc
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -4294,18 +4196,6 @@ CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
delete donorParams;
if (acceptorParams != NULL)
delete acceptorParams;
if (donors != NULL)
delete donors;
if (acceptors != NULL)
delete acceptors;
if (globals != NULL)
delete globals;
if (donorExclusions != NULL)
delete donorExclusions;
if (acceptorExclusions != NULL)
delete acceptorExclusions;
for (auto function : tabulatedFunctions)
delete function;
}
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
...
...
@@ -4333,12 +4223,12 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
if (numDonors == 0 || numAcceptors == 0)
return;
int numParticles = system.getNumParticles();
donors
= CudaArray::creat
e<int4>(cu, numDonors, "customHbondDonors");
acceptors
= CudaArray::creat
e<int4>(cu, numAcceptors, "customHbondAcceptors");
donors
.initializ
e<int4>(cu, numDonors, "customHbondDonors");
acceptors
.initializ
e<int4>(cu, numAcceptors, "customHbondAcceptors");
donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
if (force.getNumGlobalParameters() > 0)
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
vector<vector<float> > donorParamVector(numDonors);
vector<int4> donorVector(numDonors);
for (int i = 0; i < numDonors; i++) {
...
...
@@ -4348,7 +4238,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
for (int j = 0; j < (int) parameters.size(); j++)
donorParamVector[i][j] = (float) parameters[j];
}
donors
->
upload(donorVector);
donors
.
upload(donorVector);
donorParams->setParameterValues(donorParamVector);
vector<vector<float> > acceptorParamVector(numAcceptors);
vector<int4> acceptorVector(numAcceptors);
...
...
@@ -4359,7 +4249,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
for (int j = 0; j < (int) parameters.size(); j++)
acceptorParamVector[i][j] = (float) parameters[j];
}
acceptors
->
upload(acceptorVector);
acceptors
.
upload(acceptorVector);
acceptorParams->setParameterValues(acceptorParamVector);
info = new ForceInfo(force);
cu.addForce(info);
...
...
@@ -4395,10 +4285,10 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
else
throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
}
donorExclusions
= CudaArray::creat
e<int4>(cu, numDonors, "customHbondDonorExclusions");
acceptorExclusions
= CudaArray::creat
e<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions
->
upload(donorExclusionVector);
acceptorExclusions
->
upload(acceptorExclusionVector);
donorExclusions
.initializ
e<int4>(cu, numDonors, "customHbondDonorExclusions");
acceptorExclusions
.initializ
e<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions
.
upload(donorExclusionVector);
acceptorExclusions
.
upload(acceptorExclusionVector);
// Record the tabulated functions.
...
...
@@ -4406,6 +4296,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -4414,8 +4305,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[
i].
upload(f);
tableArgs << ", const float";
if (width > 1)
tableArgs << width;
...
...
@@ -4430,8 +4321,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals
!= NULL
)
globals
->
upload(globalParamValues);
if (globals
.isInitialized()
)
globals
.
upload(globalParamValues);
map<string, string> variables;
for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
const string& name = force.getPerDonorParameterName(i);
...
...
@@ -4623,7 +4514,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numDonors == 0 || numAcceptors == 0)
return 0.0;
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -4632,7 +4523,7 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
if (!hasInitializedKernel) {
hasInitializedKernel = true;
...
...
@@ -4640,42 +4531,42 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
donorArgs.push_back(&cu.getForce().getDevicePointer());
donorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
donorArgs.push_back(&cu.getPosq().getDevicePointer());
donorArgs.push_back(&donorExclusions
->
getDevicePointer());
donorArgs.push_back(&donors
->
getDevicePointer());
donorArgs.push_back(&acceptors
->
getDevicePointer());
donorArgs.push_back(&donorExclusions
.
getDevicePointer());
donorArgs.push_back(&donors
.
getDevicePointer());
donorArgs.push_back(&acceptors
.
getDevicePointer());
donorArgs.push_back(cu.getPeriodicBoxSizePointer());
donorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
donorArgs.push_back(cu.getPeriodicBoxVecXPointer());
donorArgs.push_back(cu.getPeriodicBoxVecYPointer());
donorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals
!= NULL
)
donorArgs.push_back(&globals
->
getDevicePointer());
if (globals
.isInitialized()
)
donorArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : donorParams->getBuffers())
donorArgs.push_back(&buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
donorArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
donorArgs.push_back(&function
->
getDevicePointer());
for (auto
&
function : tabulatedFunctions)
donorArgs.push_back(&function
.
getDevicePointer());
index = 0;
acceptorArgs.push_back(&cu.getForce().getDevicePointer());
acceptorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
acceptorArgs.push_back(&cu.getPosq().getDevicePointer());
acceptorArgs.push_back(&acceptorExclusions
->
getDevicePointer());
acceptorArgs.push_back(&donors
->
getDevicePointer());
acceptorArgs.push_back(&acceptors
->
getDevicePointer());
acceptorArgs.push_back(&acceptorExclusions
.
getDevicePointer());
acceptorArgs.push_back(&donors
.
getDevicePointer());
acceptorArgs.push_back(&acceptors
.
getDevicePointer());
acceptorArgs.push_back(cu.getPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecXPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecYPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals
!= NULL
)
acceptorArgs.push_back(&globals
->
getDevicePointer());
if (globals
.isInitialized()
)
acceptorArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : donorParams->getBuffers())
acceptorArgs.push_back(&buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
acceptorArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
acceptorArgs.push_back(&function
->
getDevicePointer());
for (auto
&
function : tabulatedFunctions)
acceptorArgs.push_back(&function
.
getDevicePointer());
}
int sharedMemorySize = 3*CudaContext::ThreadBlockSize*sizeof(float4);
cu.executeKernel(donorKernel, &donorArgs[0], max(numDonors, numAcceptors), CudaContext::ThreadBlockSize, sharedMemorySize);
...
...
@@ -4775,22 +4666,6 @@ CudaCalcCustomCentroidBondForceKernel::~CudaCalcCustomCentroidBondForceKernel()
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (auto function : tabulatedFunctions)
delete function;
}
void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
...
...
@@ -4827,22 +4702,22 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
}
groupParticles
= CudaArray::creat
e<int>(cu, groupParticleVec.size(), "groupParticles");
groupParticles
->
upload(groupParticleVec);
groupParticles
.initializ
e<int>(cu, groupParticleVec.size(), "groupParticles");
groupParticles
.
upload(groupParticleVec);
if (cu.getUseDoublePrecision()) {
groupWeights
= CudaArray::creat
e<double>(cu, groupParticleVec.size(), "groupWeights");
groupWeights
->
upload(groupWeightVecDouble);
centerPositions
= CudaArray::creat
e<double4>(cu, numGroups, "centerPositions");
groupWeights
.initializ
e<double>(cu, groupParticleVec.size(), "groupWeights");
groupWeights
.
upload(groupWeightVecDouble);
centerPositions
.initializ
e<double4>(cu, numGroups, "centerPositions");
}
else {
groupWeights
= CudaArray::creat
e<float>(cu, groupParticleVec.size(), "groupWeights");
groupWeights
->
upload(groupWeightVecFloat);
centerPositions
= CudaArray::creat
e<float4>(cu, numGroups, "centerPositions");
groupWeights
.initializ
e<float>(cu, groupParticleVec.size(), "groupWeights");
groupWeights
.
upload(groupWeightVecFloat);
centerPositions
.initializ
e<float4>(cu, numGroups, "centerPositions");
}
groupOffsets
= CudaArray::creat
e<int>(cu, groupOffsetVec.size(), "groupOffsets");
groupOffsets
->
upload(groupOffsetVec);
groupForces
= CudaArray::creat
e<long long>(cu, numGroups*3, "groupForces");
cu.addAutoclearBuffer(
*
groupForces);
groupOffsets
.initializ
e<int>(cu, groupOffsetVec.size(), "groupOffsets");
groupOffsets
.
upload(groupOffsetVec);
groupForces
.initializ
e<long long>(cu, numGroups*3, "groupForces");
cu.addAutoclearBuffer(groupForces);
// Record the bonds.
...
...
@@ -4861,15 +4736,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
bondGroups
= CudaArray::creat
e<int>(cu, bondGroupVec.size(), "bondGroups");
bondGroups
->
upload(bondGroupVec);
bondGroups
.initializ
e<int>(cu, bondGroupVec.size(), "bondGroups");
bondGroups
.
upload(bondGroupVec);
// Record the arguments to the force kernel.
groupForcesArgs.push_back(&groupForces
->
getDevicePointer());
groupForcesArgs.push_back(&groupForces
.
getDevicePointer());
groupForcesArgs.push_back(NULL); // Energy buffer hasn't been created yet
groupForcesArgs.push_back(¢erPositions
->
getDevicePointer());
groupForcesArgs.push_back(&bondGroups
->
getDevicePointer());
groupForcesArgs.push_back(¢erPositions
.
getDevicePointer());
groupForcesArgs.push_back(&bondGroups
.
getDevicePointer());
groupForcesArgs.push_back(cu.getPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getInvPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getPeriodicBoxVecXPointer());
...
...
@@ -4885,6 +4760,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream extraArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -4893,13 +4769,13 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
.back()->
upload(f);
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions
[i].
upload(f);
extraArgs << ", const float";
if (width > 1)
extraArgs << width;
extraArgs << "* __restrict__ " << arrayName;
groupForcesArgs.push_back(&tabulatedFunctions
.back()->
getDevicePointer());
groupForcesArgs.push_back(&tabulatedFunctions
[i].
getDevicePointer());
}
// Record information about parameters.
...
...
@@ -4924,15 +4800,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
if (needEnergyParamDerivs)
extraArgs << ", mixed* __restrict__ energyParamDerivs";
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals
->
upload(globalParamValues);
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals
.
upload(globalParamValues);
extraArgs << ", const float* __restrict__ globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
variables[name] = value;
}
groupForcesArgs.push_back(&globals
->
getDevicePointer());
groupForcesArgs.push_back(&globals
.
getDevicePointer());
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
...
...
@@ -5144,7 +5020,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
return 0.0;
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -5153,17 +5029,17 @@ double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles
->
getDevicePointer(), &groupWeights
->
getDevicePointer(),
&groupOffsets
->
getDevicePointer(), ¢erPositions
->
getDevicePointer()};
void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles
.
getDevicePointer(), &groupWeights
.
getDevicePointer(),
&groupOffsets
.
getDevicePointer(), ¢erPositions
.
getDevicePointer()};
cu.executeKernel(computeCentersKernel, computeCentersArgs, CudaContext::TileSize*numGroups);
groupForcesArgs[1] = &cu.getEnergyBuffer().getDevicePointer();
if (needEnergyParamDerivs)
groupForcesArgs[9] = &cu.getEnergyParamDerivBuffer().getDevicePointer();
cu.executeKernel(groupForcesKernel, &groupForcesArgs[0], numBonds);
void* applyForcesArgs[] = {&groupParticles
->
getDevicePointer(), &groupWeights
->
getDevicePointer(), &groupOffsets
->
getDevicePointer(),
&groupForces
->
getDevicePointer(), &cu.getForce().getDevicePointer()};
void* applyForcesArgs[] = {&groupParticles
.
getDevicePointer(), &groupWeights
.
getDevicePointer(), &groupOffsets
.
getDevicePointer(),
&groupForces
.
getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(applyForcesKernel, applyForcesArgs, CudaContext::TileSize*numGroups);
return 0.0;
}
...
...
@@ -5222,10 +5098,6 @@ CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel()
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
for (auto function : tabulatedFunctions)
delete function;
}
void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
...
...
@@ -5256,16 +5128,16 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
CudaArray* array = CudaArray::create<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions.push_back(array);
array->upload(f);
string arrayName = cu.getBondedUtilities().addArgument(array->getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
string arrayName = cu.getBondedUtilities().addArgument(tabulatedFunctions[i].getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
functionDefinitions.push_back(make_pair(name, arrayName));
}
...
...
@@ -5289,9 +5161,9 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
globals
->
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
...
...
@@ -5474,7 +5346,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
}
double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
...
...
@@ -5483,7 +5355,7 @@ double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value;
}
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
return 0.0;
}
...
...
@@ -5553,32 +5425,6 @@ CudaCalcCustomManyParticleForceKernel::~CudaCalcCustomManyParticleForceKernel()
cu.setAsCurrent();
if (params != NULL)
delete params;
if (orderIndex != NULL)
delete orderIndex;
if (particleOrder != NULL)
delete particleOrder;
if (particleTypes != NULL)
delete particleTypes;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighborPairs != NULL)
delete neighborPairs;
if (numNeighborPairs != NULL)
delete numNeighborPairs;
if (neighborStartIndex != NULL)
delete neighborStartIndex;
if (neighbors != NULL)
delete neighbors;
if (numNeighborsForAtom != NULL)
delete numNeighborsForAtom;
for (auto function : tabulatedFunctions)
delete function;
}
void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
...
...
@@ -5612,6 +5458,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -5620,8 +5467,8 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[
i].
upload(f);
tableArgs << ", const float";
if (width > 1)
tableArgs << width;
...
...
@@ -5667,16 +5514,16 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
bool hasTypeFilters = (particleOrderVec.size() > 1);
if (hasTypeFilters) {
particleTypes
= CudaArray::creat
e<int>(cu, particleTypesVec.size(), "customManyParticleTypes");
orderIndex
= CudaArray::creat
e<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder
= CudaArray::creat
e<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes
->
upload(particleTypesVec);
orderIndex
->
upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder
->
getSize());
particleTypes
.initializ
e<int>(cu, particleTypesVec.size(), "customManyParticleTypes");
orderIndex
.initializ
e<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder
.initializ
e<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes
.
upload(particleTypesVec);
orderIndex
.
upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder
.
getSize());
for (int i = 0; i < (int) particleOrderVec.size(); i++)
for (int j = 0; j < particlesPerSet; j++)
flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
particleOrder
->
upload(flattenedOrder);
particleOrder
.
upload(flattenedOrder);
}
// Build data structures for exclusions.
...
...
@@ -5697,10 +5544,10 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
exclusionStartIndexVec[i+1] = exclusionsVec.size();
}
exclusions
= CudaArray::creat
e<int>(cu, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex
= CudaArray::creat
e<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions
->
upload(exclusionsVec);
exclusionStartIndex
->
upload(exclusionStartIndexVec);
exclusions
.initializ
e<int>(cu, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex
.initializ
e<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions
.
upload(exclusionsVec);
exclusionStartIndex
.
upload(exclusionStartIndexVec);
}
// Build data structures for the neighbor list.
...
...
@@ -5708,19 +5555,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
if (nonbondedMethod != NoCutoff) {
int numAtomBlocks = cu.getNumAtomBlocks();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
blockCenter
= new CudaArray
(cu, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox
= new CudaArray
(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs
= CudaArray::creat
e<int>(cu, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex
= CudaArray::creat
e<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom
= CudaArray::creat
e<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom");
blockCenter
.initialize
(cu, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox
.initialize
(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs
.initializ
e<int>(cu, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex
.initializ
e<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom
.initializ
e<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom");
CHECK_RESULT(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "Error creating event for CustomManyParticleForce");
// Select a size for the array that holds the neighbor list. We have to make a fairly
// arbitrary guess, but if this turns out to be too small we'll increase it later.
maxNeighborPairs = 150*numParticles;
neighborPairs
= CudaArray::creat
e<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors
= CudaArray::creat
e<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
neighborPairs
.initializ
e<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors
.initializ
e<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
...
...
@@ -6045,22 +5892,22 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
forceArgs.push_back(cu.getPeriodicBoxVecYPointer());
forceArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (nonbondedMethod != NoCutoff) {
forceArgs.push_back(&neighbors
->
getDevicePointer());
forceArgs.push_back(&neighborStartIndex
->
getDevicePointer());
forceArgs.push_back(&neighbors
.
getDevicePointer());
forceArgs.push_back(&neighborStartIndex
.
getDevicePointer());
}
if (particleTypes
!= NULL
) {
forceArgs.push_back(&particleTypes
->
getDevicePointer());
forceArgs.push_back(&orderIndex
->
getDevicePointer());
forceArgs.push_back(&particleOrder
->
getDevicePointer());
if (particleTypes
.isInitialized()
) {
forceArgs.push_back(&particleTypes
.
getDevicePointer());
forceArgs.push_back(&orderIndex
.
getDevicePointer());
forceArgs.push_back(&particleOrder
.
getDevicePointer());
}
if (exclusions
!= NULL
) {
forceArgs.push_back(&exclusions
->
getDevicePointer());
forceArgs.push_back(&exclusionStartIndex
->
getDevicePointer());
if (exclusions
.isInitialized()
) {
forceArgs.push_back(&exclusions
.
getDevicePointer());
forceArgs.push_back(&exclusionStartIndex
.
getDevicePointer());
}
for (auto& buffer : params->getBuffers())
forceArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
forceArgs.push_back(&function
->
getDevicePointer());
for (auto
&
function : tabulatedFunctions)
forceArgs.push_back(&function
.
getDevicePointer());
if (nonbondedMethod != NoCutoff) {
// Set arguments for the block bounds kernel.
...
...
@@ -6071,9 +5918,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
blockBoundsArgs.push_back(cu.getPeriodicBoxVecYPointer());
blockBoundsArgs.push_back(cu.getPeriodicBoxVecZPointer());
blockBoundsArgs.push_back(&cu.getPosq().getDevicePointer());
blockBoundsArgs.push_back(&blockCenter
->
getDevicePointer());
blockBoundsArgs.push_back(&blockBoundingBox
->
getDevicePointer());
blockBoundsArgs.push_back(&numNeighborPairs
->
getDevicePointer());
blockBoundsArgs.push_back(&blockCenter
.
getDevicePointer());
blockBoundsArgs.push_back(&blockBoundingBox
.
getDevicePointer());
blockBoundsArgs.push_back(&numNeighborPairs
.
getDevicePointer());
// Set arguments for the neighbor list kernel.
...
...
@@ -6083,32 +5930,32 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
neighborsArgs.push_back(cu.getPeriodicBoxVecYPointer());
neighborsArgs.push_back(cu.getPeriodicBoxVecZPointer());
neighborsArgs.push_back(&cu.getPosq().getDevicePointer());
neighborsArgs.push_back(&blockCenter
->
getDevicePointer());
neighborsArgs.push_back(&blockBoundingBox
->
getDevicePointer());
neighborsArgs.push_back(&neighborPairs
->
getDevicePointer());
neighborsArgs.push_back(&numNeighborPairs
->
getDevicePointer());
neighborsArgs.push_back(&numNeighborsForAtom
->
getDevicePointer());
neighborsArgs.push_back(&blockCenter
.
getDevicePointer());
neighborsArgs.push_back(&blockBoundingBox
.
getDevicePointer());
neighborsArgs.push_back(&neighborPairs
.
getDevicePointer());
neighborsArgs.push_back(&numNeighborPairs
.
getDevicePointer());
neighborsArgs.push_back(&numNeighborsForAtom
.
getDevicePointer());
neighborsArgs.push_back(&maxNeighborPairs);
if (exclusions
!= NULL
) {
neighborsArgs.push_back(&exclusions
->
getDevicePointer());
neighborsArgs.push_back(&exclusionStartIndex
->
getDevicePointer());
if (exclusions
.isInitialized()
) {
neighborsArgs.push_back(&exclusions
.
getDevicePointer());
neighborsArgs.push_back(&exclusionStartIndex
.
getDevicePointer());
}
// Set arguments for the kernel to find neighbor list start indices.
startIndicesArgs.push_back(&numNeighborsForAtom
->
getDevicePointer());
startIndicesArgs.push_back(&neighborStartIndex
->
getDevicePointer());
startIndicesArgs.push_back(&numNeighborPairs
->
getDevicePointer());
startIndicesArgs.push_back(&numNeighborsForAtom
.
getDevicePointer());
startIndicesArgs.push_back(&neighborStartIndex
.
getDevicePointer());
startIndicesArgs.push_back(&numNeighborPairs
.
getDevicePointer());
startIndicesArgs.push_back(&maxNeighborPairs);
// Set arguments for the kernel to assemble the final neighbor list.
copyPairsArgs.push_back(&neighborPairs
->
getDevicePointer());
copyPairsArgs.push_back(&neighbors
->
getDevicePointer());
copyPairsArgs.push_back(&numNeighborPairs
->
getDevicePointer());
copyPairsArgs.push_back(&neighborPairs
.
getDevicePointer());
copyPairsArgs.push_back(&neighbors
.
getDevicePointer());
copyPairsArgs.push_back(&numNeighborPairs
.
getDevicePointer());
copyPairsArgs.push_back(&maxNeighborPairs);
copyPairsArgs.push_back(&numNeighborsForAtom
->
getDevicePointer());
copyPairsArgs.push_back(&neighborStartIndex
->
getDevicePointer());
copyPairsArgs.push_back(&numNeighborsForAtom
.
getDevicePointer());
copyPairsArgs.push_back(&neighborStartIndex
.
getDevicePointer());
}
}
if (globalParamValues.size() > 0) {
...
...
@@ -6131,7 +5978,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
// We need to make sure there was enough memory for the neighbor list. Download the
// information asynchronously so kernels can be running at the same time.
numNeighborPairs
->
download(numPairs, false);
numNeighborPairs
.
download(numPairs, false);
CHECK_RESULT(cuEventRecord(event, 0), "Error recording event for CustomManyParticleForce");
cu.executeKernel(startIndicesKernel, &startIndicesArgs[0], 256, 256, 256*sizeof(int));
cu.executeKernel(copyPairsKernel, ©PairsArgs[0], maxNeighborPairs);
...
...
@@ -6145,17 +5992,13 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
if (*numPairs > maxNeighborPairs) {
// Resize the arrays and run the calculation again.
delete neighborPairs;
neighborPairs = NULL;
delete neighbors;
neighbors = NULL;
maxNeighborPairs = (int) (1.1*(*numPairs));
neighborPairs
= CudaArray::create<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs"
);
neighbors
= CudaArray::create<int>(cu, maxNeighborPairs, "customManyParticleNeighbors"
);
forceArgs[5] = &neighbors
->
getDevicePointer();
neighborsArgs[5] = &neighborPairs
->
getDevicePointer();
copyPairsArgs[0] = &neighborPairs
->
getDevicePointer();
copyPairsArgs[1] = &neighbors
->
getDevicePointer();
neighborPairs
.resize(maxNeighborPairs
);
neighbors
.resize(maxNeighborPairs
);
forceArgs[5] = &neighbors
.
getDevicePointer();
neighborsArgs[5] = &neighborPairs
.
getDevicePointer();
copyPairsArgs[0] = &neighborPairs
.
getDevicePointer();
copyPairsArgs[1] = &neighbors
.
getDevicePointer();
continue;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment