Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ca4c03c3
Unverified
Commit
ca4c03c3
authored
Mar 04, 2020
by
peastman
Committed by
GitHub
Mar 04, 2020
Browse files
Merge pull request #2527 from andysim/nhcommon
Convert Nose Hoover integrator to Common plaform
parents
d169badc
2270a28a
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
844 additions
and
1866 deletions
+844
-1866
openmmapi/include/openmm/NoseHooverIntegrator.h
openmmapi/include/openmm/NoseHooverIntegrator.h
+4
-0
openmmapi/src/NoseHooverIntegrator.cpp
openmmapi/src/NoseHooverIntegrator.cpp
+7
-1
platforms/common/include/openmm/common/CommonKernels.h
platforms/common/include/openmm/common/CommonKernels.h
+110
-0
platforms/common/src/CommonKernels.cpp
platforms/common/src/CommonKernels.cpp
+544
-0
platforms/common/src/kernels/noseHooverChain.cc
platforms/common/src/kernels/noseHooverChain.cc
+56
-52
platforms/common/src/kernels/velocityVerlet.cc
platforms/common/src/kernels/velocityVerlet.cc
+119
-95
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+0
-103
platforms/cuda/src/CudaKernelFactory.cpp
platforms/cuda/src/CudaKernelFactory.cpp
+2
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+0
-463
platforms/cuda/src/kernels/noseHooverChain.cu
platforms/cuda/src/kernels/noseHooverChain.cu
+0
-172
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+0
-103
platforms/opencl/src/OpenCLKernelFactory.cpp
platforms/opencl/src/OpenCLKernelFactory.cpp
+2
-2
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+0
-529
platforms/opencl/src/kernels/velocityVerlet.cl
platforms/opencl/src/kernels/velocityVerlet.cl
+0
-344
No files found.
openmmapi/include/openmm/NoseHooverIntegrator.h
View file @
ca4c03c3
...
@@ -264,6 +264,10 @@ protected:
...
@@ -264,6 +264,10 @@ protected:
* Compute the kinetic energy of the system at the current time.
* Compute the kinetic energy of the system at the current time.
*/
*/
virtual
double
computeKineticEnergy
();
virtual
double
computeKineticEnergy
();
/**
* Computing kinetic energy for this integrator does not require forces.
*/
bool
kineticEnergyRequiresForce
()
const
override
;
std
::
vector
<
NoseHooverChain
>
noseHooverChains
;
std
::
vector
<
NoseHooverChain
>
noseHooverChains
;
std
::
vector
<
int
>
allAtoms
;
std
::
vector
<
int
>
allAtoms
;
...
...
openmmapi/src/NoseHooverIntegrator.cpp
View file @
ca4c03c3
...
@@ -267,6 +267,7 @@ void NoseHooverIntegrator::setRelativeCollisionFrequency(double frequency, int c
...
@@ -267,6 +267,7 @@ void NoseHooverIntegrator::setRelativeCollisionFrequency(double frequency, int c
}
}
double
NoseHooverIntegrator
::
computeKineticEnergy
()
{
double
NoseHooverIntegrator
::
computeKineticEnergy
()
{
forcesAreValid
=
false
;
double
kE
=
0.0
;
double
kE
=
0.0
;
if
(
noseHooverChains
.
size
()
>
0
)
{
if
(
noseHooverChains
.
size
()
>
0
)
{
for
(
const
auto
&
nhc
:
noseHooverChains
){
for
(
const
auto
&
nhc
:
noseHooverChains
){
...
@@ -278,6 +279,10 @@ double NoseHooverIntegrator::computeKineticEnergy() {
...
@@ -278,6 +279,10 @@ double NoseHooverIntegrator::computeKineticEnergy() {
return
kE
;
return
kE
;
}
}
bool
NoseHooverIntegrator
::
kineticEnergyRequiresForce
()
const
{
return
false
;
}
double
NoseHooverIntegrator
::
computeHeatBathEnergy
()
{
double
NoseHooverIntegrator
::
computeHeatBathEnergy
()
{
double
energy
=
0
;
double
energy
=
0
;
for
(
auto
&
nhc
:
noseHooverChains
)
{
for
(
auto
&
nhc
:
noseHooverChains
)
{
...
@@ -340,7 +345,8 @@ void NoseHooverIntegrator::step(int steps) {
...
@@ -340,7 +345,8 @@ void NoseHooverIntegrator::step(int steps) {
throw
OpenMMException
(
"This Integrator is not bound to a context!"
);
throw
OpenMMException
(
"This Integrator is not bound to a context!"
);
std
::
pair
<
double
,
double
>
scale
,
kineticEnergy
;
std
::
pair
<
double
,
double
>
scale
,
kineticEnergy
;
for
(
int
i
=
0
;
i
<
steps
;
++
i
)
{
for
(
int
i
=
0
;
i
<
steps
;
++
i
)
{
context
->
updateContextState
();
if
(
context
->
updateContextState
())
forcesAreValid
=
false
;
for
(
auto
&
nhc
:
noseHooverChains
)
{
for
(
auto
&
nhc
:
noseHooverChains
)
{
kineticEnergy
=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
computeMaskedKineticEnergy
(
*
context
,
nhc
,
false
);
kineticEnergy
=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
computeMaskedKineticEnergy
(
*
context
,
nhc
,
false
);
scale
=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
propagateChain
(
*
context
,
nhc
,
kineticEnergy
,
getStepSize
());
scale
=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
propagateChain
(
*
context
,
nhc
,
kineticEnergy
,
getStepSize
());
...
...
platforms/common/include/openmm/common/CommonKernels.h
View file @
ca4c03c3
...
@@ -941,6 +941,116 @@ private:
...
@@ -941,6 +941,116 @@ private:
ComputeKernel
kernel1
,
kernel2
,
kernel3
;
ComputeKernel
kernel1
,
kernel2
,
kernel3
;
};
};
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class
CommonIntegrateVelocityVerletStepKernel
:
public
IntegrateVelocityVerletStepKernel
{
public:
CommonIntegrateVelocityVerletStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
ComputeContext
&
cc
)
:
IntegrateVelocityVerletStepKernel
(
name
,
platform
),
cc
(
cc
),
hasInitializedKernels
(
false
)
{
}
~
CommonIntegrateVelocityVerletStepKernel
()
{}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
);
private:
ComputeContext
&
cc
;
float
prevMaxPairDistance
;
ComputeArray
maxPairDistanceBuffer
,
pairListBuffer
,
atomListBuffer
,
pairTemperatureBuffer
;
ComputeKernel
kernel1
,
kernel2
,
kernel3
,
kernelHardWall
;
bool
hasInitializedKernels
;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class
CommonNoseHooverChainKernel
:
public
NoseHooverChainKernel
{
public:
CommonNoseHooverChainKernel
(
std
::
string
name
,
const
Platform
&
platform
,
ComputeContext
&
cc
)
:
NoseHooverChainKernel
(
name
,
platform
),
cc
(
cc
),
hasInitializedPropagateKernel
(
false
),
hasInitializedKineticEnergyKernel
(
false
),
hasInitializedHeatBathEnergyKernel
(
false
),
hasInitializedScaleVelocitiesKernel
(
false
)
{}
~
CommonNoseHooverChainKernel
()
{}
/**
* Initialize the kernel.
*/
void
initialize
();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
std
::
pair
<
double
,
double
>
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
double
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
std
::
pair
<
double
,
double
>
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
bool
downloadValue
);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
void
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
std
::
pair
<
double
,
double
>
scaleFactors
);
private:
int
sumWorkGroupSize
;
ComputeContext
&
cc
;
ComputeArray
energyBuffer
,
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
ComputeArray
>
atomlists
,
pairlists
;
std
::
map
<
int
,
ComputeKernel
>
propagateKernels
;
bool
hasInitializedPropagateKernel
;
bool
hasInitializedKineticEnergyKernel
;
bool
hasInitializedHeatBathEnergyKernel
;
bool
hasInitializedScaleVelocitiesKernel
;
ComputeKernel
reduceEnergyKernel
;
ComputeKernel
computeHeatBathEnergyKernel
;
ComputeKernel
computeAtomsKineticEnergyKernel
;
ComputeKernel
computePairsKineticEnergyKernel
;
ComputeKernel
scaleAtomsVelocitiesKernel
;
ComputeKernel
scalePairsVelocitiesKernel
;
};
/**
/**
* This kernel is invoked by BrownianIntegrator to take one time step.
* This kernel is invoked by BrownianIntegrator to take one time step.
*/
*/
...
...
platforms/common/src/CommonKernels.cpp
View file @
ca4c03c3
...
@@ -5640,6 +5640,550 @@ double CommonIntegrateLangevinMiddleStepKernel::computeKineticEnergy(ContextImpl
...
@@ -5640,6 +5640,550 @@ double CommonIntegrateLangevinMiddleStepKernel::computeKineticEnergy(ContextImpl
return
cc
.
getIntegrationUtilities
().
computeKineticEnergy
(
0.0
);
return
cc
.
getIntegrationUtilities
().
computeKineticEnergy
(
0.0
);
}
}
void
CommonIntegrateVelocityVerletStepKernel
::
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
)
{
cc
.
initializeContexts
();
map
<
string
,
string
>
defines
;
defines
[
"BOLTZ"
]
=
cc
.
doubleToString
(
BOLTZ
);
ComputeProgram
program
=
cc
.
compileProgram
(
CommonKernelSources
::
velocityVerlet
,
defines
);
kernel1
=
program
->
createKernel
(
"integrateVelocityVerletPart1"
);
kernel2
=
program
->
createKernel
(
"integrateVelocityVerletPart2"
);
kernel3
=
program
->
createKernel
(
"integrateVelocityVerletPart3"
);
kernelHardWall
=
program
->
createKernel
(
"integrateVelocityVerletHardWall"
);
prevMaxPairDistance
=
-
1.0
f
;
maxPairDistanceBuffer
.
initialize
<
float
>
(
cc
,
1
,
"maxPairDistanceBuffer"
);
}
void
CommonIntegrateVelocityVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
IntegrationUtilities
&
integration
=
cc
.
getIntegrationUtilities
();
int
paddedNumAtoms
=
cc
.
getPaddedNumAtoms
();
double
dt
=
integrator
.
getStepSize
();
cc
.
getIntegrationUtilities
().
setNextStepSize
(
dt
);
// If the atom reordering has occured, the forces from the previous step are permuted and thus invalid.
// They need to be either sorted or recomputed; here we choose the latter.
if
(
!
forcesAreValid
||
cc
.
getAtomsWereReordered
())
context
.
calcForcesAndEnergy
(
true
,
false
);
const
auto
&
atomList
=
integrator
.
getAllThermostatedIndividualParticles
();
const
auto
&
pairList
=
integrator
.
getAllThermostatedPairs
();
int
numAtoms
=
atomList
.
size
();
int
numPairs
=
pairList
.
size
();
int
numParticles
=
numAtoms
+
2
*
numPairs
;
float
maxPairDistance
=
integrator
.
getMaximumPairDistance
();
// Make sure atom and pair metadata is uploaded and has the correct dimensions
if
(
prevMaxPairDistance
!=
maxPairDistance
)
{
std
::
vector
<
float
>
tmp
(
1
,
maxPairDistance
);
maxPairDistanceBuffer
.
upload
(
tmp
);
prevMaxPairDistance
=
maxPairDistance
;
}
if
(
numAtoms
!=
0
&&
(
!
atomListBuffer
.
isInitialized
()
||
atomListBuffer
.
getSize
()
!=
numAtoms
))
{
if
(
atomListBuffer
.
isInitialized
())
atomListBuffer
.
resize
(
atomList
.
size
());
else
atomListBuffer
.
initialize
<
int
>
(
cc
,
atomList
.
size
(),
"atomListBuffer"
);
atomListBuffer
.
upload
(
atomList
);
}
if
(
numPairs
!=
0
&&
(
!
pairListBuffer
.
isInitialized
()
||
pairListBuffer
.
getSize
()
!=
numPairs
))
{
if
(
pairListBuffer
.
isInitialized
())
{
pairListBuffer
.
resize
(
pairList
.
size
());
pairTemperatureBuffer
.
resize
(
pairList
.
size
());
}
else
{
pairListBuffer
.
initialize
<
mm_int2
>
(
cc
,
pairList
.
size
(),
"pairListBuffer"
);
pairTemperatureBuffer
.
initialize
<
float
>
(
cc
,
pairList
.
size
(),
"pairTemperatureBuffer"
);
}
std
::
vector
<
mm_int2
>
tmp
;
std
::
vector
<
float
>
tmp2
;
for
(
const
auto
&
pair
:
pairList
)
{
tmp
.
push_back
(
mm_int2
(
std
::
get
<
0
>
(
pair
),
std
::
get
<
1
>
(
pair
)));
tmp2
.
push_back
(
std
::
get
<
2
>
(
pair
));
}
pairListBuffer
.
upload
(
tmp
);
pairTemperatureBuffer
.
upload
(
tmp2
);
}
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
kernel1
->
addArg
(
numAtoms
);
kernel1
->
addArg
(
numPairs
);
kernel1
->
addArg
(
paddedNumAtoms
);
kernel1
->
addArg
(
cc
.
getIntegrationUtilities
().
getStepSize
());
kernel1
->
addArg
(
cc
.
getPosq
());
kernel1
->
addArg
(
cc
.
getVelm
());
kernel1
->
addArg
(
cc
.
getLongForceBuffer
());
kernel1
->
addArg
(
integration
.
getPosDelta
());
kernel1
->
addArg
(
numAtoms
>
0
?
atomListBuffer
:
cc
.
getEnergyBuffer
());
// The array is not used if num == 0
kernel1
->
addArg
(
numPairs
>
0
?
pairListBuffer
:
cc
.
getEnergyBuffer
());
// The array is not used if num == 0
if
(
cc
.
getUseMixedPrecision
())
kernel1
->
addArg
(
cc
.
getPosqCorrection
());
kernel2
->
addArg
(
numParticles
);
kernel2
->
addArg
(
cc
.
getIntegrationUtilities
().
getStepSize
());
kernel2
->
addArg
(
cc
.
getPosq
());
kernel2
->
addArg
(
cc
.
getVelm
());
kernel2
->
addArg
(
integration
.
getPosDelta
());
if
(
cc
.
getUseMixedPrecision
())
kernel2
->
addArg
(
cc
.
getPosqCorrection
());
if
(
numPairs
>
0
)
{
kernelHardWall
->
addArg
(
numPairs
);
kernelHardWall
->
addArg
(
maxPairDistanceBuffer
);
kernelHardWall
->
addArg
(
cc
.
getIntegrationUtilities
().
getStepSize
());
kernelHardWall
->
addArg
(
cc
.
getPosq
());
kernelHardWall
->
addArg
(
cc
.
getVelm
());
kernelHardWall
->
addArg
(
pairListBuffer
);
kernelHardWall
->
addArg
(
pairTemperatureBuffer
);
if
(
cc
.
getUseMixedPrecision
())
kernelHardWall
->
addArg
(
cc
.
getPosqCorrection
());
}
kernel3
->
addArg
(
numAtoms
);
kernel3
->
addArg
(
numPairs
);
kernel3
->
addArg
(
paddedNumAtoms
);
kernel3
->
addArg
(
cc
.
getIntegrationUtilities
().
getStepSize
());
kernel3
->
addArg
(
cc
.
getPosq
());
kernel3
->
addArg
(
cc
.
getVelm
());
kernel3
->
addArg
(
cc
.
getLongForceBuffer
());
kernel3
->
addArg
(
integration
.
getPosDelta
());
kernel3
->
addArg
(
numAtoms
>
0
?
atomListBuffer
:
cc
.
getEnergyBuffer
());
// The array is not used if num == 0
kernel3
->
addArg
(
numPairs
>
0
?
pairListBuffer
:
cc
.
getEnergyBuffer
());
// The array is not used if num == 0
if
(
cc
.
getUseMixedPrecision
())
kernel3
->
addArg
(
cc
.
getPosqCorrection
());
}
/*
* Carry out the integration
*/
// Advance the velocities a half step
kernel1
->
execute
(
std
::
max
(
numAtoms
,
numPairs
));
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
// Advance particle positions a full step
kernel2
->
execute
(
numParticles
);
// Make sure any Drude-like particles have not wandered too far from home
if
(
numPairs
>
0
)
kernelHardWall
->
execute
(
numPairs
);
integration
.
computeVirtualSites
();
context
.
calcForcesAndEnergy
(
true
,
false
);
forcesAreValid
=
true
;
// Update velocities another half step
kernel3
->
execute
(
std
::
max
(
numAtoms
,
numPairs
));
integration
.
applyVelocityConstraints
(
integrator
.
getConstraintTolerance
());
cc
.
setTime
(
cc
.
getTime
()
+
dt
);
cc
.
setStepCount
(
cc
.
getStepCount
()
+
1
);
cc
.
reorderAtoms
();
}
double
CommonIntegrateVelocityVerletStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
)
{
return
cc
.
getIntegrationUtilities
().
computeKineticEnergy
(
0
);
}
void
CommonNoseHooverChainKernel
::
initialize
()
{
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
map
<
string
,
string
>
defines
;
int
workGroupSize
=
std
::
min
(
cc
.
getMaxThreadBlockSize
(),
512
);
defines
[
"WORK_GROUP_SIZE"
]
=
std
::
to_string
(
workGroupSize
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[1] = {1.0}; for(int i=0;i<1;++i) { const real ys = arr[i];"
;
defines
[
"END_YS_LOOP"
]
=
"}"
;
ComputeProgram
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
1
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[3] = {0.828981543588751, -0.657963087177502, 0.828981543588751}; for(int i=0;i<3;++i) { const real ys = arr[i];"
;
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
3
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[5] = {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}; for(int i=0;i<5;++i) { const real ys = arr[i];"
;
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
5
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
reduceEnergyKernel
=
program
->
createKernel
(
"reduceEnergyPair"
);
computeHeatBathEnergyKernel
=
program
->
createKernel
(
"computeHeatBathEnergy"
);
computeAtomsKineticEnergyKernel
=
program
->
createKernel
(
"computeAtomsKineticEnergy"
);
computePairsKineticEnergyKernel
=
program
->
createKernel
(
"computePairsKineticEnergy"
);
scaleAtomsVelocitiesKernel
=
program
->
createKernel
(
"scaleAtomsVelocities"
);
scalePairsVelocitiesKernel
=
program
->
createKernel
(
"scalePairsVelocities"
);
int
energyBufferSize
=
cc
.
getEnergyBuffer
().
getSize
();
if
(
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
())
energyBuffer
.
initialize
<
mm_double2
>
(
cc
,
energyBufferSize
,
"energyBuffer"
);
else
energyBuffer
.
initialize
<
mm_float2
>
(
cc
,
energyBufferSize
,
"energyBuffer"
);
}
std
::
pair
<
double
,
double
>
CommonNoseHooverChainKernel
::
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
)
{
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
int
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
int
chainLength
=
nhc
.
getChainLength
();
int
numYS
=
nhc
.
getNumYoshidaSuzukiTimeSteps
();
int
numMTS
=
nhc
.
getNumMultiTimeSteps
();
if
(
numYS
!=
1
&&
numYS
!=
3
&&
numYS
!=
5
)
{
throw
OpenMMException
(
"Number of Yoshida Suzuki time steps has to be 1, 3, or 5."
);
}
auto
&
chainState
=
cc
.
getIntegrationUtilities
().
getNoseHooverChainState
();
if
(
!
scaleFactorBuffer
.
isInitialized
()
||
scaleFactorBuffer
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
mm_double2
>
zeros
{{
0
,
0
}};
if
(
scaleFactorBuffer
.
isInitialized
())
scaleFactorBuffer
.
resize
(
1
);
else
scaleFactorBuffer
.
initialize
<
mm_double2
>
(
cc
,
1
,
"scaleFactorBuffer"
);
scaleFactorBuffer
.
upload
(
zeros
);
}
else
{
std
::
vector
<
mm_float2
>
zeros
{{
0
,
0
}};
if
(
scaleFactorBuffer
.
isInitialized
())
scaleFactorBuffer
.
resize
(
1
);
else
scaleFactorBuffer
.
initialize
<
mm_float2
>
(
cc
,
1
,
"scaleFactorBuffer"
);
scaleFactorBuffer
.
upload
(
zeros
);
}
}
if
(
!
chainForces
.
isInitialized
()
||
!
chainMasses
.
isInitialized
())
{
if
(
useDouble
)
{
std
::
vector
<
double
>
zeros
(
chainLength
,
0
);
if
(
chainForces
.
isInitialized
())
{
chainMasses
.
resize
(
chainLength
);
chainForces
.
resize
(
chainLength
);
}
else
{
chainMasses
.
initialize
<
double
>
(
cc
,
chainLength
,
"chainMasses"
);
chainForces
.
initialize
<
double
>
(
cc
,
chainLength
,
"chainForces"
);
}
chainMasses
.
upload
(
zeros
);
chainForces
.
upload
(
zeros
);
}
else
{
std
::
vector
<
float
>
zeros
(
chainLength
,
0
);
if
(
chainForces
.
isInitialized
())
{
chainMasses
.
resize
(
chainLength
);
chainForces
.
resize
(
chainLength
);
}
else
{
chainMasses
.
initialize
<
float
>
(
cc
,
chainLength
,
"chainMasses"
);
chainForces
.
initialize
<
float
>
(
cc
,
chainLength
,
"chainForces"
);
}
chainMasses
.
upload
(
zeros
);
chainForces
.
upload
(
zeros
);
}
}
if
(
chainForces
.
getSize
()
<
chainLength
)
chainForces
.
resize
(
chainLength
);
if
(
chainMasses
.
getSize
()
<
chainLength
)
chainMasses
.
resize
(
chainLength
);
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
if
(
nAtoms
)
{
if
(
!
chainState
.
count
(
2
*
chainID
))
chainState
[
2
*
chainID
]
=
ComputeArray
();
if
(
!
chainState
.
at
(
2
*
chainID
).
isInitialized
()
||
chainState
.
at
(
2
*
chainID
).
getSize
()
!=
chainLength
)
{
// We need to upload the Common array
if
(
useDouble
)
{
if
(
chainState
.
at
(
2
*
chainID
).
isInitialized
())
chainState
.
at
(
2
*
chainID
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
).
initialize
<
mm_double2
>
(
cc
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
));
std
::
vector
<
mm_double2
>
zeros
(
chainLength
,
mm_double2
(
0.0
,
0.0
));
chainState
.
at
(
2
*
chainID
).
upload
(
zeros
.
data
());
}
else
{
if
(
chainState
.
at
(
2
*
chainID
).
isInitialized
())
chainState
.
at
(
2
*
chainID
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
).
initialize
<
mm_float2
>
(
cc
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
));
std
::
vector
<
mm_float2
>
zeros
(
chainLength
,
mm_float2
(
0.0
f
,
0.0
f
));
chainState
.
at
(
2
*
chainID
).
upload
(
zeros
.
data
());
}
}
}
if
(
nPairs
)
{
if
(
!
chainState
.
count
(
2
*
chainID
+
1
))
chainState
[
2
*
chainID
+
1
]
=
ComputeArray
();
if
(
!
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
()
||
chainState
.
at
(
2
*
chainID
+
1
).
getSize
()
!=
chainLength
)
{
// We need to upload the Common array
if
(
useDouble
)
{
if
(
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
())
chainState
.
at
(
2
*
chainID
+
1
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
+
1
).
initialize
<
mm_double2
>
(
cc
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
+
1
));
std
::
vector
<
mm_double2
>
zeros
(
chainLength
,
mm_double2
(
0.0
,
0.0
));
chainState
.
at
(
2
*
chainID
+
1
).
upload
(
zeros
.
data
());
}
else
{
if
(
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
())
chainState
.
at
(
2
*
chainID
+
1
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
+
1
).
initialize
<
mm_float2
>
(
cc
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
+
1
));
std
::
vector
<
mm_float2
>
zeros
(
chainLength
,
mm_float2
(
0.0
f
,
0.0
f
));
chainState
.
at
(
2
*
chainID
+
1
).
upload
(
zeros
.
data
());
}
}
}
if
(
!
hasInitializedPropagateKernel
)
{
hasInitializedPropagateKernel
=
true
;
propagateKernels
[
numYS
]
->
addArg
();
// ChainState
propagateKernels
[
numYS
]
->
addArg
(
kineticEnergyBuffer
);
propagateKernels
[
numYS
]
->
addArg
(
scaleFactorBuffer
);
propagateKernels
[
numYS
]
->
addArg
(
chainMasses
);
propagateKernels
[
numYS
]
->
addArg
(
chainForces
);
propagateKernels
[
numYS
]
->
addArg
();
// ChainType
propagateKernels
[
numYS
]
->
addArg
(
chainLength
);
propagateKernels
[
numYS
]
->
addArg
(
numMTS
);
propagateKernels
[
numYS
]
->
addArg
();
// numDoFs
propagateKernels
[
numYS
]
->
addArg
((
float
)
timeStep
);
propagateKernels
[
numYS
]
->
addArg
();
// kT
propagateKernels
[
numYS
]
->
addArg
();
// frequency
}
if
(
nAtoms
)
{
int
chainType
=
0
;
double
temperature
=
nhc
.
getTemperature
();
float
frequency
=
nhc
.
getCollisionFrequency
();
double
kT
=
BOLTZ
*
temperature
;
int
numDOFs
=
nhc
.
getNumDegreesOfFreedom
();
propagateKernels
[
numYS
]
->
setArg
(
0
,
chainState
[
2
*
chainID
]);
propagateKernels
[
numYS
]
->
setArg
(
5
,
chainType
);
propagateKernels
[
numYS
]
->
setArg
(
8
,
numDOFs
);
if
(
useDouble
)
{
propagateKernels
[
numYS
]
->
setArg
(
10
,
kT
);
}
else
{
propagateKernels
[
numYS
]
->
setArg
(
10
,
(
float
)
kT
);
}
propagateKernels
[
numYS
]
->
setArg
(
11
,
frequency
);
propagateKernels
[
numYS
]
->
execute
(
1
,
1
);
}
if
(
nPairs
)
{
int
chainType
=
1
;
double
relativeTemperature
=
nhc
.
getRelativeTemperature
();
float
relativeFrequency
=
nhc
.
getRelativeCollisionFrequency
();
double
kT
=
BOLTZ
*
relativeTemperature
;
int
ndf
=
3
*
nPairs
;
propagateKernels
[
numYS
]
->
setArg
(
0
,
chainState
[
2
*
chainID
+
1
]);
propagateKernels
[
numYS
]
->
setArg
(
5
,
chainType
);
propagateKernels
[
numYS
]
->
setArg
(
8
,
ndf
);
if
(
useDouble
)
{
propagateKernels
[
numYS
]
->
setArg
(
10
,
kT
);
}
else
{
propagateKernels
[
numYS
]
->
setArg
(
10
,
(
float
)
kT
);
}
propagateKernels
[
numYS
]
->
setArg
(
11
,
relativeFrequency
);
propagateKernels
[
numYS
]
->
execute
(
1
,
1
);
}
return
{
0
,
0
};
}
double
CommonNoseHooverChainKernel
::
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
)
{
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
chainLength
=
nhc
.
getChainLength
();
auto
&
chainState
=
cc
.
getIntegrationUtilities
().
getNoseHooverChainState
();
bool
absChainIsValid
=
chainState
.
count
(
2
*
chainID
)
!=
0
&&
chainState
[
2
*
chainID
].
isInitialized
()
&&
chainState
[
2
*
chainID
].
getSize
()
==
chainLength
;
bool
relChainIsValid
=
chainState
.
count
(
2
*
chainID
+
1
)
!=
0
&&
chainState
[
2
*
chainID
+
1
].
isInitialized
()
&&
chainState
[
2
*
chainID
+
1
].
getSize
()
==
chainLength
;
if
(
!
absChainIsValid
&&
!
relChainIsValid
)
return
0.0
;
if
(
!
heatBathEnergy
.
isInitialized
()
||
heatBathEnergy
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
double
>
one
(
1
);
heatBathEnergy
.
initialize
<
double
>
(
cc
,
1
,
"heatBathEnergy"
);
heatBathEnergy
.
upload
(
one
);
}
else
{
std
::
vector
<
float
>
one
(
1
);
heatBathEnergy
.
initialize
<
float
>
(
cc
,
1
,
"heatBathEnergy"
);
heatBathEnergy
.
upload
(
one
);
}
}
cc
.
clearBuffer
(
heatBathEnergy
);
if
(
!
hasInitializedHeatBathEnergyKernel
)
{
hasInitializedHeatBathEnergyKernel
=
true
;
computeHeatBathEnergyKernel
->
addArg
(
heatBathEnergy
);
computeHeatBathEnergyKernel
->
addArg
(
chainLength
);
computeHeatBathEnergyKernel
->
addArg
();
// numDOFs
computeHeatBathEnergyKernel
->
addArg
();
// kT
computeHeatBathEnergyKernel
->
addArg
();
// frequency
computeHeatBathEnergyKernel
->
addArg
();
// chainstate
}
if
(
absChainIsValid
)
{
int
numDOFs
=
nhc
.
getNumDegreesOfFreedom
();
double
temperature
=
nhc
.
getTemperature
();
float
frequency
=
nhc
.
getCollisionFrequency
();
double
kT
=
BOLTZ
*
temperature
;
computeHeatBathEnergyKernel
->
setArg
(
2
,
numDOFs
);
if
(
useDouble
)
{
computeHeatBathEnergyKernel
->
setArg
(
3
,
kT
);
}
else
{
computeHeatBathEnergyKernel
->
setArg
(
3
,
(
float
)
kT
);
}
computeHeatBathEnergyKernel
->
setArg
(
4
,
frequency
);
computeHeatBathEnergyKernel
->
setArg
(
5
,
chainState
[
2
*
chainID
]);
computeHeatBathEnergyKernel
->
execute
(
1
,
1
);
}
if
(
relChainIsValid
)
{
int
numDOFs
=
3
*
nhc
.
getThermostatedPairs
().
size
();
double
temperature
=
nhc
.
getRelativeTemperature
();
float
frequency
=
nhc
.
getRelativeCollisionFrequency
();
double
kT
=
BOLTZ
*
temperature
;
computeHeatBathEnergyKernel
->
setArg
(
2
,
numDOFs
);
if
(
useDouble
)
{
computeHeatBathEnergyKernel
->
setArg
(
3
,
kT
);
}
else
{
computeHeatBathEnergyKernel
->
setArg
(
3
,
(
float
)
kT
);
}
computeHeatBathEnergyKernel
->
setArg
(
4
,
frequency
);
computeHeatBathEnergyKernel
->
setArg
(
5
,
chainState
[
2
*
chainID
+
1
]);
computeHeatBathEnergyKernel
->
execute
(
1
,
1
);
}
void
*
pinnedBuffer
=
cc
.
getPinnedBuffer
();
heatBathEnergy
.
download
(
pinnedBuffer
);
if
(
useDouble
)
return
*
((
double
*
)
pinnedBuffer
);
else
return
*
((
float
*
)
pinnedBuffer
);
}
std
::
pair
<
double
,
double
>
CommonNoseHooverChainKernel
::
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
bool
downloadValue
)
{
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
const
auto
&
nhcAtoms
=
nhc
.
getThermostatedAtoms
();
const
auto
&
nhcPairs
=
nhc
.
getThermostatedPairs
();
int
nAtoms
=
nhcAtoms
.
size
();
int
nPairs
=
nhcPairs
.
size
();
if
(
nAtoms
)
{
if
(
!
atomlists
.
count
(
chainID
))
{
// We need to upload the Common array
atomlists
[
chainID
]
=
ComputeArray
();
atomlists
[
chainID
].
initialize
<
int
>
(
cc
,
nAtoms
,
"atomlist"
+
std
::
to_string
(
chainID
));
atomlists
[
chainID
].
upload
(
nhcAtoms
);
}
if
(
atomlists
[
chainID
].
getSize
()
!=
nAtoms
)
{
throw
OpenMMException
(
"Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat."
);
}
}
if
(
nPairs
)
{
if
(
!
pairlists
.
count
(
chainID
))
{
// We need to upload the Common array
pairlists
[
chainID
]
=
ComputeArray
();
pairlists
[
chainID
].
initialize
<
mm_int2
>
(
cc
,
nPairs
,
"pairlist"
+
std
::
to_string
(
chainID
));
std
::
vector
<
mm_int2
>
int2vec
;
for
(
const
auto
&
p
:
nhcPairs
)
int2vec
.
push_back
(
mm_int2
(
p
.
first
,
p
.
second
));
pairlists
[
chainID
].
upload
(
int2vec
);
}
if
(
pairlists
[
chainID
].
getSize
()
!=
nPairs
)
{
throw
OpenMMException
(
"Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat."
);
}
}
if
(
!
kineticEnergyBuffer
.
isInitialized
()
||
kineticEnergyBuffer
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
mm_double2
>
zeros
{{
0
,
0
}};
kineticEnergyBuffer
.
initialize
<
mm_double2
>
(
cc
,
1
,
"kineticEnergyBuffer"
);
kineticEnergyBuffer
.
upload
(
zeros
);
}
else
{
std
::
vector
<
mm_float2
>
zeros
{{
0
,
0
}};
kineticEnergyBuffer
.
initialize
<
mm_float2
>
(
cc
,
1
,
"kineticEnergyBuffer"
);
kineticEnergyBuffer
.
upload
(
zeros
);
}
}
int
workGroupSize
=
std
::
min
(
cc
.
getMaxThreadBlockSize
(),
512
);
if
(
!
hasInitializedKineticEnergyKernel
)
{
hasInitializedKineticEnergyKernel
=
true
;
computeAtomsKineticEnergyKernel
->
addArg
(
energyBuffer
);
computeAtomsKineticEnergyKernel
->
addArg
();
// nAtoms
computeAtomsKineticEnergyKernel
->
addArg
(
cc
.
getVelm
());
computeAtomsKineticEnergyKernel
->
addArg
();
// atom list
computePairsKineticEnergyKernel
->
addArg
(
energyBuffer
);
computePairsKineticEnergyKernel
->
addArg
();
// nPairs
computePairsKineticEnergyKernel
->
addArg
(
cc
.
getVelm
());
computePairsKineticEnergyKernel
->
addArg
();
// pair list
reduceEnergyKernel
->
addArg
(
energyBuffer
);
reduceEnergyKernel
->
addArg
(
kineticEnergyBuffer
);
reduceEnergyKernel
->
addArg
(
energyBuffer
.
getSize
());
}
cc
.
clearBuffer
(
energyBuffer
);
if
(
nAtoms
)
{
computeAtomsKineticEnergyKernel
->
setArg
(
1
,
nAtoms
);
computeAtomsKineticEnergyKernel
->
setArg
(
3
,
atomlists
[
chainID
]);
computeAtomsKineticEnergyKernel
->
execute
(
nAtoms
);
}
if
(
nPairs
)
{
computePairsKineticEnergyKernel
->
setArg
(
1
,
nPairs
);
computePairsKineticEnergyKernel
->
setArg
(
3
,
pairlists
[
chainID
]);
computePairsKineticEnergyKernel
->
execute
(
nPairs
);
}
reduceEnergyKernel
->
execute
(
workGroupSize
,
workGroupSize
);
std
::
pair
<
double
,
double
>
KEs
=
{
0
,
0
};
if
(
downloadValue
)
{
if
(
useDouble
)
{
mm_double2
tmp
;
kineticEnergyBuffer
.
download
(
&
tmp
);
KEs
.
first
=
tmp
.
x
;
KEs
.
second
=
tmp
.
y
;
}
else
{
mm_float2
tmp
;
kineticEnergyBuffer
.
download
(
&
tmp
);
KEs
.
first
=
tmp
.
x
;
KEs
.
second
=
tmp
.
y
;
}
}
return
KEs
;
}
void
CommonNoseHooverChainKernel
::
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
scaleFactor
)
{
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
int
chainID
=
nhc
.
getChainID
();
int
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
int
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
if
(
!
hasInitializedScaleVelocitiesKernel
)
{
hasInitializedScaleVelocitiesKernel
=
true
;
scaleAtomsVelocitiesKernel
->
addArg
(
scaleFactorBuffer
);
scaleAtomsVelocitiesKernel
->
addArg
();
// nAtoms
scaleAtomsVelocitiesKernel
->
addArg
(
cc
.
getVelm
());
scaleAtomsVelocitiesKernel
->
addArg
();
// atom list
scalePairsVelocitiesKernel
->
addArg
(
scaleFactorBuffer
);
scalePairsVelocitiesKernel
->
addArg
();
// nPairs
scalePairsVelocitiesKernel
->
addArg
(
cc
.
getVelm
());
scalePairsVelocitiesKernel
->
addArg
();
// pair list
}
if
(
nAtoms
)
{
scaleAtomsVelocitiesKernel
->
setArg
(
1
,
nAtoms
);
scaleAtomsVelocitiesKernel
->
setArg
(
3
,
atomlists
[
chainID
]);
scaleAtomsVelocitiesKernel
->
execute
(
nAtoms
);
}
if
(
nPairs
)
{
scalePairsVelocitiesKernel
->
setArg
(
1
,
nPairs
);
scalePairsVelocitiesKernel
->
setArg
(
3
,
pairlists
[
chainID
]);
scalePairsVelocitiesKernel
->
execute
(
nPairs
);
}
}
void
CommonIntegrateBrownianStepKernel
::
initialize
(
const
System
&
system
,
const
BrownianIntegrator
&
integrator
)
{
void
CommonIntegrateBrownianStepKernel
::
initialize
(
const
System
&
system
,
const
BrownianIntegrator
&
integrator
)
{
cc
.
initializeContexts
();
cc
.
initializeContexts
();
cc
.
setAsCurrent
();
cc
.
setAsCurrent
();
...
...
platforms/
opencl
/src/kernels/noseHooverChain.c
l
→
platforms/
common
/src/kernels/noseHooverChain.c
c
View file @
ca4c03c3
KERNEL
void
propagateNoseHooverChain
(
GLOBAL
mixed2
*
RESTRICT
chainData
,
GLOBAL
const
mixed2
*
RESTRICT
energySum
,
GLOBAL
mixed2
*
RESTRICT
scaleFactor
,
//#include
<initializer_list>
GLOBAL
mixed
*
RESTRICT
chainMasses
,
GLOBAL
mixed
*
RESTRICT
chainForces
,
int
chainType
,
int
chainLength
,
int
numMTS
,
int
numDOFs
,
float
timeStep
,
mixed
kT
,
float
frequency
){
__kernel
void
propagateNoseHooverChain
(
__global
mixed2*
restrict
chainData,
__global
const
mixed2
*
restrict
energySum,
__global
mixed2*
restrict
scaleFactor,
__global
mixed*
restrict
chainMasses,
__global
mixed*
restrict
chainForces,
int
chainType,
int
chainLength,
int
numMTS,
int
numDOFs,
float
timeStep,
mixed
kT,
float
frequency
)
{
const
mixed
kineticEnergy
=
chainType
==
0
?
energySum
[
0
].
x
:
energySum
[
0
].
y
;
const
mixed
kineticEnergy
=
chainType
==
0
?
energySum
[
0
].
x
:
energySum
[
0
].
y
;
mixed
scale
=
1
;
mixed
scale
=
1
;
if
(
kineticEnergy
<
1e-8
)
return
;
if
(
kineticEnergy
<
1e-8
)
return
;
...
@@ -54,10 +50,9 @@ __kernel void propagateNoseHooverChain(__global mixed2* restrict chainData, __gl
...
@@ -54,10 +50,9 @@ __kernel void propagateNoseHooverChain(__global mixed2* restrict chainData, __gl
/**
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
*/
__kernel
void
computeHeatBathEnergy
(
__global
mixed*
restrict
heatBathEnergy,
int
chainLength,
int
numDOFs,
KERNEL
void
computeHeatBathEnergy
(
GLOBAL
mixed
*
RESTRICT
heatBathEnergy
,
int
chainLength
,
int
numDOFs
,
mixed
kT,
float
frequency,
__global
const
mixed2*
restrict
chainData
)
{
mixed
kT
,
float
frequency
,
GLOBAL
const
mixed2
*
RESTRICT
chainData
){
// Note that this is always incremented; make sure it's zeroed properly before the first call
// Note that this is always incremented; make sure it's zeroed properly before the first call
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
mixed
prefac
=
i
?
1
:
numDOFs
;
mixed
prefac
=
i
?
1
:
numDOFs
;
mixed
mass
=
prefac
*
kT
/
(
frequency
*
frequency
);
mixed
mass
=
prefac
*
kT
/
(
frequency
*
frequency
);
...
@@ -70,25 +65,24 @@ __kernel void computeHeatBathEnergy(__global mixed* restrict heatBathEnergy, int
...
@@ -70,25 +65,24 @@ __kernel void computeHeatBathEnergy(__global mixed* restrict heatBathEnergy, int
}
}
}
}
__kernel
void
computeAtomsKineticEnergy
(
__global
mixed2
*
restrict
energyBuffer,
int
numAtoms,
KERNEL
void
computeAtomsKineticEnergy
(
GLOBAL
mixed2
*
RESTRICT
energyBuffer
,
int
numAtoms
,
__global
const
mixed4*
restrict
velm,
__global
const
int
*restrict
atoms
)
{
GLOBAL
const
mixed4
*
RESTRICT
velm
,
GLOBAL
const
int
*
RESTRICT
atoms
){
mixed2
energy
=
(
mixed2
)
(
0
,
0
)
;
mixed2
energy
=
make_mixed2
(
0
,
0
);
//energy
=
1
; return;
int
index
=
GLOBAL_ID
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
){
while
(
index
<
numAtoms
){
int
atom
=
atoms
[
index
];
int
atom
=
atoms
[
index
];
mixed4
v
=
velm
[
atom
];
mixed4
v
=
velm
[
atom
];
mixed
mass
=
v
.
w
==
0
?
0
:
1
/
v
.
w
;
mixed
mass
=
v
.
w
==
0
?
0
:
1
/
v
.
w
;
energy
.
x
+=
0.5
f
*
mass
*
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
);
energy
.
x
+=
0.5
f
*
mass
*
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
);
index
+=
get_global_size
(
0
)
;
index
+=
GLOBAL_SIZE
;
}
}
energyBuffer[
get_global_id
(
0
)
]
=
energy
;
energyBuffer
[
GLOBAL_ID
]
=
energy
;
}
}
__kernel
void
computePairsKineticEnergy
(
__global
mixed2
*
restrict
energyBuffer,
int
numPairs,
KERNEL
void
computePairsKineticEnergy
(
GLOBAL
mixed2
*
RESTRICT
energyBuffer
,
int
numPairs
,
__global
const
mixed4*
restrict
velm,
__global
const
int2
*
restrict
pairs
)
{
GLOBAL
const
mixed4
*
RESTRICT
velm
,
GLOBAL
const
int2
*
RESTRICT
pairs
){
mixed2
energy
=
(
mixed2
)
(
0
,
0
)
;
mixed2
energy
=
make_
mixed2
(
0
,
0
);
int
index
=
get_global_id
(
0
)
;
int
index
=
GLOBAL_ID
;
while
(
index
<
numPairs
){
while
(
index
<
numPairs
){
int2
pair
=
pairs
[
index
];
int2
pair
=
pairs
[
index
];
int
atom1
=
pair
.
x
;
int
atom1
=
pair
.
x
;
...
@@ -107,64 +101,74 @@ __kernel void computePairsKineticEnergy(__global mixed2 * restrict energyBuffer,
...
@@ -107,64 +101,74 @@ __kernel void computePairsKineticEnergy(__global mixed2 * restrict energyBuffer,
rv
.
z
=
v2
.
z
-
v1
.
z
;
rv
.
z
=
v2
.
z
-
v1
.
z
;
energy
.
x
+=
0.5
f
*
(
m1
+
m2
)
*
(
cv
.
x
*
cv
.
x
+
cv
.
y
*
cv
.
y
+
cv
.
z
*
cv
.
z
);
energy
.
x
+=
0.5
f
*
(
m1
+
m2
)
*
(
cv
.
x
*
cv
.
x
+
cv
.
y
*
cv
.
y
+
cv
.
z
*
cv
.
z
);
energy
.
y
+=
0.5
f
*
(
m1
*
m2
/
(
m1
+
m2
))
*
(
rv
.
x
*
rv
.
x
+
rv
.
y
*
rv
.
y
+
rv
.
z
*
rv
.
z
);
energy
.
y
+=
0.5
f
*
(
m1
*
m2
/
(
m1
+
m2
))
*
(
rv
.
x
*
rv
.
x
+
rv
.
y
*
rv
.
y
+
rv
.
z
*
rv
.
z
);
index
+=
get_global_size
(
0
)
;
index
+=
GLOBAL_SIZE
;
}
}
// The atoms version of this has been called already, so accumulate instead of assigning here
// The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer[get_global_id
(
0
)
].xy
+=
energy.xy
;
energyBuffer
[
GLOBAL_ID
].
x
+=
energy
.
x
;
energyBuffer
[
GLOBAL_ID
].
y
+=
energy
.
y
;
}
}
__kernel
void
scaleAtomsVelocities
(
__global
mixed2*
restrict
scaleFactor,
int
numAtoms,
KERNEL
void
scaleAtomsVelocities
(
GLOBAL
mixed2
*
RESTRICT
scaleFactor
,
int
numAtoms
,
__global
mixed4*
restrict
velm,
__global
const
int
*
restrict
atoms
)
{
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
int
*
RESTRICT
atoms
){
const
mixed
scale
=
scaleFactor
[
0
].
x
;
const
mixed
scale
=
scaleFactor
[
0
].
x
;
int
index
=
get_global_id
(
0
)
;
int
index
=
GLOBAL_ID
;
while
(
index
<
numAtoms
){
while
(
index
<
numAtoms
){
int
atom
=
atoms
[
index
];
int
atom
=
atoms
[
index
];
velm
[
atom
].
x
*=
scale
;
velm
[
atom
].
x
*=
scale
;
velm
[
atom
].
y
*=
scale
;
velm
[
atom
].
y
*=
scale
;
velm
[
atom
].
z
*=
scale
;
velm
[
atom
].
z
*=
scale
;
index
+=
get_global_size
(
0
)
;
index
+=
GLOBAL_SIZE
;
}
}
}
}
__kernel
void
scalePairsVelocities
(
__global
mixed2
*
restrict
scaleFactor,
int
numPairs,
KERNEL
void
scalePairsVelocities
(
GLOBAL
mixed2
*
RESTRICT
scaleFactor
,
int
numPairs
,
__global
mixed4*
restrict
velm,
__global
const
int2
*restrict
pairs
)
{
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
int2
*
RESTRICT
pairs
){
int
index
=
get_global_id
(
0
)
;
int
index
=
GLOBAL_ID
;
mixed
comScale
=
scaleFactor
[
0
].
x
;
mixed
relScale
=
scaleFactor
[
0
].
y
;
while
(
index
<
numPairs
){
while
(
index
<
numPairs
){
int
atom1
=
pairs
[
index
].
x
;
int
atom1
=
pairs
[
index
].
x
;
int
atom2
=
pairs
[
index
].
y
;
int
atom2
=
pairs
[
index
].
y
;
mixed
m1
=
velm
[
atom1
].
w
==
0
?
0
:
1
/
velm
[
atom1
].
w
;
mixed
m1
=
velm
[
atom1
].
w
==
0
?
0
:
1
/
velm
[
atom1
].
w
;
mixed
m2
=
velm
[
atom2
].
w
==
0
?
0
:
1
/
velm
[
atom2
].
w
;
mixed
m2
=
velm
[
atom2
].
w
==
0
?
0
:
1
/
velm
[
atom2
].
w
;
mixed4
cv
;
mixed4
cv
;
cv.xyz
=
(
m1*velm[atom1].xyz
+
m2*velm[atom2].xyz
)
/
(
m1
+
m2
)
;
cv
.
x
=
(
m1
*
velm
[
atom1
].
x
+
m2
*
velm
[
atom2
].
x
)
/
(
m1
+
m2
);
cv
.
y
=
(
m1
*
velm
[
atom1
].
y
+
m2
*
velm
[
atom2
].
y
)
/
(
m1
+
m2
);
cv
.
z
=
(
m1
*
velm
[
atom1
].
z
+
m2
*
velm
[
atom2
].
z
)
/
(
m1
+
m2
);
mixed4
rv
;
mixed4
rv
;
rv.xyz
=
velm[atom2].xyz
-
velm[atom1].xyz
;
rv
.
x
=
velm
[
atom2
].
x
-
velm
[
atom1
].
x
;
velm[atom1].x
=
scaleFactor[0].x
*
cv.x
-
scaleFactor[0].y
*
rv.x
*
m2
/
(
m1
+
m2
)
;
rv
.
y
=
velm
[
atom2
].
y
-
velm
[
atom1
].
y
;
velm[atom1].y
=
scaleFactor[0].x
*
cv.y
-
scaleFactor[0].y
*
rv.y
*
m2
/
(
m1
+
m2
)
;
rv
.
z
=
velm
[
atom2
].
z
-
velm
[
atom1
].
z
;
velm[atom1].z
=
scaleFactor[0].x
*
cv.z
-
scaleFactor[0].y
*
rv.z
*
m2
/
(
m1
+
m2
)
;
velm
[
atom1
].
x
=
comScale
*
cv
.
x
-
relScale
*
rv
.
x
*
m2
/
(
m1
+
m2
);
velm[atom2].x
=
scaleFactor[0].x
*
cv.x
+
scaleFactor[0].y
*
rv.x
*
m1
/
(
m1
+
m2
)
;
velm
[
atom1
].
y
=
comScale
*
cv
.
y
-
relScale
*
rv
.
y
*
m2
/
(
m1
+
m2
);
velm[atom2].y
=
scaleFactor[0].x
*
cv.y
+
scaleFactor[0].y
*
rv.y
*
m1
/
(
m1
+
m2
)
;
velm
[
atom1
].
z
=
comScale
*
cv
.
z
-
relScale
*
rv
.
z
*
m2
/
(
m1
+
m2
);
velm[atom2].z
=
scaleFactor[0].x
*
cv.z
+
scaleFactor[0].y
*
rv.z
*
m1
/
(
m1
+
m2
)
;
velm
[
atom2
].
x
=
comScale
*
cv
.
x
+
relScale
*
rv
.
x
*
m1
/
(
m1
+
m2
);
index
+=
get_global_size
(
0
)
;
velm
[
atom2
].
y
=
comScale
*
cv
.
y
+
relScale
*
rv
.
y
*
m1
/
(
m1
+
m2
);
velm
[
atom2
].
z
=
comScale
*
cv
.
z
+
relScale
*
rv
.
z
*
m1
/
(
m1
+
m2
);
index
+=
GLOBAL_SIZE
;
}
}
}
}
/**
/**
*
Sum
the
energy
buffer
containing
a
pair
of
energies
stored
as
mixed2.
This
is
copied
from
utilities.cu
with
small
modifications
* Sum the energy buffer containing a pair of energies stored as mixed2. This is
taken from the analogous customIntegrator code
*/
*/
__kernel
void
reduceEnergyPair
(
__global
const
mixed2*
restrict
energyBuffer,
__global
mixed2*
restrict
result,
int
bufferSize,
int
workGroupSize,
__local
mixed2*
restrict
tempBuffer
)
{
KERNEL
void
reduceEnergyPair
(
GLOBAL
const
mixed2
*
RESTRICT
sumBuffer
,
GLOBAL
mixed2
*
result
,
int
bufferSize
)
{
const
unsigned
int
thread
=
get_local_id
(
0
)
;
LOCAL
mixed2
tempBuffer
[
WORK_GROUP_SIZE
];
mixed2
sum
=
(
mixed2
)
(
0
,
0
)
;
const
unsigned
int
thread
=
LOCAL_ID
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0)) {
mixed2
sum
=
make_mixed2
(
0
,
0
);
sum.xy
+=
energyBuffer[index].xy
;
for
(
unsigned
int
index
=
thread
;
index
<
bufferSize
;
index
+=
LOCAL_SIZE
)
{
sum
.
x
+=
sumBuffer
[
index
].
x
;
sum
.
y
+=
sumBuffer
[
index
].
y
;
}
}
tempBuffer[thread].xy
=
sum.xy
;
tempBuffer
[
thread
].
x
=
sum
.
x
;
for
(
int
i
=
1
; i < workGroupSize; i *= 2) {
tempBuffer
[
thread
].
y
=
sum
.
y
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
i
=
1
;
i
<
WORK_GROUP_SIZE
;
i
*=
2
)
{
if
(
thread%
(
i*2
)
==
0
&&
thread+i
<
workGroupSize
)
{
SYNC_THREADS
;
tempBuffer[thread].xy
+=
tempBuffer[thread+i].xy
;
if
(
thread
%
(
i
*
2
)
==
0
&&
thread
+
i
<
WORK_GROUP_SIZE
)
{
tempBuffer
[
thread
].
x
+=
tempBuffer
[
thread
+
i
].
x
;
tempBuffer
[
thread
].
y
+=
tempBuffer
[
thread
+
i
].
y
;
}
}
}
}
if
(
thread
==
0
)
{
if
(
thread
==
0
)
*
result
=
tempBuffer
[
0
];
*
result
=
tempBuffer
[
0
];
}
}
}
platforms/c
uda
/src/kernels/velocityVerlet.c
u
→
platforms/c
ommon
/src/kernels/velocityVerlet.c
c
View file @
ca4c03c3
/**
/**
* Perform the first step of Velocity Verlet integration.
* Perform the first step of Velocity Verlet integration.
*
* update displacements (posDelta) and velocities (velm)
*/
*/
extern
"C"
__global__
void
integrateVelocityVerletPart1
(
int
numAtoms
,
int
numPairs
,
int
paddedNumAtoms
,
const
mixed2
*
__restrict__
dt
,
const
real4
*
__restrict__
posq
,
KERNEL
void
integrateVelocityVerletPart1
(
int
numAtoms
,
int
numPairs
,
int
paddedNumAtoms
,
GLOBAL
const
mixed2
*
RESTRICT
dt
,
GLOBAL
const
real4
*
RESTRICT
posq
,
const
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
mm_long
*
RESTRICT
force
,
GLOBAL
mixed4
*
RESTRICT
posDelta
,
const
int
*
__restrict__
atomList
,
const
int2
*
__restrict__
pairList
)
{
GLOBAL
const
int
*
RESTRICT
atomList
,
GLOBAL
const
int2
*
RESTRICT
pairList
#ifdef USE_MIXED_PRECISION
,
GLOBAL
const
real4
*
RESTRICT
posqCorrection
#endif
){
const
mixed2
stepSize
=
dt
[
0
];
const
mixed2
stepSize
=
dt
[
0
];
const
mixed
dtPos
=
stepSize
.
y
;
const
mixed
dtPos
=
stepSize
.
y
;
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
scale
=
0.5
f
*
dtVel
/
(
mixed
)
0x100000000
;
const
mixed
scale
=
0.5
f
*
dtVel
/
(
mixed
)
0x100000000
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
index
=
GLOBAL_ID
;
while
(
index
<
numAtoms
)
{
int
atom
=
atomList
[
index
];
int
atom
=
atomList
[
index
];
mixed4
velocity
=
velm
[
atom
];
mixed4
velocity
=
velm
[
atom
];
if
(
velocity
.
w
!=
0.0
)
{
if
(
velocity
.
w
!=
0.0
)
{
...
@@ -31,8 +34,10 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai
...
@@ -31,8 +34,10 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai
posDelta
[
atom
]
=
pos
;
posDelta
[
atom
]
=
pos
;
velm
[
atom
]
=
velocity
;
velm
[
atom
]
=
velocity
;
}
}
index
+=
GLOBAL_SIZE
;
}
}
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
=
GLOBAL_ID
;
while
(
index
<
numPairs
){
int
atom1
=
pairList
[
index
].
x
;
int
atom1
=
pairList
[
index
].
x
;
int
atom2
=
pairList
[
index
].
y
;
int
atom2
=
pairList
[
index
].
y
;
mixed4
v1
=
velm
[
atom1
];
mixed4
v1
=
velm
[
atom1
];
...
@@ -51,21 +56,27 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai
...
@@ -51,21 +56,27 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai
relVel
.
x
=
v2
.
x
-
v1
.
x
;
relVel
.
x
=
v2
.
x
-
v1
.
x
;
relVel
.
y
=
v2
.
y
-
v1
.
y
;
relVel
.
y
=
v2
.
y
-
v1
.
y
;
relVel
.
z
=
v2
.
z
-
v1
.
z
;
relVel
.
z
=
v2
.
z
-
v1
.
z
;
//
mixed3
comFrc
;
mixed3
comFrc
;
comFrc
.
x
=
force
[
atom1
]
+
force
[
atom2
];
mixed
F1x
=
scale
*
force
[
atom1
];
comFrc
.
y
=
force
[
atom1
+
paddedNumAtoms
]
+
force
[
atom2
+
paddedNumAtoms
];
mixed
F1y
=
scale
*
force
[
atom1
+
paddedNumAtoms
];
comFrc
.
z
=
force
[
atom1
+
paddedNumAtoms
*
2
]
+
force
[
atom2
+
paddedNumAtoms
*
2
];
mixed
F1z
=
scale
*
force
[
atom1
+
paddedNumAtoms
*
2
];
mixed
F2x
=
scale
*
force
[
atom2
];
mixed
F2y
=
scale
*
force
[
atom2
+
paddedNumAtoms
];
mixed
F2z
=
scale
*
force
[
atom2
+
paddedNumAtoms
*
2
];
comFrc
.
x
=
F1x
+
F2x
;
comFrc
.
y
=
F1y
+
F2y
;
comFrc
.
z
=
F1z
+
F2z
;
mixed3
relFrc
;
mixed3
relFrc
;
relFrc
.
x
=
mass1fract
*
force
[
atom2
]
-
mass2fract
*
force
[
atom1
]
;
relFrc
.
x
=
mass1fract
*
F2x
-
mass2fract
*
F1x
;
relFrc
.
y
=
mass1fract
*
force
[
atom2
+
paddedNumAtoms
]
-
mass2fract
*
force
[
atom1
+
paddedNumAtoms
]
;
relFrc
.
y
=
mass1fract
*
F2y
-
mass2fract
*
F1y
;
relFrc
.
z
=
mass1fract
*
force
[
atom2
+
paddedNumAtoms
*
2
]
-
mass2fract
*
force
[
atom1
+
paddedNumAtoms
*
2
]
;
relFrc
.
z
=
mass1fract
*
F2z
-
mass2fract
*
F1z
;
comVel
.
x
+=
comFrc
.
x
*
scale
*
invTotMass
;
comVel
.
x
+=
comFrc
.
x
*
invTotMass
;
comVel
.
y
+=
comFrc
.
y
*
scale
*
invTotMass
;
comVel
.
y
+=
comFrc
.
y
*
invTotMass
;
comVel
.
z
+=
comFrc
.
z
*
scale
*
invTotMass
;
comVel
.
z
+=
comFrc
.
z
*
invTotMass
;
relVel
.
x
+=
relFrc
.
x
*
scale
*
invRedMass
;
relVel
.
x
+=
relFrc
.
x
*
invRedMass
;
relVel
.
y
+=
relFrc
.
y
*
scale
*
invRedMass
;
relVel
.
y
+=
relFrc
.
y
*
invRedMass
;
relVel
.
z
+=
relFrc
.
z
*
scale
*
invRedMass
;
relVel
.
z
+=
relFrc
.
z
*
invRedMass
;
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
real4
posv1
=
posq
[
atom1
];
real4
posv1
=
posq
[
atom1
];
real4
posv2
=
posq
[
atom2
];
real4
posv2
=
posq
[
atom2
];
...
@@ -97,22 +108,25 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai
...
@@ -97,22 +108,25 @@ extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int numPai
posDelta
[
atom2
]
=
pos2
;
posDelta
[
atom2
]
=
pos2
;
velm
[
atom2
]
=
v2
;
velm
[
atom2
]
=
v2
;
}
}
}
index
+=
GLOBAL_SIZE
;
}
}
}
/**
/**
* Perform the second step of Velocity Verlet integration.
* Perform the second step of Velocity Verlet integration.
*
* apply displacements to positions (posq) after constraints have been enforced
*/
*/
extern
"C"
__global__
void
integrateVelocityVerletPart2
(
int
numAtoms
,
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
KERNEL
void
integrateVelocityVerletPart2
(
int
numAtoms
,
GLOBAL
mixed2
*
RESTRICT
dt
,
GLOBAL
real4
*
RESTRICT
posq
,
GLOBAL
mixed4
*
RESTRICT
velm
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
GLOBAL
const
mixed4
*
RESTRICT
posDelta
#ifdef USE_MIXED_PRECISION
,
GLOBAL
real4
*
RESTRICT
posqCorrection
#endif
){
mixed2
stepSize
=
dt
[
0
];
mixed2
stepSize
=
dt
[
0
];
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
GLOBAL_ID
;
if
(
index
==
0
)
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
dt
[
0
].
x
=
stepSize
.
y
;
for
(;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
while
(
index
<
numAtoms
)
{
mixed4
velocity
=
velm
[
index
];
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
if
(
velocity
.
w
!=
0.0
)
{
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
...
@@ -133,21 +147,23 @@ extern "C" __global__ void integrateVelocityVerletPart2(int numAtoms, mixed2* __
...
@@ -133,21 +147,23 @@ extern "C" __global__ void integrateVelocityVerletPart2(int numAtoms, mixed2* __
posq
[
index
]
=
pos
;
posq
[
index
]
=
pos
;
#endif
#endif
}
}
index
+=
GLOBAL_SIZE
;
}
}
}
}
/**
/**
* Perform the third step of Velocity Verlet integration.
* Perform the third step of Velocity Verlet integration.
*
* modify the velocities (velm) after the force update
*/
*/
extern
"C"
__global__
void
integrateVelocityVerletPart3
(
int
numAtoms
,
int
numPairs
,
int
paddedNumAtoms
,
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
KERNEL
void
integrateVelocityVerletPart3
(
int
numAtoms
,
int
numPairs
,
int
paddedNumAtoms
,
GLOBAL
mixed2
*
RESTRICT
dt
,
GLOBAL
real4
*
RESTRICT
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed4
*
__restrict__
posDelta
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
mm_long
*
RESTRICT
force
,
GLOBAL
const
mixed4
*
RESTRICT
posDelta
,
const
int
*
__restrict__
atomList
,
const
int2
*
__restrict__
pairList
)
{
GLOBAL
const
int
*
RESTRICT
atomList
,
GLOBAL
const
int2
*
RESTRICT
pairList
#ifdef USE_MIXED_PRECISION
,
GLOBAL
const
real4
*
RESTRICT
posqCorrection
#endif
){
mixed2
stepSize
=
dt
[
0
];
mixed2
stepSize
=
dt
[
0
];
#if
__CUDA_ARCH__ >= 130
#if
def SUPPORTS_DOUBLE_PRECISION
double
oneOverDt
=
1.0
/
stepSize
.
y
;
double
oneOverDt
=
1.0
/
stepSize
.
y
;
#else
#else
float
oneOverDt
=
1.0
f
/
stepSize
.
y
;
float
oneOverDt
=
1.0
f
/
stepSize
.
y
;
...
@@ -155,11 +171,10 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
...
@@ -155,11 +171,10 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
#endif
#endif
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
scale
=
0.5
f
*
dtVel
/
(
mixed
)
0x100000000
;
const
mixed
scale
=
0.5
f
*
dtVel
/
(
mixed
)
0x100000000
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
GLOBAL_ID
;
if
(
index
==
0
)
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
dt
[
0
].
x
=
stepSize
.
y
;
while
(
index
<
numAtoms
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
atomList
[
index
];
int
atom
=
atomList
[
index
];
mixed4
velocity
=
velm
[
atom
];
mixed4
velocity
=
velm
[
atom
];
if
(
velocity
.
w
!=
0.0
)
{
if
(
velocity
.
w
!=
0.0
)
{
...
@@ -167,15 +182,17 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
...
@@ -167,15 +182,17 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
velocity
.
x
+=
scale
*
force
[
atom
]
*
velocity
.
w
+
(
deltaXconstrained
.
x
-
velocity
.
x
*
stepSize
.
y
)
*
oneOverDt
;
velocity
.
x
+=
scale
*
force
[
atom
]
*
velocity
.
w
+
(
deltaXconstrained
.
x
-
velocity
.
x
*
stepSize
.
y
)
*
oneOverDt
;
velocity
.
y
+=
scale
*
force
[
atom
+
paddedNumAtoms
]
*
velocity
.
w
+
(
deltaXconstrained
.
y
-
velocity
.
y
*
stepSize
.
y
)
*
oneOverDt
;
velocity
.
y
+=
scale
*
force
[
atom
+
paddedNumAtoms
]
*
velocity
.
w
+
(
deltaXconstrained
.
y
-
velocity
.
y
*
stepSize
.
y
)
*
oneOverDt
;
velocity
.
z
+=
scale
*
force
[
atom
+
paddedNumAtoms
*
2
]
*
velocity
.
w
+
(
deltaXconstrained
.
z
-
velocity
.
z
*
stepSize
.
y
)
*
oneOverDt
;
velocity
.
z
+=
scale
*
force
[
atom
+
paddedNumAtoms
*
2
]
*
velocity
.
w
+
(
deltaXconstrained
.
z
-
velocity
.
z
*
stepSize
.
y
)
*
oneOverDt
;
#if
__CUDA_ARCH__ < 130
#if
ndef SUPPORTS_DOUBLE_PRECISION
velocity
.
x
+=
(
deltaXconstrained
.
x
-
velocity
.
x
*
stepSize
.
y
)
*
correction
;
velocity
.
x
+=
(
deltaXconstrained
.
x
-
velocity
.
x
*
stepSize
.
y
)
*
correction
;
velocity
.
y
+=
(
deltaXconstrained
.
y
-
velocity
.
y
*
stepSize
.
y
)
*
correction
;
velocity
.
y
+=
(
deltaXconstrained
.
y
-
velocity
.
y
*
stepSize
.
y
)
*
correction
;
velocity
.
z
+=
(
deltaXconstrained
.
z
-
velocity
.
z
*
stepSize
.
y
)
*
correction
;
velocity
.
z
+=
(
deltaXconstrained
.
z
-
velocity
.
z
*
stepSize
.
y
)
*
correction
;
#endif
#endif
velm
[
atom
]
=
velocity
;
velm
[
atom
]
=
velocity
;
}
}
index
+=
GLOBAL_SIZE
;
}
}
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
index
=
GLOBAL_ID
;
while
(
index
<
numPairs
)
{
int
atom1
=
pairList
[
index
].
x
;
int
atom1
=
pairList
[
index
].
x
;
int
atom2
=
pairList
[
index
].
y
;
int
atom2
=
pairList
[
index
].
y
;
mixed4
v1
=
velm
[
atom1
];
mixed4
v1
=
velm
[
atom1
];
...
@@ -194,27 +211,33 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
...
@@ -194,27 +211,33 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
relVel
.
x
=
v2
.
x
-
v1
.
x
;
relVel
.
x
=
v2
.
x
-
v1
.
x
;
relVel
.
y
=
v2
.
y
-
v1
.
y
;
relVel
.
y
=
v2
.
y
-
v1
.
y
;
relVel
.
z
=
v2
.
z
-
v1
.
z
;
relVel
.
z
=
v2
.
z
-
v1
.
z
;
//
mixed3
comFrc
;
mixed3
comFrc
;
comFrc
.
x
=
force
[
atom1
]
+
force
[
atom2
];
mixed
F1x
=
scale
*
force
[
atom1
];
comFrc
.
y
=
force
[
atom1
+
paddedNumAtoms
]
+
force
[
atom2
+
paddedNumAtoms
];
mixed
F1y
=
scale
*
force
[
atom1
+
paddedNumAtoms
];
comFrc
.
z
=
force
[
atom1
+
paddedNumAtoms
*
2
]
+
force
[
atom2
+
paddedNumAtoms
*
2
];
mixed
F1z
=
scale
*
force
[
atom1
+
paddedNumAtoms
*
2
];
mixed
F2x
=
scale
*
force
[
atom2
];
mixed
F2y
=
scale
*
force
[
atom2
+
paddedNumAtoms
];
mixed
F2z
=
scale
*
force
[
atom2
+
paddedNumAtoms
*
2
];
comFrc
.
x
=
F1x
+
F2x
;
comFrc
.
y
=
F1y
+
F2y
;
comFrc
.
z
=
F1z
+
F2z
;
mixed3
relFrc
;
mixed3
relFrc
;
relFrc
.
x
=
mass1fract
*
force
[
atom2
]
-
mass2fract
*
force
[
atom1
]
;
relFrc
.
x
=
mass1fract
*
F2x
-
mass2fract
*
F1x
;
relFrc
.
y
=
mass1fract
*
force
[
atom2
+
paddedNumAtoms
]
-
mass2fract
*
force
[
atom1
+
paddedNumAtoms
]
;
relFrc
.
y
=
mass1fract
*
F2y
-
mass2fract
*
F1y
;
relFrc
.
z
=
mass1fract
*
force
[
atom2
+
paddedNumAtoms
*
2
]
-
mass2fract
*
force
[
atom1
+
paddedNumAtoms
*
2
]
;
relFrc
.
z
=
mass1fract
*
F2z
-
mass2fract
*
F1z
;
comVel
.
x
+=
comFrc
.
x
*
scale
*
invTotMass
;
comVel
.
x
+=
comFrc
.
x
*
invTotMass
;
comVel
.
y
+=
comFrc
.
y
*
scale
*
invTotMass
;
comVel
.
y
+=
comFrc
.
y
*
invTotMass
;
comVel
.
z
+=
comFrc
.
z
*
scale
*
invTotMass
;
comVel
.
z
+=
comFrc
.
z
*
invTotMass
;
relVel
.
x
+=
relFrc
.
x
*
scale
*
invRedMass
;
relVel
.
x
+=
relFrc
.
x
*
invRedMass
;
relVel
.
y
+=
relFrc
.
y
*
scale
*
invRedMass
;
relVel
.
y
+=
relFrc
.
y
*
invRedMass
;
relVel
.
z
+=
relFrc
.
z
*
scale
*
invRedMass
;
relVel
.
z
+=
relFrc
.
z
*
invRedMass
;
if
(
v1
.
w
!=
0.0
f
)
{
if
(
v1
.
w
!=
0.0
f
)
{
mixed4
deltaXconstrained
=
posDelta
[
atom1
];
mixed4
deltaXconstrained
=
posDelta
[
atom1
];
v1
.
x
=
comVel
.
x
-
relVel
.
x
*
mass2fract
+
(
deltaXconstrained
.
x
-
v1
.
x
*
stepSize
.
y
)
*
oneOverDt
;
v1
.
x
=
comVel
.
x
-
relVel
.
x
*
mass2fract
+
(
deltaXconstrained
.
x
-
v1
.
x
*
stepSize
.
y
)
*
oneOverDt
;
v1
.
y
=
comVel
.
y
-
relVel
.
y
*
mass2fract
+
(
deltaXconstrained
.
y
-
v1
.
y
*
stepSize
.
y
)
*
oneOverDt
;
v1
.
y
=
comVel
.
y
-
relVel
.
y
*
mass2fract
+
(
deltaXconstrained
.
y
-
v1
.
y
*
stepSize
.
y
)
*
oneOverDt
;
v1
.
z
=
comVel
.
z
-
relVel
.
z
*
mass2fract
+
(
deltaXconstrained
.
z
-
v1
.
z
*
stepSize
.
y
)
*
oneOverDt
;
v1
.
z
=
comVel
.
z
-
relVel
.
z
*
mass2fract
+
(
deltaXconstrained
.
z
-
v1
.
z
*
stepSize
.
y
)
*
oneOverDt
;
#if
__CUDA_ARCH__ < 130
#if
ndef SUPPORTS_DOUBLE_PRECISION
v1
.
x
+=
(
deltaXconstrained
.
x
-
v1
.
x
*
stepSize
.
y
)
*
correction
;
v1
.
x
+=
(
deltaXconstrained
.
x
-
v1
.
x
*
stepSize
.
y
)
*
correction
;
v1
.
y
+=
(
deltaXconstrained
.
y
-
v1
.
y
*
stepSize
.
y
)
*
correction
;
v1
.
y
+=
(
deltaXconstrained
.
y
-
v1
.
y
*
stepSize
.
y
)
*
correction
;
v1
.
z
+=
(
deltaXconstrained
.
z
-
v1
.
z
*
stepSize
.
y
)
*
correction
;
v1
.
z
+=
(
deltaXconstrained
.
z
-
v1
.
z
*
stepSize
.
y
)
*
correction
;
...
@@ -226,58 +249,57 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
...
@@ -226,58 +249,57 @@ extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int numPai
v2
.
x
=
comVel
.
x
+
relVel
.
x
*
mass1fract
+
(
deltaXconstrained
.
x
-
v2
.
x
*
stepSize
.
y
)
*
oneOverDt
;
v2
.
x
=
comVel
.
x
+
relVel
.
x
*
mass1fract
+
(
deltaXconstrained
.
x
-
v2
.
x
*
stepSize
.
y
)
*
oneOverDt
;
v2
.
y
=
comVel
.
y
+
relVel
.
y
*
mass1fract
+
(
deltaXconstrained
.
y
-
v2
.
y
*
stepSize
.
y
)
*
oneOverDt
;
v2
.
y
=
comVel
.
y
+
relVel
.
y
*
mass1fract
+
(
deltaXconstrained
.
y
-
v2
.
y
*
stepSize
.
y
)
*
oneOverDt
;
v2
.
z
=
comVel
.
z
+
relVel
.
z
*
mass1fract
+
(
deltaXconstrained
.
z
-
v2
.
z
*
stepSize
.
y
)
*
oneOverDt
;
v2
.
z
=
comVel
.
z
+
relVel
.
z
*
mass1fract
+
(
deltaXconstrained
.
z
-
v2
.
z
*
stepSize
.
y
)
*
oneOverDt
;
#if
__CUDA_ARCH__ < 130
#if
ndef SUPPORTS_DOUBLE_PRECISION
v2
.
x
+=
(
deltaXconstrained
.
x
-
v2
.
x
*
stepSize
.
y
)
*
correction
;
v2
.
x
+=
(
deltaXconstrained
.
x
-
v2
.
x
*
stepSize
.
y
)
*
correction
;
v2
.
y
+=
(
deltaXconstrained
.
y
-
v2
.
y
*
stepSize
.
y
)
*
correction
;
v2
.
y
+=
(
deltaXconstrained
.
y
-
v2
.
y
*
stepSize
.
y
)
*
correction
;
v2
.
z
+=
(
deltaXconstrained
.
z
-
v2
.
z
*
stepSize
.
y
)
*
correction
;
v2
.
z
+=
(
deltaXconstrained
.
z
-
v2
.
z
*
stepSize
.
y
)
*
correction
;
#endif
#endif
velm
[
atom2
]
=
v2
;
velm
[
atom2
]
=
v2
;
}
}
index
+=
GLOBAL_SIZE
;
}
}
}
}
KERNEL
void
integrateVelocityVerletHardWall
(
int
numPairs
,
GLOBAL
const
float
*
RESTRICT
maxPairDistance
,
GLOBAL
mixed2
*
RESTRICT
dt
,
GLOBAL
real4
*
RESTRICT
posq
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
int2
*
RESTRICT
pairList
,
GLOBAL
const
float
*
RESTRICT
pairTemperature
#ifdef USE_MIXED_PRECISION
,
GLOBAL
real4
*
RESTRICT
posqCorrection
#endif
){
/**
* Apply the hard wall constraint
*/
extern
"C"
__global__
void
integrateVelocityVerletHardWall
(
int
numPairs
,
const
float
*
__restrict__
maxPairDistance
,
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
int2
*
__restrict__
pairList
,
const
float
*
__restrict__
pairTemperature
)
{
mixed
dtPos
=
dt
[
0
].
y
;
mixed
dtPos
=
dt
[
0
].
y
;
mixed
maxDelta
=
(
mixed
)
maxPairDistance
[
0
];
mixed
maxDelta
=
(
mixed
)
maxPairDistance
[
0
];
// Apply hard wall constraints.
if
(
maxDelta
>
0
){
if
(
maxDelta
>
0
)
{
int
index
=
GLOBAL_ID
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
while
(
index
<
numPairs
)
{
const
mixed
hardWallScale
=
sqrt
(
((
mixed
)
pairTemperature
[
index
])
*
((
mixed
)
BOLTZ
));
const
mixed
hardWallScale
=
sqrt
(
((
mixed
)
pairTemperature
[
index
])
*
((
mixed
)
BOLTZ
));
int2
atom
=
make_int2
(
pairList
[
index
].
x
,
pairList
[
index
].
y
);
int
atom1
=
pairList
[
index
].
x
;
int
atom2
=
pairList
[
index
].
y
;
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
real4
posv1
=
posq
[
atom
.
x
];
real4
posv1
=
posq
[
atom
1
];
real4
posc1
=
posqCorrection
[
atom
.
x
];
real4
posc1
=
posqCorrection
[
atom
1
];
mixed4
pos1
=
make_mixed4
(
posv1
.
x
+
(
mixed
)
posc1
.
x
,
posv1
.
y
+
(
mixed
)
posc1
.
y
,
posv1
.
z
+
(
mixed
)
posc1
.
z
,
posv1
.
w
);
mixed4
pos1
=
make_mixed4
(
posv1
.
x
+
(
mixed
)
posc1
.
x
,
posv1
.
y
+
(
mixed
)
posc1
.
y
,
posv1
.
z
+
(
mixed
)
posc1
.
z
,
posv1
.
w
);
real4
posv2
=
posq
[
atom
.
y
];
real4
posv2
=
posq
[
atom
2
];
real4
posc2
=
posqCorrection
[
atom
.
y
];
real4
posc2
=
posqCorrection
[
atom
2
];
mixed4
pos2
=
make_mixed4
(
posv2
.
x
+
(
mixed
)
posc2
.
x
,
posv2
.
y
+
(
mixed
)
posc2
.
y
,
posv2
.
z
+
(
mixed
)
posc2
.
z
,
posv2
.
w
);
mixed4
pos2
=
make_mixed4
(
posv2
.
x
+
(
mixed
)
posc2
.
x
,
posv2
.
y
+
(
mixed
)
posc2
.
y
,
posv2
.
z
+
(
mixed
)
posc2
.
z
,
posv2
.
w
);
#else
#else
real4
pos1
=
posq
[
atom
.
x
];
real4
pos1
=
posq
[
atom
1
];
real4
pos2
=
posq
[
atom
.
y
];
real4
pos2
=
posq
[
atom
2
];
#endif
#endif
mixed3
delta
=
make_mixed3
(
mixed3
delta
=
make_mixed3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
mixed
(
pos1
.
x
-
pos2
.
x
),
mixed
(
pos1
.
y
-
pos2
.
y
),
mixed
(
pos1
.
z
-
pos2
.
z
)
);
mixed
r
=
sqrt
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
mixed
r
=
sqrt
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
mixed
rInv
=
1
/
r
;
mixed
rInv
=
1
/
r
;
if
(
rInv
*
maxDelta
<
1.0
)
{
if
(
rInv
*
maxDelta
<
1.0
)
{
// The constraint has been violated, so make the inter-particle distance "bounce"
// The constraint has been violated, so make the inter-particle distance "bounce"
// off the hard wall.
// off the hard wall.
mixed3
bondDir
=
make_mixed3
(
delta
.
x
*
rInv
,
delta
.
y
*
rInv
,
delta
.
z
*
rInv
);
mixed3
bondDir
=
make_mixed3
(
delta
.
x
*
rInv
,
delta
.
y
*
rInv
,
delta
.
z
*
rInv
);
mixed3
vel1
=
make_mixed3
(
velm
[
atom
.
x
].
x
,
velm
[
atom
.
x
].
y
,
velm
[
atom
.
x
].
z
);
mixed3
vel1
=
make_mixed3
(
velm
[
atom
1
].
x
,
velm
[
atom
1
].
y
,
velm
[
atom
1
].
z
);
mixed3
vel2
=
make_mixed3
(
velm
[
atom
.
y
].
x
,
velm
[
atom
.
y
].
y
,
velm
[
atom
.
y
].
z
);
mixed3
vel2
=
make_mixed3
(
velm
[
atom
2
].
x
,
velm
[
atom
2
].
y
,
velm
[
atom
2
].
z
);
mixed
m1
=
velm
[
atom
.
x
].
w
!=
0.0
?
1.0
/
velm
[
atom
.
x
].
w
:
0.0
;
mixed
m1
=
velm
[
atom
1
].
w
!=
0.0
?
1.0
/
velm
[
atom
1
].
w
:
0.0
;
mixed
m2
=
velm
[
atom
.
y
].
w
!=
0.0
?
1.0
/
velm
[
atom
.
y
].
w
:
0.0
;
mixed
m2
=
velm
[
atom
2
].
w
!=
0.0
?
1.0
/
velm
[
atom
2
].
w
:
0.0
;
mixed
invTotMass
=
(
m1
+
m2
!=
0.0
)
?
1.0
/
(
m1
+
m2
)
:
0.0
;
mixed
invTotMass
=
(
m1
+
m2
!=
0.0
)
?
1.0
/
(
m1
+
m2
)
:
0.0
;
mixed
deltaR
=
r
-
maxDelta
;
mixed
deltaR
=
r
-
maxDelta
;
mixed
deltaT
=
dtPos
;
mixed
deltaT
=
dtPos
;
...
@@ -298,12 +320,12 @@ extern "C" __global__ void integrateVelocityVerletHardWall(int numPairs, const f
...
@@ -298,12 +320,12 @@ extern "C" __global__ void integrateVelocityVerletHardWall(int numPairs, const f
pos1
.
x
+=
bondDir
.
x
*
dr
;
pos1
.
x
+=
bondDir
.
x
*
dr
;
pos1
.
y
+=
bondDir
.
y
*
dr
;
pos1
.
y
+=
bondDir
.
y
*
dr
;
pos1
.
z
+=
bondDir
.
z
*
dr
;
pos1
.
z
+=
bondDir
.
z
*
dr
;
velm
[
atom
.
x
]
=
make_mixed4
(
vp1
.
x
+
bondDir
.
x
*
dotvr1
,
vp1
.
y
+
bondDir
.
y
*
dotvr1
,
vp1
.
z
+
bondDir
.
z
*
dotvr1
,
velm
[
atom
.
x
].
w
);
velm
[
atom
1
]
=
make_mixed4
(
vp1
.
x
+
bondDir
.
x
*
dotvr1
,
vp1
.
y
+
bondDir
.
y
*
dotvr1
,
vp1
.
z
+
bondDir
.
z
*
dotvr1
,
velm
[
atom
1
].
w
);
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
posq
[
atom
.
x
]
=
make_real4
((
real
)
pos1
.
x
,
(
real
)
pos1
.
y
,
(
real
)
pos1
.
z
,
(
real
)
pos1
.
w
);
posq
[
atom
1
]
=
make_real4
((
real
)
pos1
.
x
,
(
real
)
pos1
.
y
,
(
real
)
pos1
.
z
,
(
real
)
pos1
.
w
);
posqCorrection
[
atom
.
x
]
=
make_real4
(
pos1
.
x
-
(
real
)
pos1
.
x
,
pos1
.
y
-
(
real
)
pos1
.
y
,
pos1
.
z
-
(
real
)
pos1
.
z
,
0
);
posqCorrection
[
atom
1
]
=
make_real4
(
pos1
.
x
-
(
real
)
pos1
.
x
,
pos1
.
y
-
(
real
)
pos1
.
y
,
pos1
.
z
-
(
real
)
pos1
.
z
,
0
);
#else
#else
posq
[
atom
.
x
]
=
pos1
;
posq
[
atom
1
]
=
pos1
;
#endif
#endif
}
}
else
{
else
{
...
@@ -331,19 +353,21 @@ extern "C" __global__ void integrateVelocityVerletHardWall(int numPairs, const f
...
@@ -331,19 +353,21 @@ extern "C" __global__ void integrateVelocityVerletHardWall(int numPairs, const f
pos2
.
x
+=
bondDir
.
x
*
dr2
;
pos2
.
x
+=
bondDir
.
x
*
dr2
;
pos2
.
y
+=
bondDir
.
y
*
dr2
;
pos2
.
y
+=
bondDir
.
y
*
dr2
;
pos2
.
z
+=
bondDir
.
z
*
dr2
;
pos2
.
z
+=
bondDir
.
z
*
dr2
;
velm
[
atom
.
x
]
=
make_mixed4
(
vp1
.
x
+
bondDir
.
x
*
dotvr1
,
vp1
.
y
+
bondDir
.
y
*
dotvr1
,
vp1
.
z
+
bondDir
.
z
*
dotvr1
,
velm
[
atom
.
x
].
w
);
velm
[
atom
1
]
=
make_mixed4
(
vp1
.
x
+
bondDir
.
x
*
dotvr1
,
vp1
.
y
+
bondDir
.
y
*
dotvr1
,
vp1
.
z
+
bondDir
.
z
*
dotvr1
,
velm
[
atom
1
].
w
);
velm
[
atom
.
y
]
=
make_mixed4
(
vp2
.
x
+
bondDir
.
x
*
dotvr2
,
vp2
.
y
+
bondDir
.
y
*
dotvr2
,
vp2
.
z
+
bondDir
.
z
*
dotvr2
,
velm
[
atom
.
y
].
w
);
velm
[
atom
2
]
=
make_mixed4
(
vp2
.
x
+
bondDir
.
x
*
dotvr2
,
vp2
.
y
+
bondDir
.
y
*
dotvr2
,
vp2
.
z
+
bondDir
.
z
*
dotvr2
,
velm
[
atom
2
].
w
);
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
posq
[
atom
.
x
]
=
make_real4
((
real
)
pos1
.
x
,
(
real
)
pos1
.
y
,
(
real
)
pos1
.
z
,
(
real
)
pos1
.
w
);
posq
[
atom
1
]
=
make_real4
((
real
)
pos1
.
x
,
(
real
)
pos1
.
y
,
(
real
)
pos1
.
z
,
(
real
)
pos1
.
w
);
posq
[
atom
.
y
]
=
make_real4
((
real
)
pos2
.
x
,
(
real
)
pos2
.
y
,
(
real
)
pos2
.
z
,
(
real
)
pos2
.
w
);
posq
[
atom
2
]
=
make_real4
((
real
)
pos2
.
x
,
(
real
)
pos2
.
y
,
(
real
)
pos2
.
z
,
(
real
)
pos2
.
w
);
posqCorrection
[
atom
.
x
]
=
make_real4
(
pos1
.
x
-
(
real
)
pos1
.
x
,
pos1
.
y
-
(
real
)
pos1
.
y
,
pos1
.
z
-
(
real
)
pos1
.
z
,
0
);
posqCorrection
[
atom
1
]
=
make_real4
(
pos1
.
x
-
(
real
)
pos1
.
x
,
pos1
.
y
-
(
real
)
pos1
.
y
,
pos1
.
z
-
(
real
)
pos1
.
z
,
0
);
posqCorrection
[
atom
.
y
]
=
make_real4
(
pos2
.
x
-
(
real
)
pos2
.
x
,
pos2
.
y
-
(
real
)
pos2
.
y
,
pos2
.
z
-
(
real
)
pos2
.
z
,
0
);
posqCorrection
[
atom
2
]
=
make_real4
(
pos2
.
x
-
(
real
)
pos2
.
x
,
pos2
.
y
-
(
real
)
pos2
.
y
,
pos2
.
z
-
(
real
)
pos2
.
z
,
0
);
#else
#else
posq
[
atom
.
x
]
=
pos1
;
posq
[
atom
1
]
=
pos1
;
posq
[
atom
.
y
]
=
pos2
;
posq
[
atom
2
]
=
pos2
;
#endif
#endif
}
}
}
}
index
+=
GLOBAL_SIZE
;
}
}
}
/* end of hard wall constraint part */
}
}
}
platforms/cuda/include/CudaKernels.h
View file @
ca4c03c3
...
@@ -447,109 +447,6 @@ private:
...
@@ -447,109 +447,6 @@ private:
CUfunction
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
CUfunction
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
};
};
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class
CudaIntegrateVelocityVerletStepKernel
:
public
IntegrateVelocityVerletStepKernel
{
public:
CudaIntegrateVelocityVerletStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateVelocityVerletStepKernel
(
name
,
platform
),
cu
(
cu
)
{
}
~
CudaIntegrateVelocityVerletStepKernel
()
{}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
);
private:
CudaContext
&
cu
;
float
prevMaxPairDistance
;
CudaArray
maxPairDistanceBuffer
,
pairListBuffer
,
atomListBuffer
,
pairTemperatureBuffer
;
CUfunction
kernel1
,
kernel2
,
kernel3
,
kernelHardWall
;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class
CudaNoseHooverChainKernel
:
public
NoseHooverChainKernel
{
public:
CudaNoseHooverChainKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
NoseHooverChainKernel
(
name
,
platform
),
cu
(
cu
)
{
}
~
CudaNoseHooverChainKernel
()
{}
/**
* Initialize the kernel.
*/
void
initialize
();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
std
::
pair
<
double
,
double
>
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
double
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
std
::
pair
<
double
,
double
>
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
bool
downloadValue
);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
void
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
std
::
pair
<
double
,
double
>
scaleFactors
);
private:
int
sumWorkGroupSize
;
CudaContext
&
cu
;
CudaArray
energyBuffer
,
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
CudaArray
>
atomlists
,
pairlists
;
std
::
map
<
int
,
CUfunction
>
propagateKernels
;
CUfunction
reduceEnergyKernel
;
CUfunction
computeHeatBathEnergyKernel
;
CUfunction
computeAtomsKineticEnergyKernel
;
CUfunction
computePairsKineticEnergyKernel
;
CUfunction
scaleAtomsVelocitiesKernel
;
CUfunction
scalePairsVelocitiesKernel
;
};
/**
/**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/
*/
...
...
platforms/cuda/src/CudaKernelFactory.cpp
View file @
ca4c03c3
...
@@ -134,9 +134,9 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
...
@@ -134,9 +134,9 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
return
new
CommonApplyAndersenThermostatKernel
(
name
,
platform
,
cu
);
return
new
CommonApplyAndersenThermostatKernel
(
name
,
platform
,
cu
);
if
(
name
==
NoseHooverChainKernel
::
Name
())
if
(
name
==
NoseHooverChainKernel
::
Name
())
return
new
C
uda
NoseHooverChainKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
NoseHooverChainKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateVelocityVerletStepKernel
::
Name
())
if
(
name
==
IntegrateVelocityVerletStepKernel
::
Name
())
return
new
C
uda
IntegrateVelocityVerletStepKernel
(
name
,
platform
,
cu
);
return
new
C
ommon
IntegrateVelocityVerletStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
return
new
CudaApplyMonteCarloBarostatKernel
(
name
,
platform
,
cu
);
return
new
CudaApplyMonteCarloBarostatKernel
(
name
,
platform
,
cu
);
if
(
name
==
RemoveCMMotionKernel
::
Name
())
if
(
name
==
RemoveCMMotionKernel
::
Name
())
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
ca4c03c3
...
@@ -1497,118 +1497,6 @@ void CudaCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, in
...
@@ -1497,118 +1497,6 @@ void CudaCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, in
nz
=
dispersionGridSizeZ
;
nz
=
dispersionGridSizeZ
;
}
}
}
}
void
CudaIntegrateVelocityVerletStepKernel
::
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
)
{
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
map
<
string
,
string
>
defines
;
defines
[
"BOLTZ"
]
=
cu
.
doubleToString
(
BOLTZ
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
velocityVerlet
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateVelocityVerletPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateVelocityVerletPart2"
);
kernel3
=
cu
.
getKernel
(
module
,
"integrateVelocityVerletPart3"
);
kernelHardWall
=
cu
.
getKernel
(
module
,
"integrateVelocityVerletHardWall"
);
prevMaxPairDistance
=
-
1.0
;
maxPairDistanceBuffer
.
initialize
<
float
>
(
cu
,
1
,
"maxPairDistanceBuffer"
);
}
void
CudaIntegrateVelocityVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
double
dt
=
integrator
.
getStepSize
();
cu
.
getIntegrationUtilities
().
setNextStepSize
(
dt
);
if
(
!
forcesAreValid
)
context
.
calcForcesAndEnergy
(
true
,
false
);
const
auto
&
atomList
=
integrator
.
getAllThermostatedIndividualParticles
();
const
auto
&
pairList
=
integrator
.
getAllThermostatedPairs
();
int
numAtoms
=
atomList
.
size
();
int
numPairs
=
pairList
.
size
();
int
numParticles
=
numAtoms
+
2
*
numPairs
;
float
maxPairDistance
=
integrator
.
getMaximumPairDistance
();
// Make sure atom and pair metadata is uploaded and has the correct dimensions
if
(
prevMaxPairDistance
!=
maxPairDistance
)
{
std
::
vector
<
float
>
tmp
(
1
,
maxPairDistance
);
maxPairDistanceBuffer
.
upload
(
tmp
);
prevMaxPairDistance
=
maxPairDistance
;
}
if
(
numAtoms
!=
0
&&
(
!
atomListBuffer
.
isInitialized
()
||
atomListBuffer
.
getSize
()
!=
numAtoms
))
{
if
(
atomListBuffer
.
isInitialized
())
atomListBuffer
.
resize
(
atomList
.
size
());
else
atomListBuffer
.
initialize
<
int
>
(
cu
,
atomList
.
size
(),
"atomListBuffer"
);
atomListBuffer
.
upload
(
atomList
);
}
if
(
numPairs
!=
0
&&
(
!
pairListBuffer
.
isInitialized
()
||
pairListBuffer
.
getSize
()
!=
numPairs
))
{
if
(
pairListBuffer
.
isInitialized
())
{
pairListBuffer
.
resize
(
pairList
.
size
());
pairTemperatureBuffer
.
resize
(
pairList
.
size
());
}
else
{
pairListBuffer
.
initialize
<
int2
>
(
cu
,
pairList
.
size
(),
"pairListBuffer"
);
pairTemperatureBuffer
.
initialize
<
float
>
(
cu
,
pairList
.
size
(),
"pairTemperatureBuffer"
);
}
std
::
vector
<
int2
>
tmp
;
std
::
vector
<
float
>
tmp2
;
for
(
const
auto
&
pair
:
pairList
)
{
tmp
.
push_back
(
make_int2
(
std
::
get
<
0
>
(
pair
),
std
::
get
<
1
>
(
pair
)));
tmp2
.
push_back
(
std
::
get
<
2
>
(
pair
));
}
pairListBuffer
.
upload
(
tmp
);
pairTemperatureBuffer
.
upload
(
tmp2
);
}
//// Call the first integration kernel.
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args1
[]
=
{
&
numAtoms
,
&
numPairs
,
&
paddedNumAtoms
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
atomListBuffer
.
getDevicePointer
(),
&
pairListBuffer
.
getDevicePointer
()};
cu
.
executeKernel
(
kernel1
,
args1
,
std
::
max
(
numAtoms
,
numPairs
),
128
);
//// Apply constraints.
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
//// Call the second integration kernel.
void
*
args2
[]
=
{
&
numParticles
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numParticles
,
128
);
if
(
numPairs
>
0
)
{
//// Enforce hard wall constraint
void
*
argsHardWall
[]
=
{
&
numPairs
,
&
maxPairDistanceBuffer
.
getDevicePointer
(),
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
pairListBuffer
.
getDevicePointer
(),
&
pairTemperatureBuffer
.
getDevicePointer
()};
cu
.
executeKernel
(
kernelHardWall
,
argsHardWall
,
numPairs
,
128
);
}
integration
.
computeVirtualSites
();
//// Update forces
context
.
calcForcesAndEnergy
(
true
,
false
);
forcesAreValid
=
true
;
//// Call the third integration kernel.
void
*
args3
[]
=
{
&
numAtoms
,
&
numPairs
,
&
paddedNumAtoms
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
atomListBuffer
.
getDevicePointer
(),
&
pairListBuffer
.
getDevicePointer
()};
cu
.
executeKernel
(
kernel3
,
args3
,
std
::
max
(
numAtoms
,
numPairs
),
128
);
// TODO: Figure out if this is really needed. The constraint velocities are accounted for
// in a finite difference sense in the step 3 kernel, when the velocities are updated.
integration
.
applyVelocityConstraints
(
integrator
.
getConstraintTolerance
());
//// Update the time and step count.
cu
.
setTime
(
cu
.
getTime
()
+
dt
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
}
double
CudaIntegrateVelocityVerletStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
)
{
return
cu
.
getIntegrationUtilities
().
computeKineticEnergy
(
0
);
}
class
CudaCalcCustomCVForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
class
CudaCalcCustomCVForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
public:
...
@@ -1820,357 +1708,6 @@ void CudaCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -1820,357 +1708,6 @@ void CudaCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context,
delete
function
.
second
;
delete
function
.
second
;
}
}
void
CudaNoseHooverChainKernel
::
initialize
()
{
cu
.
setAsCurrent
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
map
<
string
,
string
>
defines
;
sumWorkGroupSize
=
512
;
defines
[
"WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
sumWorkGroupSize
);
defines
[
"MIXEDEXP"
]
=
useDouble
?
"exp"
:
"expf"
;
defines
[
"BEGIN_YS_LOOP"
]
=
"for(const real & ys : {1}) {"
;
defines
[
"END_YS_LOOP"
]
=
"}"
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
noseHooverChain
,
defines
,
"-std=c++11"
);
propagateKernels
[
1
]
=
cu
.
getKernel
(
module
,
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"for(const real & ys : {0.828981543588751, -0.657963087177502, 0.828981543588751}) {"
;
module
=
cu
.
createModule
(
CudaKernelSources
::
noseHooverChain
,
defines
,
"-std=c++11"
);
propagateKernels
[
3
]
=
cu
.
getKernel
(
module
,
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}) {"
;
module
=
cu
.
createModule
(
CudaKernelSources
::
noseHooverChain
,
defines
,
"-std=c++11"
);
propagateKernels
[
5
]
=
cu
.
getKernel
(
module
,
"propagateNoseHooverChain"
);
module
=
cu
.
createModule
(
CudaKernelSources
::
noseHooverChain
,
defines
,
"-std=c++11"
);
reduceEnergyKernel
=
cu
.
getKernel
(
module
,
"reduceEnergyPair"
);
computeHeatBathEnergyKernel
=
cu
.
getKernel
(
module
,
"computeHeatBathEnergy"
);
computeAtomsKineticEnergyKernel
=
cu
.
getKernel
(
module
,
"computeAtomsKineticEnergy"
);
computePairsKineticEnergyKernel
=
cu
.
getKernel
(
module
,
"computePairsKineticEnergy"
);
scaleAtomsVelocitiesKernel
=
cu
.
getKernel
(
module
,
"scaleAtomsVelocities"
);
scalePairsVelocitiesKernel
=
cu
.
getKernel
(
module
,
"scalePairsVelocities"
);
int
energyBufferSize
=
cu
.
getEnergyBuffer
().
getSize
();
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
energyBuffer
.
initialize
<
double2
>
(
cu
,
energyBufferSize
,
"energyBuffer"
);
else
energyBuffer
.
initialize
<
float2
>
(
cu
,
energyBufferSize
,
"energyBuffer"
);
}
std
::
pair
<
double
,
double
>
CudaNoseHooverChainKernel
::
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
)
{
cu
.
setAsCurrent
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
int
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
int
chainLength
=
nhc
.
getChainLength
();
int
numYS
=
nhc
.
getNumYoshidaSuzukiTimeSteps
();
int
numMTS
=
nhc
.
getNumMultiTimeSteps
();
int
numDOFs
=
nhc
.
getNumDegreesOfFreedom
();
double
temperature
=
nhc
.
getTemperature
();
double
frequency
=
nhc
.
getCollisionFrequency
();
double
relativeTemperature
=
nhc
.
getRelativeTemperature
();
double
relativeFrequency
=
nhc
.
getRelativeCollisionFrequency
();
if
(
numYS
!=
1
&&
numYS
!=
3
&&
numYS
!=
5
)
{
throw
OpenMMException
(
"Number of Yoshida Suzuki time steps has to be 1, 3, or 5."
);
}
auto
&
chainState
=
cu
.
getIntegrationUtilities
().
getNoseHooverChainState
();
if
(
!
scaleFactorBuffer
.
isInitialized
()
||
scaleFactorBuffer
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
double2
>
zeros
{{
0
,
0
}};
if
(
scaleFactorBuffer
.
isInitialized
())
scaleFactorBuffer
.
resize
(
1
);
else
scaleFactorBuffer
.
initialize
<
double2
>
(
cu
,
1
,
"scaleFactorBuffer"
);
scaleFactorBuffer
.
upload
(
zeros
);
}
else
{
std
::
vector
<
float2
>
zeros
{{
0
,
0
}};
if
(
scaleFactorBuffer
.
isInitialized
())
scaleFactorBuffer
.
resize
(
1
);
else
scaleFactorBuffer
.
initialize
<
float2
>
(
cu
,
1
,
"scaleFactorBuffer"
);
scaleFactorBuffer
.
upload
(
zeros
);
}
}
std
::
vector
<
double
>
zeros
(
chainLength
,
0
);
if
(
!
chainForces
.
isInitialized
()
||
!
chainMasses
.
isInitialized
())
{
if
(
useDouble
)
{
if
(
chainForces
.
isInitialized
())
{
chainMasses
.
resize
(
chainLength
);
chainForces
.
resize
(
chainLength
);
}
else
{
chainMasses
.
initialize
<
double
>
(
cu
,
chainLength
,
"chainMasses"
);
chainForces
.
initialize
<
double
>
(
cu
,
chainLength
,
"chainForces"
);
}
chainMasses
.
upload
(
zeros
);
chainForces
.
upload
(
zeros
);
}
else
{
if
(
chainForces
.
isInitialized
())
{
chainMasses
.
resize
(
chainLength
);
chainForces
.
resize
(
chainLength
);
}
else
{
chainMasses
.
initialize
<
float
>
(
cu
,
chainLength
,
"chainMasses"
);
chainForces
.
initialize
<
float
>
(
cu
,
chainLength
,
"chainForces"
);
}
chainMasses
.
upload
(
zeros
,
true
);
chainForces
.
upload
(
zeros
,
true
);
}
}
if
(
chainForces
.
getSize
()
<
chainLength
)
chainMasses
.
resize
(
chainLength
);
if
(
chainMasses
.
getSize
()
<
chainLength
)
chainMasses
.
resize
(
chainLength
);
float
timeStepFloat
=
(
float
)
timeStep
;
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
if
(
nAtoms
)
{
if
(
!
chainState
.
count
(
2
*
chainID
))
chainState
[
2
*
chainID
]
=
ComputeArray
();
if
(
!
chainState
.
at
(
2
*
chainID
).
isInitialized
()
||
chainState
.
at
(
2
*
chainID
).
getSize
()
!=
chainLength
)
{
// We need to upload the CUDA array
if
(
useDouble
)
{
if
(
chainState
.
at
(
2
*
chainID
).
isInitialized
())
chainState
.
at
(
2
*
chainID
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
).
initialize
<
double2
>
(
cu
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
));
std
::
vector
<
double2
>
zeros
(
chainLength
,
make_double2
(
0
,
0
));
chainState
.
at
(
2
*
chainID
).
upload
(
zeros
.
data
());
}
else
{
if
(
chainState
.
at
(
2
*
chainID
).
isInitialized
())
chainState
.
at
(
2
*
chainID
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
).
initialize
<
float2
>
(
cu
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
));
std
::
vector
<
float2
>
zeros
(
chainLength
,
make_float2
(
0
,
0
));
chainState
.
at
(
2
*
chainID
).
upload
(
zeros
.
data
());
}
}
int
chainType
=
0
;
double
kT
=
BOLTZ
*
temperature
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
frequency
;
void
*
args
[]
=
{
&
cu
.
unwrap
(
chainState
[
2
*
chainID
]).
getDevicePointer
(),
&
kineticEnergyBuffer
.
getDevicePointer
(),
&
scaleFactorBuffer
.
getDevicePointer
(),
&
chainMasses
.
getDevicePointer
(),
&
chainForces
.
getDevicePointer
(),
&
chainType
,
&
chainLength
,
&
numMTS
,
&
numDOFs
,
&
timeStepFloat
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTfloat
,
&
frequencyFloat
};
cu
.
executeKernel
(
propagateKernels
[
numYS
],
args
,
1
,
1
);
}
if
(
nPairs
)
{
if
(
!
chainState
.
count
(
2
*
chainID
+
1
))
chainState
[
2
*
chainID
+
1
]
=
ComputeArray
();
if
(
!
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
()
||
chainState
.
at
(
2
*
chainID
+
1
).
getSize
()
!=
chainLength
)
{
// We need to upload the CUDA array
if
(
useDouble
)
{
if
(
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
())
chainState
.
at
(
2
*
chainID
+
1
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
+
1
).
initialize
<
double2
>
(
cu
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
+
1
));
std
::
vector
<
double2
>
zeros
(
chainLength
,
make_double2
(
0
,
0
));
chainState
.
at
(
2
*
chainID
+
1
).
upload
(
zeros
.
data
());
}
else
{
if
(
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
())
chainState
.
at
(
2
*
chainID
+
1
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
+
1
).
initialize
<
float2
>
(
cu
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
+
1
));
std
::
vector
<
float2
>
zeros
(
chainLength
,
make_float2
(
0
,
0
));
chainState
.
at
(
2
*
chainID
+
1
).
upload
(
zeros
.
data
());
}
}
int
chainType
=
1
;
double
kT
=
BOLTZ
*
relativeTemperature
;
int
ndf
=
3
*
nPairs
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
relativeFrequency
;
void
*
args
[]
=
{
&
cu
.
unwrap
(
chainState
[
2
*
chainID
+
1
]).
getDevicePointer
(),
&
kineticEnergyBuffer
.
getDevicePointer
(),
&
scaleFactorBuffer
.
getDevicePointer
(),
&
chainMasses
.
getDevicePointer
(),
&
chainForces
.
getDevicePointer
(),
&
chainType
,
&
chainLength
,
&
numMTS
,
&
ndf
,
&
timeStepFloat
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTfloat
,
&
frequencyFloat
};
cu
.
executeKernel
(
propagateKernels
[
numYS
],
args
,
1
,
1
);
}
return
{
0
,
0
};
}
double
CudaNoseHooverChainKernel
::
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
)
{
cu
.
setAsCurrent
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
chainLength
=
nhc
.
getChainLength
();
auto
&
chainState
=
cu
.
getIntegrationUtilities
().
getNoseHooverChainState
();
bool
absChainIsValid
=
chainState
.
count
(
2
*
chainID
)
!=
0
&&
chainState
[
2
*
chainID
].
isInitialized
()
&&
chainState
[
2
*
chainID
].
getSize
()
==
chainLength
;
bool
relChainIsValid
=
chainState
.
count
(
2
*
chainID
+
1
)
!=
0
&&
chainState
[
2
*
chainID
+
1
].
isInitialized
()
&&
chainState
[
2
*
chainID
+
1
].
getSize
()
==
chainLength
;
if
(
!
absChainIsValid
&&
!
relChainIsValid
)
return
0.0
;
if
(
!
heatBathEnergy
.
isInitialized
()
||
heatBathEnergy
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
double
>
one
(
1
);
heatBathEnergy
.
initialize
<
double
>
(
cu
,
1
,
"heatBathEnergy"
);
heatBathEnergy
.
upload
(
one
);
}
else
{
std
::
vector
<
float
>
one
(
1
);
heatBathEnergy
.
initialize
<
float
>
(
cu
,
1
,
"heatBathEnergy"
);
heatBathEnergy
.
upload
(
one
);
}
}
cu
.
clearBuffer
(
heatBathEnergy
);
if
(
absChainIsValid
)
{
int
numDOFs
=
nhc
.
getNumDegreesOfFreedom
();
double
temperature
=
nhc
.
getTemperature
();
double
frequency
=
nhc
.
getCollisionFrequency
();
double
kT
=
BOLTZ
*
temperature
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
frequency
;
void
*
args
[]
=
{
&
heatBathEnergy
.
getDevicePointer
(),
&
chainLength
,
&
numDOFs
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTfloat
,
&
frequencyFloat
,
&
cu
.
unwrap
(
chainState
[
2
*
chainID
]).
getDevicePointer
()};
cu
.
executeKernel
(
computeHeatBathEnergyKernel
,
args
,
1
,
1
);
}
if
(
relChainIsValid
)
{
int
numDOFs
=
3
*
nhc
.
getThermostatedPairs
().
size
();
double
temperature
=
nhc
.
getRelativeTemperature
();
double
frequency
=
nhc
.
getRelativeCollisionFrequency
();
double
kT
=
BOLTZ
*
temperature
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
frequency
;
void
*
args
[]
=
{
&
heatBathEnergy
.
getDevicePointer
(),
&
chainLength
,
&
numDOFs
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTfloat
,
&
frequencyFloat
,
&
cu
.
unwrap
(
chainState
[
2
*
chainID
+
1
]).
getDevicePointer
()};
cu
.
executeKernel
(
computeHeatBathEnergyKernel
,
args
,
1
,
1
);
}
void
*
pinnedBuffer
=
cu
.
getPinnedBuffer
();
heatBathEnergy
.
download
(
pinnedBuffer
);
if
(
useDouble
)
return
*
((
double
*
)
pinnedBuffer
);
else
return
*
((
float
*
)
pinnedBuffer
);
}
std
::
pair
<
double
,
double
>
CudaNoseHooverChainKernel
::
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
bool
downloadValue
)
{
cu
.
setAsCurrent
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
const
auto
&
nhcAtoms
=
nhc
.
getThermostatedAtoms
();
const
auto
&
nhcPairs
=
nhc
.
getThermostatedPairs
();
auto
nAtoms
=
nhcAtoms
.
size
();
auto
nPairs
=
nhcPairs
.
size
();
if
(
nAtoms
)
{
if
(
!
atomlists
.
count
(
chainID
))
{
// We need to upload the CUDA array
atomlists
[
chainID
]
=
CudaArray
();
atomlists
[
chainID
].
initialize
<
int
>
(
cu
,
nAtoms
,
"atomlist"
+
std
::
to_string
(
chainID
));
atomlists
[
chainID
].
upload
(
nhcAtoms
);
}
if
(
atomlists
[
chainID
].
getSize
()
!=
nAtoms
)
{
throw
OpenMMException
(
"Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat."
);
}
}
if
(
nPairs
)
{
if
(
!
pairlists
.
count
(
chainID
))
{
// We need to upload the CUDA array
pairlists
[
chainID
]
=
CudaArray
();
pairlists
[
chainID
].
initialize
<
int2
>
(
cu
,
nPairs
,
"pairlist"
+
std
::
to_string
(
chainID
));
std
::
vector
<
int2
>
int2vec
;
for
(
const
auto
&
p
:
nhcPairs
)
int2vec
.
push_back
(
make_int2
(
p
.
first
,
p
.
second
));
pairlists
[
chainID
].
upload
(
int2vec
);
}
if
(
pairlists
[
chainID
].
getSize
()
!=
nPairs
)
{
throw
OpenMMException
(
"Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat."
);
}
}
if
(
!
kineticEnergyBuffer
.
isInitialized
()
||
kineticEnergyBuffer
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
double2
>
zeros
{{
0
,
0
}};
kineticEnergyBuffer
.
initialize
<
double2
>
(
cu
,
1
,
"kineticEnergyBuffer"
);
kineticEnergyBuffer
.
upload
(
zeros
);
}
else
{
std
::
vector
<
float2
>
zeros
{{
0
,
0
}};
kineticEnergyBuffer
.
initialize
<
float2
>
(
cu
,
1
,
"kineticEnergyBuffer"
);
kineticEnergyBuffer
.
upload
(
zeros
);
}
}
cu
.
clearBuffer
(
energyBuffer
);
if
(
nAtoms
)
{
void
*
args
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
nAtoms
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
atomlists
[
chainID
].
getDevicePointer
()};
cu
.
executeKernel
(
computeAtomsKineticEnergyKernel
,
args
,
nAtoms
);
}
if
(
nPairs
)
{
void
*
args
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
nPairs
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
pairlists
[
chainID
].
getDevicePointer
()};
cu
.
executeKernel
(
computePairsKineticEnergyKernel
,
args
,
nPairs
);
}
//taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer
int
bufferSize
=
energyBuffer
.
getSize
();
void
*
args2
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
kineticEnergyBuffer
.
getDevicePointer
(),
&
bufferSize
,
&
sumWorkGroupSize
};
cu
.
executeKernel
(
reduceEnergyKernel
,
args2
,
sumWorkGroupSize
,
sumWorkGroupSize
,
sumWorkGroupSize
*
energyBuffer
.
getElementSize
());
std
::
pair
<
double
,
double
>
KEs
=
{
0
,
0
};
if
(
downloadValue
)
{
void
*
pinnedBuffer
=
cu
.
getPinnedBuffer
();
kineticEnergyBuffer
.
download
(
pinnedBuffer
);
KEs
.
first
=
useDouble
?
*
((
double
*
)
pinnedBuffer
)
:
*
((
float
*
)
pinnedBuffer
);
KEs
.
second
=
useDouble
?
*
((
double
*
)
pinnedBuffer
+
1
)
:
*
((
float
*
)
pinnedBuffer
+
1
);
}
return
KEs
;
}
void
CudaNoseHooverChainKernel
::
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
scaleFactor
)
{
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
cu
.
setAsCurrent
();
int
chainID
=
nhc
.
getChainID
();
auto
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
auto
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
if
(
nAtoms
)
{
void
*
args
[]
=
{
&
scaleFactorBuffer
.
getDevicePointer
(),
&
nAtoms
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
atomlists
[
chainID
].
getDevicePointer
()};
cu
.
executeKernel
(
scaleAtomsVelocitiesKernel
,
args
,
nAtoms
);
}
if
(
nPairs
)
{
void
*
args
[]
=
{
&
scaleFactorBuffer
.
getDevicePointer
(),
&
nPairs
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
pairlists
[
chainID
].
getDevicePointer
()};
cu
.
executeKernel
(
scalePairsVelocitiesKernel
,
args
,
nPairs
);
}
}
void
CudaApplyMonteCarloBarostatKernel
::
initialize
(
const
System
&
system
,
const
Force
&
thermostat
)
{
void
CudaApplyMonteCarloBarostatKernel
::
initialize
(
const
System
&
system
,
const
Force
&
thermostat
)
{
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
savedPositions
.
initialize
(
cu
,
cu
.
getPaddedNumAtoms
(),
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
),
"savedPositions"
);
savedPositions
.
initialize
(
cu
,
cu
.
getPaddedNumAtoms
(),
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
),
"savedPositions"
);
...
...
platforms/cuda/src/kernels/noseHooverChain.cu
deleted
100644 → 0
View file @
d169badc
#include <initializer_list>
extern
"C"
__global__
void
propagateNoseHooverChain
(
mixed2
*
__restrict__
chainData
,
const
mixed2
*
__restrict__
energySum
,
mixed2
*
__restrict__
scaleFactor
,
mixed
*
__restrict__
chainMasses
,
mixed
*
__restrict__
chainForces
,
int
chainType
,
int
chainLength
,
int
numMTS
,
int
numDOFs
,
float
timeStep
,
mixed
kT
,
float
frequency
){
const
mixed
&
kineticEnergy
=
chainType
?
energySum
[
0
].
y
:
energySum
[
0
].
x
;
mixed
&
scale
=
chainType
?
scaleFactor
[
0
].
y
:
scaleFactor
[
0
].
x
;
scale
=
(
mixed
)
1
;
if
(
kineticEnergy
<
1e-8
)
return
;
for
(
int
bead
=
0
;
bead
<
chainLength
;
++
bead
)
chainMasses
[
bead
]
=
kT
/
(
frequency
*
frequency
);
chainMasses
[
0
]
*=
numDOFs
;
mixed
KE2
=
2.0
f
*
kineticEnergy
;
mixed
timeOverMTS
=
timeStep
/
numMTS
;
chainForces
[
0
]
=
(
KE2
-
numDOFs
*
kT
)
/
chainMasses
[
0
];
for
(
int
bead
=
0
;
bead
<
chainLength
-
1
;
++
bead
)
{
chainForces
[
bead
+
1
]
=
(
chainMasses
[
bead
]
*
chainData
[
bead
].
y
*
chainData
[
bead
].
y
-
kT
)
/
chainMasses
[
bead
+
1
];
}
for
(
int
mts
=
0
;
mts
<
numMTS
;
++
mts
)
{
BEGIN_YS_LOOP
mixed
wdt
=
ys
*
timeOverMTS
;
chainData
[
chainLength
-
1
].
y
+=
0.25
f
*
wdt
*
chainForces
[
chainLength
-
1
];
for
(
int
bead
=
chainLength
-
2
;
bead
>=
0
;
--
bead
)
{
mixed
aa
=
MIXEDEXP
(
-
0.125
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
chainData
[
bead
].
y
*
aa
+
0.25
f
*
wdt
*
chainForces
[
bead
]);
}
// update particle velocities
mixed
aa
=
MIXEDEXP
(
-
0.5
f
*
wdt
*
chainData
[
0
].
y
);
scale
*=
aa
;
// update the thermostat positions
for
(
int
bead
=
0
;
bead
<
chainLength
;
++
bead
)
{
chainData
[
bead
].
x
+=
0.5
f
*
chainData
[
bead
].
y
*
wdt
;
}
// update the forces
chainForces
[
0
]
=
(
scale
*
scale
*
KE2
-
numDOFs
*
kT
)
/
chainMasses
[
0
];
// update thermostat velocities
for
(
int
bead
=
0
;
bead
<
chainLength
-
1
;
++
bead
)
{
mixed
aa
=
MIXEDEXP
(
-
0.125
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
aa
*
chainData
[
bead
].
y
+
0.25
f
*
wdt
*
chainForces
[
bead
]);
chainForces
[
bead
+
1
]
=
(
chainMasses
[
bead
]
*
chainData
[
bead
].
y
*
chainData
[
bead
].
y
-
kT
)
/
chainMasses
[
bead
+
1
];
}
chainData
[
chainLength
-
1
].
y
+=
0.25
f
*
wdt
*
chainForces
[
chainLength
-
1
];
END_YS_LOOP
}
// MTS loop
}
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
extern
"C"
__global__
void
computeHeatBathEnergy
(
mixed
*
__restrict__
heatBathEnergy
,
int
chainLength
,
int
numDOFs
,
mixed
kT
,
float
frequency
,
const
mixed2
*
__restrict__
chainData
){
// Note that this is always incremented; make sure it's zeroed properly before the first call
mixed
&
energy
=
heatBathEnergy
[
0
];
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
mixed
prefac
=
i
?
1
:
numDOFs
;
mixed
mass
=
prefac
*
kT
/
(
frequency
*
frequency
);
mixed
velocity
=
chainData
[
i
].
y
;
// The kinetic energy of this bead
energy
+=
0.5
f
*
mass
*
velocity
*
velocity
;
// The potential energy of this bead
mixed
position
=
chainData
[
i
].
x
;
energy
+=
prefac
*
kT
*
position
;
}
}
extern
"C"
__global__
void
computeAtomsKineticEnergy
(
mixed2
*
__restrict__
energyBuffer
,
int
numAtoms
,
const
mixed4
*
__restrict__
velm
,
const
int
*
__restrict__
atoms
){
mixed2
energy
=
make_mixed2
(
0
,
0
);
//energy = 1; return;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
atoms
[
index
];
mixed4
v
=
velm
[
atom
];
mixed
mass
=
v
.
w
==
0
?
0
:
1
/
v
.
w
;
energy
.
x
+=
0.5
f
*
mass
*
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
);
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
energy
;
}
extern
"C"
__global__
void
computePairsKineticEnergy
(
mixed2
*
__restrict__
energyBuffer
,
int
numPairs
,
const
mixed4
*
__restrict__
velm
,
const
int2
*
__restrict__
pairs
){
mixed2
energy
=
make_mixed2
(
0
,
0
);
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int2
pair
=
pairs
[
index
];
int
atom1
=
pair
.
x
;
int
atom2
=
pair
.
y
;
mixed4
v1
=
velm
[
atom1
];
mixed4
v2
=
velm
[
atom2
];
mixed
m1
=
v1
.
w
==
0
?
0
:
1
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0
?
0
:
1
/
v2
.
w
;
mixed4
cv
;
cv
.
x
=
(
m1
*
v1
.
x
+
m2
*
v2
.
x
)
/
(
m1
+
m2
);
cv
.
y
=
(
m1
*
v1
.
y
+
m2
*
v2
.
y
)
/
(
m1
+
m2
);
cv
.
z
=
(
m1
*
v1
.
z
+
m2
*
v2
.
z
)
/
(
m1
+
m2
);
mixed4
rv
;
rv
.
x
=
v2
.
x
-
v1
.
x
;
rv
.
y
=
v2
.
y
-
v1
.
y
;
rv
.
z
=
v2
.
z
-
v1
.
z
;
energy
.
x
+=
0.5
f
*
(
m1
+
m2
)
*
(
cv
.
x
*
cv
.
x
+
cv
.
y
*
cv
.
y
+
cv
.
z
*
cv
.
z
);
energy
.
y
+=
0.5
f
*
(
m1
*
m2
/
(
m1
+
m2
))
*
(
rv
.
x
*
rv
.
x
+
rv
.
y
*
rv
.
y
+
rv
.
z
*
rv
.
z
);
}
// The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
].
x
+=
energy
.
x
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
].
y
+=
energy
.
y
;
}
extern
"C"
__global__
void
scaleAtomsVelocities
(
mixed2
*
__restrict__
scaleFactor
,
int
numAtoms
,
mixed4
*
__restrict__
velm
,
const
int
*
__restrict__
atoms
){
const
mixed
&
scale
=
scaleFactor
[
0
].
x
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
atoms
[
index
];
mixed4
&
v
=
velm
[
atom
];
v
.
x
*=
scale
;
v
.
y
*=
scale
;
v
.
z
*=
scale
;
}
}
extern
"C"
__global__
void
scalePairsVelocities
(
mixed2
*
__restrict__
scaleFactor
,
int
numPairs
,
mixed4
*
__restrict__
velm
,
const
int2
*
__restrict__
pairs
){
const
mixed
&
absScale
=
scaleFactor
[
0
].
x
;
const
mixed
&
relScale
=
scaleFactor
[
0
].
y
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom1
=
pairs
[
index
].
x
;
int
atom2
=
pairs
[
index
].
y
;
mixed4
v1
=
velm
[
atom1
];
mixed4
v2
=
velm
[
atom2
];
mixed
m1
=
v1
.
w
==
0
?
0
:
1
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0
?
0
:
1
/
v2
.
w
;
mixed4
cv
;
cv
.
x
=
(
m1
*
v1
.
x
+
m2
*
v2
.
x
)
/
(
m1
+
m2
);
cv
.
y
=
(
m1
*
v1
.
y
+
m2
*
v2
.
y
)
/
(
m1
+
m2
);
cv
.
z
=
(
m1
*
v1
.
z
+
m2
*
v2
.
z
)
/
(
m1
+
m2
);
mixed4
rv
;
rv
.
x
=
v2
.
x
-
v1
.
x
;
rv
.
y
=
v2
.
y
-
v1
.
y
;
rv
.
z
=
v2
.
z
-
v1
.
z
;
v1
.
x
=
absScale
*
cv
.
x
-
relScale
*
rv
.
x
*
m2
/
(
m1
+
m2
);
v1
.
y
=
absScale
*
cv
.
y
-
relScale
*
rv
.
y
*
m2
/
(
m1
+
m2
);
v1
.
z
=
absScale
*
cv
.
z
-
relScale
*
rv
.
z
*
m2
/
(
m1
+
m2
);
v2
.
x
=
absScale
*
cv
.
x
+
relScale
*
rv
.
x
*
m1
/
(
m1
+
m2
);
v2
.
y
=
absScale
*
cv
.
y
+
relScale
*
rv
.
y
*
m1
/
(
m1
+
m2
);
v2
.
z
=
absScale
*
cv
.
z
+
relScale
*
rv
.
z
*
m1
/
(
m1
+
m2
);
velm
[
atom1
]
=
v1
;
velm
[
atom2
]
=
v2
;
}
}
/**
* Sum the energy buffer containing a pair of energies stored as mixed2. This is copied from utilities.cu with small modifications
*/
extern
"C"
__global__
void
reduceEnergyPair
(
const
mixed2
*
__restrict__
energyBuffer
,
mixed2
*
__restrict__
result
,
int
bufferSize
,
int
workGroupSize
)
{
__shared__
mixed2
tempBuffer
[
WORK_GROUP_SIZE
];
const
unsigned
int
thread
=
threadIdx
.
x
;
mixed2
sum
=
make_mixed2
(
0
,
0
);
for
(
unsigned
int
idx
=
thread
;
idx
<
bufferSize
;
idx
+=
blockDim
.
x
)
{
sum
.
x
+=
energyBuffer
[
idx
].
x
;
sum
.
y
+=
energyBuffer
[
idx
].
y
;
}
tempBuffer
[
thread
]
=
sum
;
for
(
int
i
=
1
;
i
<
workGroupSize
;
i
*=
2
)
{
__syncthreads
();
if
(
thread
%
(
i
*
2
)
==
0
&&
thread
+
i
<
workGroupSize
)
{
tempBuffer
[
thread
].
x
+=
tempBuffer
[
thread
+
i
].
x
;
tempBuffer
[
thread
].
y
+=
tempBuffer
[
thread
+
i
].
y
;
}
}
if
(
thread
==
0
)
*
result
=
tempBuffer
[
0
];
}
platforms/opencl/include/OpenCLKernels.h
View file @
ca4c03c3
...
@@ -428,109 +428,6 @@ private:
...
@@ -428,109 +428,6 @@ private:
cl
::
Kernel
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
cl
::
Kernel
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
};
};
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class
OpenCLIntegrateVelocityVerletStepKernel
:
public
IntegrateVelocityVerletStepKernel
{
public:
OpenCLIntegrateVelocityVerletStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateVelocityVerletStepKernel
(
name
,
platform
),
cl
(
cl
)
{
}
~
OpenCLIntegrateVelocityVerletStepKernel
()
{}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
);
private:
OpenCLContext
&
cl
;
float
prevMaxPairDistance
;
OpenCLArray
maxPairDistanceBuffer
,
pairListBuffer
,
atomListBuffer
,
pairTemperatureBuffer
;
cl
::
Kernel
kernel1
,
kernel2
,
kernel3
,
kernelHardWall
;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class
OpenCLNoseHooverChainKernel
:
public
NoseHooverChainKernel
{
public:
OpenCLNoseHooverChainKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
NoseHooverChainKernel
(
name
,
platform
),
cl
(
cl
)
{
}
~
OpenCLNoseHooverChainKernel
()
{}
/**
* Initialize the kernel.
*/
void
initialize
();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
std
::
pair
<
double
,
double
>
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
double
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
std
::
pair
<
double
,
double
>
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
bool
downloadValue
);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
void
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
std
::
pair
<
double
,
double
>
scaleFactors
);
private:
int
sumWorkGroupSize
;
OpenCLContext
&
cl
;
OpenCLArray
energyBuffer
,
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
OpenCLArray
>
atomlists
,
pairlists
;
std
::
map
<
int
,
cl
::
Kernel
>
propagateKernels
;
cl
::
Kernel
reduceEnergyKernel
;
cl
::
Kernel
computeHeatBathEnergyKernel
;
cl
::
Kernel
computeAtomsKineticEnergyKernel
;
cl
::
Kernel
computePairsKineticEnergyKernel
;
cl
::
Kernel
scaleAtomsVelocitiesKernel
;
cl
::
Kernel
scalePairsVelocitiesKernel
;
};
/**
/**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/
*/
...
...
platforms/opencl/src/OpenCLKernelFactory.cpp
View file @
ca4c03c3
...
@@ -132,9 +132,9 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
...
@@ -132,9 +132,9 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
return
new
CommonApplyAndersenThermostatKernel
(
name
,
platform
,
cl
);
return
new
CommonApplyAndersenThermostatKernel
(
name
,
platform
,
cl
);
if
(
name
==
NoseHooverChainKernel
::
Name
())
if
(
name
==
NoseHooverChainKernel
::
Name
())
return
new
OpenCL
NoseHooverChainKernel
(
name
,
platform
,
cl
);
return
new
Common
NoseHooverChainKernel
(
name
,
platform
,
cl
);
if
(
name
==
IntegrateVelocityVerletStepKernel
::
Name
())
if
(
name
==
IntegrateVelocityVerletStepKernel
::
Name
())
return
new
OpenCL
IntegrateVelocityVerletStepKernel
(
name
,
platform
,
cl
);
return
new
Common
IntegrateVelocityVerletStepKernel
(
name
,
platform
,
cl
);
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
return
new
OpenCLApplyMonteCarloBarostatKernel
(
name
,
platform
,
cl
);
return
new
OpenCLApplyMonteCarloBarostatKernel
(
name
,
platform
,
cl
);
if
(
name
==
RemoveCMMotionKernel
::
Name
())
if
(
name
==
RemoveCMMotionKernel
::
Name
())
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
ca4c03c3
...
@@ -1667,151 +1667,6 @@ void OpenCLCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx,
...
@@ -1667,151 +1667,6 @@ void OpenCLCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx,
}
}
}
}
void
OpenCLIntegrateVelocityVerletStepKernel
::
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
)
{
cl
.
getPlatformData
().
initializeContexts
(
system
);
map
<
string
,
string
>
defines
;
defines
[
"BOLTZ"
]
=
cl
.
doubleToString
(
BOLTZ
);
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
velocityVerlet
,
defines
,
""
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateVelocityVerletPart1"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateVelocityVerletPart2"
);
kernel3
=
cl
::
Kernel
(
program
,
"integrateVelocityVerletPart3"
);
kernelHardWall
=
cl
::
Kernel
(
program
,
"integrateVelocityVerletHardWall"
);
prevMaxPairDistance
=
(
cl_float
)
-
1.0
;
maxPairDistanceBuffer
.
initialize
<
cl_float
>
(
cl
,
1
,
"maxPairDistanceBuffer"
);
}
void
OpenCLIntegrateVelocityVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
OpenCLIntegrationUtilities
&
integration
=
cl
.
getIntegrationUtilities
();
int
paddedNumAtoms
=
cl
.
getPaddedNumAtoms
();
double
dt
=
integrator
.
getStepSize
();
cl
.
getIntegrationUtilities
().
setNextStepSize
(
dt
);
if
(
!
forcesAreValid
)
context
.
calcForcesAndEnergy
(
true
,
false
);
const
auto
&
atomList
=
integrator
.
getAllThermostatedIndividualParticles
();
const
auto
&
pairList
=
integrator
.
getAllThermostatedPairs
();
int
numAtoms
=
atomList
.
size
();
int
numPairs
=
pairList
.
size
();
int
numParticles
=
numAtoms
+
2
*
numPairs
;
float
maxPairDistance
=
integrator
.
getMaximumPairDistance
();
// Make sure atom and pair metadata is uploaded and has the correct dimensions
if
(
prevMaxPairDistance
!=
maxPairDistance
)
{
std
::
vector
<
float
>
tmp
(
1
,
maxPairDistance
);
maxPairDistanceBuffer
.
upload
(
tmp
);
prevMaxPairDistance
=
maxPairDistance
;
}
if
(
numAtoms
!=
0
&&
(
!
atomListBuffer
.
isInitialized
()
||
atomListBuffer
.
getSize
()
!=
numAtoms
))
{
if
(
atomListBuffer
.
isInitialized
())
atomListBuffer
.
resize
(
atomList
.
size
());
else
atomListBuffer
.
initialize
<
cl_int
>
(
cl
,
atomList
.
size
(),
"atomListBuffer"
);
atomListBuffer
.
upload
(
atomList
);
}
if
(
numPairs
!=
0
&&
(
!
pairListBuffer
.
isInitialized
()
||
pairListBuffer
.
getSize
()
!=
numPairs
))
{
if
(
pairListBuffer
.
isInitialized
())
{
pairListBuffer
.
resize
(
pairList
.
size
());
pairTemperatureBuffer
.
resize
(
pairList
.
size
());
}
else
{
pairListBuffer
.
initialize
<
mm_int2
>
(
cl
,
pairList
.
size
(),
"pairListBuffer"
);
pairTemperatureBuffer
.
initialize
<
cl_float
>
(
cl
,
pairList
.
size
(),
"pairTemperatureBuffer"
);
}
std
::
vector
<
mm_int2
>
tmp
;
std
::
vector
<
float
>
tmp2
;
for
(
const
auto
&
pair
:
pairList
)
{
tmp
.
push_back
(
mm_int2
(
std
::
get
<
0
>
(
pair
),
std
::
get
<
1
>
(
pair
)));
tmp2
.
push_back
(
std
::
get
<
2
>
(
pair
));
}
pairListBuffer
.
upload
(
tmp
);
pairTemperatureBuffer
.
upload
(
tmp2
);
}
//// Call the first integration kernel.
kernel1
.
setArg
<
cl_int
>
(
0
,
numAtoms
);
kernel1
.
setArg
<
cl_int
>
(
1
,
numPairs
);
kernel1
.
setArg
<
cl_int
>
(
2
,
paddedNumAtoms
);
kernel1
.
setArg
<
cl
::
Buffer
>
(
3
,
cl
.
getIntegrationUtilities
().
getStepSize
().
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getPosq
().
getDeviceBuffer
());
setPosqCorrectionArg
(
cl
,
kernel1
,
5
);
kernel1
.
setArg
<
cl
::
Buffer
>
(
6
,
cl
.
getVelm
().
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
7
,
cl
.
getForce
().
getDeviceBuffer
());
kernel1
.
setArg
<
cl
::
Buffer
>
(
8
,
integration
.
getPosDelta
().
getDeviceBuffer
());
if
(
numAtoms
>
0
)
kernel1
.
setArg
<
cl
::
Buffer
>
(
9
,
atomListBuffer
.
getDeviceBuffer
());
else
kernel1
.
setArg
<
void
*>
(
9
,
NULL
);
if
(
numPairs
>
0
)
kernel1
.
setArg
<
cl
::
Buffer
>
(
10
,
pairListBuffer
.
getDeviceBuffer
());
else
kernel1
.
setArg
<
void
*>
(
10
,
NULL
);
cl
.
executeKernel
(
kernel1
,
std
::
max
(
numAtoms
,
numPairs
));
//// Apply constraints.
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
//// Call the second integration kernel.
kernel2
.
setArg
<
cl_int
>
(
0
,
numParticles
);
kernel2
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getIntegrationUtilities
().
getStepSize
().
getDeviceBuffer
());
kernel2
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getPosq
().
getDeviceBuffer
());
setPosqCorrectionArg
(
cl
,
kernel2
,
3
);
kernel2
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getVelm
().
getDeviceBuffer
());
kernel2
.
setArg
<
cl
::
Buffer
>
(
5
,
integration
.
getPosDelta
().
getDeviceBuffer
());
cl
.
executeKernel
(
kernel2
,
numParticles
);
if
(
numPairs
>
0
)
{
//// Enforce hard wall constraint
kernelHardWall
.
setArg
<
cl_int
>
(
0
,
numPairs
);
kernelHardWall
.
setArg
<
cl
::
Buffer
>
(
1
,
maxPairDistanceBuffer
.
getDeviceBuffer
());
kernelHardWall
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getIntegrationUtilities
().
getStepSize
().
getDeviceBuffer
());
kernelHardWall
.
setArg
<
cl
::
Buffer
>
(
3
,
cl
.
getPosq
().
getDeviceBuffer
());
setPosqCorrectionArg
(
cl
,
kernelHardWall
,
4
);
kernelHardWall
.
setArg
<
cl
::
Buffer
>
(
5
,
cl
.
getVelm
().
getDeviceBuffer
());
kernelHardWall
.
setArg
<
cl
::
Buffer
>
(
6
,
pairListBuffer
.
getDeviceBuffer
());
kernelHardWall
.
setArg
<
cl
::
Buffer
>
(
7
,
pairTemperatureBuffer
.
getDeviceBuffer
());
cl
.
executeKernel
(
kernelHardWall
,
numPairs
);
}
integration
.
computeVirtualSites
();
//// Update forces
context
.
calcForcesAndEnergy
(
true
,
false
);
forcesAreValid
=
true
;
//// Call the third integration kernel.
kernel3
.
setArg
<
cl_int
>
(
0
,
numAtoms
);
kernel3
.
setArg
<
cl_int
>
(
1
,
numPairs
);
kernel3
.
setArg
<
cl_int
>
(
2
,
paddedNumAtoms
);
kernel3
.
setArg
<
cl
::
Buffer
>
(
3
,
cl
.
getIntegrationUtilities
().
getStepSize
().
getDeviceBuffer
());
kernel3
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getPosq
().
getDeviceBuffer
());
setPosqCorrectionArg
(
cl
,
kernel3
,
5
);
kernel3
.
setArg
<
cl
::
Buffer
>
(
6
,
cl
.
getVelm
().
getDeviceBuffer
());
kernel3
.
setArg
<
cl
::
Buffer
>
(
7
,
cl
.
getForce
().
getDeviceBuffer
());
kernel3
.
setArg
<
cl
::
Buffer
>
(
8
,
integration
.
getPosDelta
().
getDeviceBuffer
());
if
(
numAtoms
>
0
)
kernel3
.
setArg
<
cl
::
Buffer
>
(
9
,
atomListBuffer
.
getDeviceBuffer
());
else
kernel3
.
setArg
<
void
*>
(
9
,
NULL
);
if
(
numPairs
>
0
)
kernel3
.
setArg
<
cl
::
Buffer
>
(
10
,
pairListBuffer
.
getDeviceBuffer
());
else
kernel3
.
setArg
<
void
*>
(
10
,
NULL
);
cl
.
executeKernel
(
kernel3
,
std
::
max
(
numAtoms
,
numPairs
));
integration
.
applyVelocityConstraints
(
integrator
.
getConstraintTolerance
());
//// Update the time and step count.
cl
.
setTime
(
cl
.
getTime
()
+
dt
);
cl
.
setStepCount
(
cl
.
getStepCount
()
+
1
);
cl
.
reorderAtoms
();
}
double
OpenCLIntegrateVelocityVerletStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
)
{
return
cl
.
getIntegrationUtilities
().
computeKineticEnergy
(
0
);
}
class
OpenCLCalcCustomCVForceKernel
::
ForceInfo
:
public
OpenCLForceInfo
{
class
OpenCLCalcCustomCVForceKernel
::
ForceInfo
:
public
OpenCLForceInfo
{
public:
public:
ForceInfo
(
ComputeForceInfo
&
force
)
:
OpenCLForceInfo
(
0
),
force
(
force
)
{
ForceInfo
(
ComputeForceInfo
&
force
)
:
OpenCLForceInfo
(
0
),
force
(
force
)
{
...
@@ -2037,390 +1892,6 @@ void OpenCLCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context
...
@@ -2037,390 +1892,6 @@ void OpenCLCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context
delete
function
.
second
;
delete
function
.
second
;
}
}
void
OpenCLNoseHooverChainKernel
::
initialize
()
{
bool
useDouble
=
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
();
map
<
string
,
string
>
defines
;
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[1] = {1.0}; for(int i=0;i<1;++i) { const real ys = arr[i];"
;
defines
[
"END_YS_LOOP"
]
=
"}"
;
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
1
]
=
cl
::
Kernel
(
program
,
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[3] = {0.828981543588751, -0.657963087177502, 0.828981543588751}; for(int i=0;i<3;++i) { const real ys = arr[i];"
;
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
3
]
=
cl
::
Kernel
(
program
,
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[5] = {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}; for(int i=0;i<5;++i) { const real ys = arr[i];"
;
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
5
]
=
cl
::
Kernel
(
program
,
"propagateNoseHooverChain"
);
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
noseHooverChain
,
defines
);
reduceEnergyKernel
=
cl
::
Kernel
(
program
,
"reduceEnergyPair"
);
computeHeatBathEnergyKernel
=
cl
::
Kernel
(
program
,
"computeHeatBathEnergy"
);
computeAtomsKineticEnergyKernel
=
cl
::
Kernel
(
program
,
"computeAtomsKineticEnergy"
);
computePairsKineticEnergyKernel
=
cl
::
Kernel
(
program
,
"computePairsKineticEnergy"
);
scaleAtomsVelocitiesKernel
=
cl
::
Kernel
(
program
,
"scaleAtomsVelocities"
);
scalePairsVelocitiesKernel
=
cl
::
Kernel
(
program
,
"scalePairsVelocities"
);
int
energyBufferSize
=
cl
.
getEnergyBuffer
().
getSize
();
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
())
energyBuffer
.
initialize
<
mm_double2
>
(
cl
,
energyBufferSize
,
"energyBuffer"
);
else
energyBuffer
.
initialize
<
mm_float2
>
(
cl
,
energyBufferSize
,
"energyBuffer"
);
}
std
::
pair
<
double
,
double
>
OpenCLNoseHooverChainKernel
::
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
)
{
bool
useDouble
=
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
int
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
int
chainLength
=
nhc
.
getChainLength
();
int
numYS
=
nhc
.
getNumYoshidaSuzukiTimeSteps
();
int
numMTS
=
nhc
.
getNumMultiTimeSteps
();
int
numDOFs
=
nhc
.
getNumDegreesOfFreedom
();
double
temperature
=
nhc
.
getTemperature
();
double
frequency
=
nhc
.
getCollisionFrequency
();
double
relativeTemperature
=
nhc
.
getRelativeTemperature
();
double
relativeFrequency
=
nhc
.
getRelativeCollisionFrequency
();
if
(
numYS
!=
1
&&
numYS
!=
3
&&
numYS
!=
5
)
{
throw
OpenMMException
(
"Number of Yoshida Suzuki time steps has to be 1, 3, or 5."
);
}
auto
&
chainState
=
cl
.
getIntegrationUtilities
().
getNoseHooverChainState
();
if
(
!
scaleFactorBuffer
.
isInitialized
()
||
scaleFactorBuffer
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
mm_double2
>
zeros
{{
0
,
0
}};
if
(
scaleFactorBuffer
.
isInitialized
())
scaleFactorBuffer
.
resize
(
1
);
else
scaleFactorBuffer
.
initialize
<
mm_double2
>
(
cl
,
1
,
"scaleFactorBuffer"
);
scaleFactorBuffer
.
upload
(
zeros
);
}
else
{
std
::
vector
<
mm_float2
>
zeros
{{
0
,
0
}};
if
(
scaleFactorBuffer
.
isInitialized
())
scaleFactorBuffer
.
resize
(
1
);
else
scaleFactorBuffer
.
initialize
<
mm_float2
>
(
cl
,
1
,
"scaleFactorBuffer"
);
scaleFactorBuffer
.
upload
(
zeros
);
}
}
if
(
!
chainForces
.
isInitialized
()
||
!
chainMasses
.
isInitialized
())
{
if
(
useDouble
)
{
std
::
vector
<
cl_double
>
zeros
(
chainLength
,
0
);
if
(
chainForces
.
isInitialized
())
{
chainMasses
.
resize
(
chainLength
);
chainForces
.
resize
(
chainLength
);
}
else
{
chainMasses
.
initialize
<
cl_double
>
(
cl
,
chainLength
,
"chainMasses"
);
chainForces
.
initialize
<
cl_double
>
(
cl
,
chainLength
,
"chainForces"
);
}
chainMasses
.
upload
(
zeros
);
chainForces
.
upload
(
zeros
);
}
else
{
std
::
vector
<
cl_float
>
zeros
(
chainLength
,
0
);
if
(
chainForces
.
isInitialized
())
{
chainMasses
.
resize
(
chainLength
);
chainForces
.
resize
(
chainLength
);
}
else
{
chainMasses
.
initialize
<
cl_float
>
(
cl
,
chainLength
,
"chainMasses"
);
chainForces
.
initialize
<
cl_float
>
(
cl
,
chainLength
,
"chainForces"
);
}
chainMasses
.
upload
(
zeros
);
chainForces
.
upload
(
zeros
);
}
}
if
(
chainForces
.
getSize
()
<
chainLength
)
chainMasses
.
resize
(
chainLength
);
if
(
chainMasses
.
getSize
()
<
chainLength
)
chainMasses
.
resize
(
chainLength
);
float
timeStepFloat
=
(
float
)
timeStep
;
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
if
(
nAtoms
)
{
if
(
!
chainState
.
count
(
2
*
chainID
))
chainState
[
2
*
chainID
]
=
ComputeArray
();
if
(
!
chainState
.
at
(
2
*
chainID
).
isInitialized
()
||
chainState
.
at
(
2
*
chainID
).
getSize
()
!=
chainLength
)
{
// We need to upload the OpenCL array
if
(
useDouble
)
{
if
(
chainState
.
at
(
2
*
chainID
).
isInitialized
())
chainState
.
at
(
2
*
chainID
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
).
initialize
<
mm_double2
>
(
cl
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
));
std
::
vector
<
mm_double2
>
zeros
(
chainLength
,
mm_double2
(
0.0
,
0.0
));
chainState
.
at
(
2
*
chainID
).
upload
(
zeros
.
data
());
}
else
{
if
(
chainState
.
at
(
2
*
chainID
).
isInitialized
())
chainState
.
at
(
2
*
chainID
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
).
initialize
<
mm_float2
>
(
cl
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
));
std
::
vector
<
mm_float2
>
zeros
(
chainLength
,
mm_float2
(
0.0
f
,
0.0
f
));
chainState
.
at
(
2
*
chainID
).
upload
(
zeros
.
data
());
}
}
int
chainType
=
0
;
double
kT
=
BOLTZ
*
temperature
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
frequency
;
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
unwrap
(
chainState
[
2
*
chainID
]).
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
1
,
kineticEnergyBuffer
.
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
2
,
scaleFactorBuffer
.
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
3
,
chainMasses
.
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
4
,
chainForces
.
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl_int
>
(
5
,
chainType
);
propagateKernels
[
numYS
].
setArg
<
cl_int
>
(
6
,
chainLength
);
propagateKernels
[
numYS
].
setArg
<
cl_int
>
(
7
,
numMTS
);
propagateKernels
[
numYS
].
setArg
<
cl_int
>
(
8
,
numDOFs
);
propagateKernels
[
numYS
].
setArg
<
cl_float
>
(
9
,
timeStepFloat
);
if
(
useDouble
)
propagateKernels
[
numYS
].
setArg
<
cl_double
>
(
10
,
kT
);
else
propagateKernels
[
numYS
].
setArg
<
cl_float
>
(
10
,
kTfloat
);
propagateKernels
[
numYS
].
setArg
<
cl_float
>
(
11
,
frequencyFloat
);
cl
.
executeKernel
(
propagateKernels
[
numYS
],
1
,
1
);
}
if
(
nPairs
)
{
if
(
!
chainState
.
count
(
2
*
chainID
+
1
))
chainState
[
2
*
chainID
+
1
]
=
ComputeArray
();
if
(
!
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
()
||
chainState
.
at
(
2
*
chainID
+
1
).
getSize
()
!=
chainLength
)
{
// We need to upload the OpenCL array
if
(
useDouble
)
{
if
(
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
())
chainState
.
at
(
2
*
chainID
+
1
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
+
1
).
initialize
<
mm_double2
>
(
cl
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
+
1
));
std
::
vector
<
mm_double2
>
zeros
(
chainLength
,
mm_double2
(
0.0
,
0.0
));
chainState
.
at
(
2
*
chainID
+
1
).
upload
(
zeros
.
data
());
}
else
{
if
(
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
())
chainState
.
at
(
2
*
chainID
+
1
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
+
1
).
initialize
<
mm_float2
>
(
cl
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
+
1
));
std
::
vector
<
mm_float2
>
zeros
(
chainLength
,
mm_float2
(
0.0
f
,
0.0
f
));
chainState
.
at
(
2
*
chainID
+
1
).
upload
(
zeros
.
data
());
}
}
int
chainType
=
1
;
double
kT
=
BOLTZ
*
relativeTemperature
;
int
ndf
=
3
*
nPairs
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
relativeFrequency
;
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
unwrap
(
chainState
[
2
*
chainID
+
1
]).
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
1
,
kineticEnergyBuffer
.
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
2
,
scaleFactorBuffer
.
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
3
,
chainMasses
.
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl
::
Buffer
>
(
4
,
chainForces
.
getDeviceBuffer
());
propagateKernels
[
numYS
].
setArg
<
cl_int
>
(
5
,
chainType
);
propagateKernels
[
numYS
].
setArg
<
cl_int
>
(
6
,
chainLength
);
propagateKernels
[
numYS
].
setArg
<
cl_int
>
(
7
,
numMTS
);
propagateKernels
[
numYS
].
setArg
<
cl_int
>
(
8
,
ndf
);
propagateKernels
[
numYS
].
setArg
<
cl_float
>
(
9
,
timeStepFloat
);
if
(
useDouble
)
propagateKernels
[
numYS
].
setArg
<
cl_double
>
(
10
,
kT
);
else
propagateKernels
[
numYS
].
setArg
<
cl_float
>
(
10
,
kTfloat
);
propagateKernels
[
numYS
].
setArg
<
cl_float
>
(
11
,
frequencyFloat
);
cl
.
executeKernel
(
propagateKernels
[
numYS
],
1
,
1
);
}
return
{
0
,
0
};
}
double
OpenCLNoseHooverChainKernel
::
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
)
{
bool
useDouble
=
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
chainLength
=
nhc
.
getChainLength
();
auto
&
chainState
=
cl
.
getIntegrationUtilities
().
getNoseHooverChainState
();
bool
absChainIsValid
=
chainState
.
count
(
2
*
chainID
)
!=
0
&&
chainState
[
2
*
chainID
].
isInitialized
()
&&
chainState
[
2
*
chainID
].
getSize
()
==
chainLength
;
bool
relChainIsValid
=
chainState
.
count
(
2
*
chainID
+
1
)
!=
0
&&
chainState
[
2
*
chainID
+
1
].
isInitialized
()
&&
chainState
[
2
*
chainID
+
1
].
getSize
()
==
chainLength
;
if
(
!
absChainIsValid
&&
!
relChainIsValid
)
return
0.0
;
if
(
!
heatBathEnergy
.
isInitialized
()
||
heatBathEnergy
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
cl_double
>
one
(
1
);
heatBathEnergy
.
initialize
<
cl_double
>
(
cl
,
1
,
"heatBathEnergy"
);
heatBathEnergy
.
upload
(
one
);
}
else
{
std
::
vector
<
cl_float
>
one
(
1
);
heatBathEnergy
.
initialize
<
cl_float
>
(
cl
,
1
,
"heatBathEnergy"
);
heatBathEnergy
.
upload
(
one
);
}
}
cl
.
clearBuffer
(
heatBathEnergy
);
if
(
absChainIsValid
)
{
int
numDOFs
=
nhc
.
getNumDegreesOfFreedom
();
double
temperature
=
nhc
.
getTemperature
();
double
frequency
=
nhc
.
getCollisionFrequency
();
double
kT
=
BOLTZ
*
temperature
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
frequency
;
computeHeatBathEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
heatBathEnergy
.
getDeviceBuffer
());
computeHeatBathEnergyKernel
.
setArg
<
cl_int
>
(
1
,
chainLength
);
computeHeatBathEnergyKernel
.
setArg
<
cl_int
>
(
2
,
numDOFs
);
if
(
useDouble
)
computeHeatBathEnergyKernel
.
setArg
<
cl_double
>
(
3
,
kT
);
else
computeHeatBathEnergyKernel
.
setArg
<
cl_float
>
(
3
,
kTfloat
);
computeHeatBathEnergyKernel
.
setArg
<
cl_float
>
(
4
,
frequencyFloat
);
computeHeatBathEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
cl
.
unwrap
(
chainState
[
2
*
chainID
]).
getDeviceBuffer
());
cl
.
executeKernel
(
computeHeatBathEnergyKernel
,
1
,
1
);
}
if
(
relChainIsValid
)
{
int
numDOFs
=
3
*
nhc
.
getThermostatedPairs
().
size
();
double
temperature
=
nhc
.
getRelativeTemperature
();
double
frequency
=
nhc
.
getRelativeCollisionFrequency
();
double
kT
=
BOLTZ
*
temperature
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
frequency
;
computeHeatBathEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
heatBathEnergy
.
getDeviceBuffer
());
computeHeatBathEnergyKernel
.
setArg
<
cl_int
>
(
1
,
chainLength
);
computeHeatBathEnergyKernel
.
setArg
<
cl_int
>
(
2
,
numDOFs
);
if
(
useDouble
)
computeHeatBathEnergyKernel
.
setArg
<
cl_double
>
(
3
,
kT
);
else
computeHeatBathEnergyKernel
.
setArg
<
cl_float
>
(
3
,
kTfloat
);
computeHeatBathEnergyKernel
.
setArg
<
cl_float
>
(
4
,
frequencyFloat
);
computeHeatBathEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
cl
.
unwrap
(
chainState
[
2
*
chainID
+
1
]).
getDeviceBuffer
());
cl
.
executeKernel
(
computeHeatBathEnergyKernel
,
1
,
1
);
}
void
*
pinnedBuffer
=
cl
.
getPinnedBuffer
();
heatBathEnergy
.
download
(
pinnedBuffer
);
if
(
useDouble
)
return
*
((
double
*
)
pinnedBuffer
);
else
return
*
((
float
*
)
pinnedBuffer
);
}
std
::
pair
<
double
,
double
>
OpenCLNoseHooverChainKernel
::
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
bool
downloadValue
)
{
bool
useDouble
=
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
const
auto
&
nhcAtoms
=
nhc
.
getThermostatedAtoms
();
const
auto
&
nhcPairs
=
nhc
.
getThermostatedPairs
();
auto
nAtoms
=
nhcAtoms
.
size
();
auto
nPairs
=
nhcPairs
.
size
();
if
(
nAtoms
)
{
if
(
!
atomlists
.
count
(
chainID
))
{
// We need to upload the OpenCL array
atomlists
[
chainID
]
=
OpenCLArray
();
atomlists
[
chainID
].
initialize
<
int
>
(
cl
,
nAtoms
,
"atomlist"
+
std
::
to_string
(
chainID
));
atomlists
[
chainID
].
upload
(
nhcAtoms
);
}
if
(
atomlists
[
chainID
].
getSize
()
!=
nAtoms
)
{
throw
OpenMMException
(
"Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat."
);
}
}
if
(
nPairs
)
{
if
(
!
pairlists
.
count
(
chainID
))
{
// We need to upload the OpenCL array
pairlists
[
chainID
]
=
OpenCLArray
();
pairlists
[
chainID
].
initialize
<
mm_int2
>
(
cl
,
nPairs
,
"pairlist"
+
std
::
to_string
(
chainID
));
std
::
vector
<
mm_int2
>
int2vec
;
for
(
const
auto
&
p
:
nhcPairs
)
int2vec
.
push_back
(
mm_int2
(
p
.
first
,
p
.
second
));
pairlists
[
chainID
].
upload
(
int2vec
);
}
if
(
pairlists
[
chainID
].
getSize
()
!=
nPairs
)
{
throw
OpenMMException
(
"Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat."
);
}
}
if
(
!
kineticEnergyBuffer
.
isInitialized
()
||
kineticEnergyBuffer
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
mm_double2
>
zeros
{{
0
,
0
}};
kineticEnergyBuffer
.
initialize
<
mm_double2
>
(
cl
,
1
,
"kineticEnergyBuffer"
);
kineticEnergyBuffer
.
upload
(
zeros
);
}
else
{
std
::
vector
<
mm_float2
>
zeros
{{
0
,
0
}};
kineticEnergyBuffer
.
initialize
<
mm_float2
>
(
cl
,
1
,
"kineticEnergyBuffer"
);
kineticEnergyBuffer
.
upload
(
zeros
);
}
}
cl
.
clearBuffer
(
cl
.
getEnergyBuffer
());
if
(
nAtoms
)
{
computeAtomsKineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
energyBuffer
.
getDeviceBuffer
());
computeAtomsKineticEnergyKernel
.
setArg
<
cl_int
>
(
1
,
nAtoms
);
computeAtomsKineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getVelm
().
getDeviceBuffer
());
computeAtomsKineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
atomlists
[
chainID
].
getDeviceBuffer
());
cl
.
executeKernel
(
computeAtomsKineticEnergyKernel
,
nAtoms
);
}
if
(
nPairs
)
{
computePairsKineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
energyBuffer
.
getDeviceBuffer
());
computePairsKineticEnergyKernel
.
setArg
<
cl_int
>
(
1
,
nPairs
);
computePairsKineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getVelm
().
getDeviceBuffer
());
computePairsKineticEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pairlists
[
chainID
].
getDeviceBuffer
());
cl
.
executeKernel
(
computePairsKineticEnergyKernel
,
nPairs
);
}
int
bufferSize
=
energyBuffer
.
getSize
();
int
workGroupSize
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
();
if
(
workGroupSize
>
512
)
workGroupSize
=
512
;
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
energyBuffer
.
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
kineticEnergyBuffer
.
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
2
,
bufferSize
);
reduceEnergyKernel
.
setArg
<
cl_int
>
(
3
,
workGroupSize
);
reduceEnergyKernel
.
setArg
(
4
,
workGroupSize
*
energyBuffer
.
getElementSize
(),
NULL
);
cl
.
executeKernel
(
reduceEnergyKernel
,
workGroupSize
,
workGroupSize
);
std
::
pair
<
double
,
double
>
KEs
=
{
0
,
0
};
if
(
downloadValue
)
{
if
(
useDouble
)
{
mm_double2
tmp
;
kineticEnergyBuffer
.
download
(
&
tmp
);
KEs
.
first
=
tmp
.
x
;
KEs
.
second
=
tmp
.
y
;
}
else
{
mm_float2
tmp
;
kineticEnergyBuffer
.
download
(
&
tmp
);
KEs
.
first
=
tmp
.
x
;
KEs
.
second
=
tmp
.
y
;
}
}
return
KEs
;
}
void
OpenCLNoseHooverChainKernel
::
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
scaleFactor
)
{
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
int
chainID
=
nhc
.
getChainID
();
auto
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
auto
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
if
(
nAtoms
)
{
scaleAtomsVelocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
scaleFactorBuffer
.
getDeviceBuffer
());
scaleAtomsVelocitiesKernel
.
setArg
<
cl_int
>
(
1
,
nAtoms
);
scaleAtomsVelocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getVelm
().
getDeviceBuffer
());
scaleAtomsVelocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
atomlists
[
chainID
].
getDeviceBuffer
());
cl
.
executeKernel
(
scaleAtomsVelocitiesKernel
,
nAtoms
);
}
if
(
nPairs
)
{
scalePairsVelocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
scaleFactorBuffer
.
getDeviceBuffer
());
scalePairsVelocitiesKernel
.
setArg
<
cl_int
>
(
1
,
nPairs
);
scalePairsVelocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getVelm
().
getDeviceBuffer
());
scalePairsVelocitiesKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pairlists
[
chainID
].
getDeviceBuffer
());
cl
.
executeKernel
(
scalePairsVelocitiesKernel
,
nPairs
);
}
}
void
OpenCLApplyMonteCarloBarostatKernel
::
initialize
(
const
System
&
system
,
const
Force
&
thermostat
)
{
void
OpenCLApplyMonteCarloBarostatKernel
::
initialize
(
const
System
&
system
,
const
Force
&
thermostat
)
{
savedPositions
.
initialize
(
cl
,
cl
.
getPaddedNumAtoms
(),
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
),
"savedPositions"
);
savedPositions
.
initialize
(
cl
,
cl
.
getPaddedNumAtoms
(),
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
),
"savedPositions"
);
savedForces
.
initialize
(
cl
,
cl
.
getPaddedNumAtoms
(),
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
),
"savedForces"
);
savedForces
.
initialize
(
cl
,
cl
.
getPaddedNumAtoms
(),
cl
.
getUseDoublePrecision
()
?
sizeof
(
mm_double4
)
:
sizeof
(
mm_float4
),
"savedForces"
);
...
...
platforms/opencl/src/kernels/velocityVerlet.cl
deleted
100644 → 0
View file @
d169badc
/**
*
Perform
the
first
step
of
Velocity
Verlet
integration.
*/
__kernel
void
integrateVelocityVerletPart1
(
int
numAtoms,
int
numPairs,
int
paddedNumAtoms,
__global
const
mixed2*
restrict
dt,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
mixed4*
restrict
posDelta,
__global
const
int*
restrict
atomList,
__global
const
int2*
restrict
pairList
)
{
const
mixed2
stepSize
=
dt[0]
;
const
mixed
dtPos
=
stepSize.y
;
const
mixed
dtVel
=
0.5f*
(
stepSize.x+stepSize.y
)
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
int
atom
=
atomList[index]
;
mixed4
velocity
=
velm[atom]
;
if
(
velocity.w
!=
0.0
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[atom]
;
real4
pos2
=
posqCorrection[atom]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[atom]
;
#
endif
velocity.x
+=
0.5f
*
dtVel*force[atom].x*velocity.w
;
velocity.y
+=
0.5f
*
dtVel*force[atom].y*velocity.w
;
velocity.z
+=
0.5f
*
dtVel*force[atom].z*velocity.w
;
pos.x
=
velocity.x*dtPos
;
pos.y
=
velocity.y*dtPos
;
pos.z
=
velocity.z*dtPos
;
posDelta[atom]
=
pos
;
velm[atom]
=
velocity
;
}
index
+=
get_global_size
(
0
)
;
}
index
=
get_global_id
(
0
)
;
while
(
index
<
numPairs
)
{
int
atom1
=
pairList[index].x
;
int
atom2
=
pairList[index].y
;
mixed4
v1
=
velm[atom1]
;
mixed4
v2
=
velm[atom2]
;
mixed
m1
=
v1.w
==
0.0f
?
0.0f
:
1.0f
/
v1.w
;
mixed
m2
=
v2.w
==
0.0f
?
0.0f
:
1.0f
/
v2.w
;
mixed
mass1fract
=
m1
/
(
m1
+
m2
)
;
mixed
mass2fract
=
m2
/
(
m1
+
m2
)
;
mixed
invRedMass
=
(
m1
*
m2
!=
0.0f
)
?
(
m1
+
m2
)
/
(
m1
*
m2
)
:
0.0f
;
mixed
invTotMass
=
(
m1
+
m2
!=
0.0f
)
?
1.0f
/
(
m1
+
m2
)
:
0.0f
;
mixed3
comVel
;
comVel.x=
v1.x*mass1fract
+
v2.x*mass2fract
;
comVel.y=
v1.y*mass1fract
+
v2.y*mass2fract
;
comVel.z=
v1.z*mass1fract
+
v2.z*mass2fract
;
mixed3
relVel
;
relVel.x=
v2.x
-
v1.x
;
relVel.y=
v2.y
-
v1.y
;
relVel.z=
v2.z
-
v1.z
;
mixed3
comFrc
;
comFrc.x
=
force[atom1].x
+
force[atom2].x
;
comFrc.y
=
force[atom1].y
+
force[atom2].y
;
comFrc.z
=
force[atom1].z
+
force[atom2].z
;
mixed3
relFrc
;
relFrc.x
=
mass1fract*force[atom2].x
-
mass2fract*force[atom1].x
;
relFrc.y
=
mass1fract*force[atom2].y
-
mass2fract*force[atom1].y
;
relFrc.z
=
mass1fract*force[atom2].z
-
mass2fract*force[atom1].z
;
comVel.x
+=
comFrc.x
*
0.5f
*
dtVel
*
invTotMass
;
comVel.y
+=
comFrc.y
*
0.5f
*
dtVel
*
invTotMass
;
comVel.z
+=
comFrc.z
*
0.5f
*
dtVel
*
invTotMass
;
relVel.x
+=
relFrc.x
*
0.5f
*
dtVel
*
invRedMass
;
relVel.y
+=
relFrc.y
*
0.5f
*
dtVel
*
invRedMass
;
relVel.z
+=
relFrc.z
*
0.5f
*
dtVel
*
invRedMass
;
#
ifdef
USE_MIXED_PRECISION
real4
posv1
=
posq[atom1]
;
real4
posv2
=
posq[atom2]
;
real4
posc1
=
posqCorrection[atom1]
;
real4
posc2
=
posqCorrection[atom2]
;
mixed4
pos1
=
(
mixed4
)
(
posv1.x+
(
mixed
)
posc1.x,
posv1.y+
(
mixed
)
posc1.y,
posv1.z+
(
mixed
)
posc1.z,
posv1.w
)
;
mixed4
pos2
=
(
mixed4
)
(
posv2.x+
(
mixed
)
posc2.x,
posv2.y+
(
mixed
)
posc2.y,
posv2.z+
(
mixed
)
posc2.z,
posv2.w
)
;
#
else
real4
pos1
=
posq[atom1]
;
real4
pos2
=
posq[atom2]
;
#
endif
if
(
v1.w
!=
0.0f
)
{
v1.x
=
comVel.x
-
relVel.x*mass2fract
;
v1.y
=
comVel.y
-
relVel.y*mass2fract
;
v1.z
=
comVel.z
-
relVel.z*mass2fract
;
pos1.x
=
v1.x*dtPos
;
pos1.y
=
v1.y*dtPos
;
pos1.z
=
v1.z*dtPos
;
posDelta[atom1]
=
pos1
;
velm[atom1]
=
v1
;
}
if
(
v2.w
!=
0.0f
)
{
v2.x
=
comVel.x
+
relVel.x*mass1fract
;
v2.y
=
comVel.y
+
relVel.y*mass1fract
;
v2.z
=
comVel.z
+
relVel.z*mass1fract
;
pos2.x
=
v2.x*dtPos
;
pos2.y
=
v2.y*dtPos
;
pos2.z
=
v2.z*dtPos
;
posDelta[atom2]
=
pos2
;
velm[atom2]
=
v2
;
}
index
+=
get_global_size
(
0
)
;
}
}
/**
*
Perform
the
second
step
of
Velocity
Verlet
integration.
*/
__kernel
void
integrateVelocityVerletPart2
(
int
numAtoms,
__global
mixed2*
restrict
dt,
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
velm,
__global
const
mixed4*
restrict
posDelta
)
{
mixed2
stepSize
=
dt[0]
;
int
index
=
get_global_id
(
0
)
;
if
(
index
==
0
)
dt[0].x
=
stepSize.y
;
while
(
index
<
numAtoms
)
{
mixed4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
mixed4
delta
=
posDelta[index]
;
pos.xyz
+=
delta.xyz
;
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
(
real4
)
((
real
)
pos.x,
(
real
)
pos.y,
(
real
)
pos.z,
(
real
)
pos.w
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
}
index
+=
get_global_size
(
0
)
;
}
}
/**
*
Perform
the
third
step
of
Velocity
Verlet
integration.
*/
__kernel
void
integrateVelocityVerletPart3
(
int
numAtoms,
int
numPairs,
int
paddedNumAtoms,
__global
mixed2*
restrict
dt,
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
const
mixed4*
restrict
posDelta,
__global
const
int*
restrict
atomList,
__global
const
int2*
__restrict__
pairList
)
{
mixed2
stepSize
=
dt[0]
;
#
ifndef
SUPPORTS_DOUBLE_PRECISION
double
oneOverDt
=
1.0/stepSize.y
;
#
else
float
oneOverDt
=
1.0f/stepSize.y
;
float
correction
=
(
1.0f-oneOverDt*stepSize.y
)
/stepSize.y
;
#
endif
const
mixed
dtVel
=
0.5f*
(
stepSize.x+stepSize.y
)
;
int
index
=
get_global_id
(
0
)
;
if
(
index
==
0
)
dt[0].x
=
stepSize.y
;
while
(
index
<
numAtoms
)
{
int
atom
=
atomList[index]
;
mixed4
velocity
=
velm[atom]
;
if
(
velocity.w
!=
0.0
)
{
mixed4
deltaXconstrained
=
posDelta[atom]
;
velocity.x
+=
0.5f
*
dtVel*force[atom].x*velocity.w
+
(
deltaXconstrained.x
-
velocity.x*stepSize.y
)
*oneOverDt
;
velocity.y
+=
0.5f
*
dtVel*force[atom].y*velocity.w
+
(
deltaXconstrained.y
-
velocity.y*stepSize.y
)
*oneOverDt
;
velocity.z
+=
0.5f
*
dtVel*force[atom].z*velocity.w
+
(
deltaXconstrained.z
-
velocity.z*stepSize.y
)
*oneOverDt
;
#
ifdef
SUPPORTS_DOUBLE_PRECISION
velocity.x
+=
(
deltaXconstrained.x
-
velocity.x*stepSize.y
)
*correction
;
velocity.y
+=
(
deltaXconstrained.y
-
velocity.y*stepSize.y
)
*correction
;
velocity.z
+=
(
deltaXconstrained.z
-
velocity.z*stepSize.y
)
*correction
;
#
endif
velm[atom]
=
velocity
;
}
index
+=
get_global_size
(
0
)
;
}
index
=
get_global_id
(
0
)
;
while
(
index
<
numPairs
)
{
int
atom1
=
pairList[index].x
;
int
atom2
=
pairList[index].y
;
mixed4
v1
=
velm[atom1]
;
mixed4
v2
=
velm[atom2]
;
mixed
m1
=
v1.w
==
0.0f
?
0.0f
:
1.0f
/
v1.w
;
mixed
m2
=
v2.w
==
0.0f
?
0.0f
:
1.0f
/
v2.w
;
mixed
mass1fract
=
m1
/
(
m1
+
m2
)
;
mixed
mass2fract
=
m2
/
(
m1
+
m2
)
;
mixed
invRedMass
=
(
m1
*
m2
!=
0.0f
)
?
(
m1
+
m2
)
/
(
m1
*
m2
)
:
0.0f
;
mixed
invTotMass
=
(
m1
+
m2
!=
0.0f
)
?
1.0f
/
(
m1
+
m2
)
:
0.0f
;
mixed3
comVel
;
comVel.x=
v1.x*mass1fract
+
v2.x*mass2fract
;
comVel.y=
v1.y*mass1fract
+
v2.y*mass2fract
;
comVel.z=
v1.z*mass1fract
+
v2.z*mass2fract
;
mixed3
relVel
;
relVel.x=
v2.x
-
v1.x
;
relVel.y=
v2.y
-
v1.y
;
relVel.z=
v2.z
-
v1.z
;
mixed3
comFrc
;
comFrc.x
=
force[atom1].x
+
force[atom2].x
;
comFrc.y
=
force[atom1].y
+
force[atom2].y
;
comFrc.z
=
force[atom1].z
+
force[atom2].z
;
mixed3
relFrc
;
relFrc.x
=
mass1fract*force[atom2].x
-
mass2fract*force[atom1].x
;
relFrc.y
=
mass1fract*force[atom2].y
-
mass2fract*force[atom1].y
;
relFrc.z
=
mass1fract*force[atom2].z
-
mass2fract*force[atom1].z
;
comVel.x
+=
comFrc.x
*
0.5f
*
dtVel
*
invTotMass
;
comVel.y
+=
comFrc.y
*
0.5f
*
dtVel
*
invTotMass
;
comVel.z
+=
comFrc.z
*
0.5f
*
dtVel
*
invTotMass
;
relVel.x
+=
relFrc.x
*
0.5f
*
dtVel
*
invRedMass
;
relVel.y
+=
relFrc.y
*
0.5f
*
dtVel
*
invRedMass
;
relVel.z
+=
relFrc.z
*
0.5f
*
dtVel
*
invRedMass
;
if
(
v1.w
!=
0.0f
)
{
mixed4
deltaXconstrained
=
posDelta[atom1]
;
v1.x
=
comVel.x
-
relVel.x*mass2fract
+
(
deltaXconstrained.x
-
v1.x*stepSize.y
)
*oneOverDt
;
v1.y
=
comVel.y
-
relVel.y*mass2fract
+
(
deltaXconstrained.y
-
v1.y*stepSize.y
)
*oneOverDt
;
v1.z
=
comVel.z
-
relVel.z*mass2fract
+
(
deltaXconstrained.z
-
v1.z*stepSize.y
)
*oneOverDt
;
#
ifdef
SUPPORTS_DOUBLE_PRECISION
v1.x
+=
(
deltaXconstrained.x
-
v1.x*stepSize.y
)
*correction
;
v1.y
+=
(
deltaXconstrained.y
-
v1.y*stepSize.y
)
*correction
;
v1.z
+=
(
deltaXconstrained.z
-
v1.z*stepSize.y
)
*correction
;
#
endif
velm[atom1]
=
v1
;
}
if
(
v2.w
!=
0.0f
)
{
mixed4
deltaXconstrained
=
posDelta[atom2]
;
v2.x
=
comVel.x
+
relVel.x*mass1fract
+
(
deltaXconstrained.x
-
v2.x*stepSize.y
)
*oneOverDt
;
v2.y
=
comVel.y
+
relVel.y*mass1fract
+
(
deltaXconstrained.y
-
v2.y*stepSize.y
)
*oneOverDt
;
v2.z
=
comVel.z
+
relVel.z*mass1fract
+
(
deltaXconstrained.z
-
v2.z*stepSize.y
)
*oneOverDt
;
#
ifdef
SUPPORTS_DOUBLE_PRECISION
v2.x
+=
(
deltaXconstrained.x
-
v2.x*stepSize.y
)
*correction
;
v2.y
+=
(
deltaXconstrained.y
-
v2.y*stepSize.y
)
*correction
;
v2.z
+=
(
deltaXconstrained.z
-
v2.z*stepSize.y
)
*correction
;
#
endif
velm[atom2]
=
v2
;
}
index
+=
get_global_size
(
0
)
;
}
}
__kernel
void
integrateVelocityVerletHardWall
(
int
numPairs,
__global
const
float*
restrict
maxPairDistance,
__global
mixed2*
restrict
dt,
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
velm,
__global
const
int2*
restrict
pairList,
__global
const
float*
__restrict__
pairTemperature
)
{
mixed
dtPos
=
dt[0].y
;
mixed
maxDelta
=
(
mixed
)
maxPairDistance[0]
;
if
(
maxDelta
>
0
)
{
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numPairs
)
{
const
mixed
hardWallScale
=
sqrt
(
((
mixed
)
pairTemperature[index]
)
*
((
mixed
)
BOLTZ
))
;
int2
atom
=
(
int2
)
(
pairList[index].x,
pairList[index].y
)
;
#
ifdef
USE_MIXED_PRECISION
real4
posv1
=
posq[atom.x]
;
real4
posc1
=
posqCorrection[atom.x]
;
mixed4
pos1
=
(
mixed4
)
(
posv1.x+
(
mixed
)
posc1.x,
posv1.y+
(
mixed
)
posc1.y,
posv1.z+
(
mixed
)
posc1.z,
posv1.w
)
;
real4
posv2
=
posq[atom.y]
;
real4
posc2
=
posqCorrection[atom.y]
;
mixed4
pos2
=
(
mixed4
)
(
posv2.x+
(
mixed
)
posc2.x,
posv2.y+
(
mixed
)
posc2.y,
posv2.z+
(
mixed
)
posc2.z,
posv2.w
)
;
#
else
real4
pos1
=
posq[atom.x]
;
real4
pos2
=
posq[atom.y]
;
#
endif
mixed3
delta
=
(
mixed3
)
(
(
mixed
)
(
pos1.x
-
pos2.x
)
,
(
mixed
)
(
pos1.y
-
pos2.y
)
,
(
mixed
)
(
pos1.z
-
pos2.z
)
)
;
mixed
r
=
sqrt
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
)
;
mixed
rInv
=
1/r
;
if
(
rInv*maxDelta
<
1.0
)
{
//
The
constraint
has
been
violated,
so
make
the
inter-particle
distance
"bounce"
//
off
the
hard
wall.
mixed3
bondDir
=
(
mixed3
)
(
delta.x
*
rInv,
delta.y
*
rInv,
delta.z
*
rInv
)
;
mixed3
vel1
=
(
mixed3
)
(
velm[atom.x].x,
velm[atom.x].y,
velm[atom.x].z
)
;
mixed3
vel2
=
(
mixed3
)
(
velm[atom.y].x,
velm[atom.y].y,
velm[atom.y].z
)
;
mixed
m1
=
velm[atom.x].w
!=
0.0
?
1.0/velm[atom.x].w
:
0.0
;
mixed
m2
=
velm[atom.y].w
!=
0.0
?
1.0/velm[atom.y].w
:
0.0
;
mixed
invTotMass
=
(
m1
+
m2
!=
0.0
)
?
1.0
/
(
m1
+
m2
)
:
0.0
;
mixed
deltaR
=
r-maxDelta
;
mixed
deltaT
=
dtPos
;
mixed
dt
=
dtPos
;
mixed
dotvr1
=
vel1.x*bondDir.x
+
vel1.y*bondDir.y
+
vel1.z*bondDir.z
;
mixed3
vb1
=
(
mixed3
)
(
bondDir.x*dotvr1,
bondDir.y*dotvr1,
bondDir.z*dotvr1
)
;
mixed3
vp1
=
(
mixed3
)
(
vel1.x-vb1.x,
vel1.y-vb1.y,
vel1.z-vb1.z
)
;
if
(
m2
==
0
)
{
//
The
parent
particle
is
massless,
so
move
only
the
Drude
particle.
if
(
dotvr1
!=
0.0
)
deltaT
=
deltaR/fabs
(
dotvr1
)
;
if
(
deltaT
>
dtPos
)
deltaT
=
dtPos
;
dotvr1
=
-dotvr1*hardWallScale/
(
fabs
(
dotvr1
)
*sqrt
(
m1
))
;
mixed
dr
=
-deltaR
+
deltaT*dotvr1
;
pos1.x
+=
bondDir.x*dr
;
pos1.y
+=
bondDir.y*dr
;
pos1.z
+=
bondDir.z*dr
;
velm[atom.x]
=
(
mixed4
)
(
vp1.x
+
bondDir.x*dotvr1,
vp1.y
+
bondDir.y*dotvr1,
vp1.z
+
bondDir.z*dotvr1,
velm[atom.x].w
)
;
#
ifdef
USE_MIXED_PRECISION
posq[atom.x]
=
(
real4
)
((
real
)
pos1.x,
(
real
)
pos1.y,
(
real
)
pos1.z,
(
real
)
pos1.w
)
;
posqCorrection[atom.x]
=
(
real4
)
(
pos1.x-
(
real
)
pos1.x,
pos1.y-
(
real
)
pos1.y,
pos1.z-
(
real
)
pos1.z,
0
)
;
#
else
posq[atom.x]
=
pos1
;
#
endif
}
else
{
//
Move
both
particles.
mixed
dotvr2
=
vel2.x*bondDir.x
+
vel2.y*bondDir.y
+
vel2.z*bondDir.z
;
mixed3
vb2
=
(
mixed3
)
(
bondDir.x*dotvr2,
bondDir.y*dotvr2,
bondDir.z*dotvr2
)
;
mixed3
vp2
=
(
mixed3
)
(
vel2.x-vb2.x,
vel2.y-vb2.y,
vel2.z-vb2.z
)
;
mixed
vbCMass
=
(
m1*dotvr1
+
m2*dotvr2
)
*invTotMass
;
dotvr1
-=
vbCMass
;
dotvr2
-=
vbCMass
;
if
(
dotvr1
!=
dotvr2
)
deltaT
=
deltaR/fabs
(
dotvr1-dotvr2
)
;
if
(
deltaT
>
dt
)
deltaT
=
dt
;
mixed
vBond
=
hardWallScale/sqrt
(
m1
)
;
dotvr1
=
-dotvr1*vBond*m2*invTotMass/fabs
(
dotvr1
)
;
dotvr2
=
-dotvr2*vBond*m1*invTotMass/fabs
(
dotvr2
)
;
mixed
dr1
=
-deltaR*m2*invTotMass
+
deltaT*dotvr1
;
mixed
dr2
=
deltaR*m1*invTotMass
+
deltaT*dotvr2
;
dotvr1
+=
vbCMass
;
dotvr2
+=
vbCMass
;
pos1.x
+=
bondDir.x*dr1
;
pos1.y
+=
bondDir.y*dr1
;
pos1.z
+=
bondDir.z*dr1
;
pos2.x
+=
bondDir.x*dr2
;
pos2.y
+=
bondDir.y*dr2
;
pos2.z
+=
bondDir.z*dr2
;
velm[atom.x]
=
(
mixed4
)
(
vp1.x
+
bondDir.x*dotvr1,
vp1.y
+
bondDir.y*dotvr1,
vp1.z
+
bondDir.z*dotvr1,
velm[atom.x].w
)
;
velm[atom.y]
=
(
mixed4
)
(
vp2.x
+
bondDir.x*dotvr2,
vp2.y
+
bondDir.y*dotvr2,
vp2.z
+
bondDir.z*dotvr2,
velm[atom.y].w
)
;
#
ifdef
USE_MIXED_PRECISION
posq[atom.x]
=
(
real4
)
((
real
)
pos1.x,
(
real
)
pos1.y,
(
real
)
pos1.z,
(
real
)
pos1.w
)
;
posq[atom.y]
=
(
real4
)
((
real
)
pos2.x,
(
real
)
pos2.y,
(
real
)
pos2.z,
(
real
)
pos2.w
)
;
posqCorrection[atom.x]
=
(
real4
)
(
pos1.x-
(
real
)
pos1.x,
pos1.y-
(
real
)
pos1.y,
pos1.z-
(
real
)
pos1.z,
0
)
;
posqCorrection[atom.y]
=
(
real4
)
(
pos2.x-
(
real
)
pos2.x,
pos2.y-
(
real
)
pos2.y,
pos2.z-
(
real
)
pos2.z,
0
)
;
#
else
posq[atom.x]
=
pos1
;
posq[atom.y]
=
pos2
;
#
endif
}
}
index
+=
get_global_size
(
0
)
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment