Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5adf0871
"vscode:/vscode.git/clone" did not exist on "6ea84b2e54268b6b3983453d5fcccfbe778fbcc6"
Unverified
Commit
5adf0871
authored
Feb 27, 2020
by
Andy Simmonett
Browse files
Remove CUDA kernels
parent
a141f79e
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
0 additions
and
1087 deletions
+0
-1087
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+0
-103
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+0
-463
platforms/cuda/src/kernels/noseHooverChain.cu
platforms/cuda/src/kernels/noseHooverChain.cu
+0
-172
platforms/cuda/src/kernels/velocityVerlet.cu
platforms/cuda/src/kernels/velocityVerlet.cu
+0
-349
No files found.
platforms/cuda/include/CudaKernels.h
View file @
5adf0871
...
...
@@ -447,109 +447,6 @@ private:
CUfunction
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
};
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class
CudaIntegrateVelocityVerletStepKernel
:
public
IntegrateVelocityVerletStepKernel
{
public:
CudaIntegrateVelocityVerletStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateVelocityVerletStepKernel
(
name
,
platform
),
cu
(
cu
)
{
}
~
CudaIntegrateVelocityVerletStepKernel
()
{}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
);
private:
CudaContext
&
cu
;
float
prevMaxPairDistance
;
CudaArray
maxPairDistanceBuffer
,
pairListBuffer
,
atomListBuffer
,
pairTemperatureBuffer
;
CUfunction
kernel1
,
kernel2
,
kernel3
,
kernelHardWall
;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class
CudaNoseHooverChainKernel
:
public
NoseHooverChainKernel
{
public:
CudaNoseHooverChainKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
NoseHooverChainKernel
(
name
,
platform
),
cu
(
cu
)
{
}
~
CudaNoseHooverChainKernel
()
{}
/**
* Initialize the kernel.
*/
void
initialize
();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
std
::
pair
<
double
,
double
>
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
double
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
std
::
pair
<
double
,
double
>
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
bool
downloadValue
);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
void
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
std
::
pair
<
double
,
double
>
scaleFactors
);
private:
int
sumWorkGroupSize
;
CudaContext
&
cu
;
CudaArray
energyBuffer
,
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
CudaArray
>
atomlists
,
pairlists
;
std
::
map
<
int
,
CUfunction
>
propagateKernels
;
CUfunction
reduceEnergyKernel
;
CUfunction
computeHeatBathEnergyKernel
;
CUfunction
computeAtomsKineticEnergyKernel
;
CUfunction
computePairsKineticEnergyKernel
;
CUfunction
scaleAtomsVelocitiesKernel
;
CUfunction
scalePairsVelocitiesKernel
;
};
/**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
5adf0871
...
...
@@ -1496,118 +1496,6 @@ void CudaCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, in
nz
=
dispersionGridSizeZ
;
}
}
void
CudaIntegrateVelocityVerletStepKernel
::
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
)
{
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
map
<
string
,
string
>
defines
;
defines
[
"BOLTZ"
]
=
cu
.
doubleToString
(
BOLTZ
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
velocityVerlet
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateVelocityVerletPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateVelocityVerletPart2"
);
kernel3
=
cu
.
getKernel
(
module
,
"integrateVelocityVerletPart3"
);
kernelHardWall
=
cu
.
getKernel
(
module
,
"integrateVelocityVerletHardWall"
);
prevMaxPairDistance
=
-
1.0
;
maxPairDistanceBuffer
.
initialize
<
float
>
(
cu
,
1
,
"maxPairDistanceBuffer"
);
}
void
CudaIntegrateVelocityVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
double
dt
=
integrator
.
getStepSize
();
cu
.
getIntegrationUtilities
().
setNextStepSize
(
dt
);
if
(
!
forcesAreValid
)
context
.
calcForcesAndEnergy
(
true
,
false
);
const
auto
&
atomList
=
integrator
.
getAllThermostatedIndividualParticles
();
const
auto
&
pairList
=
integrator
.
getAllThermostatedPairs
();
int
numAtoms
=
atomList
.
size
();
int
numPairs
=
pairList
.
size
();
int
numParticles
=
numAtoms
+
2
*
numPairs
;
float
maxPairDistance
=
integrator
.
getMaximumPairDistance
();
// Make sure atom and pair metadata is uploaded and has the correct dimensions
if
(
prevMaxPairDistance
!=
maxPairDistance
)
{
std
::
vector
<
float
>
tmp
(
1
,
maxPairDistance
);
maxPairDistanceBuffer
.
upload
(
tmp
);
prevMaxPairDistance
=
maxPairDistance
;
}
if
(
numAtoms
!=
0
&&
(
!
atomListBuffer
.
isInitialized
()
||
atomListBuffer
.
getSize
()
!=
numAtoms
))
{
if
(
atomListBuffer
.
isInitialized
())
atomListBuffer
.
resize
(
atomList
.
size
());
else
atomListBuffer
.
initialize
<
int
>
(
cu
,
atomList
.
size
(),
"atomListBuffer"
);
atomListBuffer
.
upload
(
atomList
);
}
if
(
numPairs
!=
0
&&
(
!
pairListBuffer
.
isInitialized
()
||
pairListBuffer
.
getSize
()
!=
numPairs
))
{
if
(
pairListBuffer
.
isInitialized
())
{
pairListBuffer
.
resize
(
pairList
.
size
());
pairTemperatureBuffer
.
resize
(
pairList
.
size
());
}
else
{
pairListBuffer
.
initialize
<
int2
>
(
cu
,
pairList
.
size
(),
"pairListBuffer"
);
pairTemperatureBuffer
.
initialize
<
float
>
(
cu
,
pairList
.
size
(),
"pairTemperatureBuffer"
);
}
std
::
vector
<
int2
>
tmp
;
std
::
vector
<
float
>
tmp2
;
for
(
const
auto
&
pair
:
pairList
)
{
tmp
.
push_back
(
make_int2
(
std
::
get
<
0
>
(
pair
),
std
::
get
<
1
>
(
pair
)));
tmp2
.
push_back
(
std
::
get
<
2
>
(
pair
));
}
pairListBuffer
.
upload
(
tmp
);
pairTemperatureBuffer
.
upload
(
tmp2
);
}
//// Call the first integration kernel.
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
void
*
args1
[]
=
{
&
numAtoms
,
&
numPairs
,
&
paddedNumAtoms
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
atomListBuffer
.
getDevicePointer
(),
&
pairListBuffer
.
getDevicePointer
()};
cu
.
executeKernel
(
kernel1
,
args1
,
std
::
max
(
numAtoms
,
numPairs
),
128
);
//// Apply constraints.
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
//// Call the second integration kernel.
void
*
args2
[]
=
{
&
numParticles
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numParticles
,
128
);
if
(
numPairs
>
0
)
{
//// Enforce hard wall constraint
void
*
argsHardWall
[]
=
{
&
numPairs
,
&
maxPairDistanceBuffer
.
getDevicePointer
(),
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
pairListBuffer
.
getDevicePointer
(),
&
pairTemperatureBuffer
.
getDevicePointer
()};
cu
.
executeKernel
(
kernelHardWall
,
argsHardWall
,
numPairs
,
128
);
}
integration
.
computeVirtualSites
();
//// Update forces
context
.
calcForcesAndEnergy
(
true
,
false
);
forcesAreValid
=
true
;
//// Call the third integration kernel.
void
*
args3
[]
=
{
&
numAtoms
,
&
numPairs
,
&
paddedNumAtoms
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
atomListBuffer
.
getDevicePointer
(),
&
pairListBuffer
.
getDevicePointer
()};
cu
.
executeKernel
(
kernel3
,
args3
,
std
::
max
(
numAtoms
,
numPairs
),
128
);
// TODO: Figure out if this is really needed. The constraint velocities are accounted for
// in a finite difference sense in the step 3 kernel, when the velocities are updated.
integration
.
applyVelocityConstraints
(
integrator
.
getConstraintTolerance
());
//// Update the time and step count.
cu
.
setTime
(
cu
.
getTime
()
+
dt
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
reorderAtoms
();
}
double
CudaIntegrateVelocityVerletStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
)
{
return
cu
.
getIntegrationUtilities
().
computeKineticEnergy
(
0
);
}
class
CudaCalcCustomCVForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
...
...
@@ -1819,357 +1707,6 @@ void CudaCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context,
delete
function
.
second
;
}
void
CudaNoseHooverChainKernel
::
initialize
()
{
cu
.
setAsCurrent
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
map
<
string
,
string
>
defines
;
sumWorkGroupSize
=
512
;
defines
[
"WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
sumWorkGroupSize
);
defines
[
"MIXEDEXP"
]
=
useDouble
?
"exp"
:
"expf"
;
defines
[
"BEGIN_YS_LOOP"
]
=
"for(const real & ys : {1}) {"
;
defines
[
"END_YS_LOOP"
]
=
"}"
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
noseHooverChain
,
defines
,
"-std=c++11"
);
propagateKernels
[
1
]
=
cu
.
getKernel
(
module
,
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"for(const real & ys : {0.828981543588751, -0.657963087177502, 0.828981543588751}) {"
;
module
=
cu
.
createModule
(
CudaKernelSources
::
noseHooverChain
,
defines
,
"-std=c++11"
);
propagateKernels
[
3
]
=
cu
.
getKernel
(
module
,
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}) {"
;
module
=
cu
.
createModule
(
CudaKernelSources
::
noseHooverChain
,
defines
,
"-std=c++11"
);
propagateKernels
[
5
]
=
cu
.
getKernel
(
module
,
"propagateNoseHooverChain"
);
module
=
cu
.
createModule
(
CudaKernelSources
::
noseHooverChain
,
defines
,
"-std=c++11"
);
reduceEnergyKernel
=
cu
.
getKernel
(
module
,
"reduceEnergyPair"
);
computeHeatBathEnergyKernel
=
cu
.
getKernel
(
module
,
"computeHeatBathEnergy"
);
computeAtomsKineticEnergyKernel
=
cu
.
getKernel
(
module
,
"computeAtomsKineticEnergy"
);
computePairsKineticEnergyKernel
=
cu
.
getKernel
(
module
,
"computePairsKineticEnergy"
);
scaleAtomsVelocitiesKernel
=
cu
.
getKernel
(
module
,
"scaleAtomsVelocities"
);
scalePairsVelocitiesKernel
=
cu
.
getKernel
(
module
,
"scalePairsVelocities"
);
int
energyBufferSize
=
cu
.
getEnergyBuffer
().
getSize
();
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
energyBuffer
.
initialize
<
double2
>
(
cu
,
energyBufferSize
,
"energyBuffer"
);
else
energyBuffer
.
initialize
<
float2
>
(
cu
,
energyBufferSize
,
"energyBuffer"
);
}
std
::
pair
<
double
,
double
>
CudaNoseHooverChainKernel
::
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
)
{
cu
.
setAsCurrent
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
int
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
int
chainLength
=
nhc
.
getChainLength
();
int
numYS
=
nhc
.
getNumYoshidaSuzukiTimeSteps
();
int
numMTS
=
nhc
.
getNumMultiTimeSteps
();
int
numDOFs
=
nhc
.
getNumDegreesOfFreedom
();
double
temperature
=
nhc
.
getTemperature
();
double
frequency
=
nhc
.
getCollisionFrequency
();
double
relativeTemperature
=
nhc
.
getRelativeTemperature
();
double
relativeFrequency
=
nhc
.
getRelativeCollisionFrequency
();
if
(
numYS
!=
1
&&
numYS
!=
3
&&
numYS
!=
5
)
{
throw
OpenMMException
(
"Number of Yoshida Suzuki time steps has to be 1, 3, or 5."
);
}
auto
&
chainState
=
cu
.
getIntegrationUtilities
().
getNoseHooverChainState
();
if
(
!
scaleFactorBuffer
.
isInitialized
()
||
scaleFactorBuffer
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
double2
>
zeros
{{
0
,
0
}};
if
(
scaleFactorBuffer
.
isInitialized
())
scaleFactorBuffer
.
resize
(
1
);
else
scaleFactorBuffer
.
initialize
<
double2
>
(
cu
,
1
,
"scaleFactorBuffer"
);
scaleFactorBuffer
.
upload
(
zeros
);
}
else
{
std
::
vector
<
float2
>
zeros
{{
0
,
0
}};
if
(
scaleFactorBuffer
.
isInitialized
())
scaleFactorBuffer
.
resize
(
1
);
else
scaleFactorBuffer
.
initialize
<
float2
>
(
cu
,
1
,
"scaleFactorBuffer"
);
scaleFactorBuffer
.
upload
(
zeros
);
}
}
std
::
vector
<
double
>
zeros
(
chainLength
,
0
);
if
(
!
chainForces
.
isInitialized
()
||
!
chainMasses
.
isInitialized
())
{
if
(
useDouble
)
{
if
(
chainForces
.
isInitialized
())
{
chainMasses
.
resize
(
chainLength
);
chainForces
.
resize
(
chainLength
);
}
else
{
chainMasses
.
initialize
<
double
>
(
cu
,
chainLength
,
"chainMasses"
);
chainForces
.
initialize
<
double
>
(
cu
,
chainLength
,
"chainForces"
);
}
chainMasses
.
upload
(
zeros
);
chainForces
.
upload
(
zeros
);
}
else
{
if
(
chainForces
.
isInitialized
())
{
chainMasses
.
resize
(
chainLength
);
chainForces
.
resize
(
chainLength
);
}
else
{
chainMasses
.
initialize
<
float
>
(
cu
,
chainLength
,
"chainMasses"
);
chainForces
.
initialize
<
float
>
(
cu
,
chainLength
,
"chainForces"
);
}
chainMasses
.
upload
(
zeros
,
true
);
chainForces
.
upload
(
zeros
,
true
);
}
}
if
(
chainForces
.
getSize
()
<
chainLength
)
chainMasses
.
resize
(
chainLength
);
if
(
chainMasses
.
getSize
()
<
chainLength
)
chainMasses
.
resize
(
chainLength
);
float
timeStepFloat
=
(
float
)
timeStep
;
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
if
(
nAtoms
)
{
if
(
!
chainState
.
count
(
2
*
chainID
))
chainState
[
2
*
chainID
]
=
ComputeArray
();
if
(
!
chainState
.
at
(
2
*
chainID
).
isInitialized
()
||
chainState
.
at
(
2
*
chainID
).
getSize
()
!=
chainLength
)
{
// We need to upload the CUDA array
if
(
useDouble
)
{
if
(
chainState
.
at
(
2
*
chainID
).
isInitialized
())
chainState
.
at
(
2
*
chainID
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
).
initialize
<
double2
>
(
cu
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
));
std
::
vector
<
double2
>
zeros
(
chainLength
,
make_double2
(
0
,
0
));
chainState
.
at
(
2
*
chainID
).
upload
(
zeros
.
data
());
}
else
{
if
(
chainState
.
at
(
2
*
chainID
).
isInitialized
())
chainState
.
at
(
2
*
chainID
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
).
initialize
<
float2
>
(
cu
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
));
std
::
vector
<
float2
>
zeros
(
chainLength
,
make_float2
(
0
,
0
));
chainState
.
at
(
2
*
chainID
).
upload
(
zeros
.
data
());
}
}
int
chainType
=
0
;
double
kT
=
BOLTZ
*
temperature
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
frequency
;
void
*
args
[]
=
{
&
cu
.
unwrap
(
chainState
[
2
*
chainID
]).
getDevicePointer
(),
&
kineticEnergyBuffer
.
getDevicePointer
(),
&
scaleFactorBuffer
.
getDevicePointer
(),
&
chainMasses
.
getDevicePointer
(),
&
chainForces
.
getDevicePointer
(),
&
chainType
,
&
chainLength
,
&
numMTS
,
&
numDOFs
,
&
timeStepFloat
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTfloat
,
&
frequencyFloat
};
cu
.
executeKernel
(
propagateKernels
[
numYS
],
args
,
1
,
1
);
}
if
(
nPairs
)
{
if
(
!
chainState
.
count
(
2
*
chainID
+
1
))
chainState
[
2
*
chainID
+
1
]
=
ComputeArray
();
if
(
!
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
()
||
chainState
.
at
(
2
*
chainID
+
1
).
getSize
()
!=
chainLength
)
{
// We need to upload the CUDA array
if
(
useDouble
)
{
if
(
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
())
chainState
.
at
(
2
*
chainID
+
1
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
+
1
).
initialize
<
double2
>
(
cu
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
+
1
));
std
::
vector
<
double2
>
zeros
(
chainLength
,
make_double2
(
0
,
0
));
chainState
.
at
(
2
*
chainID
+
1
).
upload
(
zeros
.
data
());
}
else
{
if
(
chainState
.
at
(
2
*
chainID
+
1
).
isInitialized
())
chainState
.
at
(
2
*
chainID
+
1
).
resize
(
chainLength
);
else
chainState
.
at
(
2
*
chainID
+
1
).
initialize
<
float2
>
(
cu
,
chainLength
,
"chainState"
+
std
::
to_string
(
2
*
chainID
+
1
));
std
::
vector
<
float2
>
zeros
(
chainLength
,
make_float2
(
0
,
0
));
chainState
.
at
(
2
*
chainID
+
1
).
upload
(
zeros
.
data
());
}
}
int
chainType
=
1
;
double
kT
=
BOLTZ
*
relativeTemperature
;
int
ndf
=
3
*
nPairs
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
relativeFrequency
;
void
*
args
[]
=
{
&
cu
.
unwrap
(
chainState
[
2
*
chainID
+
1
]).
getDevicePointer
(),
&
kineticEnergyBuffer
.
getDevicePointer
(),
&
scaleFactorBuffer
.
getDevicePointer
(),
&
chainMasses
.
getDevicePointer
(),
&
chainForces
.
getDevicePointer
(),
&
chainType
,
&
chainLength
,
&
numMTS
,
&
ndf
,
&
timeStepFloat
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTfloat
,
&
frequencyFloat
};
cu
.
executeKernel
(
propagateKernels
[
numYS
],
args
,
1
,
1
);
}
return
{
0
,
0
};
}
double
CudaNoseHooverChainKernel
::
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
)
{
cu
.
setAsCurrent
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
chainLength
=
nhc
.
getChainLength
();
auto
&
chainState
=
cu
.
getIntegrationUtilities
().
getNoseHooverChainState
();
bool
absChainIsValid
=
chainState
.
count
(
2
*
chainID
)
!=
0
&&
chainState
[
2
*
chainID
].
isInitialized
()
&&
chainState
[
2
*
chainID
].
getSize
()
==
chainLength
;
bool
relChainIsValid
=
chainState
.
count
(
2
*
chainID
+
1
)
!=
0
&&
chainState
[
2
*
chainID
+
1
].
isInitialized
()
&&
chainState
[
2
*
chainID
+
1
].
getSize
()
==
chainLength
;
if
(
!
absChainIsValid
&&
!
relChainIsValid
)
return
0.0
;
if
(
!
heatBathEnergy
.
isInitialized
()
||
heatBathEnergy
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
double
>
one
(
1
);
heatBathEnergy
.
initialize
<
double
>
(
cu
,
1
,
"heatBathEnergy"
);
heatBathEnergy
.
upload
(
one
);
}
else
{
std
::
vector
<
float
>
one
(
1
);
heatBathEnergy
.
initialize
<
float
>
(
cu
,
1
,
"heatBathEnergy"
);
heatBathEnergy
.
upload
(
one
);
}
}
cu
.
clearBuffer
(
heatBathEnergy
);
if
(
absChainIsValid
)
{
int
numDOFs
=
nhc
.
getNumDegreesOfFreedom
();
double
temperature
=
nhc
.
getTemperature
();
double
frequency
=
nhc
.
getCollisionFrequency
();
double
kT
=
BOLTZ
*
temperature
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
frequency
;
void
*
args
[]
=
{
&
heatBathEnergy
.
getDevicePointer
(),
&
chainLength
,
&
numDOFs
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTfloat
,
&
frequencyFloat
,
&
cu
.
unwrap
(
chainState
[
2
*
chainID
]).
getDevicePointer
()};
cu
.
executeKernel
(
computeHeatBathEnergyKernel
,
args
,
1
,
1
);
}
if
(
relChainIsValid
)
{
int
numDOFs
=
3
*
nhc
.
getThermostatedPairs
().
size
();
double
temperature
=
nhc
.
getRelativeTemperature
();
double
frequency
=
nhc
.
getRelativeCollisionFrequency
();
double
kT
=
BOLTZ
*
temperature
;
float
kTfloat
=
(
float
)
kT
;
float
frequencyFloat
=
(
float
)
frequency
;
void
*
args
[]
=
{
&
heatBathEnergy
.
getDevicePointer
(),
&
chainLength
,
&
numDOFs
,
useDouble
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTfloat
,
&
frequencyFloat
,
&
cu
.
unwrap
(
chainState
[
2
*
chainID
+
1
]).
getDevicePointer
()};
cu
.
executeKernel
(
computeHeatBathEnergyKernel
,
args
,
1
,
1
);
}
void
*
pinnedBuffer
=
cu
.
getPinnedBuffer
();
heatBathEnergy
.
download
(
pinnedBuffer
);
if
(
useDouble
)
return
*
((
double
*
)
pinnedBuffer
);
else
return
*
((
float
*
)
pinnedBuffer
);
}
std
::
pair
<
double
,
double
>
CudaNoseHooverChainKernel
::
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
bool
downloadValue
)
{
cu
.
setAsCurrent
();
bool
useDouble
=
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
const
auto
&
nhcAtoms
=
nhc
.
getThermostatedAtoms
();
const
auto
&
nhcPairs
=
nhc
.
getThermostatedPairs
();
auto
nAtoms
=
nhcAtoms
.
size
();
auto
nPairs
=
nhcPairs
.
size
();
if
(
nAtoms
)
{
if
(
!
atomlists
.
count
(
chainID
))
{
// We need to upload the CUDA array
atomlists
[
chainID
]
=
CudaArray
();
atomlists
[
chainID
].
initialize
<
int
>
(
cu
,
nAtoms
,
"atomlist"
+
std
::
to_string
(
chainID
));
atomlists
[
chainID
].
upload
(
nhcAtoms
);
}
if
(
atomlists
[
chainID
].
getSize
()
!=
nAtoms
)
{
throw
OpenMMException
(
"Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat."
);
}
}
if
(
nPairs
)
{
if
(
!
pairlists
.
count
(
chainID
))
{
// We need to upload the CUDA array
pairlists
[
chainID
]
=
CudaArray
();
pairlists
[
chainID
].
initialize
<
int2
>
(
cu
,
nPairs
,
"pairlist"
+
std
::
to_string
(
chainID
));
std
::
vector
<
int2
>
int2vec
;
for
(
const
auto
&
p
:
nhcPairs
)
int2vec
.
push_back
(
make_int2
(
p
.
first
,
p
.
second
));
pairlists
[
chainID
].
upload
(
int2vec
);
}
if
(
pairlists
[
chainID
].
getSize
()
!=
nPairs
)
{
throw
OpenMMException
(
"Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat."
);
}
}
if
(
!
kineticEnergyBuffer
.
isInitialized
()
||
kineticEnergyBuffer
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
double2
>
zeros
{{
0
,
0
}};
kineticEnergyBuffer
.
initialize
<
double2
>
(
cu
,
1
,
"kineticEnergyBuffer"
);
kineticEnergyBuffer
.
upload
(
zeros
);
}
else
{
std
::
vector
<
float2
>
zeros
{{
0
,
0
}};
kineticEnergyBuffer
.
initialize
<
float2
>
(
cu
,
1
,
"kineticEnergyBuffer"
);
kineticEnergyBuffer
.
upload
(
zeros
);
}
}
cu
.
clearBuffer
(
energyBuffer
);
if
(
nAtoms
)
{
void
*
args
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
nAtoms
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
atomlists
[
chainID
].
getDevicePointer
()};
cu
.
executeKernel
(
computeAtomsKineticEnergyKernel
,
args
,
nAtoms
);
}
if
(
nPairs
)
{
void
*
args
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
nPairs
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
pairlists
[
chainID
].
getDevicePointer
()};
cu
.
executeKernel
(
computePairsKineticEnergyKernel
,
args
,
nPairs
);
}
//taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer
int
bufferSize
=
energyBuffer
.
getSize
();
void
*
args2
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
kineticEnergyBuffer
.
getDevicePointer
(),
&
bufferSize
,
&
sumWorkGroupSize
};
cu
.
executeKernel
(
reduceEnergyKernel
,
args2
,
sumWorkGroupSize
,
sumWorkGroupSize
,
sumWorkGroupSize
*
energyBuffer
.
getElementSize
());
std
::
pair
<
double
,
double
>
KEs
=
{
0
,
0
};
if
(
downloadValue
)
{
void
*
pinnedBuffer
=
cu
.
getPinnedBuffer
();
kineticEnergyBuffer
.
download
(
pinnedBuffer
);
KEs
.
first
=
useDouble
?
*
((
double
*
)
pinnedBuffer
)
:
*
((
float
*
)
pinnedBuffer
);
KEs
.
second
=
useDouble
?
*
((
double
*
)
pinnedBuffer
+
1
)
:
*
((
float
*
)
pinnedBuffer
+
1
);
}
return
KEs
;
}
void
CudaNoseHooverChainKernel
::
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
scaleFactor
)
{
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
cu
.
setAsCurrent
();
int
chainID
=
nhc
.
getChainID
();
auto
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
auto
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
if
(
nAtoms
)
{
void
*
args
[]
=
{
&
scaleFactorBuffer
.
getDevicePointer
(),
&
nAtoms
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
atomlists
[
chainID
].
getDevicePointer
()};
cu
.
executeKernel
(
scaleAtomsVelocitiesKernel
,
args
,
nAtoms
);
}
if
(
nPairs
)
{
void
*
args
[]
=
{
&
scaleFactorBuffer
.
getDevicePointer
(),
&
nPairs
,
&
cu
.
getVelm
().
getDevicePointer
(),
&
pairlists
[
chainID
].
getDevicePointer
()};
cu
.
executeKernel
(
scalePairsVelocitiesKernel
,
args
,
nPairs
);
}
}
void
CudaApplyMonteCarloBarostatKernel
::
initialize
(
const
System
&
system
,
const
Force
&
thermostat
)
{
cu
.
setAsCurrent
();
savedPositions
.
initialize
(
cu
,
cu
.
getPaddedNumAtoms
(),
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
),
"savedPositions"
);
...
...
platforms/cuda/src/kernels/noseHooverChain.cu
deleted
100644 → 0
View file @
a141f79e
#include <initializer_list>
extern
"C"
__global__
void
propagateNoseHooverChain
(
mixed2
*
__restrict__
chainData
,
const
mixed2
*
__restrict__
energySum
,
mixed2
*
__restrict__
scaleFactor
,
mixed
*
__restrict__
chainMasses
,
mixed
*
__restrict__
chainForces
,
int
chainType
,
int
chainLength
,
int
numMTS
,
int
numDOFs
,
float
timeStep
,
mixed
kT
,
float
frequency
){
const
mixed
&
kineticEnergy
=
chainType
?
energySum
[
0
].
y
:
energySum
[
0
].
x
;
mixed
&
scale
=
chainType
?
scaleFactor
[
0
].
y
:
scaleFactor
[
0
].
x
;
scale
=
(
mixed
)
1
;
if
(
kineticEnergy
<
1e-8
)
return
;
for
(
int
bead
=
0
;
bead
<
chainLength
;
++
bead
)
chainMasses
[
bead
]
=
kT
/
(
frequency
*
frequency
);
chainMasses
[
0
]
*=
numDOFs
;
mixed
KE2
=
2.0
f
*
kineticEnergy
;
mixed
timeOverMTS
=
timeStep
/
numMTS
;
chainForces
[
0
]
=
(
KE2
-
numDOFs
*
kT
)
/
chainMasses
[
0
];
for
(
int
bead
=
0
;
bead
<
chainLength
-
1
;
++
bead
)
{
chainForces
[
bead
+
1
]
=
(
chainMasses
[
bead
]
*
chainData
[
bead
].
y
*
chainData
[
bead
].
y
-
kT
)
/
chainMasses
[
bead
+
1
];
}
for
(
int
mts
=
0
;
mts
<
numMTS
;
++
mts
)
{
BEGIN_YS_LOOP
mixed
wdt
=
ys
*
timeOverMTS
;
chainData
[
chainLength
-
1
].
y
+=
0.25
f
*
wdt
*
chainForces
[
chainLength
-
1
];
for
(
int
bead
=
chainLength
-
2
;
bead
>=
0
;
--
bead
)
{
mixed
aa
=
MIXEDEXP
(
-
0.125
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
chainData
[
bead
].
y
*
aa
+
0.25
f
*
wdt
*
chainForces
[
bead
]);
}
// update particle velocities
mixed
aa
=
MIXEDEXP
(
-
0.5
f
*
wdt
*
chainData
[
0
].
y
);
scale
*=
aa
;
// update the thermostat positions
for
(
int
bead
=
0
;
bead
<
chainLength
;
++
bead
)
{
chainData
[
bead
].
x
+=
0.5
f
*
chainData
[
bead
].
y
*
wdt
;
}
// update the forces
chainForces
[
0
]
=
(
scale
*
scale
*
KE2
-
numDOFs
*
kT
)
/
chainMasses
[
0
];
// update thermostat velocities
for
(
int
bead
=
0
;
bead
<
chainLength
-
1
;
++
bead
)
{
mixed
aa
=
MIXEDEXP
(
-
0.125
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
aa
*
chainData
[
bead
].
y
+
0.25
f
*
wdt
*
chainForces
[
bead
]);
chainForces
[
bead
+
1
]
=
(
chainMasses
[
bead
]
*
chainData
[
bead
].
y
*
chainData
[
bead
].
y
-
kT
)
/
chainMasses
[
bead
+
1
];
}
chainData
[
chainLength
-
1
].
y
+=
0.25
f
*
wdt
*
chainForces
[
chainLength
-
1
];
END_YS_LOOP
}
// MTS loop
}
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
extern
"C"
__global__
void
computeHeatBathEnergy
(
mixed
*
__restrict__
heatBathEnergy
,
int
chainLength
,
int
numDOFs
,
mixed
kT
,
float
frequency
,
const
mixed2
*
__restrict__
chainData
){
// Note that this is always incremented; make sure it's zeroed properly before the first call
mixed
&
energy
=
heatBathEnergy
[
0
];
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
mixed
prefac
=
i
?
1
:
numDOFs
;
mixed
mass
=
prefac
*
kT
/
(
frequency
*
frequency
);
mixed
velocity
=
chainData
[
i
].
y
;
// The kinetic energy of this bead
energy
+=
0.5
f
*
mass
*
velocity
*
velocity
;
// The potential energy of this bead
mixed
position
=
chainData
[
i
].
x
;
energy
+=
prefac
*
kT
*
position
;
}
}
extern
"C"
__global__
void
computeAtomsKineticEnergy
(
mixed2
*
__restrict__
energyBuffer
,
int
numAtoms
,
const
mixed4
*
__restrict__
velm
,
const
int
*
__restrict__
atoms
){
mixed2
energy
=
make_mixed2
(
0
,
0
);
//energy = 1; return;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
atoms
[
index
];
mixed4
v
=
velm
[
atom
];
mixed
mass
=
v
.
w
==
0
?
0
:
1
/
v
.
w
;
energy
.
x
+=
0.5
f
*
mass
*
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
);
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
energy
;
}
extern
"C"
__global__
void
computePairsKineticEnergy
(
mixed2
*
__restrict__
energyBuffer
,
int
numPairs
,
const
mixed4
*
__restrict__
velm
,
const
int2
*
__restrict__
pairs
){
mixed2
energy
=
make_mixed2
(
0
,
0
);
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int2
pair
=
pairs
[
index
];
int
atom1
=
pair
.
x
;
int
atom2
=
pair
.
y
;
mixed4
v1
=
velm
[
atom1
];
mixed4
v2
=
velm
[
atom2
];
mixed
m1
=
v1
.
w
==
0
?
0
:
1
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0
?
0
:
1
/
v2
.
w
;
mixed4
cv
;
cv
.
x
=
(
m1
*
v1
.
x
+
m2
*
v2
.
x
)
/
(
m1
+
m2
);
cv
.
y
=
(
m1
*
v1
.
y
+
m2
*
v2
.
y
)
/
(
m1
+
m2
);
cv
.
z
=
(
m1
*
v1
.
z
+
m2
*
v2
.
z
)
/
(
m1
+
m2
);
mixed4
rv
;
rv
.
x
=
v2
.
x
-
v1
.
x
;
rv
.
y
=
v2
.
y
-
v1
.
y
;
rv
.
z
=
v2
.
z
-
v1
.
z
;
energy
.
x
+=
0.5
f
*
(
m1
+
m2
)
*
(
cv
.
x
*
cv
.
x
+
cv
.
y
*
cv
.
y
+
cv
.
z
*
cv
.
z
);
energy
.
y
+=
0.5
f
*
(
m1
*
m2
/
(
m1
+
m2
))
*
(
rv
.
x
*
rv
.
x
+
rv
.
y
*
rv
.
y
+
rv
.
z
*
rv
.
z
);
}
// The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
].
x
+=
energy
.
x
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
].
y
+=
energy
.
y
;
}
extern
"C"
__global__
void
scaleAtomsVelocities
(
mixed2
*
__restrict__
scaleFactor
,
int
numAtoms
,
mixed4
*
__restrict__
velm
,
const
int
*
__restrict__
atoms
){
const
mixed
&
scale
=
scaleFactor
[
0
].
x
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
atoms
[
index
];
mixed4
&
v
=
velm
[
atom
];
v
.
x
*=
scale
;
v
.
y
*=
scale
;
v
.
z
*=
scale
;
}
}
extern
"C"
__global__
void
scalePairsVelocities
(
mixed2
*
__restrict__
scaleFactor
,
int
numPairs
,
mixed4
*
__restrict__
velm
,
const
int2
*
__restrict__
pairs
){
const
mixed
&
absScale
=
scaleFactor
[
0
].
x
;
const
mixed
&
relScale
=
scaleFactor
[
0
].
y
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom1
=
pairs
[
index
].
x
;
int
atom2
=
pairs
[
index
].
y
;
mixed4
v1
=
velm
[
atom1
];
mixed4
v2
=
velm
[
atom2
];
mixed
m1
=
v1
.
w
==
0
?
0
:
1
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0
?
0
:
1
/
v2
.
w
;
mixed4
cv
;
cv
.
x
=
(
m1
*
v1
.
x
+
m2
*
v2
.
x
)
/
(
m1
+
m2
);
cv
.
y
=
(
m1
*
v1
.
y
+
m2
*
v2
.
y
)
/
(
m1
+
m2
);
cv
.
z
=
(
m1
*
v1
.
z
+
m2
*
v2
.
z
)
/
(
m1
+
m2
);
mixed4
rv
;
rv
.
x
=
v2
.
x
-
v1
.
x
;
rv
.
y
=
v2
.
y
-
v1
.
y
;
rv
.
z
=
v2
.
z
-
v1
.
z
;
v1
.
x
=
absScale
*
cv
.
x
-
relScale
*
rv
.
x
*
m2
/
(
m1
+
m2
);
v1
.
y
=
absScale
*
cv
.
y
-
relScale
*
rv
.
y
*
m2
/
(
m1
+
m2
);
v1
.
z
=
absScale
*
cv
.
z
-
relScale
*
rv
.
z
*
m2
/
(
m1
+
m2
);
v2
.
x
=
absScale
*
cv
.
x
+
relScale
*
rv
.
x
*
m1
/
(
m1
+
m2
);
v2
.
y
=
absScale
*
cv
.
y
+
relScale
*
rv
.
y
*
m1
/
(
m1
+
m2
);
v2
.
z
=
absScale
*
cv
.
z
+
relScale
*
rv
.
z
*
m1
/
(
m1
+
m2
);
velm
[
atom1
]
=
v1
;
velm
[
atom2
]
=
v2
;
}
}
/**
* Sum the energy buffer containing a pair of energies stored as mixed2. This is copied from utilities.cu with small modifications
*/
extern
"C"
__global__
void
reduceEnergyPair
(
const
mixed2
*
__restrict__
energyBuffer
,
mixed2
*
__restrict__
result
,
int
bufferSize
,
int
workGroupSize
)
{
__shared__
mixed2
tempBuffer
[
WORK_GROUP_SIZE
];
const
unsigned
int
thread
=
threadIdx
.
x
;
mixed2
sum
=
make_mixed2
(
0
,
0
);
for
(
unsigned
int
idx
=
thread
;
idx
<
bufferSize
;
idx
+=
blockDim
.
x
)
{
sum
.
x
+=
energyBuffer
[
idx
].
x
;
sum
.
y
+=
energyBuffer
[
idx
].
y
;
}
tempBuffer
[
thread
]
=
sum
;
for
(
int
i
=
1
;
i
<
workGroupSize
;
i
*=
2
)
{
__syncthreads
();
if
(
thread
%
(
i
*
2
)
==
0
&&
thread
+
i
<
workGroupSize
)
{
tempBuffer
[
thread
].
x
+=
tempBuffer
[
thread
+
i
].
x
;
tempBuffer
[
thread
].
y
+=
tempBuffer
[
thread
+
i
].
y
;
}
}
if
(
thread
==
0
)
*
result
=
tempBuffer
[
0
];
}
platforms/cuda/src/kernels/velocityVerlet.cu
deleted
100644 → 0
View file @
a141f79e
/**
* Perform the first step of Velocity Verlet integration.
*
* update displacements (posDelta) and velocities (velm)
*/
extern
"C"
__global__
void
integrateVelocityVerletPart1
(
int
numAtoms
,
int
numPairs
,
int
paddedNumAtoms
,
const
mixed2
*
__restrict__
dt
,
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
,
const
int
*
__restrict__
atomList
,
const
int2
*
__restrict__
pairList
)
{
const
mixed2
stepSize
=
dt
[
0
];
const
mixed
dtPos
=
stepSize
.
y
;
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
scale
=
0.5
f
*
dtVel
/
(
mixed
)
0x100000000
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
atomList
[
index
];
mixed4
velocity
=
velm
[
atom
];
if
(
velocity
.
w
!=
0.0
)
{
#ifdef USE_MIXED_PRECISION
real4
pos1
=
posq
[
atom
];
real4
pos2
=
posqCorrection
[
atom
];
mixed4
pos
=
make_mixed4
(
pos1
.
x
+
(
mixed
)
pos2
.
x
,
pos1
.
y
+
(
mixed
)
pos2
.
y
,
pos1
.
z
+
(
mixed
)
pos2
.
z
,
pos1
.
w
);
#else
real4
pos
=
posq
[
atom
];
#endif
velocity
.
x
+=
scale
*
force
[
atom
]
*
velocity
.
w
;
velocity
.
y
+=
scale
*
force
[
atom
+
paddedNumAtoms
]
*
velocity
.
w
;
velocity
.
z
+=
scale
*
force
[
atom
+
paddedNumAtoms
*
2
]
*
velocity
.
w
;
pos
.
x
=
velocity
.
x
*
dtPos
;
pos
.
y
=
velocity
.
y
*
dtPos
;
pos
.
z
=
velocity
.
z
*
dtPos
;
posDelta
[
atom
]
=
pos
;
velm
[
atom
]
=
velocity
;
}
}
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom1
=
pairList
[
index
].
x
;
int
atom2
=
pairList
[
index
].
y
;
mixed4
v1
=
velm
[
atom1
];
mixed4
v2
=
velm
[
atom2
];
mixed
m1
=
v1
.
w
==
0.0
f
?
0.0
f
:
1.0
f
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0.0
f
?
0.0
f
:
1.0
f
/
v2
.
w
;
mixed
mass1fract
=
m1
/
(
m1
+
m2
);
mixed
mass2fract
=
m2
/
(
m1
+
m2
);
mixed
invRedMass
=
(
m1
*
m2
!=
0.0
f
)
?
(
m1
+
m2
)
/
(
m1
*
m2
)
:
0.0
f
;
mixed
invTotMass
=
(
m1
+
m2
!=
0.0
f
)
?
1.0
f
/
(
m1
+
m2
)
:
0.0
f
;
mixed3
comVel
;
comVel
.
x
=
v1
.
x
*
mass1fract
+
v2
.
x
*
mass2fract
;
comVel
.
y
=
v1
.
y
*
mass1fract
+
v2
.
y
*
mass2fract
;
comVel
.
z
=
v1
.
z
*
mass1fract
+
v2
.
z
*
mass2fract
;
mixed3
relVel
;
relVel
.
x
=
v2
.
x
-
v1
.
x
;
relVel
.
y
=
v2
.
y
-
v1
.
y
;
relVel
.
z
=
v2
.
z
-
v1
.
z
;
//
mixed3
comFrc
;
comFrc
.
x
=
force
[
atom1
]
+
force
[
atom2
];
comFrc
.
y
=
force
[
atom1
+
paddedNumAtoms
]
+
force
[
atom2
+
paddedNumAtoms
];
comFrc
.
z
=
force
[
atom1
+
paddedNumAtoms
*
2
]
+
force
[
atom2
+
paddedNumAtoms
*
2
];
mixed3
relFrc
;
relFrc
.
x
=
mass1fract
*
force
[
atom2
]
-
mass2fract
*
force
[
atom1
];
relFrc
.
y
=
mass1fract
*
force
[
atom2
+
paddedNumAtoms
]
-
mass2fract
*
force
[
atom1
+
paddedNumAtoms
];
relFrc
.
z
=
mass1fract
*
force
[
atom2
+
paddedNumAtoms
*
2
]
-
mass2fract
*
force
[
atom1
+
paddedNumAtoms
*
2
];
comVel
.
x
+=
comFrc
.
x
*
scale
*
invTotMass
;
comVel
.
y
+=
comFrc
.
y
*
scale
*
invTotMass
;
comVel
.
z
+=
comFrc
.
z
*
scale
*
invTotMass
;
relVel
.
x
+=
relFrc
.
x
*
scale
*
invRedMass
;
relVel
.
y
+=
relFrc
.
y
*
scale
*
invRedMass
;
relVel
.
z
+=
relFrc
.
z
*
scale
*
invRedMass
;
#ifdef USE_MIXED_PRECISION
real4
posv1
=
posq
[
atom1
];
real4
posv2
=
posq
[
atom2
];
real4
posc1
=
posqCorrection
[
atom1
];
real4
posc2
=
posqCorrection
[
atom2
];
mixed4
pos1
=
make_mixed4
(
posv1
.
x
+
(
mixed
)
posc1
.
x
,
posv1
.
y
+
(
mixed
)
posc1
.
y
,
posv1
.
z
+
(
mixed
)
posc1
.
z
,
posv1
.
w
);
mixed4
pos2
=
make_mixed4
(
posv2
.
x
+
(
mixed
)
posc2
.
x
,
posv2
.
y
+
(
mixed
)
posc2
.
y
,
posv2
.
z
+
(
mixed
)
posc2
.
z
,
posv2
.
w
);
#else
real4
pos1
=
posq
[
atom1
];
real4
pos2
=
posq
[
atom2
];
#endif
if
(
v1
.
w
!=
0.0
f
)
{
v1
.
x
=
comVel
.
x
-
relVel
.
x
*
mass2fract
;
v1
.
y
=
comVel
.
y
-
relVel
.
y
*
mass2fract
;
v1
.
z
=
comVel
.
z
-
relVel
.
z
*
mass2fract
;
pos1
.
x
=
v1
.
x
*
dtPos
;
pos1
.
y
=
v1
.
y
*
dtPos
;
pos1
.
z
=
v1
.
z
*
dtPos
;
posDelta
[
atom1
]
=
pos1
;
velm
[
atom1
]
=
v1
;
}
if
(
v2
.
w
!=
0.0
f
)
{
v2
.
x
=
comVel
.
x
+
relVel
.
x
*
mass1fract
;
v2
.
y
=
comVel
.
y
+
relVel
.
y
*
mass1fract
;
v2
.
z
=
comVel
.
z
+
relVel
.
z
*
mass1fract
;
pos2
.
x
=
v2
.
x
*
dtPos
;
pos2
.
y
=
v2
.
y
*
dtPos
;
pos2
.
z
=
v2
.
z
*
dtPos
;
posDelta
[
atom2
]
=
pos2
;
velm
[
atom2
]
=
v2
;
}
}
}
/**
* Perform the second step of Velocity Verlet integration.
*
* apply displacements to positions (posq) after constraints have been enforced
*/
extern
"C"
__global__
void
integrateVelocityVerletPart2
(
int
numAtoms
,
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
mixed2
stepSize
=
dt
[
0
];
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
for
(;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
#ifdef USE_MIXED_PRECISION
real4
pos1
=
posq
[
index
];
real4
pos2
=
posqCorrection
[
index
];
mixed4
pos
=
make_mixed4
(
pos1
.
x
+
(
mixed
)
pos2
.
x
,
pos1
.
y
+
(
mixed
)
pos2
.
y
,
pos1
.
z
+
(
mixed
)
pos2
.
z
,
pos1
.
w
);
#else
real4
pos
=
posq
[
index
];
#endif
mixed4
delta
=
posDelta
[
index
];
pos
.
x
+=
delta
.
x
;
pos
.
y
+=
delta
.
y
;
pos
.
z
+=
delta
.
z
;
#ifdef USE_MIXED_PRECISION
posq
[
index
]
=
make_real4
((
real
)
pos
.
x
,
(
real
)
pos
.
y
,
(
real
)
pos
.
z
,
(
real
)
pos
.
w
);
posqCorrection
[
index
]
=
make_real4
(
pos
.
x
-
(
real
)
pos
.
x
,
pos
.
y
-
(
real
)
pos
.
y
,
pos
.
z
-
(
real
)
pos
.
z
,
0
);
#else
posq
[
index
]
=
pos
;
#endif
}
}
}
/**
* Perform the third step of Velocity Verlet integration.
*
* modify the velocities (velm) after the force update
*/
extern
"C"
__global__
void
integrateVelocityVerletPart3
(
int
numAtoms
,
int
numPairs
,
int
paddedNumAtoms
,
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed4
*
__restrict__
posDelta
,
const
int
*
__restrict__
atomList
,
const
int2
*
__restrict__
pairList
)
{
mixed2
stepSize
=
dt
[
0
];
#if __CUDA_ARCH__ >= 130
double
oneOverDt
=
1.0
/
stepSize
.
y
;
#else
float
oneOverDt
=
1.0
f
/
stepSize
.
y
;
float
correction
=
(
1.0
f
-
oneOverDt
*
stepSize
.
y
)
/
stepSize
.
y
;
#endif
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
scale
=
0.5
f
*
dtVel
/
(
mixed
)
0x100000000
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
atomList
[
index
];
mixed4
velocity
=
velm
[
atom
];
if
(
velocity
.
w
!=
0.0
)
{
mixed4
deltaXconstrained
=
posDelta
[
atom
];
velocity
.
x
+=
scale
*
force
[
atom
]
*
velocity
.
w
+
(
deltaXconstrained
.
x
-
velocity
.
x
*
stepSize
.
y
)
*
oneOverDt
;
velocity
.
y
+=
scale
*
force
[
atom
+
paddedNumAtoms
]
*
velocity
.
w
+
(
deltaXconstrained
.
y
-
velocity
.
y
*
stepSize
.
y
)
*
oneOverDt
;
velocity
.
z
+=
scale
*
force
[
atom
+
paddedNumAtoms
*
2
]
*
velocity
.
w
+
(
deltaXconstrained
.
z
-
velocity
.
z
*
stepSize
.
y
)
*
oneOverDt
;
#if __CUDA_ARCH__ < 130
velocity
.
x
+=
(
deltaXconstrained
.
x
-
velocity
.
x
*
stepSize
.
y
)
*
correction
;
velocity
.
y
+=
(
deltaXconstrained
.
y
-
velocity
.
y
*
stepSize
.
y
)
*
correction
;
velocity
.
z
+=
(
deltaXconstrained
.
z
-
velocity
.
z
*
stepSize
.
y
)
*
correction
;
#endif
velm
[
atom
]
=
velocity
;
}
}
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom1
=
pairList
[
index
].
x
;
int
atom2
=
pairList
[
index
].
y
;
mixed4
v1
=
velm
[
atom1
];
mixed4
v2
=
velm
[
atom2
];
mixed
m1
=
v1
.
w
==
0.0
f
?
0.0
f
:
1.0
f
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0.0
f
?
0.0
f
:
1.0
f
/
v2
.
w
;
mixed
mass1fract
=
m1
/
(
m1
+
m2
);
mixed
mass2fract
=
m2
/
(
m1
+
m2
);
mixed
invRedMass
=
(
m1
*
m2
!=
0.0
f
)
?
(
m1
+
m2
)
/
(
m1
*
m2
)
:
0.0
f
;
mixed
invTotMass
=
(
m1
+
m2
!=
0.0
f
)
?
1.0
f
/
(
m1
+
m2
)
:
0.0
f
;
mixed3
comVel
;
comVel
.
x
=
v1
.
x
*
mass1fract
+
v2
.
x
*
mass2fract
;
comVel
.
y
=
v1
.
y
*
mass1fract
+
v2
.
y
*
mass2fract
;
comVel
.
z
=
v1
.
z
*
mass1fract
+
v2
.
z
*
mass2fract
;
mixed3
relVel
;
relVel
.
x
=
v2
.
x
-
v1
.
x
;
relVel
.
y
=
v2
.
y
-
v1
.
y
;
relVel
.
z
=
v2
.
z
-
v1
.
z
;
//
mixed3
comFrc
;
comFrc
.
x
=
force
[
atom1
]
+
force
[
atom2
];
comFrc
.
y
=
force
[
atom1
+
paddedNumAtoms
]
+
force
[
atom2
+
paddedNumAtoms
];
comFrc
.
z
=
force
[
atom1
+
paddedNumAtoms
*
2
]
+
force
[
atom2
+
paddedNumAtoms
*
2
];
mixed3
relFrc
;
relFrc
.
x
=
mass1fract
*
force
[
atom2
]
-
mass2fract
*
force
[
atom1
];
relFrc
.
y
=
mass1fract
*
force
[
atom2
+
paddedNumAtoms
]
-
mass2fract
*
force
[
atom1
+
paddedNumAtoms
];
relFrc
.
z
=
mass1fract
*
force
[
atom2
+
paddedNumAtoms
*
2
]
-
mass2fract
*
force
[
atom1
+
paddedNumAtoms
*
2
];
comVel
.
x
+=
comFrc
.
x
*
scale
*
invTotMass
;
comVel
.
y
+=
comFrc
.
y
*
scale
*
invTotMass
;
comVel
.
z
+=
comFrc
.
z
*
scale
*
invTotMass
;
relVel
.
x
+=
relFrc
.
x
*
scale
*
invRedMass
;
relVel
.
y
+=
relFrc
.
y
*
scale
*
invRedMass
;
relVel
.
z
+=
relFrc
.
z
*
scale
*
invRedMass
;
if
(
v1
.
w
!=
0.0
f
)
{
mixed4
deltaXconstrained
=
posDelta
[
atom1
];
v1
.
x
=
comVel
.
x
-
relVel
.
x
*
mass2fract
+
(
deltaXconstrained
.
x
-
v1
.
x
*
stepSize
.
y
)
*
oneOverDt
;
v1
.
y
=
comVel
.
y
-
relVel
.
y
*
mass2fract
+
(
deltaXconstrained
.
y
-
v1
.
y
*
stepSize
.
y
)
*
oneOverDt
;
v1
.
z
=
comVel
.
z
-
relVel
.
z
*
mass2fract
+
(
deltaXconstrained
.
z
-
v1
.
z
*
stepSize
.
y
)
*
oneOverDt
;
#if __CUDA_ARCH__ < 130
v1
.
x
+=
(
deltaXconstrained
.
x
-
v1
.
x
*
stepSize
.
y
)
*
correction
;
v1
.
y
+=
(
deltaXconstrained
.
y
-
v1
.
y
*
stepSize
.
y
)
*
correction
;
v1
.
z
+=
(
deltaXconstrained
.
z
-
v1
.
z
*
stepSize
.
y
)
*
correction
;
#endif
velm
[
atom1
]
=
v1
;
}
if
(
v2
.
w
!=
0.0
f
)
{
mixed4
deltaXconstrained
=
posDelta
[
atom2
];
v2
.
x
=
comVel
.
x
+
relVel
.
x
*
mass1fract
+
(
deltaXconstrained
.
x
-
v2
.
x
*
stepSize
.
y
)
*
oneOverDt
;
v2
.
y
=
comVel
.
y
+
relVel
.
y
*
mass1fract
+
(
deltaXconstrained
.
y
-
v2
.
y
*
stepSize
.
y
)
*
oneOverDt
;
v2
.
z
=
comVel
.
z
+
relVel
.
z
*
mass1fract
+
(
deltaXconstrained
.
z
-
v2
.
z
*
stepSize
.
y
)
*
oneOverDt
;
#if __CUDA_ARCH__ < 130
v2
.
x
+=
(
deltaXconstrained
.
x
-
v2
.
x
*
stepSize
.
y
)
*
correction
;
v2
.
y
+=
(
deltaXconstrained
.
y
-
v2
.
y
*
stepSize
.
y
)
*
correction
;
v2
.
z
+=
(
deltaXconstrained
.
z
-
v2
.
z
*
stepSize
.
y
)
*
correction
;
#endif
velm
[
atom2
]
=
v2
;
}
}
}
/**
* Apply the hard wall constraint
*/
extern
"C"
__global__
void
integrateVelocityVerletHardWall
(
int
numPairs
,
const
float
*
__restrict__
maxPairDistance
,
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
int2
*
__restrict__
pairList
,
const
float
*
__restrict__
pairTemperature
)
{
mixed
dtPos
=
dt
[
0
].
y
;
mixed
maxDelta
=
(
mixed
)
maxPairDistance
[
0
];
// Apply hard wall constraints.
if
(
maxDelta
>
0
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
const
mixed
hardWallScale
=
sqrt
(
((
mixed
)
pairTemperature
[
index
])
*
((
mixed
)
BOLTZ
));
int2
atom
=
make_int2
(
pairList
[
index
].
x
,
pairList
[
index
].
y
);
#ifdef USE_MIXED_PRECISION
real4
posv1
=
posq
[
atom
.
x
];
real4
posc1
=
posqCorrection
[
atom
.
x
];
mixed4
pos1
=
make_mixed4
(
posv1
.
x
+
(
mixed
)
posc1
.
x
,
posv1
.
y
+
(
mixed
)
posc1
.
y
,
posv1
.
z
+
(
mixed
)
posc1
.
z
,
posv1
.
w
);
real4
posv2
=
posq
[
atom
.
y
];
real4
posc2
=
posqCorrection
[
atom
.
y
];
mixed4
pos2
=
make_mixed4
(
posv2
.
x
+
(
mixed
)
posc2
.
x
,
posv2
.
y
+
(
mixed
)
posc2
.
y
,
posv2
.
z
+
(
mixed
)
posc2
.
z
,
posv2
.
w
);
#else
real4
pos1
=
posq
[
atom
.
x
];
real4
pos2
=
posq
[
atom
.
y
];
#endif
mixed3
delta
=
make_mixed3
(
mixed
(
pos1
.
x
-
pos2
.
x
),
mixed
(
pos1
.
y
-
pos2
.
y
),
mixed
(
pos1
.
z
-
pos2
.
z
)
);
mixed
r
=
sqrt
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
mixed
rInv
=
1
/
r
;
if
(
rInv
*
maxDelta
<
1.0
)
{
// The constraint has been violated, so make the inter-particle distance "bounce"
// off the hard wall.
mixed3
bondDir
=
make_mixed3
(
delta
.
x
*
rInv
,
delta
.
y
*
rInv
,
delta
.
z
*
rInv
);
mixed3
vel1
=
make_mixed3
(
velm
[
atom
.
x
].
x
,
velm
[
atom
.
x
].
y
,
velm
[
atom
.
x
].
z
);
mixed3
vel2
=
make_mixed3
(
velm
[
atom
.
y
].
x
,
velm
[
atom
.
y
].
y
,
velm
[
atom
.
y
].
z
);
mixed
m1
=
velm
[
atom
.
x
].
w
!=
0.0
?
1.0
/
velm
[
atom
.
x
].
w
:
0.0
;
mixed
m2
=
velm
[
atom
.
y
].
w
!=
0.0
?
1.0
/
velm
[
atom
.
y
].
w
:
0.0
;
mixed
invTotMass
=
(
m1
+
m2
!=
0.0
)
?
1.0
/
(
m1
+
m2
)
:
0.0
;
mixed
deltaR
=
r
-
maxDelta
;
mixed
deltaT
=
dtPos
;
mixed
dt
=
dtPos
;
mixed
dotvr1
=
vel1
.
x
*
bondDir
.
x
+
vel1
.
y
*
bondDir
.
y
+
vel1
.
z
*
bondDir
.
z
;
mixed3
vb1
=
make_mixed3
(
bondDir
.
x
*
dotvr1
,
bondDir
.
y
*
dotvr1
,
bondDir
.
z
*
dotvr1
);
mixed3
vp1
=
make_mixed3
(
vel1
.
x
-
vb1
.
x
,
vel1
.
y
-
vb1
.
y
,
vel1
.
z
-
vb1
.
z
);
if
(
m2
==
0
)
{
// The parent particle is massless, so move only the Drude particle.
if
(
dotvr1
!=
0.0
)
deltaT
=
deltaR
/
fabs
(
dotvr1
);
if
(
deltaT
>
dtPos
)
deltaT
=
dtPos
;
dotvr1
=
-
dotvr1
*
hardWallScale
/
(
fabs
(
dotvr1
)
*
sqrt
(
m1
));
mixed
dr
=
-
deltaR
+
deltaT
*
dotvr1
;
pos1
.
x
+=
bondDir
.
x
*
dr
;
pos1
.
y
+=
bondDir
.
y
*
dr
;
pos1
.
z
+=
bondDir
.
z
*
dr
;
velm
[
atom
.
x
]
=
make_mixed4
(
vp1
.
x
+
bondDir
.
x
*
dotvr1
,
vp1
.
y
+
bondDir
.
y
*
dotvr1
,
vp1
.
z
+
bondDir
.
z
*
dotvr1
,
velm
[
atom
.
x
].
w
);
#ifdef USE_MIXED_PRECISION
posq
[
atom
.
x
]
=
make_real4
((
real
)
pos1
.
x
,
(
real
)
pos1
.
y
,
(
real
)
pos1
.
z
,
(
real
)
pos1
.
w
);
posqCorrection
[
atom
.
x
]
=
make_real4
(
pos1
.
x
-
(
real
)
pos1
.
x
,
pos1
.
y
-
(
real
)
pos1
.
y
,
pos1
.
z
-
(
real
)
pos1
.
z
,
0
);
#else
posq
[
atom
.
x
]
=
pos1
;
#endif
}
else
{
// Move both particles.
mixed
dotvr2
=
vel2
.
x
*
bondDir
.
x
+
vel2
.
y
*
bondDir
.
y
+
vel2
.
z
*
bondDir
.
z
;
mixed3
vb2
=
make_mixed3
(
bondDir
.
x
*
dotvr2
,
bondDir
.
y
*
dotvr2
,
bondDir
.
z
*
dotvr2
);
mixed3
vp2
=
make_mixed3
(
vel2
.
x
-
vb2
.
x
,
vel2
.
y
-
vb2
.
y
,
vel2
.
z
-
vb2
.
z
);
mixed
vbCMass
=
(
m1
*
dotvr1
+
m2
*
dotvr2
)
*
invTotMass
;
dotvr1
-=
vbCMass
;
dotvr2
-=
vbCMass
;
if
(
dotvr1
!=
dotvr2
)
deltaT
=
deltaR
/
fabs
(
dotvr1
-
dotvr2
);
if
(
deltaT
>
dt
)
deltaT
=
dt
;
mixed
vBond
=
hardWallScale
/
sqrt
(
m1
);
dotvr1
=
-
dotvr1
*
vBond
*
m2
*
invTotMass
/
fabs
(
dotvr1
);
dotvr2
=
-
dotvr2
*
vBond
*
m1
*
invTotMass
/
fabs
(
dotvr2
);
mixed
dr1
=
-
deltaR
*
m2
*
invTotMass
+
deltaT
*
dotvr1
;
mixed
dr2
=
deltaR
*
m1
*
invTotMass
+
deltaT
*
dotvr2
;
dotvr1
+=
vbCMass
;
dotvr2
+=
vbCMass
;
pos1
.
x
+=
bondDir
.
x
*
dr1
;
pos1
.
y
+=
bondDir
.
y
*
dr1
;
pos1
.
z
+=
bondDir
.
z
*
dr1
;
pos2
.
x
+=
bondDir
.
x
*
dr2
;
pos2
.
y
+=
bondDir
.
y
*
dr2
;
pos2
.
z
+=
bondDir
.
z
*
dr2
;
velm
[
atom
.
x
]
=
make_mixed4
(
vp1
.
x
+
bondDir
.
x
*
dotvr1
,
vp1
.
y
+
bondDir
.
y
*
dotvr1
,
vp1
.
z
+
bondDir
.
z
*
dotvr1
,
velm
[
atom
.
x
].
w
);
velm
[
atom
.
y
]
=
make_mixed4
(
vp2
.
x
+
bondDir
.
x
*
dotvr2
,
vp2
.
y
+
bondDir
.
y
*
dotvr2
,
vp2
.
z
+
bondDir
.
z
*
dotvr2
,
velm
[
atom
.
y
].
w
);
#ifdef USE_MIXED_PRECISION
posq
[
atom
.
x
]
=
make_real4
((
real
)
pos1
.
x
,
(
real
)
pos1
.
y
,
(
real
)
pos1
.
z
,
(
real
)
pos1
.
w
);
posq
[
atom
.
y
]
=
make_real4
((
real
)
pos2
.
x
,
(
real
)
pos2
.
y
,
(
real
)
pos2
.
z
,
(
real
)
pos2
.
w
);
posqCorrection
[
atom
.
x
]
=
make_real4
(
pos1
.
x
-
(
real
)
pos1
.
x
,
pos1
.
y
-
(
real
)
pos1
.
y
,
pos1
.
z
-
(
real
)
pos1
.
z
,
0
);
posqCorrection
[
atom
.
y
]
=
make_real4
(
pos2
.
x
-
(
real
)
pos2
.
x
,
pos2
.
y
-
(
real
)
pos2
.
y
,
pos2
.
z
-
(
real
)
pos2
.
z
,
0
);
#else
posq
[
atom
.
x
]
=
pos1
;
posq
[
atom
.
y
]
=
pos2
;
#endif
}
}
}
}
/* end of hard wall constraint part */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment