Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
19d2885a
Commit
19d2885a
authored
Jan 23, 2014
by
Lee-Ping
Browse files
Merge github.com:SimTk/openmm
parents
99ef4344
57a6768e
Changes
310
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
4764 additions
and
611 deletions
+4764
-611
platforms/cpu/src/CpuBondForce.cpp
platforms/cpu/src/CpuBondForce.cpp
+217
-0
platforms/cpu/src/CpuGBSAOBCForce.cpp
platforms/cpu/src/CpuGBSAOBCForce.cpp
+419
-0
platforms/cpu/src/CpuKernelFactory.cpp
platforms/cpu/src/CpuKernelFactory.cpp
+12
-2
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+403
-45
platforms/cpu/src/CpuLangevinDynamics.cpp
platforms/cpu/src/CpuLangevinDynamics.cpp
+126
-0
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+162
-157
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+115
-372
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+301
-0
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+333
-0
platforms/cpu/src/CpuPlatform.cpp
platforms/cpu/src/CpuPlatform.cpp
+40
-0
platforms/cpu/src/CpuRandom.cpp
platforms/cpu/src/CpuRandom.cpp
+84
-0
platforms/cpu/src/CpuSETTLE.cpp
platforms/cpu/src/CpuSETTLE.cpp
+104
-0
platforms/cpu/src/gmx_atomic.h
platforms/cpu/src/gmx_atomic.h
+1596
-0
platforms/cpu/tests/CMakeLists.txt
platforms/cpu/tests/CMakeLists.txt
+1
-0
platforms/cpu/tests/TestCpuGBSAOBCForce.cpp
platforms/cpu/tests/TestCpuGBSAOBCForce.cpp
+244
-0
platforms/cpu/tests/TestCpuLangevinIntegrator.cpp
platforms/cpu/tests/TestCpuLangevinIntegrator.cpp
+283
-0
platforms/cpu/tests/TestCpuNeighborList.cpp
platforms/cpu/tests/TestCpuNeighborList.cpp
+9
-5
platforms/cpu/tests/TestCpuNonbondedForce.cpp
platforms/cpu/tests/TestCpuNonbondedForce.cpp
+30
-30
platforms/cpu/tests/TestCpuPeriodicTorsionForce.cpp
platforms/cpu/tests/TestCpuPeriodicTorsionForce.cpp
+133
-0
platforms/cpu/tests/TestCpuRBTorsionForce.cpp
platforms/cpu/tests/TestCpuRBTorsionForce.cpp
+152
-0
No files found.
platforms/cpu/src/CpuBondForce.cpp
0 → 100644
View file @
19d2885a
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuBondForce.h"
#include "openmm/OpenMMException.h"
using
namespace
OpenMM
;
using
namespace
std
;
class
CpuBondForce
::
ComputeForceTask
:
public
ThreadPool
::
Task
{
public:
ComputeForceTask
(
CpuBondForce
&
owner
,
vector
<
RealVec
>&
atomCoordinates
,
RealOpenMM
**
parameters
,
vector
<
RealVec
>&
forces
,
vector
<
RealOpenMM
>&
threadEnergy
,
RealOpenMM
*
totalEnergy
,
ReferenceBondIxn
&
referenceBondIxn
)
:
owner
(
owner
),
atomCoordinates
(
atomCoordinates
),
parameters
(
parameters
),
forces
(
forces
),
threadEnergy
(
threadEnergy
),
totalEnergy
(
totalEnergy
),
referenceBondIxn
(
referenceBondIxn
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
RealOpenMM
*
energy
=
(
totalEnergy
==
NULL
?
NULL
:
&
threadEnergy
[
threadIndex
]);
owner
.
threadComputeForce
(
threads
,
threadIndex
,
atomCoordinates
,
parameters
,
forces
,
energy
,
referenceBondIxn
);
}
CpuBondForce
&
owner
;
vector
<
RealVec
>&
atomCoordinates
;
RealOpenMM
**
parameters
;
vector
<
RealVec
>&
forces
;
vector
<
RealOpenMM
>&
threadEnergy
;
RealOpenMM
*
totalEnergy
;
ReferenceBondIxn
&
referenceBondIxn
;
};
CpuBondForce
::
CpuBondForce
()
{
}
void
CpuBondForce
::
initialize
(
int
numAtoms
,
int
numBonds
,
int
numAtomsPerBond
,
int
**
bondAtoms
,
ThreadPool
&
threads
)
{
this
->
numBonds
=
numBonds
;
this
->
numAtomsPerBond
=
numAtomsPerBond
;
this
->
bondAtoms
=
bondAtoms
;
this
->
threads
=
&
threads
;
int
numThreads
=
threads
.
getNumThreads
();
int
targetBondsPerThread
=
numBonds
/
numThreads
;
// Record the bonds that include each atom.
vector
<
set
<
int
>
>
atomBonds
(
numAtoms
);
for
(
int
bond
=
0
;
bond
<
numBonds
;
bond
++
)
{
for
(
int
i
=
0
;
i
<
numAtomsPerBond
;
i
++
)
atomBonds
[
bondAtoms
[
bond
][
i
]].
insert
(
bond
);
}
// Divide bonds into groups.
vector
<
int
>
atomThread
(
numAtoms
,
-
1
);
vector
<
int
>
bondThread
(
numBonds
,
-
1
);
threadBonds
.
resize
(
numThreads
);
int
numProcessed
=
0
;
int
thread
=
0
;
list
<
int
>
candidateBonds
;
while
(
thread
<
numThreads
)
{
// Find the next unassigned bond.
while
(
numProcessed
<
numBonds
&&
bondThread
[
numProcessed
]
!=
-
1
)
numProcessed
++
;
if
(
numProcessed
==
numBonds
)
break
;
// We've gone through the whole list of bonds.
// See if this bond can be assigned to this thread.
if
(
!
canAssignBond
(
numProcessed
,
thread
,
atomThread
))
{
numProcessed
++
;
continue
;
}
// Assign this bond to the thread.
assignBond
(
numProcessed
++
,
thread
,
atomThread
,
bondThread
,
atomBonds
,
candidateBonds
);
// Assign additional bonds that have been identified as involving atoms assigned to this thread.
while
(
!
candidateBonds
.
empty
()
&&
threadBonds
[
thread
].
size
()
<
targetBondsPerThread
)
{
int
bond
=
*
candidateBonds
.
begin
();
if
(
bondThread
[
bond
]
==
-
1
&&
canAssignBond
(
bond
,
thread
,
atomThread
))
assignBond
(
bond
,
thread
,
atomThread
,
bondThread
,
atomBonds
,
candidateBonds
);
candidateBonds
.
pop_front
();
}
// If we have assigned enough bonds to this thread, move on to the next one.
if
(
threadBonds
[
thread
].
size
()
>=
targetBondsPerThread
)
{
candidateBonds
.
clear
();
thread
++
;
}
}
// Look through the remaining bonds and see whether any of them can be assigned.
candidateBonds
.
clear
();
for
(
int
bond
=
0
;
bond
<
numBonds
;
bond
++
)
{
if
(
bondThread
[
bond
]
==
-
1
)
{
// See whether this bond can be assigned to a thread.
bool
canAssign
=
true
;
int
assignment
=
-
1
;
for
(
int
i
=
0
;
i
<
numAtomsPerBond
;
i
++
)
{
int
thread
=
atomThread
[
bondAtoms
[
bond
][
i
]];
if
(
thread
==
-
1
||
thread
==
assignment
)
continue
;
if
(
assignment
==
-
1
)
assignment
=
thread
;
else
{
canAssign
=
false
;
break
;
}
}
if
(
canAssign
)
{
// Assign this bond to a thread.
if
(
assignment
==
-
1
)
assignment
=
numThreads
-
1
;
assignBond
(
bond
,
assignment
,
atomThread
,
bondThread
,
atomBonds
,
candidateBonds
);
}
else
{
// Add it to the list of "extra" bonds.
extraBonds
.
push_back
(
bond
);
}
}
}
}
bool
CpuBondForce
::
canAssignBond
(
int
bond
,
int
thread
,
vector
<
int
>&
atomThread
)
{
for
(
int
i
=
0
;
i
<
numAtomsPerBond
;
i
++
)
{
int
atom
=
bondAtoms
[
bond
][
i
];
if
(
atomThread
[
atom
]
!=
-
1
&&
atomThread
[
atom
]
!=
thread
)
return
false
;
}
return
true
;
}
void
CpuBondForce
::
assignBond
(
int
bond
,
int
thread
,
vector
<
int
>&
atomThread
,
vector
<
int
>&
bondThread
,
vector
<
set
<
int
>
>&
atomBonds
,
list
<
int
>&
candidateBonds
)
{
// Assign the bond to a thread.
bondThread
[
bond
]
=
thread
;
threadBonds
[
thread
].
push_back
(
bond
);
// Mark every atom in this bond as also belonging to the thread, and add all of their
// bonds to the list of candidates.
for
(
int
i
=
0
;
i
<
numAtomsPerBond
;
i
++
)
{
int
&
atom
=
atomThread
[
bondAtoms
[
bond
][
i
]];
if
(
atom
==
thread
)
continue
;
if
(
atom
!=
-
1
)
throw
OpenMMException
(
"CpuBondForce: Internal error: atoms assigned to threads incorrectly"
);
atom
=
thread
;
for
(
set
<
int
>::
const_iterator
iter
=
atomBonds
[
atom
].
begin
();
iter
!=
atomBonds
[
atom
].
end
();
++
iter
)
candidateBonds
.
push_back
(
*
iter
);
}
}
void
CpuBondForce
::
calculateForce
(
vector
<
RealVec
>&
atomCoordinates
,
RealOpenMM
**
parameters
,
vector
<
RealVec
>&
forces
,
RealOpenMM
*
totalEnergy
,
ReferenceBondIxn
&
referenceBondIxn
)
{
// Have the worker threads compute their forces.
vector
<
RealOpenMM
>
threadEnergy
(
threads
->
getNumThreads
(),
0
);
ComputeForceTask
task
(
*
this
,
atomCoordinates
,
parameters
,
forces
,
threadEnergy
,
totalEnergy
,
referenceBondIxn
);
threads
->
execute
(
task
);
threads
->
waitForThreads
();
// Compute any "extra" bonds.
for
(
int
i
=
0
;
i
<
extraBonds
.
size
();
i
++
)
{
int
bond
=
extraBonds
[
i
];
referenceBondIxn
.
calculateBondIxn
(
bondAtoms
[
bond
],
atomCoordinates
,
parameters
[
bond
],
forces
,
totalEnergy
);
}
// Compute the total energy.
if
(
totalEnergy
!=
NULL
)
for
(
int
i
=
0
;
i
<
threads
->
getNumThreads
();
i
++
)
*
totalEnergy
+=
threadEnergy
[
i
];
}
void
CpuBondForce
::
threadComputeForce
(
ThreadPool
&
threads
,
int
threadIndex
,
vector
<
RealVec
>&
atomCoordinates
,
RealOpenMM
**
parameters
,
vector
<
RealVec
>&
forces
,
RealOpenMM
*
totalEnergy
,
ReferenceBondIxn
&
referenceBondIxn
)
{
vector
<
int
>&
bonds
=
threadBonds
[
threadIndex
];
int
numBonds
=
bonds
.
size
();
for
(
int
i
=
0
;
i
<
numBonds
;
i
++
)
{
int
bond
=
bonds
[
i
];
referenceBondIxn
.
calculateBondIxn
(
bondAtoms
[
bond
],
atomCoordinates
,
parameters
[
bond
],
forces
,
totalEnergy
);
}
}
\ No newline at end of file
platforms/cpu/src/CpuGBSAOBCForce.cpp
0 → 100644
View file @
19d2885a
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuGBSAOBCForce.h"
#include "SimTKOpenMMRealType.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
using
namespace
std
;
using
namespace
OpenMM
;
const
int
CpuGBSAOBCForce
::
NUM_TABLE_POINTS
=
4096
;
const
float
CpuGBSAOBCForce
::
TABLE_MIN
=
0.25
f
;
const
float
CpuGBSAOBCForce
::
TABLE_MAX
=
1.5
f
;
class
CpuGBSAOBCForce
::
ComputeTask
:
public
ThreadPool
::
Task
{
public:
ComputeTask
(
CpuGBSAOBCForce
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadComputeForce
(
threads
,
threadIndex
);
}
CpuGBSAOBCForce
&
owner
;
};
CpuGBSAOBCForce
::
CpuGBSAOBCForce
()
:
cutoff
(
false
),
periodic
(
false
)
{
logDX
=
(
TABLE_MAX
-
TABLE_MIN
)
/
NUM_TABLE_POINTS
;
logDXInv
=
1.0
f
/
logDX
;
logTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
x
=
TABLE_MIN
+
i
*
logDX
;
logTable
[
i
]
=
log
(
x
);
}
}
void
CpuGBSAOBCForce
::
setUseCutoff
(
float
distance
)
{
cutoff
=
true
;
cutoffDistance
=
distance
;
}
void
CpuGBSAOBCForce
::
setPeriodic
(
float
*
periodicBoxSize
)
{
periodic
=
true
;
this
->
periodicBoxSize
[
0
]
=
periodicBoxSize
[
0
];
this
->
periodicBoxSize
[
1
]
=
periodicBoxSize
[
1
];
this
->
periodicBoxSize
[
2
]
=
periodicBoxSize
[
2
];
}
void
CpuGBSAOBCForce
::
setSoluteDielectric
(
float
dielectric
)
{
soluteDielectric
=
dielectric
;
}
void
CpuGBSAOBCForce
::
setSolventDielectric
(
float
dielectric
)
{
solventDielectric
=
dielectric
;
}
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
CpuGBSAOBCForce
::
getParticleParameters
()
const
{
return
particleParams
;
}
void
CpuGBSAOBCForce
::
setParticleParameters
(
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
params
)
{
particleParams
=
params
;
bornRadii
.
resize
(
params
.
size
()
+
3
);
obcChain
.
resize
(
params
.
size
()
+
3
);
}
void
CpuGBSAOBCForce
::
computeForce
(
const
AlignedArray
<
float
>&
posq
,
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
// Record the parameters for the threads.
this
->
posq
=
&
posq
[
0
];
this
->
threadForce
=
&
threadForce
;
includeEnergy
=
(
totalEnergy
!=
NULL
);
int
numThreads
=
threads
.
getNumThreads
();
threadEnergy
.
resize
(
numThreads
);
threadBornForces
.
resize
(
numThreads
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
threadBornForces
[
i
].
resize
(
particleParams
.
size
()
+
3
);
gmx_atomic_t
counter
;
this
->
atomicCounter
=
&
counter
;
// Signal the threads to start running and wait for them to finish.
ComputeTask
task
(
*
this
);
gmx_atomic_set
(
&
counter
,
0
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
// Compute Born radii
gmx_atomic_set
(
&
counter
,
0
);
threads
.
resumeThreads
();
threads
.
waitForThreads
();
// Compute surface area term
gmx_atomic_set
(
&
counter
,
0
);
threads
.
resumeThreads
();
threads
.
waitForThreads
();
// First loop
gmx_atomic_set
(
&
counter
,
0
);
threads
.
resumeThreads
();
threads
.
waitForThreads
();
// Second loop
// Combine the energies from all the threads.
if
(
totalEnergy
!=
NULL
)
{
double
energy
=
0
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
energy
+=
threadEnergy
[
i
];
*
totalEnergy
+=
(
float
)
energy
;
}
}
void
CpuGBSAOBCForce
::
threadComputeForce
(
ThreadPool
&
threads
,
int
threadIndex
)
{
int
numParticles
=
particleParams
.
size
();
int
numThreads
=
threads
.
getNumThreads
();
const
float
dielectricOffset
=
0.009
;
const
float
alphaObc
=
1.0
f
;
const
float
betaObc
=
0.8
f
;
const
float
gammaObc
=
4.85
f
;
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
int
start
=
(
threadIndex
*
numParticles
)
/
numThreads
;
int
end
=
((
threadIndex
+
1
)
*
numParticles
)
/
numThreads
;
// Calculate Born radii
while
(
true
)
{
int
blockStart
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
4
);
if
(
blockStart
>=
numParticles
)
break
;
int
numInBlock
=
min
(
4
,
numParticles
-
blockStart
);
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomRadius
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
atomRadius
[
i
]
=
particleParams
[
atomIndex
].
first
;
atomx
[
i
]
=
posq
[
4
*
atomIndex
];
atomy
[
i
]
=
posq
[
4
*
atomIndex
+
1
];
atomz
[
i
]
=
posq
[
4
*
atomIndex
+
2
];
blockMask
[
i
]
=
0xFFFFFFFF
;
}
fvec4
offsetRadiusI
(
atomRadius
);
fvec4
radiusIInverse
=
1.0
f
/
offsetRadiusI
;
fvec4
x
(
atomx
);
fvec4
y
(
atomy
);
fvec4
z
(
atomz
);
ivec4
mask
(
blockMask
);
float
sum
[
4
]
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
for
(
int
atomJ
=
0
;
atomJ
<
numParticles
;
atomJ
++
)
{
fvec4
posJ
(
posq
+
4
*
atomJ
);
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posJ
,
x
,
y
,
z
,
dx
,
dy
,
dz
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
ivec4
include
=
mask
&
(
blockAtomIndex
!=
ivec4
(
atomJ
));
if
(
cutoff
)
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
fvec4
r
=
sqrt
(
r2
);
float
scaledRadiusJ
=
particleParams
[
atomJ
].
second
;
float
scaledRadiusJ2
=
scaledRadiusJ
*
scaledRadiusJ
;
fvec4
rScaledRadiusJ
=
r
+
scaledRadiusJ
;
include
=
include
&
(
offsetRadiusI
<
rScaledRadiusJ
);
fvec4
l_ij
=
1.0
f
/
max
(
offsetRadiusI
,
abs
(
r
-
scaledRadiusJ
));
fvec4
u_ij
=
1.0
f
/
rScaledRadiusJ
;
fvec4
l_ij2
=
l_ij
*
l_ij
;
fvec4
u_ij2
=
u_ij
*
u_ij
;
fvec4
rInverse
=
1.0
f
/
r
;
fvec4
r2Inverse
=
rInverse
*
rInverse
;
fvec4
logRatio
=
fastLog
(
u_ij
/
l_ij
);
fvec4
term
=
l_ij
-
u_ij
+
0.25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0.5
f
*
rInverse
*
logRatio
)
+
(
0.25
f
*
scaledRadiusJ
*
scaledRadiusJ
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
sum
[
j
]
+=
term
[
j
];
if
(
offsetRadiusI
[
j
]
<
scaledRadiusJ
-
r
[
j
])
sum
[
j
]
+=
2.0
f
*
(
radiusIInverse
[
j
]
-
l_ij
[
j
]);
}
}
}
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
sum
[
i
]
*=
0.5
f
*
atomRadius
[
i
];
float
sum2
=
sum
[
i
]
*
sum
[
i
];
float
sum3
=
sum
[
i
]
*
sum2
;
float
tanhSum
=
tanh
(
alphaObc
*
sum
[
i
]
-
betaObc
*
sum2
+
gammaObc
*
sum3
);
float
radiusI
=
atomRadius
[
i
]
+
dielectricOffset
;
bornRadii
[
atomIndex
]
=
1.0
f
/
(
1.0
f
/
atomRadius
[
i
]
-
tanhSum
/
radiusI
);
obcChain
[
atomIndex
]
=
atomRadius
[
i
]
*
(
alphaObc
-
2.0
f
*
betaObc
*
sum
[
i
]
+
3.0
f
*
gammaObc
*
sum2
);
obcChain
[
atomIndex
]
=
(
1.0
f
-
tanhSum
*
tanhSum
)
*
obcChain
[
atomIndex
]
/
radiusI
;
}
}
threads
.
syncThreads
();
// Calculate ACE surface area term.
const
float
probeRadius
=
0.14
f
;
const
float
surfaceAreaFactor
=
28.3919551
;
double
energy
=
0.0
;
vector
<
float
>&
bornForces
=
threadBornForces
[
threadIndex
];
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
bornForces
[
i
]
=
0.0
f
;
while
(
true
)
{
int
atomI
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
atomI
>=
numParticles
)
break
;
if
(
bornRadii
[
atomI
]
>
0
)
{
float
radiusI
=
particleParams
[
atomI
].
first
+
dielectricOffset
;
float
r
=
radiusI
+
probeRadius
;
float
ratio6
=
powf
(
radiusI
/
bornRadii
[
atomI
],
6.0
f
);
float
saTerm
=
surfaceAreaFactor
*
r
*
r
*
ratio6
;
energy
+=
saTerm
;
bornForces
[
atomI
]
=
-
6.0
f
*
saTerm
/
bornRadii
[
atomI
];
}
else
bornForces
[
atomI
]
=
0.0
f
;
}
threads
.
syncThreads
();
// First loop of Born energy computation.
float
*
forces
=
&
(
*
threadForce
)[
threadIndex
][
0
];
float
preFactor
;
if
(
soluteDielectric
!=
0.0
f
&&
solventDielectric
!=
0.0
f
)
preFactor
=
ONE_4PI_EPS0
*
((
1.0
f
/
solventDielectric
)
-
(
1.0
f
/
soluteDielectric
));
else
preFactor
=
0.0
f
;
while
(
true
)
{
int
blockStart
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
4
);
if
(
blockStart
>=
numParticles
)
break
;
int
numInBlock
=
min
(
4
,
numParticles
-
blockStart
);
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomCharge
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
),
blockAtomBornForce
(
0.0
f
);
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
atomx
[
i
]
=
posq
[
4
*
atomIndex
];
atomy
[
i
]
=
posq
[
4
*
atomIndex
+
1
];
atomz
[
i
]
=
posq
[
4
*
atomIndex
+
2
];
atomCharge
[
i
]
=
preFactor
*
posq
[
4
*
atomIndex
+
3
];
blockMask
[
i
]
=
0xFFFFFFFF
;
}
fvec4
radii
(
&
bornRadii
[
blockStart
]);
fvec4
x
(
atomx
);
fvec4
y
(
atomy
);
fvec4
z
(
atomz
);
fvec4
partialChargeI
(
atomCharge
);
ivec4
mask
(
blockMask
);
for
(
int
atomJ
=
blockStart
;
atomJ
<
numParticles
;
atomJ
++
)
{
fvec4
posJ
(
posq
+
4
*
atomJ
);
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posJ
,
x
,
y
,
z
,
dx
,
dy
,
dz
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
ivec4
include
=
mask
&
(
blockAtomIndex
<=
ivec4
(
atomJ
));
if
(
cutoff
)
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
fvec4
r
=
sqrt
(
r2
);
fvec4
alpha2_ij
=
radii
*
bornRadii
[
atomJ
];
fvec4
D_ij
=
r2
/
(
4.0
f
*
alpha2_ij
);
fvec4
expTerm
(
expf
(
-
D_ij
[
0
]),
expf
(
-
D_ij
[
1
]),
expf
(
-
D_ij
[
2
]),
expf
(
-
D_ij
[
3
]));
fvec4
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
fvec4
denominator
=
sqrt
(
denominator2
);
fvec4
Gpol
=
(
partialChargeI
*
posJ
[
3
])
/
denominator
;
fvec4
dGpol_dr
=
-
Gpol
*
(
1.0
f
-
0.25
f
*
expTerm
)
/
denominator2
;
fvec4
dGpol_dalpha2_ij
=
-
0.5
f
*
Gpol
*
expTerm
*
(
1.0
f
+
D_ij
)
/
denominator2
;
dGpol_dr
=
blend
(
0.0
f
,
dGpol_dr
,
include
);
dGpol_dalpha2_ij
=
blend
(
0.0
f
,
dGpol_dalpha2_ij
,
include
);
fvec4
fx
=
dx
*
dGpol_dr
;
fvec4
fy
=
dy
*
dGpol_dr
;
fvec4
fz
=
dz
*
dGpol_dr
;
blockAtomForceX
-=
fx
;
blockAtomForceY
-=
fy
;
blockAtomForceZ
-=
fz
;
blockAtomBornForce
+=
dGpol_dalpha2_ij
*
bornRadii
[
atomJ
];
float
*
atomForce
=
forces
+
4
*
atomJ
;
fvec4
one
(
1.0
f
);
atomForce
[
0
]
+=
dot4
(
fx
,
one
);
atomForce
[
1
]
+=
dot4
(
fy
,
one
);
atomForce
[
2
]
+=
dot4
(
fz
,
one
);
ivec4
atomJMask
=
include
&
(
blockAtomIndex
!=
ivec4
(
atomJ
));
fvec4
termEnergy
=
blend
(
0.0
f
,
Gpol
,
include
);
if
(
cutoff
)
termEnergy
-=
blend
(
0.0
f
,
partialChargeI
*
posJ
[
3
]
/
cutoffDistance
,
atomJMask
);
termEnergy
*=
blend
(
0.5
f
,
1.0
f
,
atomJMask
);
energy
+=
dot4
(
termEnergy
,
one
);
bornForces
[
atomJ
]
+=
dot4
(
blend
(
0.0
f
,
dGpol_dalpha2_ij
,
atomJMask
),
radii
);
}
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
(
fvec4
(
forces
+
4
*
atomIndex
)
+
f
[
i
]).
store
(
forces
+
4
*
atomIndex
);
bornForces
[
atomIndex
]
+=
blockAtomBornForce
[
i
];
}
}
threads
.
syncThreads
();
// Second loop of Born energy computation.
while
(
true
)
{
int
blockStart
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
4
);
if
(
blockStart
>=
numParticles
)
break
;
fvec4
bornForce
(
0.0
f
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
bornForce
+=
fvec4
(
&
threadBornForces
[
i
][
blockStart
]);
fvec4
radii
(
&
bornRadii
[
blockStart
]);
bornForce
*=
radii
*
radii
*
fvec4
(
&
obcChain
[
blockStart
]);
int
numInBlock
=
min
(
4
,
numParticles
-
blockStart
);
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomRadius
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
atomRadius
[
i
]
=
particleParams
[
atomIndex
].
first
;
atomx
[
i
]
=
posq
[
4
*
atomIndex
];
atomy
[
i
]
=
posq
[
4
*
atomIndex
+
1
];
atomz
[
i
]
=
posq
[
4
*
atomIndex
+
2
];
blockMask
[
i
]
=
0xFFFFFFFF
;
}
fvec4
offsetRadiusI
(
atomRadius
);
fvec4
x
(
atomx
);
fvec4
y
(
atomy
);
fvec4
z
(
atomz
);
ivec4
mask
(
blockMask
);
for
(
int
atomJ
=
0
;
atomJ
<
numParticles
;
atomJ
++
)
{
fvec4
posJ
(
posq
+
4
*
atomJ
);
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posJ
,
x
,
y
,
z
,
dx
,
dy
,
dz
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
ivec4
include
=
mask
&
(
blockAtomIndex
!=
ivec4
(
atomJ
));
if
(
cutoff
)
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
fvec4
r
=
sqrt
(
r2
);
float
scaledRadiusJ
=
particleParams
[
atomJ
].
second
;
float
scaledRadiusJ2
=
scaledRadiusJ
*
scaledRadiusJ
;
fvec4
rScaledRadiusJ
=
r
+
scaledRadiusJ
;
include
=
include
&
(
offsetRadiusI
<
rScaledRadiusJ
);
fvec4
l_ij
=
1.0
f
/
max
(
offsetRadiusI
,
abs
(
r
-
scaledRadiusJ
));
fvec4
u_ij
=
1.0
f
/
rScaledRadiusJ
;
fvec4
l_ij2
=
l_ij
*
l_ij
;
fvec4
u_ij2
=
u_ij
*
u_ij
;
fvec4
rInverse
=
1.0
f
/
r
;
fvec4
r2Inverse
=
rInverse
*
rInverse
;
fvec4
logRatio
=
fastLog
(
u_ij
/
l_ij
);
fvec4
t3
=
0.125
f
*
(
1.0
f
+
scaledRadiusJ2
*
r2Inverse
)
*
(
l_ij2
-
u_ij2
)
+
0.25
f
*
logRatio
*
r2Inverse
;
fvec4
de
=
bornForce
*
t3
*
rInverse
;
de
=
blend
(
0.0
f
,
de
,
include
);
fvec4
fx
=
dx
*
de
;
fvec4
fy
=
dy
*
de
;
fvec4
fz
=
dz
*
de
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atomJ
;
fvec4
one
(
1.0
f
);
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
(
fvec4
(
forces
+
4
*
atomIndex
)
+
f
[
i
]).
store
(
forces
+
4
*
atomIndex
);
}
}
threadEnergy
[
threadIndex
]
=
energy
;
}
void
CpuGBSAOBCForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuGBSAOBCForce
::
fastLog
(
const
fvec4
&
x
)
{
// Evaluate log(x) using a lookup table for speed.
if
(
any
((
x
<
TABLE_MIN
)
|
(
x
>=
TABLE_MAX
)))
return
fvec4
(
logf
(
x
[
0
]),
logf
(
x
[
1
]),
logf
(
x
[
2
]),
logf
(
x
[
3
]));
fvec4
x1
=
(
x
-
TABLE_MIN
)
*
logDXInv
;
ivec4
index
=
floor
(
x1
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
logTable
[
index
[
0
]]);
fvec4
t2
(
&
logTable
[
index
[
1
]]);
fvec4
t3
(
&
logTable
[
index
[
2
]]);
fvec4
t4
(
&
logTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuKernelFactory.cpp
View file @
19d2885a
...
...
@@ -38,8 +38,18 @@
using
namespace
OpenMM
;
KernelImpl
*
CpuKernelFactory
::
createKernelImpl
(
std
::
string
name
,
const
Platform
&
platform
,
ContextImpl
&
context
)
const
{
ReferencePlatform
::
PlatformData
&
data
=
*
static_cast
<
ReferencePlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
CpuPlatform
::
PlatformData
&
data
=
CpuPlatform
::
getPlatformData
(
context
);
if
(
name
==
CalcForcesAndEnergyKernel
::
Name
())
return
new
CpuCalcForcesAndEnergyKernel
(
name
,
platform
,
data
,
context
);
if
(
name
==
CalcPeriodicTorsionForceKernel
::
Name
())
return
new
CpuCalcPeriodicTorsionForceKernel
(
name
,
platform
,
data
);
if
(
name
==
CalcRBTorsionForceKernel
::
Name
())
return
new
CpuCalcRBTorsionForceKernel
(
name
,
platform
,
data
);
if
(
name
==
CalcNonbondedForceKernel
::
Name
())
return
new
CpuCalcNonbondedForceKernel
(
name
,
platform
);
return
new
CpuCalcNonbondedForceKernel
(
name
,
platform
,
data
);
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
return
new
CpuCalcGBSAOBCForceKernel
(
name
,
platform
,
data
);
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
return
new
CpuIntegrateLangevinStepKernel
(
name
,
platform
,
data
);
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
}
platforms/cpu/src/CpuKernels.cpp
View file @
19d2885a
...
...
@@ -31,11 +31,17 @@
#include "CpuKernels.h"
#include "ReferenceBondForce.h"
#include "ReferenceConstraints.h"
#include "ReferenceKernelFactory.h"
#include "ReferenceKernels.h"
#include "ReferenceLJCoulomb14.h"
#include "ReferenceProperDihedralBond.h"
#include "ReferenceRbDihedralBond.h"
#include "openmm/Context.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/internal/vectorize.h"
#include "RealVec.h"
using
namespace
OpenMM
;
...
...
@@ -61,6 +67,270 @@ static RealVec& extractBoxSize(ContextImpl& context) {
return
*
(
RealVec
*
)
data
->
periodicBoxSize
;
}
static
ReferenceConstraints
&
extractConstraints
(
ContextImpl
&
context
)
{
ReferencePlatform
::
PlatformData
*
data
=
reinterpret_cast
<
ReferencePlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
return
*
(
ReferenceConstraints
*
)
data
->
constraints
;
}
/**
* Compute the kinetic energy of the system, possibly shifting the velocities in time to account
* for a leapfrog integrator.
*/
static
double
computeShiftedKineticEnergy
(
ContextImpl
&
context
,
vector
<
double
>&
masses
,
double
timeShift
)
{
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
velData
=
extractVelocities
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
// Compute the shifted velocities.
vector
<
RealVec
>
shiftedVel
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
if
(
masses
[
i
]
>
0
)
shiftedVel
[
i
]
=
velData
[
i
]
+
forceData
[
i
]
*
(
timeShift
/
masses
[
i
]);
else
shiftedVel
[
i
]
=
velData
[
i
];
}
// Apply constraints to them.
vector
<
double
>
inverseMasses
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
inverseMasses
[
i
]
=
(
masses
[
i
]
==
0
?
0
:
1
/
masses
[
i
]);
extractConstraints
(
context
).
applyToVelocities
(
posData
,
shiftedVel
,
inverseMasses
,
1e-4
);
// Compute the kinetic energy.
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
if
(
masses
[
i
]
>
0
)
energy
+=
masses
[
i
]
*
(
shiftedVel
[
i
].
dot
(
shiftedVel
[
i
]));
return
0.5
*
energy
;
}
class
CpuCalcForcesAndEnergyKernel
::
SumForceTask
:
public
ThreadPool
::
Task
{
public:
SumForceTask
(
int
numParticles
,
vector
<
RealVec
>&
forceData
,
CpuPlatform
::
PlatformData
&
data
)
:
numParticles
(
numParticles
),
forceData
(
forceData
),
data
(
data
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Sum the contributions to forces that have been calculated by different threads.
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
fvec4
f
(
0.0
f
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
+=
fvec4
(
&
data
.
threadForce
[
j
][
4
*
i
]);
forceData
[
i
][
0
]
+=
f
[
0
];
forceData
[
i
][
1
]
+=
f
[
1
];
forceData
[
i
][
2
]
+=
f
[
2
];
}
}
int
numParticles
;
vector
<
RealVec
>&
forceData
;
CpuPlatform
::
PlatformData
&
data
;
};
class
CpuCalcForcesAndEnergyKernel
::
InitForceTask
:
public
ThreadPool
::
Task
{
public:
InitForceTask
(
int
numParticles
,
ContextImpl
&
context
,
CpuPlatform
::
PlatformData
&
data
)
:
numParticles
(
numParticles
),
context
(
context
),
data
(
data
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Convert the positions to single precision and apply periodic boundary conditions
AlignedArray
<
float
>&
posq
=
data
.
posq
;
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
RealVec
boxSize
=
extractBoxSize
(
context
);
double
invBoxSize
[
3
]
=
{
1
/
boxSize
[
0
],
1
/
boxSize
[
1
],
1
/
boxSize
[
2
]};
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
if
(
data
.
isPeriodic
)
for
(
int
i
=
start
;
i
<
end
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
RealOpenMM
x
=
posData
[
i
][
j
];
double
base
=
floor
(
x
*
invBoxSize
[
j
])
*
boxSize
[
j
];
posq
[
4
*
i
+
j
]
=
(
float
)
(
x
-
base
);
}
else
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
posq
[
4
*
i
]
=
(
float
)
posData
[
i
][
0
];
posq
[
4
*
i
+
1
]
=
(
float
)
posData
[
i
][
1
];
posq
[
4
*
i
+
2
]
=
(
float
)
posData
[
i
][
2
];
}
// Clear the forces.
fvec4
zero
(
0.0
f
);
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
zero
.
store
(
&
data
.
threadForce
[
threadIndex
][
j
*
4
]);
}
int
numParticles
;
ContextImpl
&
context
;
CpuPlatform
::
PlatformData
&
data
;
};
CpuCalcForcesAndEnergyKernel
::
CpuCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
,
ContextImpl
&
context
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
)
{
// Create a Reference platform version of this kernel.
ReferenceKernelFactory
referenceFactory
;
referenceKernel
=
Kernel
(
referenceFactory
.
createKernelImpl
(
name
,
platform
,
context
));
}
void
CpuCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
initialize
(
system
);
}
void
CpuCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
// Convert positions to single precision and clear the forces.
InitForceTask
task
(
context
.
getSystem
().
getNumParticles
(),
context
,
data
);
data
.
threads
.
execute
(
task
);
data
.
threads
.
waitForThreads
();
}
double
CpuCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
// Sum the forces from all the threads.
SumForceTask
task
(
context
.
getSystem
().
getNumParticles
(),
extractForces
(
context
),
data
);
data
.
threads
.
execute
(
task
);
data
.
threads
.
waitForThreads
();
return
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
CpuCalcPeriodicTorsionForceKernel
::~
CpuCalcPeriodicTorsionForceKernel
()
{
if
(
torsionIndexArray
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
{
delete
[]
torsionIndexArray
[
i
];
delete
[]
torsionParamArray
[
i
];
}
delete
[]
torsionIndexArray
;
delete
[]
torsionParamArray
;
}
}
void
CpuCalcPeriodicTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
PeriodicTorsionForce
&
force
)
{
numTorsions
=
force
.
getNumTorsions
();
torsionIndexArray
=
new
int
*
[
numTorsions
];
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
torsionIndexArray
[
i
]
=
new
int
[
4
];
torsionParamArray
=
new
RealOpenMM
*
[
numTorsions
];
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
torsionParamArray
[
i
]
=
new
RealOpenMM
[
3
];
for
(
int
i
=
0
;
i
<
numTorsions
;
++
i
)
{
int
particle1
,
particle2
,
particle3
,
particle4
,
periodicity
;
double
phase
,
k
;
force
.
getTorsionParameters
(
i
,
particle1
,
particle2
,
particle3
,
particle4
,
periodicity
,
phase
,
k
);
torsionIndexArray
[
i
][
0
]
=
particle1
;
torsionIndexArray
[
i
][
1
]
=
particle2
;
torsionIndexArray
[
i
][
2
]
=
particle3
;
torsionIndexArray
[
i
][
3
]
=
particle4
;
torsionParamArray
[
i
][
0
]
=
(
RealOpenMM
)
k
;
torsionParamArray
[
i
][
1
]
=
(
RealOpenMM
)
phase
;
torsionParamArray
[
i
][
2
]
=
(
RealOpenMM
)
periodicity
;
}
bondForce
.
initialize
(
system
.
getNumParticles
(),
numTorsions
,
4
,
torsionIndexArray
,
data
.
threads
);
}
double
CpuCalcPeriodicTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
RealOpenMM
energy
=
0
;
ReferenceProperDihedralBond
periodicTorsionBond
;
bondForce
.
calculateForce
(
posData
,
torsionParamArray
,
forceData
,
includeEnergy
?
&
energy
:
NULL
,
periodicTorsionBond
);
return
energy
;
}
void
CpuCalcPeriodicTorsionForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
PeriodicTorsionForce
&
force
)
{
if
(
numTorsions
!=
force
.
getNumTorsions
())
throw
OpenMMException
(
"updateParametersInContext: The number of torsions has changed"
);
// Record the values.
for
(
int
i
=
0
;
i
<
numTorsions
;
++
i
)
{
int
particle1
,
particle2
,
particle3
,
particle4
,
periodicity
;
double
phase
,
k
;
force
.
getTorsionParameters
(
i
,
particle1
,
particle2
,
particle3
,
particle4
,
periodicity
,
phase
,
k
);
if
(
particle1
!=
torsionIndexArray
[
i
][
0
]
||
particle2
!=
torsionIndexArray
[
i
][
1
]
||
particle3
!=
torsionIndexArray
[
i
][
2
]
||
particle4
!=
torsionIndexArray
[
i
][
3
])
throw
OpenMMException
(
"updateParametersInContext: The set of particles in a torsion has changed"
);
torsionParamArray
[
i
][
0
]
=
(
RealOpenMM
)
k
;
torsionParamArray
[
i
][
1
]
=
(
RealOpenMM
)
phase
;
torsionParamArray
[
i
][
2
]
=
(
RealOpenMM
)
periodicity
;
}
}
CpuCalcRBTorsionForceKernel
::~
CpuCalcRBTorsionForceKernel
()
{
if
(
torsionIndexArray
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
{
delete
[]
torsionIndexArray
[
i
];
delete
[]
torsionParamArray
[
i
];
}
delete
[]
torsionIndexArray
;
delete
[]
torsionParamArray
;
}
}
void
CpuCalcRBTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
RBTorsionForce
&
force
)
{
numTorsions
=
force
.
getNumTorsions
();
torsionIndexArray
=
new
int
*
[
numTorsions
];
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
torsionIndexArray
[
i
]
=
new
int
[
4
];
torsionParamArray
=
new
RealOpenMM
*
[
numTorsions
];
for
(
int
i
=
0
;
i
<
numTorsions
;
i
++
)
torsionParamArray
[
i
]
=
new
RealOpenMM
[
6
];
for
(
int
i
=
0
;
i
<
numTorsions
;
++
i
)
{
int
particle1
,
particle2
,
particle3
,
particle4
;
double
c0
,
c1
,
c2
,
c3
,
c4
,
c5
;
force
.
getTorsionParameters
(
i
,
particle1
,
particle2
,
particle3
,
particle4
,
c0
,
c1
,
c2
,
c3
,
c4
,
c5
);
torsionIndexArray
[
i
][
0
]
=
particle1
;
torsionIndexArray
[
i
][
1
]
=
particle2
;
torsionIndexArray
[
i
][
2
]
=
particle3
;
torsionIndexArray
[
i
][
3
]
=
particle4
;
torsionParamArray
[
i
][
0
]
=
(
RealOpenMM
)
c0
;
torsionParamArray
[
i
][
1
]
=
(
RealOpenMM
)
c1
;
torsionParamArray
[
i
][
2
]
=
(
RealOpenMM
)
c2
;
torsionParamArray
[
i
][
3
]
=
(
RealOpenMM
)
c3
;
torsionParamArray
[
i
][
4
]
=
(
RealOpenMM
)
c4
;
torsionParamArray
[
i
][
5
]
=
(
RealOpenMM
)
c5
;
}
bondForce
.
initialize
(
system
.
getNumParticles
(),
numTorsions
,
4
,
torsionIndexArray
,
data
.
threads
);
}
double
CpuCalcRBTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
RealOpenMM
energy
=
0
;
ReferenceRbDihedralBond
rbTorsionBond
;
bondForce
.
calculateForce
(
posData
,
torsionParamArray
,
forceData
,
includeEnergy
?
&
energy
:
NULL
,
rbTorsionBond
);
return
energy
;
}
void
CpuCalcRBTorsionForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
RBTorsionForce
&
force
)
{
if
(
numTorsions
!=
force
.
getNumTorsions
())
throw
OpenMMException
(
"updateParametersInContext: The number of torsions has changed"
);
// Record the values.
for
(
int
i
=
0
;
i
<
numTorsions
;
++
i
)
{
int
particle1
,
particle2
,
particle3
,
particle4
;
double
c0
,
c1
,
c2
,
c3
,
c4
,
c5
;
force
.
getTorsionParameters
(
i
,
particle1
,
particle2
,
particle3
,
particle4
,
c0
,
c1
,
c2
,
c3
,
c4
,
c5
);
if
(
particle1
!=
torsionIndexArray
[
i
][
0
]
||
particle2
!=
torsionIndexArray
[
i
][
1
]
||
particle3
!=
torsionIndexArray
[
i
][
2
]
||
particle4
!=
torsionIndexArray
[
i
][
3
])
throw
OpenMMException
(
"updateParametersInContext: The set of particles in a torsion has changed"
);
torsionParamArray
[
i
][
0
]
=
(
RealOpenMM
)
c0
;
torsionParamArray
[
i
][
1
]
=
(
RealOpenMM
)
c1
;
torsionParamArray
[
i
][
2
]
=
(
RealOpenMM
)
c2
;
torsionParamArray
[
i
][
3
]
=
(
RealOpenMM
)
c3
;
torsionParamArray
[
i
][
4
]
=
(
RealOpenMM
)
c4
;
torsionParamArray
[
i
][
5
]
=
(
RealOpenMM
)
c5
;
}
}
class
CpuCalcNonbondedForceKernel
::
PmeIO
:
public
CalcPmeReciprocalForceKernel
::
IO
{
public:
PmeIO
(
float
*
posq
,
float
*
force
,
int
numParticles
)
:
posq
(
posq
),
force
(
force
),
numParticles
(
numParticles
)
{
...
...
@@ -81,15 +351,35 @@ private:
int
numParticles
;
};
bool
isVec8Supported
();
CpuNonbondedForce
*
createCpuNonbondedForceVec4
();
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
CpuCalcNonbondedForceKernel
::
CpuCalcNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
),
neighborList
(
NULL
),
nonbonded
(
NULL
)
{
if
(
isVec8Supported
())
{
neighborList
=
new
CpuNeighborList
(
8
);
nonbonded
=
createCpuNonbondedForceVec8
();
}
else
{
neighborList
=
new
CpuNeighborList
(
4
);
nonbonded
=
createCpuNonbondedForceVec4
();
}
}
CpuCalcNonbondedForceKernel
::~
CpuCalcNonbondedForceKernel
()
{
if
(
bonded14ParamArray
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
num14
;
i
++
)
{
delete
[]
bonded14IndexArray
[
i
];
delete
[]
bonded14ParamArray
[
i
];
}
delete
bonded14IndexArray
;
delete
bonded14ParamArray
;
delete
[]
bonded14IndexArray
;
delete
[]
bonded14ParamArray
;
}
if
(
nonbonded
!=
NULL
)
delete
nonbonded
;
if
(
neighborList
!=
NULL
)
delete
neighborList
;
}
void
CpuCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
...
...
@@ -97,8 +387,6 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
// Identify which exceptions are 1-4 interactions.
numParticles
=
force
.
getNumParticles
();
posq
.
resize
(
4
*
numParticles
,
0
);
forces
.
resize
(
4
*
numParticles
,
0
);
exclusions
.
resize
(
numParticles
);
vector
<
int
>
nb14s
;
for
(
int
i
=
0
;
i
<
force
.
getNumExceptions
();
i
++
)
{
...
...
@@ -125,7 +413,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
double
charge
,
radius
,
depth
;
force
.
getParticleParameters
(
i
,
charge
,
radius
,
depth
);
posq
[
4
*
i
+
3
]
=
(
float
)
charge
;
data
.
posq
[
4
*
i
+
3
]
=
(
float
)
charge
;
particleParams
[
i
]
=
make_pair
((
float
)
(
0.5
*
radius
),
(
float
)
(
2.0
*
sqrt
(
depth
)));
sumSquaredCharges
+=
charge
*
charge
;
}
...
...
@@ -173,6 +461,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
else
dispersionCoefficient
=
0.0
;
lastPositions
.
resize
(
numParticles
,
Vec3
(
1e10
,
1e10
,
1e10
));
data
.
isPeriodic
=
(
nonbondedMethod
==
CutoffPeriodic
||
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
);
}
double
CpuCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
{
...
...
@@ -192,32 +481,14 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
}
}
}
AlignedArray
<
float
>&
posq
=
data
.
posq
;
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
RealVec
boxSize
=
extractBoxSize
(
context
);
float
floatBoxSize
[
3
]
=
{(
float
)
boxSize
[
0
],
(
float
)
boxSize
[
1
],
(
float
)
boxSize
[
2
]};
double
energy
=
ewaldSelfEnergy
;
bool
periodic
=
(
nonbondedMethod
==
CutoffPeriodic
);
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
bool
ewald
=
(
nonbondedMethod
==
Ewald
);
bool
pme
=
(
nonbondedMethod
==
PME
);
// Convert the positions to single precision.
if
(
periodic
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
RealOpenMM
x
=
posData
[
i
][
j
];
double
base
=
floor
(
x
/
boxSize
[
j
]
+
0.5
)
*
boxSize
[
j
];
posq
[
4
*
i
+
j
]
=
(
float
)
(
x
-
base
);
}
else
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
posq
[
4
*
i
]
=
(
float
)
posData
[
i
][
0
];
posq
[
4
*
i
+
1
]
=
(
float
)
posData
[
i
][
1
];
posq
[
4
*
i
+
2
]
=
(
float
)
posData
[
i
][
2
];
}
for
(
int
i
=
0
;
i
<
4
*
numParticles
;
i
++
)
forces
[
i
]
=
0.0
f
;
if
(
nonbondedMethod
!=
NoCutoff
)
{
// Determine whether we need to recompute the neighbor list.
...
...
@@ -244,6 +515,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
int
numMoved
=
moved
.
size
();
double
cutoff2
=
nonbondedCutoff
*
nonbondedCutoff
;
double
paddedCutoff2
=
(
nonbondedCutoff
+
padding
)
*
(
nonbondedCutoff
+
padding
);
for
(
int
i
=
1
;
i
<
numMoved
&&
!
needRecompute
;
i
++
)
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
RealVec
delta
=
posData
[
moved
[
i
]]
-
posData
[
moved
[
j
]];
...
...
@@ -251,7 +523,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
// These particles should interact. See if they are in the neighbor list.
RealVec
oldDelta
=
lastPositions
[
moved
[
i
]]
-
lastPositions
[
moved
[
j
]];
if
(
oldDelta
.
dot
(
oldDelta
)
>
c
utoff2
)
{
if
(
oldDelta
.
dot
(
oldDelta
)
>
paddedC
utoff2
)
{
needRecompute
=
true
;
break
;
}
...
...
@@ -259,47 +531,42 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
}
}
if
(
needRecompute
)
{
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
periodic
||
ewald
||
pme
,
nonbondedCutoff
+
padding
);
neighborList
->
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
data
.
isPeriodic
,
nonbondedCutoff
+
padding
,
data
.
threads
);
lastPositions
=
posData
;
}
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
,
rfDielectric
);
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
,
rfDielectric
);
}
if
(
periodic
||
ewald
||
pme
)
{
if
(
data
.
isPeriodic
)
{
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
if
(
boxSize
[
0
]
<
minAllowedSize
||
boxSize
[
1
]
<
minAllowedSize
||
boxSize
[
2
]
<
minAllowedSize
)
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
nonbonded
.
setPeriodic
(
floatBoxSize
);
nonbonded
->
setPeriodic
(
floatBoxSize
);
}
if
(
ewald
)
nonbonded
.
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
nonbonded
->
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
if
(
pme
)
nonbonded
.
setUsePME
(
ewaldAlpha
,
gridSize
);
nonbonded
->
setUsePME
(
ewaldAlpha
,
gridSize
);
if
(
useSwitchingFunction
)
nonbonded
.
setUseSwitchingFunction
(
switchingDistance
);
float
nonbondedEnergy
=
0
;
nonbonded
->
setUseSwitchingFunction
(
switchingDistance
);
double
nonbondedEnergy
=
0
;
if
(
includeDirect
)
nonbonded
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
particleParams
,
exclusions
,
&
forces
[
0
]
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
->
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
data
.
threadForce
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
data
.
threads
);
if
(
includeReciprocal
)
{
if
(
useOptimizedPme
)
{
PmeIO
io
(
&
posq
[
0
],
&
f
orce
s
[
0
],
numParticles
);
PmeIO
io
(
&
posq
[
0
],
&
data
.
threadF
orce
[
0
]
[
0
],
numParticles
);
Vec3
periodicBoxSize
(
boxSize
[
0
],
boxSize
[
1
],
boxSize
[
2
]);
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
beginComputation
(
io
,
periodicBoxSize
,
includeEnergy
);
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
nonbondedEnergy
+=
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
else
nonbonded
.
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
->
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
}
energy
+=
nonbondedEnergy
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
forceData
[
i
][
0
]
+=
forces
[
4
*
i
];
forceData
[
i
][
1
]
+=
forces
[
4
*
i
+
1
];
forceData
[
i
][
2
]
+=
forces
[
4
*
i
+
2
];
}
if
(
includeDirect
)
{
ReferenceBondForce
refBondForce
;
ReferenceLJCoulomb14
nonbonded14
;
refBondForce
.
calculateForce
(
num14
,
bonded14IndexArray
,
posData
,
bonded14ParamArray
,
forceData
,
includeEnergy
?
&
energy
:
NULL
,
nonbonded14
);
if
(
periodic
||
ewald
||
pme
)
if
(
data
.
isPeriodic
)
energy
+=
dispersionCoefficient
/
(
boxSize
[
0
]
*
boxSize
[
1
]
*
boxSize
[
2
]);
}
return
energy
;
...
...
@@ -325,7 +592,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
double
charge
,
radius
,
depth
;
force
.
getParticleParameters
(
i
,
charge
,
radius
,
depth
);
posq
[
4
*
i
+
3
]
=
(
float
)
charge
;
data
.
posq
[
4
*
i
+
3
]
=
(
float
)
charge
;
particleParams
[
i
]
=
make_pair
((
float
)
(
0.5
*
radius
),
(
float
)
(
2.0
*
sqrt
(
depth
)));
sumSquaredCharges
+=
charge
*
charge
;
}
...
...
@@ -350,3 +617,94 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
if
(
force
.
getUseDispersionCorrection
()
&&
(
method
==
NonbondedForce
::
CutoffPeriodic
||
method
==
NonbondedForce
::
Ewald
||
method
==
NonbondedForce
::
PME
))
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
context
.
getSystem
(),
force
);
}
CpuCalcGBSAOBCForceKernel
::~
CpuCalcGBSAOBCForceKernel
()
{
}
void
CpuCalcGBSAOBCForceKernel
::
initialize
(
const
System
&
system
,
const
GBSAOBCForce
&
force
)
{
int
numParticles
=
system
.
getNumParticles
();
particleParams
.
resize
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
double
charge
,
radius
,
scalingFactor
;
force
.
getParticleParameters
(
i
,
charge
,
radius
,
scalingFactor
);
data
.
posq
[
4
*
i
+
3
]
=
(
float
)
charge
;
radius
-=
0.009
;
particleParams
[
i
]
=
make_pair
((
float
)
radius
,
(
float
)
(
scalingFactor
*
radius
));
}
obc
.
setParticleParameters
(
particleParams
);
obc
.
setSolventDielectric
((
float
)
force
.
getSolventDielectric
());
obc
.
setSoluteDielectric
((
float
)
force
.
getSoluteDielectric
());
if
(
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
NoCutoff
)
obc
.
setUseCutoff
((
float
)
force
.
getCutoffDistance
());
data
.
isPeriodic
=
(
force
.
getNonbondedMethod
()
==
GBSAOBCForce
::
CutoffPeriodic
);
}
double
CpuCalcGBSAOBCForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
data
.
isPeriodic
)
{
RealVec
&
boxSize
=
extractBoxSize
(
context
);
float
floatBoxSize
[
3
]
=
{(
float
)
boxSize
[
0
],
(
float
)
boxSize
[
1
],
(
float
)
boxSize
[
2
]};
obc
.
setPeriodic
(
floatBoxSize
);
}
double
energy
=
0.0
;
obc
.
computeForce
(
data
.
posq
,
data
.
threadForce
,
includeEnergy
?
&
energy
:
NULL
,
data
.
threads
);
return
energy
;
}
void
CpuCalcGBSAOBCForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
GBSAOBCForce
&
force
)
{
int
numParticles
=
force
.
getNumParticles
();
if
(
numParticles
!=
obc
.
getParticleParameters
().
size
())
throw
OpenMMException
(
"updateParametersInContext: The number of particles has changed"
);
// Record the values.
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
double
charge
,
radius
,
scalingFactor
;
force
.
getParticleParameters
(
i
,
charge
,
radius
,
scalingFactor
);
data
.
posq
[
4
*
i
+
3
]
=
(
float
)
charge
;
radius
-=
0.009
;
particleParams
[
i
]
=
make_pair
((
float
)
radius
,
(
float
)
(
scalingFactor
*
radius
));
}
obc
.
setParticleParameters
(
particleParams
);
}
CpuIntegrateLangevinStepKernel
::~
CpuIntegrateLangevinStepKernel
()
{
if
(
dynamics
)
delete
dynamics
;
}
void
CpuIntegrateLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
LangevinIntegrator
&
integrator
)
{
int
numParticles
=
system
.
getNumParticles
();
masses
.
resize
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
masses
[
i
]
=
static_cast
<
RealOpenMM
>
(
system
.
getParticleMass
(
i
));
data
.
random
.
initialize
(
integrator
.
getRandomNumberSeed
(),
data
.
threads
.
getNumThreads
());
}
void
CpuIntegrateLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
)
{
double
temperature
=
integrator
.
getTemperature
();
double
friction
=
integrator
.
getFriction
();
double
stepSize
=
integrator
.
getStepSize
();
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
velData
=
extractVelocities
(
context
);
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
if
(
dynamics
==
0
||
temperature
!=
prevTemp
||
friction
!=
prevFriction
||
stepSize
!=
prevStepSize
)
{
// Recreate the computation objects with the new parameters.
if
(
dynamics
)
delete
dynamics
;
RealOpenMM
tau
=
(
friction
==
0.0
?
0.0
:
1.0
/
friction
);
dynamics
=
new
CpuLangevinDynamics
(
context
.
getSystem
().
getNumParticles
(),
stepSize
,
tau
,
temperature
,
data
.
threads
,
data
.
random
);
dynamics
->
setReferenceConstraintAlgorithm
(
&
extractConstraints
(
context
));
prevTemp
=
temperature
;
prevFriction
=
friction
;
prevStepSize
=
stepSize
;
}
dynamics
->
update
(
context
.
getSystem
(),
posData
,
velData
,
forceData
,
masses
,
integrator
.
getConstraintTolerance
());
ReferencePlatform
::
PlatformData
*
refData
=
reinterpret_cast
<
ReferencePlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
refData
->
time
+=
stepSize
;
refData
->
stepCount
++
;
}
double
CpuIntegrateLangevinStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
)
{
return
computeShiftedKineticEnergy
(
context
,
masses
,
0.5
*
integrator
.
getStepSize
());
}
platforms/cpu/src/CpuLangevinDynamics.cpp
0 → 100644
View file @
19d2885a
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuLangevinDynamics.h"
using
namespace
OpenMM
;
using
namespace
std
;
class
CpuLangevinDynamics
::
Update1Task
:
public
ThreadPool
::
Task
{
public:
Update1Task
(
CpuLangevinDynamics
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadUpdate1
(
threadIndex
);
}
CpuLangevinDynamics
&
owner
;
};
class
CpuLangevinDynamics
::
Update2Task
:
public
ThreadPool
::
Task
{
public:
Update2Task
(
CpuLangevinDynamics
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadUpdate2
(
threadIndex
);
}
CpuLangevinDynamics
&
owner
;
};
CpuLangevinDynamics
::
CpuLangevinDynamics
(
int
numberOfAtoms
,
RealOpenMM
deltaT
,
RealOpenMM
tau
,
RealOpenMM
temperature
,
ThreadPool
&
threads
,
CpuRandom
&
random
)
:
ReferenceStochasticDynamics
(
numberOfAtoms
,
deltaT
,
tau
,
temperature
),
threads
(
threads
),
random
(
random
)
{
}
CpuLangevinDynamics
::~
CpuLangevinDynamics
()
{
}
void
CpuLangevinDynamics
::
updatePart1
(
int
numberOfAtoms
,
vector
<
RealVec
>&
atomCoordinates
,
vector
<
RealVec
>&
velocities
,
vector
<
RealVec
>&
forces
,
vector
<
RealOpenMM
>&
inverseMasses
,
vector
<
RealVec
>&
xPrime
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
velocities
=
&
velocities
[
0
];
this
->
forces
=
&
forces
[
0
];
this
->
inverseMasses
=
&
inverseMasses
[
0
];
this
->
xPrime
=
&
xPrime
[
0
];
// Signal the threads to start running and wait for them to finish.
Update1Task
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
void
CpuLangevinDynamics
::
updatePart2
(
int
numberOfAtoms
,
vector
<
RealVec
>&
atomCoordinates
,
vector
<
RealVec
>&
velocities
,
vector
<
RealVec
>&
forces
,
vector
<
RealOpenMM
>&
inverseMasses
,
vector
<
RealVec
>&
xPrime
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
velocities
=
&
velocities
[
0
];
this
->
forces
=
&
forces
[
0
];
this
->
inverseMasses
=
&
inverseMasses
[
0
];
this
->
xPrime
=
&
xPrime
[
0
];
// Signal the threads to start running and wait for them to finish.
Update2Task
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
void
CpuLangevinDynamics
::
threadUpdate1
(
int
threadIndex
)
{
const
RealOpenMM
tau
=
getTau
();
const
RealOpenMM
vscale
=
EXP
(
-
getDeltaT
()
/
tau
);
const
RealOpenMM
fscale
=
(
1
-
vscale
)
*
tau
;
const
RealOpenMM
kT
=
BOLTZ
*
getTemperature
();
const
RealOpenMM
noisescale
=
SQRT
(
2
*
kT
/
tau
)
*
SQRT
(
0.5
*
(
1
-
vscale
*
vscale
)
*
tau
);
int
start
=
threadIndex
*
numberOfAtoms
/
threads
.
getNumThreads
();
int
end
=
(
threadIndex
+
1
)
*
numberOfAtoms
/
threads
.
getNumThreads
();
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
if
(
inverseMasses
[
i
]
!=
0.0
)
{
RealOpenMM
sqrtInvMass
=
SQRT
(
inverseMasses
[
i
]);
RealVec
noise
(
random
.
getGaussianRandom
(
threadIndex
),
random
.
getGaussianRandom
(
threadIndex
),
random
.
getGaussianRandom
(
threadIndex
));
velocities
[
i
]
=
velocities
[
i
]
*
vscale
+
forces
[
i
]
*
(
fscale
*
inverseMasses
[
i
])
+
noise
*
(
noisescale
*
sqrtInvMass
);
}
}
}
void
CpuLangevinDynamics
::
threadUpdate2
(
int
threadIndex
)
{
const
RealOpenMM
dt
=
getDeltaT
();
int
start
=
threadIndex
*
numberOfAtoms
/
threads
.
getNumThreads
();
int
end
=
(
threadIndex
+
1
)
*
numberOfAtoms
/
threads
.
getNumThreads
();
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
if
(
inverseMasses
[
i
]
!=
0.0
)
{
RealOpenMM
sqrtInvMass
=
SQRT
(
inverseMasses
[
i
]);
xPrime
[
i
]
=
atomCoordinates
[
i
]
+
velocities
[
i
]
*
dt
;
}
}
}
platforms/cpu/src/CpuNeighborList.cpp
View file @
19d2885a
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuNeighborList.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/vectorize.h"
...
...
@@ -12,27 +43,25 @@ using namespace std;
namespace
OpenMM
{
const
int
CpuNeighborList
::
BlockSize
=
4
;
class
VoxelIndex
{
public:
VoxelIndex
()
:
x
(
0
),
y
(
0
)
{
}
VoxelIndex
(
int
x
,
int
y
)
:
x
(
x
),
y
(
y
)
{
}
int
x
;
int
y
;
};
typedef
pair
<
const
float
*
,
int
>
VoxelItem
;
/**
* This data structure organizes the particles spatially. It divides them into bins along the x and y axes,
* then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
*/
class
CpuNeighborList
::
Voxels
{
public:
Voxels
(
float
vsx
,
float
vsy
,
float
minx
,
float
maxx
,
float
miny
,
float
maxy
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
minx
(
minx
),
maxx
(
maxx
),
miny
(
miny
),
maxy
(
maxy
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
Voxels
(
int
blockSize
,
float
vsx
,
float
vsy
,
float
minx
,
float
maxx
,
float
miny
,
float
maxy
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
blockSize
(
blockSize
),
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
minx
(
minx
),
maxx
(
maxx
),
miny
(
miny
),
maxy
(
maxy
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
if
(
usePeriodic
)
{
nx
=
(
int
)
floorf
(
periodicBoxSize
[
0
]
/
voxelSizeX
+
0.5
f
);
ny
=
(
int
)
floorf
(
periodicBoxSize
[
1
]
/
voxelSizeY
+
0.5
f
);
...
...
@@ -60,7 +89,7 @@ public:
*/
void
insert
(
const
int
&
atom
,
const
float
*
location
)
{
VoxelIndex
voxelIndex
=
getVoxelIndex
(
location
);
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
].
push_back
(
make_pair
(
location
[
2
],
VoxelItem
(
location
,
atom
))
)
;
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
].
push_back
(
make_pair
(
location
[
2
],
atom
));
}
/**
...
...
@@ -76,7 +105,7 @@ public:
* Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value.
*/
int
findLowerBound
(
int
x
,
int
y
,
double
z
)
const
{
const
vector
<
pair
<
float
,
VoxelItem
>
>&
bin
=
bins
[
x
][
y
];
const
vector
<
pair
<
float
,
int
>
>&
bin
=
bins
[
x
][
y
];
int
lower
=
0
;
int
upper
=
bin
.
size
();
while
(
lower
<
upper
)
{
...
...
@@ -93,7 +122,7 @@ public:
* Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value.
*/
int
findUpperBound
(
int
x
,
int
y
,
double
z
)
const
{
const
vector
<
pair
<
float
,
VoxelItem
>
>&
bin
=
bins
[
x
][
y
];
const
vector
<
pair
<
float
,
int
>
>&
bin
=
bins
[
x
][
y
];
int
lower
=
0
;
int
upper
=
bin
.
size
();
while
(
lower
<
upper
)
{
...
...
@@ -125,7 +154,7 @@ public:
return
VoxelIndex
(
x
,
y
);
}
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
fvec4
blockCenter
,
fvec4
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
blockAtoms
,
const
float
*
atomLocations
)
const
{
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
const
fvec4
&
blockCenter
,
const
fvec4
&
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
&
blockAtoms
,
const
float
*
atomLocations
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
neighbors
.
resize
(
0
);
exclusions
.
resize
(
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
...
...
@@ -135,8 +164,12 @@ public:
float
refineCutoff
=
maxDistance
-
max
(
max
(
blockWidth
[
0
],
blockWidth
[
1
]),
blockWidth
[
2
]);
float
refineCutoffSquared
=
refineCutoff
*
refineCutoff
;
int
dIndexX
=
min
(
nx
/
2
,
int
((
maxDistance
+
blockWidth
[
0
])
/
voxelSizeX
)
+
1
);
// How may voxels away do we have to look?
int
dIndexY
=
min
(
ny
/
2
,
int
((
maxDistance
+
blockWidth
[
1
])
/
voxelSizeY
)
+
1
);
int
dIndexX
=
int
((
maxDistance
+
blockWidth
[
0
])
/
voxelSizeX
)
+
1
;
// How may voxels away do we have to look?
int
dIndexY
=
int
((
maxDistance
+
blockWidth
[
1
])
/
voxelSizeY
)
+
1
;
if
(
usePeriodic
)
{
dIndexX
=
min
(
nx
/
2
,
dIndexX
);
dIndexY
=
min
(
ny
/
2
,
dIndexY
);
}
float
centerPos
[
4
];
blockCenter
.
store
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
...
...
@@ -148,63 +181,87 @@ public:
if
(
usePeriodic
)
{
endx
=
min
(
endx
,
centerVoxelIndex
.
x
-
dIndexX
+
nx
-
1
);
endy
=
min
(
endy
,
centerVoxelIndex
.
y
-
dIndexY
+
ny
-
1
);
numRanges
=
2
;
}
else
{
startx
=
max
(
startx
,
0
);
starty
=
max
(
starty
,
0
);
endx
=
min
(
endx
,
nx
-
1
);
endy
=
min
(
endy
,
ny
-
1
);
numRanges
=
1
;
}
int
lastSortedIndex
=
B
lockSize
*
(
blockIndex
+
1
);
int
lastSortedIndex
=
b
lockSize
*
(
blockIndex
+
1
);
VoxelIndex
voxelIndex
(
0
,
0
);
for
(
int
x
=
startx
;
x
<=
endx
;
++
x
)
{
voxelIndex
.
x
=
x
;
if
(
usePeriodic
)
voxelIndex
.
x
=
(
x
<
0
?
x
+
nx
:
(
x
>=
nx
?
x
-
nx
:
x
));
float
dx
=
max
(
0.0
f
,
voxelSizeX
*
max
(
0
,
abs
(
centerVoxelIndex
.
x
-
x
)
-
1
)
-
blockWidth
[
0
]);
for
(
int
y
=
starty
;
y
<=
endy
;
++
y
)
{
voxelIndex
.
y
=
y
;
if
(
usePeriodic
)
voxelIndex
.
y
=
(
y
<
0
?
y
+
ny
:
(
y
>=
ny
?
y
-
ny
:
y
));
float
dy
=
max
(
0.0
f
,
voxelSizeY
*
max
(
0
,
abs
(
centerVoxelIndex
.
y
-
y
)
-
1
)
-
blockWidth
[
1
]);
// Identify the range of atoms within this bin we need to search. When using periodic boundary
// conditions, there may be two separate ranges.
float
dz
=
maxDistance
+
blockWidth
[
2
];
dz
=
sqrtf
(
max
(
0.0
f
,
dz
*
dz
-
dx
*
dx
-
dy
*
dy
));
float
minz
=
centerPos
[
2
];
float
maxz
=
centerPos
[
2
];
fvec4
offset
(
voxelSizeX
*
x
+
(
usePeriodic
?
0.0
f
:
minx
),
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
0
,
0
);
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
const
float
*
atomPos
=
&
atomLocations
[
4
*
blockAtoms
[
k
]];
fvec4
posVec
(
atomPos
);
fvec4
delta1
=
offset
-
posVec
;
fvec4
delta2
=
delta1
+
fvec4
(
voxelSizeX
,
voxelSizeY
,
0
,
0
);
if
(
usePeriodic
)
{
delta1
-=
round
(
delta1
*
invBoxSize
)
*
boxSize
;
delta2
-=
round
(
delta2
*
invBoxSize
)
*
boxSize
;
}
fvec4
delta
=
min
(
abs
(
delta1
),
abs
(
delta2
));
float
dx
=
(
x
==
atomVoxelIndex
[
k
].
x
?
0.0
f
:
delta
[
0
]);
float
dy
=
(
y
==
atomVoxelIndex
[
k
].
y
?
0.0
f
:
delta
[
1
]);
float
dist2
=
maxDistanceSquared
-
dx
*
dx
-
dy
*
dy
;
if
(
dist2
>
0
)
{
float
dist
=
sqrtf
(
dist2
);
minz
=
min
(
minz
,
atomPos
[
2
]
-
dist
);
maxz
=
max
(
maxz
,
atomPos
[
2
]
+
dist
);
}
}
if
(
minz
==
maxz
)
continue
;
bool
needPeriodic
=
(
centerPos
[
0
]
-
blockWidth
[
0
]
<
maxDistance
||
centerPos
[
0
]
+
blockWidth
[
0
]
>
periodicBoxSize
[
0
]
-
maxDistance
||
centerPos
[
1
]
-
blockWidth
[
1
]
<
maxDistance
||
centerPos
[
1
]
+
blockWidth
[
1
]
>
periodicBoxSize
[
1
]
-
maxDistance
||
minz
<
0.0
f
||
maxz
>
periodicBoxSize
[
2
]);
int
rangeStart
[
2
];
int
rangeEnd
[
2
];
rangeStart
[
0
]
=
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
-
dz
);
if
(
usePeriodic
)
{
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
+
dz
);
rangeStart
[
0
]
=
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
minz
);
if
(
needPeriodic
)
{
numRanges
=
2
;
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
maxz
);
if
(
rangeStart
[
0
]
>
0
)
{
rangeStart
[
1
]
=
0
;
rangeEnd
[
1
]
=
min
(
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
+
d
z
-
periodicBoxSize
[
2
]),
rangeStart
[
0
]);
rangeEnd
[
1
]
=
min
(
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
max
z
-
periodicBoxSize
[
2
]),
rangeStart
[
0
]);
}
else
{
rangeStart
[
1
]
=
max
(
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
-
d
z
+
periodicBoxSize
[
2
]),
rangeEnd
[
0
]);
rangeStart
[
1
]
=
max
(
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
min
z
+
periodicBoxSize
[
2
]),
rangeEnd
[
0
]);
rangeEnd
[
1
]
=
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
].
size
();
}
}
else
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
+
dz
);
else
{
numRanges
=
1
;
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
maxz
);
}
// Loop over atoms and check to see if they are neighbors of this block.
for
(
int
range
=
0
;
range
<
numRanges
;
range
++
)
{
for
(
int
item
=
rangeStart
[
range
];
item
<
rangeEnd
[
range
];
item
++
)
{
const
int
sortedIndex
=
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
][
item
].
second
.
second
;
const
int
sortedIndex
=
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
][
item
].
second
;
// Avoid duplicate entries.
if
(
sortedIndex
>=
lastSortedIndex
)
continue
;
fvec4
atomPos
(
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
][
item
].
second
.
first
);
fvec4
atomPos
(
atomLocations
+
4
*
sortedAtoms
[
sortedIndex
]
);
fvec4
delta
=
atomPos
-
centerPos
;
if
(
use
Periodic
)
{
if
(
need
Periodic
)
{
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
}
...
...
@@ -221,7 +278,7 @@ public:
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
fvec4
pos1
(
&
atomLocations
[
4
*
blockAtoms
[
k
]]);
delta
=
atomPos
-
pos1
;
if
(
use
Periodic
)
{
if
(
need
Periodic
)
{
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
}
...
...
@@ -238,10 +295,12 @@ public:
// Add this atom to the list of neighbors.
neighbors
.
push_back
(
sortedAtoms
[
sortedIndex
]);
if
(
sortedIndex
<
B
lockSize
*
blockIndex
)
if
(
sortedIndex
<
b
lockSize
*
blockIndex
)
exclusions
.
push_back
(
0
);
else
exclusions
.
push_back
(
0xF
&
(
0xF
<<
(
sortedIndex
-
BlockSize
*
blockIndex
)));
else
{
int
mask
=
(
1
<<
blockSize
)
-
1
;
exclusions
.
push_back
(
mask
&
(
mask
<<
(
sortedIndex
-
blockSize
*
blockIndex
)));
}
}
}
}
...
...
@@ -249,63 +308,31 @@ public:
}
private:
int
blockSize
;
float
voxelSizeX
,
voxelSizeY
;
float
minx
,
maxx
,
miny
,
maxy
;
int
nx
,
ny
;
const
float
*
periodicBoxSize
;
const
bool
usePeriodic
;
vector
<
vector
<
vector
<
pair
<
float
,
VoxelItem
>
>
>
>
bins
;
vector
<
vector
<
vector
<
pair
<
float
,
int
>
>
>
>
bins
;
};
class
CpuNeighborList
::
Thread
Data
{
class
CpuNeighborList
::
Thread
Task
:
public
ThreadPool
::
Task
{
public:
ThreadData
(
int
index
,
CpuNeighborList
&
owner
)
:
index
(
index
),
owner
(
owner
)
{
ThreadTask
(
CpuNeighborList
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadComputeNeighborList
(
threads
,
threadIndex
);
}
int
index
;
CpuNeighborList
&
owner
;
};
static
void
*
threadBody
(
void
*
args
)
{
CpuNeighborList
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuNeighborList
::
ThreadData
*>
(
args
);
data
.
owner
.
runThread
(
data
.
index
);
delete
&
data
;
return
0
;
CpuNeighborList
::
CpuNeighborList
(
int
blockSize
)
:
blockSize
(
blockSize
)
{
}
CpuNeighborList
::
CpuNeighborList
()
{
isDeleted
=
false
;
numThreads
=
getNumProcessors
();
pthread_cond_init
(
&
startCondition
,
NULL
);
pthread_cond_init
(
&
endCondition
,
NULL
);
pthread_mutex_init
(
&
lock
,
NULL
);
thread
.
resize
(
numThreads
);
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
ThreadData
*
data
=
new
ThreadData
(
i
,
*
this
);
threadData
.
push_back
(
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
}
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
}
CpuNeighborList
::~
CpuNeighborList
()
{
isDeleted
=
true
;
pthread_mutex_lock
(
&
lock
);
pthread_cond_broadcast
(
&
startCondition
);
pthread_mutex_unlock
(
&
lock
);
for
(
int
i
=
0
;
i
<
(
int
)
thread
.
size
();
i
++
)
pthread_join
(
thread
[
i
],
NULL
);
pthread_mutex_destroy
(
&
lock
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
endCondition
);
}
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
vector
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
)
{
int
numBlocks
=
(
numAtoms
+
BlockSize
-
1
)
/
BlockSize
;
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
AlignedArray
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
)
{
int
numBlocks
=
(
numAtoms
+
blockSize
-
1
)
/
blockSize
;
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
sortedAtoms
.
resize
(
numAtoms
);
...
...
@@ -338,8 +365,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
// Sort the atoms based on a Hilbert curve.
atomBins
.
resize
(
numAtoms
);
pthread_mutex_lock
(
&
lock
);
advanceThreads
();
ThreadTask
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
sort
(
atomBins
.
begin
(),
atomBins
.
end
());
// Build the voxel hash.
...
...
@@ -348,10 +376,10 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
if
(
!
usePeriodic
)
edgeSizeX
=
edgeSizeY
=
maxDistance
;
// TODO - adjust this as needed
else
{
edgeSizeX
=
0.
5
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeY
=
0.
5
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
edgeSizeX
=
0.
6
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeY
=
0.
6
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
}
Voxels
voxels
(
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
Voxels
voxels
(
blockSize
,
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
atomIndex
=
atomBins
[
i
].
second
;
sortedAtoms
[
i
]
=
atomIndex
;
...
...
@@ -362,14 +390,14 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
// Signal the threads to start running and wait for them to finish.
advanc
eThreads
();
p
thread
_mutex_unlock
(
&
lock
);
threads
.
resum
eThreads
();
thread
s
.
waitForThreads
(
);
// Add padding atoms to fill up the last block.
int
numPadding
=
numBlocks
*
B
lockSize
-
numAtoms
;
int
numPadding
=
numBlocks
*
b
lockSize
-
numAtoms
;
if
(
numPadding
>
0
)
{
char
mask
=
(
0xF
0
>>
numPadding
)
&
0xF
;
char
mask
=
(
(
0xF
FFF
-
(
1
<<
blockSize
)
+
1
)
>>
numPadding
);
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
sortedAtoms
.
push_back
(
0
);
vector
<
char
>&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
...
...
@@ -379,7 +407,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
}
int
CpuNeighborList
::
getNumBlocks
()
const
{
return
sortedAtoms
.
size
()
/
B
lockSize
;
return
sortedAtoms
.
size
()
/
b
lockSize
;
}
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
...
...
@@ -395,82 +423,59 @@ const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) con
}
void
CpuNeighborList
::
runThread
(
int
index
)
{
while
(
true
)
{
// Wait for the signal to start running.
threadWait
();
if
(
isDeleted
)
break
;
// Compute the positions of atoms along the Hilbert curve.
void
CpuNeighborList
::
threadComputeNeighborList
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute the positions of atoms along the Hilbert curve.
float
binWidth
=
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
f
;
float
invBinWidth
=
1.0
f
/
binWidth
;
bitmask_t
coords
[
3
];
for
(
int
i
=
index
;
i
<
numAtoms
;
i
+=
numThreads
)
{
const
float
*
pos
=
&
atomLocations
[
4
*
i
];
coords
[
0
]
=
(
bitmask_t
)
((
pos
[
0
]
-
minx
)
*
invBinWidth
);
coords
[
1
]
=
(
bitmask_t
)
((
pos
[
1
]
-
miny
)
*
invBinWidth
);
coords
[
2
]
=
(
bitmask_t
)
((
pos
[
2
]
-
minz
)
*
invBinWidth
);
int
bin
=
(
int
)
hilbert_c2i
(
3
,
8
,
coords
);
atomBins
[
i
]
=
pair
<
int
,
int
>
(
bin
,
i
);
}
threadWait
();
// Compute this thread's subset of neighbors.
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
for
(
int
i
=
index
;
i
<
numBlocks
;
i
+=
numThreads
)
{
{
int
firstIndex
=
BlockSize
*
i
;
int
atomsInBlock
=
min
(
BlockSize
,
numAtoms
-
firstIndex
);
blockAtoms
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
}
float
binWidth
=
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
f
;
float
invBinWidth
=
1.0
f
/
binWidth
;
bitmask_t
coords
[
3
];
int
numThreads
=
threads
.
getNumThreads
();
for
(
int
i
=
threadIndex
;
i
<
numAtoms
;
i
+=
numThreads
)
{
const
float
*
pos
=
&
atomLocations
[
4
*
i
];
coords
[
0
]
=
(
bitmask_t
)
((
pos
[
0
]
-
minx
)
*
invBinWidth
);
coords
[
1
]
=
(
bitmask_t
)
((
pos
[
1
]
-
miny
)
*
invBinWidth
);
coords
[
2
]
=
(
bitmask_t
)
((
pos
[
2
]
-
minz
)
*
invBinWidth
);
int
bin
=
(
int
)
hilbert_c2i
(
3
,
8
,
coords
);
atomBins
[
i
]
=
pair
<
int
,
int
>
(
bin
,
i
);
}
threads
.
syncThreads
();
int
firstIndex
=
BlockSize
*
i
;
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
maxPos
=
minPos
;
int
atomsInBlock
=
min
(
BlockSize
,
numAtoms
-
firstIndex
);
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
+
j
]]);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
);
// Record the exclusions for this block.
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
const
set
<
int
>&
atomExclusions
=
(
*
exclusions
)[
sortedAtoms
[
firstIndex
+
j
]];
char
mask
=
1
<<
j
;
for
(
int
k
=
0
;
k
<
(
int
)
blockNeighbors
[
i
].
size
();
k
++
)
{
int
atomIndex
=
blockNeighbors
[
i
][
k
];
if
(
atomExclusions
.
find
(
atomIndex
)
!=
atomExclusions
.
end
())
blockExclusions
[
i
][
k
]
|=
mask
;
}
}
// Compute this thread's subset of neighbors.
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
vector
<
VoxelIndex
>
atomVoxelIndex
;
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
// Find the atoms in this block and compute their bounding box.
int
firstIndex
=
blockSize
*
i
;
int
atomsInBlock
=
min
(
blockSize
,
numAtoms
-
firstIndex
);
blockAtoms
.
resize
(
atomsInBlock
);
atomVoxelIndex
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
atomVoxelIndex
[
j
]
=
voxels
->
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
j
]]);
}
}
}
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
maxPos
=
minPos
;
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
+
j
]]);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
,
atomVoxelIndex
);
void
CpuNeighborList
::
threadWait
()
{
pthread_mutex_lock
(
&
lock
);
waitCount
++
;
pthread_cond_signal
(
&
endCondition
);
pthread_cond_wait
(
&
startCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
}
// Record the exclusions for this block.
void
CpuNeighborList
::
advanceThreads
()
{
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
while
(
waitCount
<
numThreads
)
{
pthread_cond_wait
(
&
endCondition
,
&
lock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
const
set
<
int
>&
atomExclusions
=
(
*
exclusions
)[
sortedAtoms
[
firstIndex
+
j
]];
char
mask
=
1
<<
j
;
for
(
int
k
=
0
;
k
<
(
int
)
blockNeighbors
[
i
].
size
();
k
++
)
{
int
atomIndex
=
blockNeighbors
[
i
][
k
];
if
(
atomExclusions
.
find
(
atomIndex
)
!=
atomExclusions
.
end
())
blockExclusions
[
i
][
k
]
|=
mask
;
}
}
}
}
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
19d2885a
...
...
@@ -22,7 +22,6 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include <complex>
#include "SimTKOpenMMCommon.h"
...
...
@@ -30,9 +29,8 @@
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/SplineFitter.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
#include <algorithm>
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
...
...
@@ -42,80 +40,43 @@ using namespace std;
using
namespace
OpenMM
;
const
float
CpuNonbondedForce
::
TWO_OVER_SQRT_PI
=
(
float
)
(
2
/
sqrt
(
PI_M
));
const
int
CpuNonbondedForce
::
NUM_TABLE_POINTS
=
1025
;
const
int
CpuNonbondedForce
::
NUM_TABLE_POINTS
=
2048
;
class
CpuNonbondedForce
::
ThreadData
{
class
CpuNonbondedForce
::
ComputeDirectTask
:
public
ThreadPool
::
Task
{
public:
ThreadData
(
int
index
,
CpuNonbondedForce
&
owner
)
:
index
(
index
),
owner
(
owner
)
{
ComputeDirectTask
(
CpuNonbondedForce
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadComputeDirect
(
threads
,
threadIndex
);
}
int
index
;
CpuNonbondedForce
&
owner
;
vector
<
float
>
threadForce
;
double
threadEnergy
;
};
static
void
*
threadBody
(
void
*
args
)
{
CpuNonbondedForce
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuNonbondedForce
::
ThreadData
*>
(
args
);
data
.
owner
.
runThread
(
data
.
index
,
data
.
threadForce
,
data
.
threadEnergy
);
delete
&
data
;
return
0
;
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
)
{
isDeleted
=
false
;
numThreads
=
getNumProcessors
();
pthread_cond_init
(
&
startCondition
,
NULL
);
pthread_cond_init
(
&
endCondition
,
NULL
);
pthread_mutex_init
(
&
lock
,
NULL
);
thread
.
resize
(
numThreads
);
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
ThreadData
*
data
=
new
ThreadData
(
i
,
*
this
);
threadData
.
push_back
(
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
}
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
tableIsValid
(
false
),
cutoffDistance
(
0.0
f
),
alphaEwald
(
0.0
f
)
{
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForce destructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::~
CpuNonbondedForce
(){
isDeleted
=
true
;
pthread_mutex_lock
(
&
lock
);
pthread_cond_broadcast
(
&
startCondition
);
pthread_mutex_unlock
(
&
lock
);
for
(
int
i
=
0
;
i
<
(
int
)
thread
.
size
();
i
++
)
pthread_join
(
thread
[
i
],
NULL
);
pthread_mutex_destroy
(
&
lock
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
endCondition
);
CpuNonbondedForce
::~
CpuNonbondedForce
()
{
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
)
{
void
CpuNonbondedForce
::
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
)
{
if
(
distance
!=
cutoffDistance
)
tableIsValid
=
false
;
cutoff
=
true
;
cutoffDistance
=
distance
;
neighborList
=
&
neighbors
;
...
...
@@ -170,6 +131,8 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUseEwald
(
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
alphaEwald
=
alpha
;
numRx
=
kmaxx
;
numRy
=
kmaxy
;
...
...
@@ -188,6 +151,8 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUsePME
(
float
alpha
,
int
meshSize
[
3
])
{
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
alphaEwald
=
alpha
;
meshDim
[
0
]
=
meshSize
[
0
];
meshDim
[
1
]
=
meshSize
[
1
];
...
...
@@ -198,35 +163,27 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
void
CpuNonbondedForce
::
tabulateEwaldScaleFactor
()
{
ewaldDX
=
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
if
(
tableIsValid
)
return
;
tableIsValid
=
true
;
ewaldDX
=
cutoffDistance
/
NUM_TABLE_POINTS
;
ewaldDXInv
=
1.0
f
/
ewaldDX
;
vector
<
double
>
x
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
y
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
deriv
;
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
double
r
=
i
*
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
ewaldScaleTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
r
=
i
*
ewaldDX
;
double
alphaR
=
alphaEwald
*
r
;
x
[
i
]
=
r
;
y
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
}
SplineFitter
::
createNaturalSpline
(
x
,
y
,
deriv
);
ewaldScaleTable
.
resize
(
4
*
NUM_TABLE_POINTS
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
;
i
++
)
{
ewaldScaleTable
[
4
*
i
]
=
(
float
)
y
[
i
];
ewaldScaleTable
[
4
*
i
+
1
]
=
(
float
)
y
[
i
+
1
];
ewaldScaleTable
[
4
*
i
+
2
]
=
(
float
)
(
deriv
[
i
]
*
ewaldDX
*
ewaldDX
/
6
);
ewaldScaleTable
[
4
*
i
+
3
]
=
(
float
)
(
deriv
[
i
+
1
]
*
ewaldDX
*
ewaldDX
/
6
);
ewaldScaleTable
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
}
}
void
CpuNonbondedForce
::
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
vector
<
RealVec
>&
atomCoordinates
,
void
CpuNonbondedForce
::
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
RealVec
>&
forces
,
float
*
totalEnergy
)
const
{
vector
<
RealVec
>&
forces
,
double
*
totalEnergy
)
const
{
typedef
std
::
complex
<
float
>
d_complex
;
static
const
float
epsilon
=
1.0
;
int
kmax
=
(
ewald
?
std
::
max
(
numRx
,
std
::
max
(
numRy
,
numRz
))
:
0
);
int
kmax
=
(
ewald
?
max
(
numRx
,
max
(
numRy
,
numRz
))
:
0
);
float
factorEwald
=
-
1
/
(
4
*
alphaEwald
*
alphaEwald
);
float
TWO_PI
=
2.0
*
PI_M
;
float
recipCoeff
=
(
float
)(
ONE_4PI_EPS0
*
4
*
PI_M
/
(
periodicBoxSize
[
0
]
*
periodicBoxSize
[
1
]
*
periodicBoxSize
[
2
])
/
epsilon
);
...
...
@@ -333,111 +290,107 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v
}
void
CpuNonbondedForce
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
float
*
f
orce
s
,
float
*
totalEnergy
)
{
void
CpuNonbondedForce
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
AlignedArray
<
float
>
>&
threadF
orce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
posq
=
posq
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
atomParameters
=
&
atomParameters
[
0
];
this
->
exclusions
=
&
exclusions
[
0
];
this
->
threadForce
=
&
threadForce
;
includeEnergy
=
(
totalEnergy
!=
NULL
);
threadEnergy
.
resize
(
threads
.
getNumThreads
());
gmx_atomic_t
counter
;
gmx_atomic_set
(
&
counter
,
0
);
this
->
atomicCounter
=
&
counter
;
// Signal the threads to start running and wait for them to finish.
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
ComputeDirectTask
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
// Combine the
result
s from all the threads.
// Combine the
energie
s from all the threads.
double
directEnergy
=
0
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
directEnergy
+=
threadData
[
i
]
->
threadEnergy
;
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
{
fvec4
f
(
forces
+
4
*
i
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
+=
fvec4
(
&
threadData
[
j
]
->
threadForce
[
4
*
i
]);
f
.
store
(
forces
+
4
*
i
);
if
(
totalEnergy
!=
NULL
)
{
double
directEnergy
=
0
;
int
numThreads
=
threads
.
getNumThreads
();
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
directEnergy
+=
threadEnergy
[
i
];
*
totalEnergy
+=
directEnergy
;
}
if
(
totalEnergy
!=
NULL
)
*
totalEnergy
+=
(
float
)
directEnergy
;
}
void
CpuNonbondedForce
::
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute this thread's subset of interactions.
int
numThreads
=
threads
.
getNumThreads
();
threadEnergy
[
threadIndex
]
=
0
;
double
*
energyPtr
=
(
includeEnergy
?
&
threadEnergy
[
threadIndex
]
:
NULL
);
float
*
forces
=
&
(
*
threadForce
)[
threadIndex
][
0
];
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
while
(
true
)
{
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
break
;
calculateBlockEwaldIxn
(
nextBlock
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
void
CpuNonbondedForce
::
runThread
(
int
index
,
vector
<
float
>&
threadForce
,
double
&
threadEnergy
)
{
while
(
true
)
{
// Wait for the signal to start running.
pthread_mutex_lock
(
&
lock
);
waitCount
++
;
pthread_cond_signal
(
&
endCondition
);
pthread_cond_wait
(
&
startCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
if
(
isDeleted
)
break
;
// Compute this thread's subset of interactions.
threadEnergy
=
0
;
double
*
energyPtr
=
(
includeEnergy
?
&
threadEnergy
:
NULL
);
threadForce
.
resize
(
4
*
numberOfAtoms
,
0.0
f
);
for
(
int
i
=
0
;
i
<
4
*
numberOfAtoms
;
i
++
)
threadForce
[
i
]
=
0.0
f
;
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
index
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockEwaldIxn
(
i
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
for
(
int
i
=
index
;
i
<
numberOfAtoms
;
i
+=
numThreads
)
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
if
(
*
iter
>
i
)
{
int
j
=
*
iter
;
fvec4
deltaR
;
fvec4
posI
(
posq
+
4
*
i
);
fvec4
posJ
(
posq
+
4
*
j
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
false
,
boxSize
,
invBoxSize
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
)[
0
];
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
&
threadForce
[
4
*
i
])
-
result
).
store
(
&
threadForce
[
4
*
i
]);
(
fvec4
(
&
threadForce
[
4
*
j
])
+
result
).
store
(
&
threadForce
[
4
*
j
]);
if
(
includeEnergy
)
threadEnergy
-=
chargeProd
*
inverseR
*
(
1.0
f
-
erfcAlphaR
);
}
for
(
int
i
=
threadIndex
;
i
<
numberOfAtoms
;
i
+=
numThreads
)
{
fvec4
posI
((
float
)
atomCoordinates
[
i
][
0
],
(
float
)
atomCoordinates
[
i
][
1
],
(
float
)
atomCoordinates
[
i
][
2
],
0.0
f
);
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
if
(
*
iter
>
i
)
{
int
j
=
*
iter
;
fvec4
deltaR
;
fvec4
posJ
((
float
)
atomCoordinates
[
j
][
0
],
(
float
)
atomCoordinates
[
j
][
1
],
(
float
)
atomCoordinates
[
j
][
2
],
0.0
f
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
false
,
boxSize
,
invBoxSize
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
);
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
i
)
-
result
).
store
(
forces
+
4
*
i
);
(
fvec4
(
forces
+
4
*
j
)
+
result
).
store
(
forces
+
4
*
j
);
if
(
includeEnergy
)
threadEnergy
[
threadIndex
]
-=
chargeProd
*
inverseR
*
(
1.0
f
-
erfcAlphaR
);
}
}
}
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
index
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockIxn
(
i
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
}
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
while
(
true
)
{
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
break
;
calculateBlockIxn
(
nextBlock
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
else
{
// Loop over all atom pairs
for
(
int
i
=
index
;
i
<
numberOfAtoms
;
i
+=
numThreads
){
for
(
int
j
=
i
+
1
;
j
<
numberOfAtoms
;
j
++
)
if
(
exclusions
[
j
].
find
(
i
)
==
exclusions
[
j
].
end
())
calculateOneIxn
(
i
,
j
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
}
}
else
{
// Loop over all atom pairs
while
(
true
)
{
int
i
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
i
>=
numberOfAtoms
)
break
;
for
(
int
j
=
i
+
1
;
j
<
numberOfAtoms
;
j
++
)
if
(
exclusions
[
j
].
find
(
i
)
==
exclusions
[
j
].
end
())
calculateOneIxn
(
i
,
j
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
}
}
...
...
@@ -496,198 +449,6 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
}
void
CpuNonbondedForce
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
float
blockAtomR2
[
4
];
bool
include
[
4
];
fvec4
blockAtomDelta
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
bool
any
=
false
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
getDeltaR
(
atomPosq
,
blockAtomPosq
[
j
],
blockAtomDelta
[
j
],
blockAtomR2
[
j
],
periodic
,
boxSize
,
invBoxSize
);
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
(
!
cutoff
||
blockAtomR2
[
j
]
<
cutoffDistance
*
cutoffDistance
));
any
|=
include
[
j
];
}
if
(
!
any
)
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r2
(
blockAtomR2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
}
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
fvec4
dEdR
=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
*
totalEnergy
+=
energy
[
j
];
}
// Accumulate forces.
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
fvec4
result
=
blockAtomDelta
[
j
]
*
dEdR
[
j
];
blockAtomForce
[
j
]
+=
result
;
atomForce
-=
result
;
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
float
blockAtomR2
[
4
];
bool
include
[
4
];
fvec4
blockAtomDelta
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
bool
any
=
false
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
getDeltaR
(
atomPosq
,
blockAtomPosq
[
j
],
blockAtomDelta
[
j
],
blockAtomR2
[
j
],
periodic
,
boxSize
,
invBoxSize
);
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
blockAtomR2
[
j
]
<
cutoffDistance
*
cutoffDistance
);
any
|=
include
[
j
];
}
if
(
!
any
)
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r2
(
blockAtomR2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec4
dEdR
=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
dEdR
+=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// Accumulate energies.
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
*
totalEnergy
+=
energy
[
j
];
}
// Accumulate forces.
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
fvec4
result
=
blockAtomDelta
[
j
]
*
dEdR
[
j
];
blockAtomForce
[
j
]
+=
result
;
atomForce
-=
result
;
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
deltaR
=
posJ
-
posI
;
if
(
periodic
)
{
...
...
@@ -697,33 +458,15 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
r2
=
dot3
(
deltaR
,
deltaR
);
}
f
vec4
CpuNonbondedForce
::
erfcApprox
(
f
vec4
x
)
{
f
loat
CpuNonbondedForce
::
erfcApprox
(
f
loat
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
f
vec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
f
loat
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec4
CpuNonbondedForce
::
ewaldScaleFunction
(
fvec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
float
y
[
4
];
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
floor
(
x1
);
fvec4
coeff
[
4
];
coeff
[
1
]
=
x1
-
index
;
coeff
[
0
]
=
1.0
f
-
coeff
[
1
];
coeff
[
2
]
=
coeff
[
0
]
*
coeff
[
0
]
*
coeff
[
0
]
-
coeff
[
0
];
coeff
[
3
]
=
coeff
[
1
]
*
coeff
[
1
]
*
coeff
[
1
]
-
coeff
[
1
];
_MM_TRANSPOSE4_PS
(
coeff
[
0
],
coeff
[
1
],
coeff
[
2
],
coeff
[
3
]);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
if
(
index
[
i
]
<
NUM_TABLE_POINTS
)
y
[
i
]
=
dot4
(
coeff
[
i
],
fvec4
(
&
ewaldScaleTable
[
4
*
index
[
i
]]));
}
return
fvec4
(
y
);
}
platforms/cpu/src/CpuNonbondedForceVec4.cpp
0 → 100644
View file @
19d2885a
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
using
namespace
std
;
using
namespace
OpenMM
;
/**
* Factory method to create a CpuNonbondedForceVec4.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec4
()
{
return
new
CpuNonbondedForceVec4
();
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec4 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
}
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
float
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuNonbondedForceVec4
::
erfcApprox
(
const
fvec4
&
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
fvec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec4
CpuNonbondedForceVec4
::
ewaldScaleFunction
(
const
fvec4
&
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForceVec8.cpp
0 → 100644
View file @
19d2885a
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec8.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
using
namespace
std
;
using
namespace
OpenMM
;
#ifndef __AVX__
bool
isVec8Supported
()
{
return
false
;
}
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
throw
OpenMMException
(
"Internal error: OpenMM was compiled without AVX support"
);
}
#else
/**
* Check whether 8 component vectors are supported with the current CPU.
*/
bool
isVec8Supported
()
{
// Make sure the CPU supports AVX.
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
cpuid
(
cpuInfo
,
1
);
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
28
))
!=
0
);
}
return
false
;
}
/**
* Factory method to create a CpuNonbondedForceVec8.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
return
new
CpuNonbondedForceVec8
();
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec8 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
}
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec8
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
fx
=
dx
*
dEdR
;
fvec8
fy
=
dy
*
dEdR
;
fvec8
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
8
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec8
one
(
1.0
f
);
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec8
fx
=
dx
*
dEdR
;
fvec8
fy
=
dy
*
dEdR
;
fvec8
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
8
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
float
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec8
CpuNonbondedForceVec8
::
erfcApprox
(
const
fvec8
&
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
fvec8
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
fvec8
CpuNonbondedForceVec8
::
ewaldScaleFunction
(
const
fvec8
&
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec8
x1
=
x
*
ewaldDXInv
;
ivec8
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec8
coeff2
=
x1
-
index
;
fvec8
coeff1
=
1.0
f
-
coeff2
;
ivec4
indexLower
=
index
.
lowerVec
();
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t1
(
&
ewaldScaleTable
[
indexLower
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
indexLower
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
indexLower
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
indexLower
[
3
]]);
fvec4
t5
(
&
ewaldScaleTable
[
indexUpper
[
0
]]);
fvec4
t6
(
&
ewaldScaleTable
[
indexUpper
[
1
]]);
fvec4
t7
(
&
ewaldScaleTable
[
indexUpper
[
2
]]);
fvec4
t8
(
&
ewaldScaleTable
[
indexUpper
[
3
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
#endif
platforms/cpu/src/CpuPlatform.cpp
View file @
19d2885a
...
...
@@ -32,9 +32,12 @@
#include "CpuPlatform.h"
#include "CpuKernelFactory.h"
#include "CpuKernels.h"
#include "CpuSETTLE.h"
#include "ReferenceConstraints.h"
#include "openmm/internal/hardware.h"
using
namespace
OpenMM
;
using
namespace
std
;
extern
"C"
OPENMM_EXPORT_CPU
void
registerPlatforms
()
{
// Only register this platform if the CPU supports SSE 4.1.
...
...
@@ -43,9 +46,16 @@ extern "C" OPENMM_EXPORT_CPU void registerPlatforms() {
Platform
::
registerPlatform
(
new
CpuPlatform
());
}
map
<
ContextImpl
*
,
CpuPlatform
::
PlatformData
*>
CpuPlatform
::
contextData
;
CpuPlatform
::
CpuPlatform
()
{
CpuKernelFactory
*
factory
=
new
CpuKernelFactory
();
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcPeriodicTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcRBTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGBSAOBCForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
}
double
CpuPlatform
::
getSpeed
()
const
{
...
...
@@ -67,3 +77,33 @@ bool CpuPlatform::isProcessorSupported() {
}
return
false
;
}
void
CpuPlatform
::
contextCreated
(
ContextImpl
&
context
,
const
map
<
string
,
string
>&
properties
)
const
{
ReferencePlatform
::
contextCreated
(
context
,
properties
);
PlatformData
*
data
=
new
PlatformData
(
context
.
getSystem
().
getNumParticles
());
contextData
[
&
context
]
=
data
;
ReferenceConstraints
&
constraints
=
*
(
ReferenceConstraints
*
)
reinterpret_cast
<
ReferencePlatform
::
PlatformData
*>
(
context
.
getPlatformData
())
->
constraints
;
if
(
constraints
.
settle
!=
NULL
)
{
CpuSETTLE
*
parallelSettle
=
new
CpuSETTLE
(
context
.
getSystem
(),
*
(
ReferenceSETTLEAlgorithm
*
)
constraints
.
settle
,
data
->
threads
);
delete
constraints
.
settle
;
constraints
.
settle
=
parallelSettle
;
}
}
void
CpuPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
PlatformData
*
data
=
contextData
[
&
context
];
delete
data
;
contextData
.
erase
(
&
context
);
}
CpuPlatform
::
PlatformData
&
CpuPlatform
::
getPlatformData
(
ContextImpl
&
context
)
{
return
*
contextData
[
&
context
];
}
CpuPlatform
::
PlatformData
::
PlatformData
(
int
numParticles
)
:
posq
(
4
*
numParticles
)
{
int
numThreads
=
threads
.
getNumThreads
();
threadForce
.
resize
(
numThreads
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
threadForce
[
i
].
resize
(
4
*
numParticles
);
isPeriodic
=
false
;
}
platforms/cpu/src/CpuRandom.cpp
0 → 100644
View file @
19d2885a
/* Portions copyright (c) 2013 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuRandom.h"
#include "openmm/OpenMMException.h"
#include <cmath>
using
namespace
std
;
using
namespace
OpenMM
;
CpuRandom
::
CpuRandom
()
:
hasInitialized
(
false
)
{
}
CpuRandom
::~
CpuRandom
()
{
for
(
int
i
=
0
;
i
<
threadRandom
.
size
();
i
++
)
delete
threadRandom
[
i
];
}
void
CpuRandom
::
initialize
(
int
seed
,
int
numThreads
)
{
if
(
hasInitialized
)
{
if
(
seed
==
randomSeed
)
return
;
// Already initialized with the same seed.
throw
OpenMMException
(
"Random number generator initialized twice with different seeds"
);
}
randomSeed
=
seed
;
hasInitialized
=
true
;
threadRandom
.
resize
(
numThreads
);
nextGaussian
.
resize
(
numThreads
);
nextGaussianIsValid
.
resize
(
numThreads
,
false
);
// Use a quick and dirty RNG to pick seeds for the real random number generator.
unsigned
int
r
=
(
unsigned
int
)
seed
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
threadRandom
[
i
]
=
new
OpenMM_SFMT
::
SFMT
();
init_gen_rand
(
r
,
*
threadRandom
[
i
]);
}
}
float
CpuRandom
::
getGaussianRandom
(
int
threadIndex
)
{
if
(
nextGaussianIsValid
[
threadIndex
])
{
nextGaussianIsValid
[
threadIndex
]
=
false
;
return
nextGaussian
[
threadIndex
];
}
// Use the polar form of the Box-Muller transformation to generate two Gaussian random numbers.
float
x
,
y
,
r2
;
do
{
x
=
2.0
f
*
(
float
)
genrand_real2
(
*
threadRandom
[
threadIndex
])
-
1.0
f
;
y
=
2.0
f
*
(
float
)
genrand_real2
(
*
threadRandom
[
threadIndex
])
-
1.0
f
;
r2
=
x
*
x
+
y
*
y
;
}
while
(
r2
>=
1.0
f
||
r2
==
0.0
f
);
float
multiplier
=
sqrtf
((
-
2.0
f
*
logf
(
r2
))
/
r2
);
nextGaussian
[
threadIndex
]
=
y
*
multiplier
;
nextGaussianIsValid
[
threadIndex
]
=
true
;
return
x
*
multiplier
;
}
float
CpuRandom
::
getUniformRandom
(
int
threadIndex
)
{
return
genrand_real2
(
*
threadRandom
[
threadIndex
]);
}
platforms/cpu/src/CpuSETTLE.cpp
0 → 100644
View file @
19d2885a
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuSETTLE.h"
using
namespace
OpenMM
;
using
namespace
std
;
class
CpuSETTLE
::
ApplyToPositionsTask
:
public
ThreadPool
::
Task
{
public:
ApplyToPositionsTask
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
,
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
)
:
atomCoordinates
(
atomCoordinates
),
atomCoordinatesP
(
atomCoordinatesP
),
inverseMasses
(
inverseMasses
),
tolerance
(
tolerance
),
threadSettle
(
threadSettle
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
threadSettle
[
threadIndex
]
->
apply
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
);
}
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
;
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
;
vector
<
RealOpenMM
>&
inverseMasses
;
RealOpenMM
tolerance
;
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
;
};
class
CpuSETTLE
::
ApplyToVelocitiesTask
:
public
ThreadPool
::
Task
{
public:
ApplyToVelocitiesTask
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
velocities
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
,
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
)
:
atomCoordinates
(
atomCoordinates
),
velocities
(
velocities
),
inverseMasses
(
inverseMasses
),
tolerance
(
tolerance
),
threadSettle
(
threadSettle
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
threadSettle
[
threadIndex
]
->
applyToVelocities
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
);
}
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
;
vector
<
OpenMM
::
RealVec
>&
velocities
;
vector
<
RealOpenMM
>&
inverseMasses
;
RealOpenMM
tolerance
;
vector
<
ReferenceSETTLEAlgorithm
*>&
threadSettle
;
};
CpuSETTLE
::
CpuSETTLE
(
const
System
&
system
,
const
ReferenceSETTLEAlgorithm
&
settle
,
ThreadPool
&
threads
)
:
threads
(
threads
)
{
int
numThreads
=
threads
.
getNumThreads
();
int
numClusters
=
settle
.
getNumClusters
();
vector
<
RealOpenMM
>
mass
(
system
.
getNumParticles
());
for
(
int
i
=
0
;
i
<
system
.
getNumParticles
();
i
++
)
mass
[
i
]
=
system
.
getParticleMass
(
i
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
int
start
=
i
*
numClusters
/
numThreads
;
int
end
=
(
i
+
1
)
*
numClusters
/
numThreads
;
if
(
start
!=
end
)
{
int
numThreadClusters
=
end
-
start
;
vector
<
int
>
atom1
(
numThreadClusters
),
atom2
(
numThreadClusters
),
atom3
(
numThreadClusters
);
vector
<
RealOpenMM
>
distance1
(
numThreadClusters
),
distance2
(
numThreadClusters
);
for
(
int
j
=
0
;
j
<
numThreadClusters
;
j
++
)
settle
.
getClusterParameters
(
start
+
j
,
atom1
[
j
],
atom2
[
j
],
atom3
[
j
],
distance1
[
j
],
distance2
[
j
]);
threadSettle
.
push_back
(
new
ReferenceSETTLEAlgorithm
(
atom1
,
atom2
,
atom3
,
distance1
,
distance2
,
mass
));
}
}
}
CpuSETTLE
::~
CpuSETTLE
()
{
for
(
int
i
=
0
;
i
<
(
int
)
threadSettle
.
size
();
i
++
)
delete
threadSettle
[
i
];
}
void
CpuSETTLE
::
apply
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
atomCoordinatesP
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
)
{
ApplyToPositionsTask
task
(
atomCoordinates
,
atomCoordinatesP
,
inverseMasses
,
tolerance
,
threadSettle
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
void
CpuSETTLE
::
applyToVelocities
(
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
vector
<
OpenMM
::
RealVec
>&
velocities
,
vector
<
RealOpenMM
>&
inverseMasses
,
RealOpenMM
tolerance
)
{
ApplyToVelocitiesTask
task
(
atomCoordinates
,
velocities
,
inverseMasses
,
tolerance
,
threadSettle
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
}
platforms/cpu/src/gmx_atomic.h
0 → 100644
View file @
19d2885a
/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* Copyright (c) 2004-2008, Erik Lindahl <lindahl@cbr.su.se>
*
* Unfortunately, some of the constructs in this file are _very_ sensitive
* to compiler optimizations and architecture changes. If you find any such
* errors, please send a message to lindahl@cbr.su.se to help us fix the
* upstream version too.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* And Hey:
* Gnomes, ROck Monsters And Chili Sauce
*/
#ifndef _GMX_ATOMIC_H_
#define _GMX_ATOMIC_H_
/*! \file gmx_atomic.h
*
* @brief Atomic operations for fast SMP synchronization
*
* This file defines atomic integer operations and spinlocks for
* fast synchronization in performance-critical regions of gromacs.
*
* In general, the best option is to use functions without explicit
* locking, e.g. gmx_atomic_fetch_add() or gmx_atomic_cmpxchg().
*
* Not all architecture support atomic operations though inline assembly,
* and even if they do it might not be implemented here. In that case
* we use a fallback mutex implementation, so you can always count on
* the function interfaces working in Gromacs.
*
* Don't use spinlocks in non-performance-critical regions like file I/O.
* Since they always spin busy they would waste CPU cycles instead of
* properly yielding to a computation thread while waiting for the disk.
*
* Finally, note that all our spinlock operations are defined to return
* 0 if initialization or locking completes successfully.
* This is the opposite of some other implementations, but the same standard
* as used for pthread mutexes. So, if e.g. are trying to lock a spinlock,
* you will have gotten the lock if the return value is 0.
*
* gmx_spinlock_islocked(x) obviously still returns 1 if the lock is locked,
* and 0 if it is available, though...
*/
#include <stdio.h>
#include <pthread.h>
#ifdef __cplusplus
extern
"C"
{
#endif
#if 0
} /* Avoids screwing up auto-indentation */
#endif
#if ( ( (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__)) && \
(defined(i386) || defined(__x86_64__)) ) \
|| defined (DOXYGEN) )
/* This code is executed for x86 and x86-64, with these compilers:
* GNU
* Intel
* Pathscale
* All these support GCC-style inline assembly.
* We also use this section for the documentation.
*/
/*! \brief Memory barrier operation
*
* Modern CPUs rely heavily on out-of-order execution, and one common feature
* is that load/stores might be reordered. Also, when using inline assembly
* the compiler might already have loaded the variable we are changing into
* a register, so any update to memory won't be visible.
*
* This command creates a memory barrier, i.e. all memory results before
* it in the code should be visible to all memory operations after it - the
* CPU cannot propagate load/stores across it.
*/
#define gmx_atomic_memory_barrier() __asm__ __volatile__("": : :"memory")
/* Only gcc and Intel support this check, otherwise set it to true (skip doc) */
#if (!defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined DOXYGEN)
#define __builtin_constant_p(i) (1)
#endif
/*! \brief Gromacs atomic operations datatype
*
* Portable synchronization primitives like mutexes are effective for
* many purposes, but usually not very high performance.
* One of the problem is that you have the overhead of a function call,
* and another is that Mutexes often have extra overhead to make the
* scheduling fair. Finally, if performance is important we don't want
* to suspend the thread if we cannot lock a mutex, but spin-lock at 100%
* CPU usage until the resources is available (e.g. increment a counter).
*
* These things can often be implemented with inline-assembly or other
* system-dependent functions, and we provide such functionality for the
* most common platforms. For portability we also have a fallback
* implementation using a mutex for locking.
*
* Performance-wise, the fastest solution is always to avoid locking
* completely (obvious, but remember it!). If you cannot do that, the
* next best thing is to use atomic operations that e.g. increment a
* counter without explicit locking. Spinlocks are useful to lock an
* entire region, but leads to more overhead and can be difficult to
* debug - it is up to you to make sure that only the thread owning the
* lock unlocks it!
*
* You should normally NOT use atomic operations for things like
* I/O threads. These should yield to other threads while waiting for
* the disk instead of spinning at 100% CPU usage.
*
* It is imperative that you use the provided routines for reading
* and writing, since some implementations require memory barriers before
* the CPU or memory sees an updated result. The structure contents is
* only visible here so it can be inlined for performance - it might
* change without further notice.
*
* \note No initialization is required for atomic variables.
*
* Currently, we have (real) atomic operations for:
*
* - x86 or x86_64, using GNU compilers
* - x86 or x86_64, using Intel compilers
* - x86 or x86_64, using Pathscale compilers
* - Itanium, using GNU compilers
* - Itanium, using Intel compilers
* - Itanium, using HP compilers
* - PowerPC, using GNU compilers
* - PowerPC, using IBM AIX compilers
* - PowerPC, using IBM compilers >=7.0 under Linux or Mac OS X.
*/
typedef
struct
gmx_atomic
{
volatile
int
value
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_atomic_t
;
/*! \brief Gromacs spinlock
*
* Spinlocks provide a faster synchronization than mutexes,
* although they consume CPU-cycles while waiting. They are implemented
* with atomic operations and inline assembly whenever possible, and
* otherwise we use a fallback implementation where a spinlock is identical
* to a mutex (this is one of the reasons why you have to initialize them).
*
* There are no guarantees whatsoever about fair scheduling or
* debugging if you make a mistake and unlock a variable somebody
* else has locked - performance is the primary goal of spinlocks.
*
*/
typedef
struct
gmx_spinlock
{
volatile
unsigned
int
lock
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_spinlock_t
;
/*! \brief Spinlock static initializer
*
* This is used for static spinlock initialization, and has the same
* properties as GMX_THREAD_MUTEX_INITIALIZER has for mutexes.
* This is only for inlining in the gmx_thread.h header file. Whether
* it is 0, 1, or something else when unlocked depends on the platform.
* Don't assume anything about it. It might even be a mutex when using the
* fallback implementation!
*/
#define GMX_SPINLOCK_INITIALIZER { 1 }
/*! \brief Return value of an atomic integer
*
* Also implements proper memory barriers when necessary.
* The actual implementation is system-dependent.
*
* \param a Atomic variable to read
* \return Integer value of the atomic variable
*/
#define gmx_atomic_read(a) ((a)->value)
/*! \brief Write value to an atomic integer
*
* Also implements proper memory barriers when necessary.
* The actual implementation is system-dependent.
*
* \param a Atomic variable
* \param i Integer to set the atomic variable to.
*/
#define gmx_atomic_set(a,i) (((a)->value) = (i))
/*! \brief Add integer to atomic variable
*
* Also implements proper memory barriers when necessary.
* The actual implementation is system-dependent.
*
* \param a atomic datatype to modify
* \param i integer to increment with. Use i<0 to subtract atomically.
*
* \return The new value (after summation).
*/
static
inline
int
gmx_atomic_add_return
(
gmx_atomic_t
*
a
,
volatile
int
i
)
{
int
__i
;
__i
=
i
;
__asm__
__volatile__
(
"lock ; xaddl %0, %1;"
:
"=r"
(
i
)
:
"m"
(
a
->
value
),
"0"
(
i
));
return
i
+
__i
;
}
/*! \brief Add to variable, return the old value.
*
* This operation is quite useful for synchronization counters.
* By performing a fetchadd with N, a thread can e.g. reserve a chunk
* with the next N iterations, and the return value is the index
* of the first element to treat.
*
* Also implements proper memory barriers when necessary.
* The actual implementation is system-dependent.
*
* \param a atomic datatype to modify
* \param i integer to increment with. Use i<0 to subtract atomically.
*
* \return The value of the atomic variable before addition.
*/
static
inline
int
gmx_atomic_fetch_add
(
gmx_atomic_t
*
a
,
volatile
int
i
)
{
int
__i
;
__i
=
i
;
__asm__
__volatile__
(
"lock ; xaddl %0, %1;"
:
"=r"
(
i
)
:
"m"
(
a
->
value
),
"0"
(
i
));
return
i
;
}
/*! \brief Atomic compare-exchange operation
*
* The \a old value is compared with the memory value in the atomic datatype.
* If the are identical, the atomic type is updated to the new value,
* and otherwise left unchanged.
*
* This is a very useful synchronization primitive: You can start by reading
* a value (without locking anything), perform some calculations, and then
* atomically try to update it in memory unless it has changed. If it has
* changed you will get an error return code - reread the new value
* an repeat the calculations in that case.
*
* \param a Atomic datatype ('memory' value)
* \param oldval Integer value read from the atomic type at an earlier point
* \param newval New value to write to the atomic type if it currently is
* identical to the old value.
*
* \return The value of the atomic memory variable in memory when this
* instruction was executed. This, if the operation succeeded the
* return value was identical to the \a old parameter, and if not
* it returns the updated value in memory so you can repeat your
* operations on it.
*
* \note The exchange occured if the return value is identical to \a old.
*/
static
inline
int
gmx_atomic_cmpxchg
(
gmx_atomic_t
*
a
,
int
oldval
,
int
newval
)
{
volatile
unsigned
long
prev
;
__asm__
__volatile__
(
"lock ; cmpxchgl %1,%2"
:
"=a"
(
prev
)
:
"q"
(
newval
),
"m"
(
a
->
value
),
"0"
(
oldval
)
:
"memory"
);
return
prev
;
}
/*! \brief Initialize spinlock
*
* In theory you can call this from multiple threads, but remember
* that we don't check for errors. If the first thread proceeded to
* lock the spinlock after initialization, the second will happily
* overwrite the contents and unlock it without warning you.
*
* \param x Gromacs spinlock pointer.
*/
static
inline
void
gmx_spinlock_init
(
gmx_spinlock_t
*
x
)
{
x
->
lock
=
1
;
}
/*! \brief Acquire spinlock
*
* This routine blocks until the spinlock is available, and
* the locks it again before returning.
*
* \param x Gromacs spinlock pointer
*/
static
inline
void
gmx_spinlock_lock
(
gmx_spinlock_t
*
x
)
{
__asm__
__volatile__
(
"
\n
1:
\t
"
"lock ; decb %0
\n\t
"
"jns 3f
\n
"
"2:
\t
"
"rep;nop
\n\t
"
"cmpb $0,%0
\n\t
"
"jle 2b
\n\t
"
"jmp 1b
\n
"
"3:
\n\t
"
:
"=m"
(
x
->
lock
)
:
:
"memory"
);
}
/*! \brief Attempt to acquire spinlock
*
* This routine acquires the spinlock if possible, but if
* already locked it return an error code immediately.
*
* \param x Gromacs spinlock pointer
*
* \return 0 if the mutex was available so we could lock it,
* otherwise a non-zero integer (1) if the lock is busy.
*/
static
inline
int
gmx_spinlock_trylock
(
gmx_spinlock_t
*
x
)
{
char
old_value
;
__asm__
__volatile__
(
"xchgb %b0,%1"
:
"=q"
(
old_value
),
"=m"
(
x
->
lock
)
:
"0"
(
0
)
:
"memory"
);
return
(
old_value
<=
0
);
}
/*! \brief Release spinlock
*
* \param x Gromacs spinlock pointer
*
* Unlocks the spinlock, regardless if which thread locked it.
*/
static
inline
void
gmx_spinlock_unlock
(
gmx_spinlock_t
*
x
)
{
char
old_value
=
1
;
__asm__
__volatile__
(
"xchgb %b0, %1"
:
"=q"
(
old_value
),
"=m"
(
x
->
lock
)
:
"0"
(
old_value
)
:
"memory"
);
}
/*! \brief Check if spinlock is locked
*
* This routine returns immediately with the lock status.
*
* \param x Gromacs spinlock pointer
*
* \return 1 if the spinlock is locked, 0 otherwise.
*/
static
inline
int
gmx_spinlock_islocked
(
gmx_spinlock_t
*
x
)
{
return
(
*
(
volatile
signed
char
*
)(
&
(
x
)
->
lock
)
<=
0
);
}
/*! \brief Wait for a spinlock to become available
*
* This routine blocks until the spinlock is unlocked,
* but in contrast to gmx_spinlock_lock() it returns without
* trying to lock the spinlock.
*
* \param x Gromacs spinlock pointer
*/
static
inline
void
gmx_spinlock_wait
(
gmx_spinlock_t
*
x
)
{
do
{
gmx_atomic_memory_barrier
();
}
while
(
gmx_spinlock_islocked
(
x
));
}
#elif ( defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__)))
/* PowerPC using proper GCC inline assembly.
* Recent versions of xlC (>=7.0) _partially_ support this, but since it is
* not 100% compatible we provide a separate implementation for xlC in
* the next section.
*/
/* Compiler-dependent stuff: GCC memory barrier */
#define gmx_atomic_memory_barrier() __asm__ __volatile__("": : :"memory")
typedef
struct
gmx_atomic
{
volatile
int
value
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_atomic_t
;
typedef
struct
gmx_spinlock
{
volatile
unsigned
int
lock
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_spinlock_t
;
#define GMX_SPINLOCK_INITIALIZER { 0 }
#define gmx_atomic_read(a) ((a)->value)
#define gmx_atomic_set(a,i) (((a)->value) = (i))
static
inline
int
gmx_atomic_add_return
(
gmx_atomic_t
*
a
,
int
i
)
{
int
t
;
__asm__
__volatile__
(
"1: lwarx %0,0,%2
\n
"
"
\t
add %0,%1,%0
\n
"
"
\t
stwcx. %0,0,%2
\n
"
"
\t
bne- 1b"
"
\t
isync
\n
"
:
"=&r"
(
t
)
:
"r"
(
i
),
"r"
(
&
a
->
value
)
:
"cc"
,
"memory"
);
return
t
;
}
static
inline
int
gmx_atomic_fetch_add
(
gmx_atomic_t
*
a
,
int
i
)
{
int
t
;
__asm__
__volatile__
(
"
\t
eieio
\n
"
"1: lwarx %0,0,%2
\n
"
"
\t
add %0,%1,%0
\n
"
"
\t
stwcx. %0,0,%2
\n
"
"
\t
bne- 1b
\n
"
"
\t
isync
\n
"
:
"=&r"
(
t
)
:
"r"
(
i
),
"r"
(
&
a
->
value
)
:
"cc"
,
"memory"
);
return
(
t
-
i
);
}
static
inline
int
gmx_atomic_cmpxchg
(
gmx_atomic_t
*
a
,
int
oldval
,
int
newval
)
{
int
prev
;
__asm__
__volatile__
(
"1: lwarx %0,0,%2
\n
"
"
\t
cmpw 0,%0,%3
\n
"
"
\t
bne 2f
\n
"
"
\t
stwcx. %4,0,%2
\n
"
"bne- 1b
\n
"
"
\t
sync
\n
"
"2:
\n
"
:
"=&r"
(
prev
),
"=m"
(
a
->
value
)
:
"r"
(
&
a
->
value
),
"r"
(
oldval
),
"r"
(
newval
),
"m"
(
a
->
value
)
:
"cc"
,
"memory"
);
return
prev
;
}
static
inline
void
gmx_spinlock_init
(
gmx_spinlock_t
*
x
)
{
x
->
lock
=
0
;
}
static
inline
void
gmx_spinlock_lock
(
gmx_spinlock_t
*
x
)
{
unsigned
int
tmp
;
__asm__
__volatile__
(
"
\t
b 1f
\n
"
"2: lwzx %0,0,%1
\n
"
"
\t
cmpwi 0,%0,0
\n
"
"
\t
bne+ 2b
\n
"
"1: lwarx %0,0,%1
\n
"
"
\t
cmpwi 0,%0,0
\n
"
"
\t
bne- 2b
\n
"
"
\t
stwcx. %2,0,%1
\n
"
"bne- 2b
\n
"
"
\t
isync
\n
"
:
"=&r"
(
tmp
)
:
"r"
(
&
x
->
lock
),
"r"
(
1
)
:
"cr0"
,
"memory"
);
}
static
inline
int
gmx_spinlock_trylock
(
gmx_spinlock_t
*
x
)
{
unsigned
int
old
,
t
;
unsigned
int
mask
=
1
;
volatile
unsigned
int
*
p
=
&
x
->
lock
;
__asm__
__volatile__
(
"
\t
eieio
\n
"
"1: lwarx %0,0,%4
\n
"
"
\t
or %1,%0,%3
\n
"
"
\t
stwcx. %1,0,%4
\n
"
"
\t
bne 1b
\n
"
"
\t
sync
\n
"
:
"=&r"
(
old
),
"=&r"
(
t
),
"=m"
(
*
p
)
:
"r"
(
mask
),
"r"
(
p
),
"m"
(
*
p
)
:
"cc"
,
"memory"
);
return
((
old
&
mask
)
!=
0
);
}
static
inline
void
gmx_spinlock_unlock
(
gmx_spinlock_t
*
x
)
{
__asm__
__volatile__
(
"
\t
eieio
\n
"
:
:
:
"memory"
);
x
->
lock
=
0
;
}
static
inline
int
gmx_spinlock_islocked
(
gmx_spinlock_t
*
x
)
{
return
(
x
->
lock
!=
0
);
}
static
inline
void
gmx_spinlock_wait
(
gmx_spinlock_t
*
x
)
{
do
{
gmx_atomic_memory_barrier
();
}
while
(
gmx_spinlock_islocked
(
x
));
}
#elif ( (defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) && \
(defined(__powerpc__) || defined(__ppc__)))
/* PowerPC using xlC inline assembly.
* Recent versions of xlC (>=7.0) _partially_ support GCC inline assembly
* if you use the option -qasm=gcc but we have had to hack things a bit, in
* particular when it comes to clobbered variables. Since this implementation
* _could_ be buggy, we have separated it from the known-to-be-working gcc
* one above.
*/
/* memory barrier - no idea how to create one with xlc! */
#define gmx_atomic_memory_barrier()
typedef
struct
gmx_atomic
{
volatile
int
value
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_atomic_t
;
typedef
struct
gmx_spinlock
{
volatile
unsigned
int
lock
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_spinlock_t
;
#define GMX_SPINLOCK_INITIALIZER { 0 }
#define gmx_atomic_read(a) ((a)->value)
#define gmx_atomic_set(a,i) (((a)->value) = (i))
static
inline
int
gmx_atomic_add_return
(
gmx_atomic_t
*
a
,
int
i
)
{
int
t
;
__asm__
__volatile__
(
"1: lwarx %0,0,%2
\n
"
"
\t
add %0,%1,%0
\n
"
"
\t
stwcx. %0,0,%2
\n
"
"
\t
bne- 1b
\n
"
"
\t
isync
\n
"
:
"=&r"
(
t
)
:
"r"
(
i
),
"r"
(
&
a
->
value
)
);
return
t
;
}
static
inline
int
gmx_atomic_fetch_add
(
gmx_atomic_t
*
a
,
int
i
)
{
int
t
;
__asm__
__volatile__
(
"
\t
eieio
\n
"
"1: lwarx %0,0,%2
\n
"
"
\t
add %0,%1,%0
\n
"
"
\t
stwcx. %0,0,%2
\n
"
"
\t
bne- 1b
\n
"
"
\t
isync
\n
"
:
"=&r"
(
t
)
:
"r"
(
i
),
"r"
(
&
a
->
value
));
return
(
t
-
i
);
}
static
inline
int
gmx_atomic_cmpxchg
(
gmx_atomic_t
*
a
,
int
oldval
,
int
newval
)
{
int
prev
;
__asm__
__volatile__
(
"1: lwarx %0,0,%2
\n
"
"
\t
cmpw 0,%0,%3
\n
"
"
\t
bne 2f
\n
"
"
\t
stwcx. %4,0,%2
\n
"
"
\t
bne- 1b
\n
"
"
\t
sync
\n
"
"2:
\n
"
:
"=&r"
(
prev
),
"=m"
(
a
->
value
)
:
"r"
(
&
a
->
value
),
"r"
(
oldval
),
"r"
(
newval
),
"m"
(
a
->
value
));
return
prev
;
}
static
inline
void
gmx_spinlock_init
(
gmx_spinlock_t
*
x
)
{
x
->
lock
=
0
;
}
static
inline
void
gmx_spinlock_lock
(
gmx_spinlock_t
*
x
)
{
unsigned
int
tmp
;
__asm__
__volatile__
(
"
\t
b 1f
\n
"
"2: lwzx %0,0,%1
\n
"
"
\t
cmpwi 0,%0,0
\n
"
"
\t
bne+ 2b
\n
"
"1: lwarx %0,0,%1
\n
"
"
\t
cmpwi 0,%0,0
\n
"
"
\t
bne- 2b
\n
"
"
\t
stwcx. %2,0,%1
\n
"
"
\t
bne- 2b
\n
"
"
\t
isync
\n
"
:
"=&r"
(
tmp
)
:
"r"
(
&
x
->
lock
),
"r"
(
1
));
}
static
inline
int
gmx_spinlock_trylock
(
gmx_spinlock_t
*
x
)
{
unsigned
int
old
,
t
;
unsigned
int
mask
=
1
;
volatile
unsigned
int
*
p
=
&
x
->
lock
;
__asm__
__volatile__
(
"
\t
eieio
\n
"
"1: lwarx %0,0,%4
\n
"
"
\t
or %1,%0,%3
\n
"
"
\t
stwcx. %1,0,%4
\n
"
"
\t
bne 1b
\n
"
"
\t
sync
\n
"
:
"=&r"
(
old
),
"=&r"
(
t
),
"=m"
(
*
p
)
:
"r"
(
mask
),
"r"
(
p
),
"m"
(
*
p
));
return
((
old
&
mask
)
!=
0
);
}
static
inline
void
gmx_spinlock_unlock
(
gmx_spinlock_t
*
x
)
{
__asm__
__volatile__
(
"
\t
eieio
\n
"
);
x
->
lock
=
0
;
}
static
inline
void
gmx_spinlock_islocked
(
gmx_spinlock_t
*
x
)
{
return
(
x
->
lock
!=
0
);
}
static
inline
void
gmx_spinlock_wait
(
gmx_spinlock_t
*
x
)
{
do
{
gmx_atomic_memory_barrier
();
}
while
(
gmx_spinlock_islocked
(
x
));
}
#elif (defined(__ia64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER)))
/* ia64 with GCC or Intel compilers. Since we need to define everything through
* cmpxchg and fetchadd on ia64, we merge the different compilers and only provide
* different implementations for that single function.
* Documentation? Check the gcc/x86 section.
*/
typedef
struct
gmx_atomic
{
volatile
int
value
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_atomic_t
;
typedef
struct
gmx_spinlock
{
volatile
unsigned
int
lock
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_spinlock_t
;
#define GMX_SPINLOCK_INITIALIZER { 0 }
#define gmx_atomic_read(a) ((a)->value)
#define gmx_atomic_set(a,i) (((a)->value) = (i))
/* Compiler thingies */
#ifdef __INTEL_COMPILER
void
__memory_barrier
(
void
);
int
_InterlockedCompareExchange
(
volatile
int
*
dest
,
int
xchg
,
int
comp
);
unsigned
__int64
__fetchadd4_rel
(
unsigned
int
*
addend
,
const
int
increment
);
/* ia64 memory barrier */
# define gmx_atomic_memory_barrier() __memory_barrier()
/* ia64 cmpxchg */
# define gmx_atomic_cmpxchg(a, oldval, newval) _InterlockedCompareExchange(&a->value,newval,oldval)
/* ia64 fetchadd, but it only works with increments +/- 1,4,8,16 */
# define gmx_ia64_fetchadd(a, inc) __fetchadd4_rel(a, inc)
#elif defined __GNUC__
/* ia64 memory barrier */
# define gmx_atomic_memory_barrier() asm volatile ("":::"memory")
/* ia64 cmpxchg */
static
inline
int
gmx_atomic_cmpxchg
(
gmx_atomic_t
*
a
,
int
oldval
,
int
newval
)
{
volatile
int
res
;
asm
volatile
(
"mov ar.ccv=%0;;"
::
"rO"
(
oldval
));
asm
volatile
(
"cmpxchg4.acq %0=[%1],%2,ar.ccv"
:
"=r"
(
res
)
:
"r"
(
&
a
->
value
),
"r"
(
newval
)
:
"memory"
);
return
res
;
}
/* fetchadd, but on ia64 it only works with increments +/- 1,4,8,16 */
#define gmx_ia64_fetchadd(a, inc) \
({ unsigned long res; \
asm volatile ("fetchadd4.rel %0=[%1],%2" \
: "=r"(res) : "r"(a), "i" (inc) : "memory"); \
res; \
})
#else
/* Unknown compiler */
# error Unknown ia64 compiler (not GCC or ICC) - modify gmx_thread.h!
#endif
static
inline
int
gmx_atomic_add_return
(
gmx_atomic_t
*
a
,
volatile
int
i
)
{
volatile
int
oldval
,
newval
;
volatile
int
__i
=
i
;
/* Use fetchadd if, and only if, the increment value can be determined
* at compile time (otherwise this check is optimized away) and it is
* a value supported by fetchadd (1,4,8,16,-1,-4,-8,-16).
*/
if
(
__builtin_constant_p
(
i
)
&&
(
(
__i
==
1
)
||
(
__i
==
4
)
||
(
__i
==
8
)
||
(
__i
==
16
)
||
(
__i
==
-
1
)
||
(
__i
==
-
4
)
||
(
__i
==
-
8
)
||
(
__i
==
-
16
)
)
)
{
oldval
=
gmx_ia64_fetchadd
(
a
,
__i
);
newval
=
oldval
+
i
;
}
else
{
/* Use compare-exchange addition that works with any value */
do
{
oldval
=
gmx_atomic_read
(
a
);
newval
=
oldval
+
i
;
}
while
(
gmx_atomic_cmpxchg
(
a
,
oldval
,
newval
)
!=
oldval
);
}
return
newval
;
}
static
inline
int
gmx_atomic_fetch_add
(
gmx_atomic_t
*
a
,
volatile
int
i
)
{
volatile
int
oldval
,
newval
;
volatile
int
__i
=
i
;
/* Use ia64 fetchadd if, and only if, the increment value can be determined
* at compile time (otherwise this check is optimized away) and it is
* a value supported by fetchadd (1,4,8,16,-1,-4,-8,-16).
*/
if
(
__builtin_constant_p
(
i
)
&&
(
(
__i
==
1
)
||
(
__i
==
4
)
||
(
__i
==
8
)
||
(
__i
==
16
)
||
(
__i
==
-
1
)
||
(
__i
==
-
4
)
||
(
__i
==
-
8
)
||
(
__i
==
-
16
)
)
)
{
oldval
=
gmx_ia64_fetchadd
(
a
,
__i
);
newval
=
oldval
+
i
;
}
else
{
/* Use compare-exchange addition that works with any value */
do
{
oldval
=
gmx_atomic_read
(
a
);
newval
=
oldval
+
i
;
}
while
(
gmx_atomic_cmpxchg
(
a
,
oldval
,
newval
)
!=
oldval
);
}
return
oldval
;
}
static
inline
void
gmx_spinlock_init
(
gmx_spinlock_t
*
x
)
{
x
->
lock
=
0
;
}
static
inline
void
gmx_spinlock_lock
(
gmx_spinlock_t
*
x
)
{
gmx_atomic_t
*
a
=
(
gmx_atomic_t
*
)
x
;
unsigned
long
value
;
value
=
gmx_atomic_cmpxchg
(
a
,
0
,
1
);
if
(
value
)
{
do
{
while
(
a
->
value
!=
0
)
{
gmx_atomic_memory_barrier
();
}
value
=
gmx_atomic_cmpxchg
(
a
,
0
,
1
);
}
while
(
value
);
}
}
static
inline
int
gmx_spinlock_trylock
(
gmx_spinlock_t
*
x
)
{
return
(
gmx_atomic_cmpxchg
((
gmx_atomic_t
*
)
x
,
0
,
1
)
!=
0
);
}
static
inline
void
gmx_spinlock_unlock
(
gmx_spinlock_t
*
x
)
{
do
{
gmx_atomic_memory_barrier
();
x
->
lock
=
0
;
}
while
(
0
);
}
static
inline
int
gmx_spinlock_islocked
(
gmx_spinlock_t
*
x
)
{
return
(
x
->
lock
!=
0
);
}
static
inline
void
gmx_spinlock_wait
(
gmx_spinlock_t
*
x
)
{
do
{
gmx_atomic_memory_barrier
();
}
while
(
gmx_spinlock_islocked
(
x
));
}
#undef gmx_ia64_fetchadd
#elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
/* HP compiler on ia64 */
#include <machine/sys/inline.h>
#define gmx_atomic_memory_barrier() _Asm_mf()
#define gmx_hpia64_fetchadd(a, i) \
_Asm_fetchadd((_Asm_fasz)_FASZ_W,(_Asm_sem)_SEM_REL, \
(UInt32*)a,(unsigned int) i, \
(_Asm_ldhint)LDHINT_NONE)
typedef
struct
gmx_atomic
{
volatile
int
value
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_atomic_t
;
typedef
struct
gmx_spinlock
{
volatile
unsigned
int
lock
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_spinlock_t
;
static
inline
int
gmx_atomic_cmpxchg
(
gmx_atomic_t
*
a
,
int
oldval
,
int
newval
)
{
int
ret
;
_Asm_mov_to_ar
((
_Asm_app_reg
)
_AREG_CCV
,(
Uint32
)
oldval
,
(
_Asm_fence
)(
_UP_CALL_FENCE
|
_UP_SYS_FENCE
|
_DOWN_CALL_FENCE
|
_DOWN_SYS_FENCE
));
ret
=
_Asm_cmpxchg
((
_Asm_sz
)
SZ_W
,(
_Asm_sem
)
_SEM_ACQ
,(
Uint32
*
)
a
,
(
Uint32
)
newval
,(
_Asm_ldhint
)
_LDHINT_NONE
);
return
ret
;
}
#define GMX_SPINLOCK_INITIALIZER { 0 }
#define gmx_atomic_read(a) ((a)->value)
#define gmx_atomic_set(a,i) (((a)->value) = (i))
static
inline
void
gmx_atomic_add_return
(
gmx_atomic_t
*
a
,
int
i
)
{
int
old
,
new
;
int
__i
=
i
;
/* On HP-UX we don't know any macro to determine whether the increment
* is known at compile time, but hopefully the call uses something simple
* like a constant, and then the optimizer should be able to do the job.
*/
if
(
(
__i
==
1
)
||
(
__i
==
4
)
||
(
__i
==
8
)
||
(
__i
==
16
)
||
(
__i
==
-
1
)
||
(
__i
==
-
4
)
||
(
__i
==
-
8
)
||
(
__i
==
-
16
)
)
{
oldval
=
gmx_hpia64_fetchadd
(
a
,
__i
);
newval
=
oldval
+
i
;
}
else
{
/* Use compare-exchange addition that works with any value */
do
{
oldval
=
gmx_atomic_read
(
a
);
newval
=
oldval
+
i
;
}
while
(
gmx_atomic_cmpxchg
(
a
,
oldval
,
newval
)
!=
oldval
);
}
return
newval
;
}
static
inline
int
gmx_atomic_fetch_add
(
gmx_atomic_t
*
a
,
int
i
)
{
int
oldval
,
newval
;
int
__i
=
i
;
/* On HP-UX we don't know any macro to determine whether the increment
* is known at compile time, but hopefully the call uses something simple
* like a constant, and then the optimizer should be able to do the job.
*/
if
(
(
__i
==
1
)
||
(
__i
==
4
)
||
(
__i
==
8
)
||
(
__i
==
16
)
||
(
__i
==
-
1
)
||
(
__i
==
-
4
)
||
(
__i
==
-
8
)
||
(
__i
==
-
16
)
)
{
oldval
=
gmx_hpia64_fetchadd
(
a
,
__i
);
newval
=
oldval
+
i
;
}
else
{
/* Use compare-exchange addition that works with any value */
do
{
oldval
=
gmx_atomic_read
(
a
);
newval
=
oldval
+
i
;
}
while
(
gmx_atomic_cmpxchg
(
a
,
oldval
,
newval
)
!=
oldval
);
}
return
oldval
;
}
static
inline
void
gmx_spinlock_init
(
gmx_spinlock_t
*
x
)
{
x
->
lock
=
0
;
}
static
inline
void
gmx_spinlock_trylock
(
gmx_spinlock_t
*
x
)
{
int
rc
;
rc
=
_Asm_xchg
((
_Asm_sz
)
_SZ_W
,
(
unsigned
int
*
)
x
,
1
(
_Asm_ldhit
)
_LDHINT_NONE
);
return
(
(
rc
>
0
)
?
1
:
0
);
}
static
inline
void
gmx_spinlock_lock
(
gmx_spinlock_t
*
x
)
{
int
status
=
1
;
do
{
if
(
*
((
unsigned
int
*
)
x
->
lock
)
==
0
)
{
status
=
gmx_spinlock_trylock
(
x
);
}
}
while
(
status
!=
0
);
}
static
inline
void
gmx_spinlock_unlock
(
gmx_spinlock_t
*
x
)
{
_Asm_fetchadd
((
_Asm_fasz
)
_SZ_W
,(
_Asm_sem
)
_SEM_REL
,
(
unsigned
int
*
)
x
,
-
1
,(
_Asm_ldhint
)
_LDHINT_NONE
);
}
static
inline
void
gmx_spinlock_islocked
(
gmx_spinlock_t
*
x
)
{
return
(
x
->
lock
!=
0
);
}
static
inline
void
gmx_spinlock_wait
(
gmx_spinlock_t
*
x
)
{
do
{
gmx_atomic_memory_barrier
();
}
while
(
gmx_spinlock_islocked
(
x
));
}
#undef gmx_hpia64_fetchadd
#elif (defined(_MSC_VER) && (_MSC_VER >= 1200))
/* Microsoft Visual C on x86, define taken from FFTW who got it from Morten Nissov */
#include <windows.h>
#define gmx_atomic_memory_barrier()
typedef
struct
gmx_atomic
{
LONG
volatile
value
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_atomic_t
;
typedef
struct
gmx_spinlock
{
LONG
volatile
lock
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_spinlock_t
;
#define GMX_SPINLOCK_INITIALIZER { 0 }
#define gmx_atomic_read(a) ((a)->value)
#define gmx_atomic_set(a,i) (((a)->value) = (i))
#define gmx_atomic_fetch_add(a, i) \
InterlockedExchangeAdd((LONG volatile *)a, (LONG) i)
#define gmx_atomic_add_return(a, i) \
( i + InterlockedExchangeAdd((LONG volatile *)a, (LONG) i) )
#define gmx_atomic_cmpxchg(a, oldval, newval) \
InterlockedCompareExchange((LONG volatile *)a, (LONG) newval, (LONG) oldval)
# define gmx_spinlock_lock(x) \
while((InterlockedCompareExchange((LONG volatile *)&x, 1, 0))!=0)
#define gmx_spinlock_trylock(x) \
InterlockedCompareExchange((LONG volatile *)&x, 1, 0)
static
inline
void
gmx_spinlock_unlock
(
gmx_spinlock_t
*
x
)
{
x
->
lock
=
0
;
}
static
inline
int
gmx_spinlock_islocked
(
gmx_spinlock_t
*
x
)
{
return
(
*
(
volatile
signed
char
*
)(
&
(
x
)
->
lock
)
!=
0
);
}
static
inline
void
gmx_spinlock_wait
(
gmx_spinlock_t
*
x
)
{
while
(
gmx_spinlock_islocked
(
x
))
{
Sleep
(
0
);
}
}
#elif defined(__xlC__) && defined (_AIX)
/* IBM xlC compiler on AIX */
#include <sys/atomic_op.h>
#define gmx_atomic_memory_barrier()
typedef
struct
gmx_atomic
{
volatile
int
value
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_atomic_t
;
typedef
struct
gmx_spinlock
{
volatile
unsigned
int
lock
;
/*!< Volatile, to avoid compiler aliasing */
}
gmx_spinlock_t
;
static
inline
int
gmx_atomic_cmpxchg
(
gmx_atomic_t
*
a
,
int
oldval
,
int
newval
)
{
int
t
;
if
(
__check_lock
((
atomic_p
)
&
a
->
value
,
oldval
,
newval
))
{
/* Not successful - value had changed in memory. Reload value. */
t
=
a
->
value
;
}
else
{
/* replacement suceeded */
t
=
oldval
;
}
return
t
;
}
static
inline
void
gmx_atomic_add_return
(
gmx_atomic_t
*
a
,
int
i
)
{
int
oldval
,
newval
;
do
{
oldval
=
gmx_atomic_read
(
a
);
newval
=
oldval
+
i
;
}
while
(
__check_lock
((
atomic_p
)
&
a
->
value
,
oldval
,
newval
));
return
newval
;
}
static
inline
void
gmx_atomic_fetch_add
(
gmx_atomic_t
*
a
,
int
i
)
{
int
oldval
,
newval
;
do
{
oldval
=
gmx_atomic_read
(
a
);
newval
=
oldval
+
i
;
}
while
(
__check_lock
((
atomic_p
)
&
a
->
value
,
oldval
,
newval
));
return
oldval
;
}
static
inline
void
gmx_spinlock_init
(
gmx_spinlock_t
*
x
)
{
__clear_lock
((
atomic_p
)
x
,
0
);
}
static
inline
void
gmx_spinlock_lock
(
gmx_spinlock_t
*
x
)
{
do
{
;
}
while
(
__check_lock
((
atomic_p
)
x
,
0
,
1
));
}
static
inline
void
gmx_spinlock_trylock
(
gmx_spinlock_t
*
x
)
{
/* Return 0 if we got the lock */
return
(
__check_lock
((
atomic_p
)
x
,
0
,
1
)
!=
0
)
}
static
inline
void
gmx_spinlock_unlock
(
gmx_spinlock_t
*
x
)
{
__clear_lock
((
atomic_p
)
x
,
0
);
}
static
inline
void
gmx_spinlock_islocked
(
gmx_spinlock_t
*
x
)
{
return
(
*
((
atomic_p
)
x
)
!=
0
);
}
static
inline
void
gmx_spinlock_wait
(
gmx_spinlock_t
*
x
)
{
while
(
gmx_spinlock_islocked
(
x
))
{
;
}
}
#else
/* No atomic operations, use mutex fallback. Documentation is in x86 section */
#define gmx_atomic_memory_barrier()
/* System mutex used for locking to guarantee atomicity */
static
pthread_mutex_t
gmx_atomic_mutex
=
PTHREAD_MUTEX_INITIALIZER
;
typedef
struct
gmx_atomic
{
int
value
;
}
gmx_atomic_t
;
#define gmx_spinlock_t pthread_mutex_t
# define GMX_SPINLOCK_INITIALIZER PTHREAD_MUTEX_INITIALIZER
/* Since mutexes guarantee memory barriers this works fine */
#define gmx_atomic_read(a) ((a)->value)
static
inline
void
gmx_atomic_set
(
gmx_atomic_t
*
a
,
int
i
)
{
/* Mutexes here are necessary to guarantee memory visibility */
pthread_mutex_lock
(
&
gmx_atomic_mutex
);
a
->
value
=
i
;
pthread_mutex_unlock
(
&
gmx_atomic_mutex
);
}
static
inline
int
gmx_atomic_add_return
(
gmx_atomic_t
*
a
,
int
i
)
{
int
t
;
pthread_mutex_lock
(
&
gmx_atomic_mutex
);
t
=
a
->
value
+
i
;
a
->
value
=
t
;
pthread_mutex_unlock
(
&
gmx_atomic_mutex
);
return
t
;
}
static
inline
int
gmx_atomic_fetch_add
(
gmx_atomic_t
*
a
,
int
i
)
{
int
old_value
;
pthread_mutex_lock
(
&
gmx_atomic_mutex
);
old_value
=
a
->
value
;
a
->
value
=
old_value
+
i
;
pthread_mutex_unlock
(
&
gmx_atomic_mutex
);
return
old_value
;
}
static
inline
int
gmx_atomic_cmpxchg
(
gmx_atomic_t
*
a
,
int
oldv
,
int
newv
)
{
int
t
;
pthread_mutex_lock
(
&
gmx_atomic_mutex
);
t
=
a
->
value
;
if
(
t
==
oldv
)
{
a
->
value
=
newv
;
}
pthread_mutex_unlock
(
&
gmx_atomic_mutex
);
return
t
;
}
#define gmx_spinlock_init(lock) pthread_mutex_init(lock)
#define gmx_spinlock_lock(lock) pthread_mutex_lock(lock)
#define gmx_spinlock_trylock(lock) pthread_mutex_trylock(lock)
#define gmx_spinlock_unlock(lock) pthread_mutex_unlock(lock)
static
inline
int
gmx_spinlock_islocked
(
gmx_spinlock_t
*
x
)
{
int
rc
;
if
(
gmx_spinlock_trylock
(
x
)
!=
0
)
{
/* It was locked */
return
1
;
}
else
{
/* We just locked it */
gmx_spinlock_unlock
(
x
);
return
0
;
}
}
static
inline
void
gmx_spinlock_wait
(
gmx_spinlock_t
*
x
)
{
int
rc
;
gmx_spinlock_lock
(
x
);
/* Got the lock now, so the waiting is over */
gmx_spinlock_unlock
(
x
);
}
#endif
/*! \brief Spinlock-based barrier type
*
* This barrier has the same functionality as the standard
* gmx_thread_barrier_t, but since it is based on spinlocks
* it provides faster synchronization at the cost of busy-waiting.
*
* Variables of this type should be initialized by calling
* gmx_spinlock_barrier_init() to set the number of threads
* that should be synchronized.
*/
typedef
struct
gmx_spinlock_barrier
{
gmx_atomic_t
count
;
/*!< Number of threads remaining */
int
threshold
;
/*!< Total number of threads */
volatile
int
cycle
;
/*!< Current cycle (alternating 0/1) */
}
gmx_spinlock_barrier_t
;
/*! \brief Initialize spinlock-based barrier
*
* \param barrier Pointer to _spinlock_ barrier. Note that this is not
* the same datatype as the full, thread based, barrier.
* \param count Number of threads to synchronize. All threads
* will be released after \a count calls to
* gmx_spinlock_barrier_wait().
*/
static
inline
void
gmx_spinlock_barrier_init
(
gmx_spinlock_barrier_t
*
barrier
,
int
count
)
{
barrier
->
threshold
=
count
;
barrier
->
cycle
=
0
;
gmx_atomic_set
(
&
(
barrier
->
count
),
count
);
}
/*! \brief Perform busy-waiting barrier synchronization
*
* This routine blocks until it has been called N times,
* where N is the count value the barrier was initialized with.
* After N total calls all threads return. The barrier automatically
* cycles, and thus requires another N calls to unblock another time.
*
* Note that spinlock-based barriers are completely different from
* standard ones (using mutexes and condition variables), only the
* functionality and names are similar.
*
* \param barrier Pointer to previously create barrier.
*
* \return The last thread returns -1, all the others 0.
*/
static
inline
int
gmx_spinlock_barrier_wait
(
gmx_spinlock_barrier_t
*
barrier
)
{
int
cycle
;
int
status
;
/* We don't need to lock or use atomic ops here, since the cycle index
* cannot change until after the last thread has performed the check
* further down. Further, they cannot reach this point in the next
* barrier iteration until all of them have been released, and that
* happens after the cycle value has been updated.
*
* No synchronization == fast synchronization.
*/
cycle
=
barrier
->
cycle
;
/* Decrement the count atomically and check if it is zero.
* This will only be true for the last thread calling us.
*/
if
(
gmx_atomic_add_return
(
&
(
barrier
->
count
),
-
1
)
==
0
)
{
gmx_atomic_set
(
&
(
barrier
->
count
),
barrier
->
threshold
);
barrier
->
cycle
=
!
barrier
->
cycle
;
status
=
-
1
;
}
else
{
/* Wait until the last thread changes the cycle index.
* We are both using a memory barrier, and explicit
* volatile pointer cast to make sure the compiler
* doesn't try to be smart and cache the contents.
*/
do
{
gmx_atomic_memory_barrier
();
}
while
(
*
(
volatile
int
*
)(
&
(
barrier
->
cycle
))
==
cycle
);
status
=
0
;
}
return
status
;
}
#ifdef __cplusplus
}
#endif
#endif
/* _GMX_ATOMIC_H_ */
platforms/cpu/tests/CMakeLists.txt
View file @
19d2885a
...
...
@@ -20,6 +20,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_TARGET
}
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
single
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
platforms/cpu/tests/TestCpuGBSAOBCForce.cpp
0 → 100644
View file @
19d2885a
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CPU implementation of GBSAOBCForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CpuPlatform.h"
#include "openmm/GBSAOBCForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/NonbondedForce.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testSingleParticle
()
{
CpuPlatform
platform
;
System
system
;
system
.
addParticle
(
2.0
);
LangevinIntegrator
integrator
(
0
,
0.1
,
0.01
);
GBSAOBCForce
*
forceField
=
new
GBSAOBCForce
();
forceField
->
addParticle
(
0.5
,
0.15
,
1
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
1
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Energy
);
double
bornRadius
=
0.15
-
0.009
;
// dielectric offset
double
eps0
=
EPSILON0
;
double
bornEnergy
=
(
-
0.5
*
0.5
/
(
8
*
PI_M
*
eps0
))
*
(
1.0
/
forceField
->
getSoluteDielectric
()
-
1.0
/
forceField
->
getSolventDielectric
())
/
bornRadius
;
double
extendedRadius
=
0.15
+
0.14
;
// probe radius
double
nonpolarEnergy
=
CAL2JOULE
*
PI_M
*
0.0216
*
(
10
*
extendedRadius
)
*
(
10
*
extendedRadius
)
*
std
::
pow
(
0.15
/
bornRadius
,
6.0
);
// Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp
ASSERT_EQUAL_TOL
((
bornEnergy
+
nonpolarEnergy
),
state
.
getPotentialEnergy
(),
0.01
);
// Change the parameters and see if it is still correct.
forceField
->
setParticleParameters
(
0
,
0.4
,
0.25
,
1
);
forceField
->
updateParametersInContext
(
context
);
state
=
context
.
getState
(
State
::
Energy
);
bornRadius
=
0.25
-
0.009
;
// dielectric offset
bornEnergy
=
(
-
0.4
*
0.4
/
(
8
*
PI_M
*
eps0
))
*
(
1.0
/
forceField
->
getSoluteDielectric
()
-
1.0
/
forceField
->
getSolventDielectric
())
/
bornRadius
;
extendedRadius
=
bornRadius
+
0.14
;
nonpolarEnergy
=
CAL2JOULE
*
PI_M
*
0.0216
*
(
10
*
extendedRadius
)
*
(
10
*
extendedRadius
)
*
std
::
pow
(
0.25
/
bornRadius
,
6.0
);
ASSERT_EQUAL_TOL
((
bornEnergy
+
nonpolarEnergy
),
state
.
getPotentialEnergy
(),
0.01
);
}
void
testCutoffAndPeriodic
()
{
CpuPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
LangevinIntegrator
integrator
(
0
,
0.1
,
0.01
);
GBSAOBCForce
*
gbsa
=
new
GBSAOBCForce
();
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
gbsa
->
addParticle
(
-
1
,
0.15
,
1
);
nonbonded
->
addParticle
(
-
1
,
1
,
0
);
gbsa
->
addParticle
(
1
,
0.15
,
1
);
nonbonded
->
addParticle
(
1
,
1
,
0
);
const
double
cutoffDistance
=
3.0
;
const
double
boxSize
=
10.0
;
nonbonded
->
setCutoffDistance
(
cutoffDistance
);
gbsa
->
setCutoffDistance
(
cutoffDistance
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
system
.
addForce
(
gbsa
);
system
.
addForce
(
nonbonded
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
// Calculate the forces for both cutoff and periodic with two different atom positions.
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffNonPeriodic
);
gbsa
->
setNonbondedMethod
(
GBSAOBCForce
::
CutoffNonPeriodic
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
gbsa
->
setNonbondedMethod
(
GBSAOBCForce
::
CutoffPeriodic
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state2
=
context
.
getState
(
State
::
Forces
);
positions
[
1
][
0
]
+=
boxSize
;
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffNonPeriodic
);
gbsa
->
setNonbondedMethod
(
GBSAOBCForce
::
CutoffNonPeriodic
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state3
=
context
.
getState
(
State
::
Forces
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
gbsa
->
setNonbondedMethod
(
GBSAOBCForce
::
CutoffPeriodic
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state4
=
context
.
getState
(
State
::
Forces
);
// All forces should be identical, exception state3 which should be zero.
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
0
],
state2
.
getForces
()[
0
],
0.01
);
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
1
],
state2
.
getForces
()[
1
],
0.01
);
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
0
],
state4
.
getForces
()[
0
],
0.01
);
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
1
],
state4
.
getForces
()[
1
],
0.01
);
ASSERT_EQUAL_VEC
(
state3
.
getForces
()[
0
],
Vec3
(
0
,
0
,
0
),
0.01
);
ASSERT_EQUAL_VEC
(
state3
.
getForces
()[
1
],
Vec3
(
0
,
0
,
0
),
0.01
);
}
void
testForce
(
int
numParticles
,
NonbondedForce
::
NonbondedMethod
method
,
GBSAOBCForce
::
NonbondedMethod
method2
)
{
CpuPlatform
platform
;
ReferencePlatform
reference
;
System
system
;
GBSAOBCForce
*
gbsa
=
new
GBSAOBCForce
();
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
1.0
);
double
charge
=
i
%
2
==
0
?
-
1
:
1
;
gbsa
->
addParticle
(
charge
,
0.15
,
1
);
nonbonded
->
addParticle
(
charge
,
1
,
0
);
}
nonbonded
->
setNonbondedMethod
(
method
);
gbsa
->
setNonbondedMethod
(
method2
);
nonbonded
->
setCutoffDistance
(
3.0
);
gbsa
->
setCutoffDistance
(
3.0
);
int
grid
=
(
int
)
floor
(
0.5
+
pow
(
numParticles
,
1.0
/
3.0
));
if
(
method
==
NonbondedForce
::
CutoffPeriodic
)
{
double
boxSize
=
(
grid
+
1
)
*
1.1
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
}
system
.
addForce
(
gbsa
);
system
.
addForce
(
nonbonded
);
LangevinIntegrator
integrator1
(
0
,
0.1
,
0.01
);
LangevinIntegrator
integrator2
(
0
,
0.1
,
0.01
);
Context
context
(
system
,
integrator1
,
platform
);
Context
refContext
(
system
,
integrator2
,
reference
);
// Set random (but uniformly distributed) positions for all the particles.
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
grid
;
i
++
)
for
(
int
j
=
0
;
j
<
grid
;
j
++
)
for
(
int
k
=
0
;
k
<
grid
;
k
++
)
positions
[
i
*
grid
*
grid
+
j
*
grid
+
k
]
=
Vec3
(
i
*
1.1
,
j
*
1.1
,
k
*
1.1
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
positions
[
i
]
=
positions
[
i
]
+
Vec3
(
0.5
*
genrand_real2
(
sfmt
),
0.5
*
genrand_real2
(
sfmt
),
0.5
*
genrand_real2
(
sfmt
));
context
.
setPositions
(
positions
);
refContext
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
refState
=
refContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Make sure the CPU and Reference platforms agree.
double
norm
=
0.0
;
double
diff
=
0.0
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
f
=
state
.
getForces
()[
i
];
norm
+=
f
[
0
]
*
f
[
0
]
+
f
[
1
]
*
f
[
1
]
+
f
[
2
]
*
f
[
2
];
Vec3
delta
=
f
-
refState
.
getForces
()[
i
];
diff
+=
delta
[
0
]
*
delta
[
0
]
+
delta
[
1
]
*
delta
[
1
]
+
delta
[
2
]
*
delta
[
2
];
}
norm
=
std
::
sqrt
(
norm
);
diff
=
std
::
sqrt
(
diff
);
ASSERT_EQUAL_TOL
(
0.0
,
diff
,
0.001
*
norm
);
ASSERT_EQUAL_TOL
(
state
.
getPotentialEnergy
(),
refState
.
getPotentialEnergy
(),
1e-3
);
// Take a small step in the direction of the energy gradient and see whether the potential energy changes by the expected amount.
// (This doesn't work with cutoffs, since the energy changes discontinuously at the cutoff distance.)
if
(
method
==
NonbondedForce
::
NoCutoff
)
{
const
double
delta
=
0.3
;
double
step
=
0.5
*
delta
/
norm
;
vector
<
Vec3
>
positions2
(
numParticles
),
positions3
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
p
=
positions
[
i
];
Vec3
f
=
state
.
getForces
()[
i
];
positions2
[
i
]
=
Vec3
(
p
[
0
]
-
f
[
0
]
*
step
,
p
[
1
]
-
f
[
1
]
*
step
,
p
[
2
]
-
f
[
2
]
*
step
);
positions3
[
i
]
=
Vec3
(
p
[
0
]
+
f
[
0
]
*
step
,
p
[
1
]
+
f
[
1
]
*
step
,
p
[
2
]
+
f
[
2
]
*
step
);
}
context
.
setPositions
(
positions2
);
State
state2
=
context
.
getState
(
State
::
Energy
);
context
.
setPositions
(
positions3
);
State
state3
=
context
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL
(
norm
,
(
state2
.
getPotentialEnergy
()
-
state3
.
getPotentialEnergy
())
/
delta
,
1e-2
)
}
}
int
main
()
{
try
{
if
(
!
CpuPlatform
::
isProcessorSupported
())
{
cout
<<
"CPU is not supported. Exiting."
<<
endl
;
return
0
;
}
testSingleParticle
();
testCutoffAndPeriodic
();
for
(
int
i
=
5
;
i
<
11
;
i
++
)
{
testForce
(
i
*
i
*
i
,
NonbondedForce
::
NoCutoff
,
GBSAOBCForce
::
NoCutoff
);
testForce
(
i
*
i
*
i
,
NonbondedForce
::
CutoffNonPeriodic
,
GBSAOBCForce
::
CutoffNonPeriodic
);
testForce
(
i
*
i
*
i
,
NonbondedForce
::
CutoffPeriodic
,
GBSAOBCForce
::
CutoffPeriodic
);
}
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cpu/tests/TestCpuLangevinIntegrator.cpp
0 → 100644
View file @
19d2885a
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the reference implementation of LangevinIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CpuPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testSingleBond
()
{
CpuPlatform
platform
;
System
system
;
system
.
addParticle
(
2.0
);
system
.
addParticle
(
2.0
);
LangevinIntegrator
integrator
(
0
,
0.1
,
0.01
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
forceField
->
addBond
(
0
,
1
,
1.5
,
1
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
// This is simply a damped harmonic oscillator, so compare it to the analytical solution.
double
freq
=
std
::
sqrt
(
1
-
0.05
*
0.05
);
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
);
double
time
=
state
.
getTime
();
double
expectedDist
=
1.5
+
0.5
*
std
::
exp
(
-
0.05
*
time
)
*
std
::
cos
(
freq
*
time
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
0
],
0.02
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
1
],
0.02
);
double
expectedSpeed
=
-
0.5
*
std
::
exp
(
-
0.05
*
time
)
*
(
0.05
*
std
::
cos
(
freq
*
time
)
+
freq
*
std
::
sin
(
freq
*
time
));
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
0
],
0.02
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
1
],
0.02
);
integrator
.
step
(
1
);
}
// Not set the friction to a tiny value and see if it conserves energy.
integrator
.
setFriction
(
5e-5
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Energy
);
double
initialEnergy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
state
=
context
.
getState
(
State
::
Energy
);
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
integrator
.
step
(
1
);
}
}
void
testTemperature
()
{
const
int
numParticles
=
8
;
const
double
temp
=
100.0
;
CpuPlatform
platform
;
System
system
;
LangevinIntegrator
integrator
(
temp
,
2.0
,
0.01
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
2.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
}
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
positions
[
i
]
=
Vec3
((
i
%
2
==
0
?
2
:
-
2
),
(
i
%
4
<
2
?
2
:
-
2
),
(
i
<
4
?
2
:
-
2
));
context
.
setPositions
(
positions
);
// Let it equilibrate.
integrator
.
step
(
10000
);
// Now run it for a while and see if the temperature is correct.
double
ke
=
0.0
;
for
(
int
i
=
0
;
i
<
10000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Energy
);
ke
+=
state
.
getKineticEnergy
();
integrator
.
step
(
1
);
}
ke
/=
10000
;
double
expected
=
0.5
*
numParticles
*
3
*
BOLTZ
*
temp
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
ke
,
6
/
std
::
sqrt
(
10000.0
));
}
void
testConstraints
()
{
const
int
numParticles
=
8
;
const
double
temp
=
100.0
;
CpuPlatform
platform
;
System
system
;
LangevinIntegrator
integrator
(
temp
,
2.0
,
0.01
);
integrator
.
setConstraintTolerance
(
1e-5
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
10.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
for
(
int
i
=
0
;
i
<
numParticles
-
1
;
++
i
)
system
.
addConstraint
(
i
,
i
+
1
,
1.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
(
i
/
2
,
(
i
+
1
)
/
2
,
0
);
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and see whether the constraints remain satisfied.
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Positions
);
for
(
int
j
=
0
;
j
<
numParticles
-
1
;
++
j
)
{
Vec3
p1
=
state
.
getPositions
()[
j
];
Vec3
p2
=
state
.
getPositions
()[
j
+
1
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
1.0
,
dist
,
2e-5
);
}
integrator
.
step
(
1
);
}
}
void
testConstrainedMasslessParticles
()
{
CpuPlatform
platform
;
System
system
;
system
.
addParticle
(
0.0
);
system
.
addParticle
(
1.0
);
system
.
addConstraint
(
0
,
1
,
1.5
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
LangevinIntegrator
integrator
(
300.0
,
2.0
,
0.01
);
bool
failed
=
false
;
try
{
// This should throw an exception.
Context
context
(
system
,
integrator
,
platform
);
}
catch
(
exception
&
ex
)
{
failed
=
true
;
}
ASSERT
(
failed
);
// Now make both particles massless, which should work.
system
.
setParticleMass
(
1
,
0.0
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setVelocitiesToTemperature
(
300.0
);
integrator
.
step
(
1
);
State
state
=
context
.
getState
(
State
::
Velocities
|
State
::
Positions
);
ASSERT_EQUAL
(
0.0
,
state
.
getVelocities
()[
0
][
0
]);
}
void
testRandomSeed
()
{
const
int
numParticles
=
8
;
const
double
temp
=
100.0
;
const
double
collisionFreq
=
10.0
;
CpuPlatform
platform
;
System
system
;
LangevinIntegrator
integrator
(
temp
,
2.0
,
0.01
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
2.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
}
system
.
addForce
(
forceField
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
((
i
%
2
==
0
?
2
:
-
2
),
(
i
%
4
<
2
?
2
:
-
2
),
(
i
<
4
?
2
:
-
2
));
velocities
[
i
]
=
Vec3
(
0
,
0
,
0
);
}
// Try twice with the same random seed.
integrator
.
setRandomNumberSeed
(
5
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state1
=
context
.
getState
(
State
::
Positions
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state2
=
context
.
getState
(
State
::
Positions
);
// Try twice with a different random seed.
integrator
.
setRandomNumberSeed
(
10
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state3
=
context
.
getState
(
State
::
Positions
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state4
=
context
.
getState
(
State
::
Positions
);
// Compare the results.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
ASSERT_EQUAL_TOL
(
state1
.
getPositions
()[
i
][
j
],
state2
.
getPositions
()[
i
][
j
],
1e-5
);
ASSERT_EQUAL_TOL
(
state3
.
getPositions
()[
i
][
j
],
state4
.
getPositions
()[
i
][
j
],
1e-5
);
ASSERT
(
state1
.
getPositions
()[
i
][
j
]
!=
state3
.
getPositions
()[
i
][
j
]);
}
}
}
int
main
()
{
try
{
if
(
!
CpuPlatform
::
isProcessorSupported
())
{
cout
<<
"CPU is not supported. Exiting."
<<
endl
;
return
0
;
}
testSingleBond
();
testTemperature
();
testConstraints
();
testConstrainedMasslessParticles
();
testRandomSeed
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cpu/tests/TestCpuNeighborList.cpp
View file @
19d2885a
...
...
@@ -34,6 +34,8 @@
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/ThreadPool.h"
#include "AlignedArray.h"
#include "CpuNeighborList.h"
#include "CpuPlatform.h"
#include "sfmt/SFMT.h"
...
...
@@ -49,9 +51,10 @@ void testNeighborList(bool periodic) {
const
int
numParticles
=
500
;
const
float
cutoff
=
2.0
f
;
const
float
boxSize
[
3
]
=
{
20.0
f
,
15.0
f
,
22.0
f
};
const
int
blockSize
=
8
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
float
>
positions
(
4
*
numParticles
);
AlignedArray
<
float
>
positions
(
4
*
numParticles
);
for
(
int
i
=
0
;
i
<
4
*
numParticles
;
i
++
)
if
(
i
%
4
<
3
)
positions
[
i
]
=
boxSize
[
i
%
4
]
*
genrand_real2
(
sfmt
);
...
...
@@ -63,15 +66,16 @@ void testNeighborList(bool periodic) {
exclusions
[
i
-
j
].
insert
(
i
);
}
}
CpuNeighborList
neighborList
;
neighborList
.
computeNeighborList
(
numParticles
,
positions
,
exclusions
,
boxSize
,
periodic
,
cutoff
);
ThreadPool
threads
;
CpuNeighborList
neighborList
(
blockSize
);
neighborList
.
computeNeighborList
(
numParticles
,
positions
,
exclusions
,
boxSize
,
periodic
,
cutoff
,
threads
);
// Convert the neighbor list to a set for faster lookup.
set
<
pair
<
int
,
int
>
>
neighbors
;
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
.
getSortedAtoms
().
size
();
i
++
)
{
int
blockIndex
=
i
/
CpuNeighborList
::
B
lockSize
;
int
indexInBlock
=
i
-
blockIndex
*
CpuNeighborList
::
B
lockSize
;
int
blockIndex
=
i
/
b
lockSize
;
int
indexInBlock
=
i
-
blockIndex
*
b
lockSize
;
char
mask
=
1
<<
indexInBlock
;
for
(
int
j
=
0
;
j
<
(
int
)
neighborList
.
getBlockExclusions
(
blockIndex
).
size
();
j
++
)
{
if
((
neighborList
.
getBlockExclusions
(
blockIndex
)[
j
]
&
mask
)
==
0
)
{
...
...
platforms/cpu/tests/TestCpuNonbondedForce.cpp
View file @
19d2885a
...
...
@@ -397,44 +397,44 @@ void testLargeSystem() {
system
.
addForce
(
bonds
);
VerletIntegrator
integrator1
(
0.01
);
VerletIntegrator
integrator2
(
0.01
);
Context
cuContext
(
system
,
integrator1
,
platform
);
Context
c
p
uContext
(
system
,
integrator1
,
platform
);
Context
referenceContext
(
system
,
integrator2
,
reference
);
cuContext
.
setPositions
(
positions
);
cuContext
.
setVelocities
(
velocities
);
c
p
uContext
.
setPositions
(
positions
);
c
p
uContext
.
setVelocities
(
velocities
);
referenceContext
.
setPositions
(
positions
);
referenceContext
.
setVelocities
(
velocities
);
State
cuState
=
cuContext
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Forces
|
State
::
Energy
);
State
c
p
uState
=
c
p
uContext
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Forces
|
State
::
Energy
);
State
referenceState
=
referenceContext
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Forces
|
State
::
Energy
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL_VEC
(
cuState
.
getPositions
()[
i
],
referenceState
.
getPositions
()[
i
],
tol
);
ASSERT_EQUAL_VEC
(
cuState
.
getVelocities
()[
i
],
referenceState
.
getVelocities
()[
i
],
tol
);
ASSERT_EQUAL_VEC
(
cuState
.
getForces
()[
i
],
referenceState
.
getForces
()[
i
],
tol
);
ASSERT_EQUAL_VEC
(
c
p
uState
.
getPositions
()[
i
],
referenceState
.
getPositions
()[
i
],
tol
);
ASSERT_EQUAL_VEC
(
c
p
uState
.
getVelocities
()[
i
],
referenceState
.
getVelocities
()[
i
],
tol
);
ASSERT_EQUAL_VEC
(
c
p
uState
.
getForces
()[
i
],
referenceState
.
getForces
()[
i
],
tol
);
}
ASSERT_EQUAL_TOL
(
cuState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
ASSERT_EQUAL_TOL
(
c
p
uState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
// Now do the same thing with periodic boundary conditions.
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
cuContext
.
reinitialize
();
c
p
uContext
.
reinitialize
();
referenceContext
.
reinitialize
();
cuContext
.
setPositions
(
positions
);
cuContext
.
setVelocities
(
velocities
);
c
p
uContext
.
setPositions
(
positions
);
c
p
uContext
.
setVelocities
(
velocities
);
referenceContext
.
setPositions
(
positions
);
referenceContext
.
setVelocities
(
velocities
);
cuState
=
cuContext
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Forces
|
State
::
Energy
);
c
p
uState
=
c
p
uContext
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Forces
|
State
::
Energy
);
referenceState
=
referenceContext
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Forces
|
State
::
Energy
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
double
dx
=
cuState
.
getPositions
()[
i
][
0
]
-
referenceState
.
getPositions
()[
i
][
0
];
double
dy
=
cuState
.
getPositions
()[
i
][
1
]
-
referenceState
.
getPositions
()[
i
][
1
];
double
dz
=
cuState
.
getPositions
()[
i
][
2
]
-
referenceState
.
getPositions
()[
i
][
2
];
ASSERT_EQUAL_TOL
(
fmod
(
cuState
.
getPositions
()[
i
][
0
]
-
referenceState
.
getPositions
()[
i
][
0
],
boxSize
),
0
,
tol
);
ASSERT_EQUAL_TOL
(
fmod
(
cuState
.
getPositions
()[
i
][
1
]
-
referenceState
.
getPositions
()[
i
][
1
],
boxSize
),
0
,
tol
);
ASSERT_EQUAL_TOL
(
fmod
(
cuState
.
getPositions
()[
i
][
2
]
-
referenceState
.
getPositions
()[
i
][
2
],
boxSize
),
0
,
tol
);
ASSERT_EQUAL_VEC
(
cuState
.
getVelocities
()[
i
],
referenceState
.
getVelocities
()[
i
],
tol
);
ASSERT_EQUAL_VEC
(
cuState
.
getForces
()[
i
],
referenceState
.
getForces
()[
i
],
tol
);
double
dx
=
c
p
uState
.
getPositions
()[
i
][
0
]
-
referenceState
.
getPositions
()[
i
][
0
];
double
dy
=
c
p
uState
.
getPositions
()[
i
][
1
]
-
referenceState
.
getPositions
()[
i
][
1
];
double
dz
=
c
p
uState
.
getPositions
()[
i
][
2
]
-
referenceState
.
getPositions
()[
i
][
2
];
ASSERT_EQUAL_TOL
(
fmod
(
c
p
uState
.
getPositions
()[
i
][
0
]
-
referenceState
.
getPositions
()[
i
][
0
],
boxSize
),
0
,
tol
);
ASSERT_EQUAL_TOL
(
fmod
(
c
p
uState
.
getPositions
()[
i
][
1
]
-
referenceState
.
getPositions
()[
i
][
1
],
boxSize
),
0
,
tol
);
ASSERT_EQUAL_TOL
(
fmod
(
c
p
uState
.
getPositions
()[
i
][
2
]
-
referenceState
.
getPositions
()[
i
][
2
],
boxSize
),
0
,
tol
);
ASSERT_EQUAL_VEC
(
c
p
uState
.
getVelocities
()[
i
],
referenceState
.
getVelocities
()[
i
],
tol
);
ASSERT_EQUAL_VEC
(
c
p
uState
.
getForces
()[
i
],
referenceState
.
getForces
()[
i
],
tol
);
}
ASSERT_EQUAL_TOL
(
cuState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
ASSERT_EQUAL_TOL
(
c
p
uState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
}
void
testDispersionCorrection
()
{
...
...
@@ -542,15 +542,15 @@ void testChangingParameters() {
VerletIntegrator
integrator1
(
0.01
);
VerletIntegrator
integrator2
(
0.01
);
Context
cuContext
(
system
,
integrator1
,
platform
);
Context
c
p
uContext
(
system
,
integrator1
,
platform
);
Context
referenceContext
(
system
,
integrator2
,
reference
);
cuContext
.
setPositions
(
positions
);
c
p
uContext
.
setPositions
(
positions
);
referenceContext
.
setPositions
(
positions
);
State
cuState
=
cuContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
c
p
uState
=
c
p
uContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
referenceState
=
referenceContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
cuState
.
getForces
()[
i
],
referenceState
.
getForces
()[
i
],
tol
);
ASSERT_EQUAL_TOL
(
cuState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
ASSERT_EQUAL_VEC
(
c
p
uState
.
getForces
()[
i
],
referenceState
.
getForces
()[
i
],
tol
);
ASSERT_EQUAL_TOL
(
c
p
uState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
// Now modify parameters and see if they still agree.
...
...
@@ -559,13 +559,13 @@ void testChangingParameters() {
nonbonded
->
getParticleParameters
(
i
,
charge
,
sigma
,
epsilon
);
nonbonded
->
setParticleParameters
(
i
,
1.5
*
charge
,
1.1
*
sigma
,
1.7
*
epsilon
);
}
nonbonded
->
updateParametersInContext
(
cuContext
);
nonbonded
->
updateParametersInContext
(
c
p
uContext
);
nonbonded
->
updateParametersInContext
(
referenceContext
);
cuState
=
cuContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
c
p
uState
=
c
p
uContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
referenceState
=
referenceContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
cuState
.
getForces
()[
i
],
referenceState
.
getForces
()[
i
],
tol
);
ASSERT_EQUAL_TOL
(
cuState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
ASSERT_EQUAL_VEC
(
c
p
uState
.
getForces
()[
i
],
referenceState
.
getForces
()[
i
],
tol
);
ASSERT_EQUAL_TOL
(
c
p
uState
.
getPotentialEnergy
(),
referenceState
.
getPotentialEnergy
(),
tol
);
}
void
testSwitchingFunction
(
NonbondedForce
::
NonbondedMethod
method
)
{
...
...
platforms/cpu/tests/TestCpuPeriodicTorsionForce.cpp
0 → 100644
View file @
19d2885a
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2014 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CPU implementation of PeriodicTorsionForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CpuPlatform.h"
#include "openmm/PeriodicTorsionForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "SimTKOpenMMRealType.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
CpuPlatform
platform
;
const
double
TOL
=
1e-5
;
void
testPeriodicTorsions
()
{
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
PeriodicTorsionForce
*
forceField
=
new
PeriodicTorsionForce
();
forceField
->
addTorsion
(
0
,
1
,
2
,
3
,
2
,
PI_M
/
3
,
1.1
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
4
);
positions
[
0
]
=
Vec3
(
0
,
1
,
0
);
positions
[
1
]
=
Vec3
(
0
,
0
,
0
);
positions
[
2
]
=
Vec3
(
1
,
0
,
0
);
positions
[
3
]
=
Vec3
(
1
,
0
,
2
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
{
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
double
torque
=
-
2
*
1.1
*
std
::
sin
(
2
*
PI_M
/
3
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
torque
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0.5
*
torque
,
0
),
forces
[
3
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
forces
[
0
][
0
]
+
forces
[
1
][
0
]
+
forces
[
2
][
0
]
+
forces
[
3
][
0
],
forces
[
0
][
1
]
+
forces
[
1
][
1
]
+
forces
[
2
][
1
]
+
forces
[
3
][
1
],
forces
[
0
][
2
]
+
forces
[
1
][
2
]
+
forces
[
2
][
2
]
+
forces
[
3
][
2
]),
Vec3
(
0
,
0
,
0
),
TOL
);
ASSERT_EQUAL_TOL
(
1.1
*
(
1
+
std
::
cos
(
2
*
PI_M
/
3
)),
state
.
getPotentialEnergy
(),
TOL
);
}
// Try changing the torsion parameters and make sure it's still correct.
forceField
->
setTorsionParameters
(
0
,
0
,
1
,
2
,
3
,
3
,
PI_M
/
3.2
,
1.3
);
forceField
->
updateParametersInContext
(
context
);
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
{
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
double
dtheta
=
(
3
*
PI_M
/
2
)
-
(
PI_M
/
3.2
);
double
torque
=
-
3
*
1.3
*
std
::
sin
(
dtheta
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
torque
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0.5
*
torque
,
0
),
forces
[
3
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
forces
[
0
][
0
]
+
forces
[
1
][
0
]
+
forces
[
2
][
0
]
+
forces
[
3
][
0
],
forces
[
0
][
1
]
+
forces
[
1
][
1
]
+
forces
[
2
][
1
]
+
forces
[
3
][
1
],
forces
[
0
][
2
]
+
forces
[
1
][
2
]
+
forces
[
2
][
2
]
+
forces
[
3
][
2
]),
Vec3
(
0
,
0
,
0
),
TOL
);
ASSERT_EQUAL_TOL
(
1.3
*
(
1
+
std
::
cos
(
dtheta
)),
state
.
getPotentialEnergy
(),
TOL
);
}
}
void
testParallelComputation
()
{
System
system
;
const
int
numParticles
=
200
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
PeriodicTorsionForce
*
force
=
new
PeriodicTorsionForce
();
for
(
int
i
=
3
;
i
<
numParticles
;
i
++
)
force
->
addTorsion
(
i
-
3
,
i
-
2
,
i
-
1
,
i
,
2
,
1.1
,
i
);
system
.
addForce
(
force
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
positions
[
i
]
=
Vec3
(
i
,
i
%
2
,
i
%
3
);
VerletIntegrator
integrator1
(
0.01
);
ReferencePlatform
reference
;
Context
context1
(
system
,
integrator1
,
reference
);
context1
.
setPositions
(
positions
);
State
state1
=
context1
.
getState
(
State
::
Forces
|
State
::
Energy
);
VerletIntegrator
integrator2
(
0.01
);
Context
context2
(
system
,
integrator2
,
platform
);
context2
.
setPositions
(
positions
);
State
state2
=
context2
.
getState
(
State
::
Forces
|
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
(),
state2
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state2
.
getForces
()[
i
],
1e-5
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
testPeriodicTorsions
();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cpu/tests/TestCpuRBTorsionForce.cpp
0 → 100644
View file @
19d2885a
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2014 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of RBTorsionForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CpuPlatform.h"
#include "openmm/RBTorsionForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "SimTKOpenMMRealType.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
CpuPlatform
platform
;
const
double
TOL
=
1e-5
;
void
testRBTorsions
()
{
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
RBTorsionForce
*
forceField
=
new
RBTorsionForce
();
forceField
->
addTorsion
(
0
,
1
,
2
,
3
,
0.1
,
0.2
,
0.3
,
0.4
,
0.5
,
0.6
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
4
);
positions
[
0
]
=
Vec3
(
0
,
1
,
0
);
positions
[
1
]
=
Vec3
(
0
,
0
,
0
);
positions
[
2
]
=
Vec3
(
1
,
0
,
0
);
positions
[
3
]
=
Vec3
(
1
,
1
,
1
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
{
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
double
psi
=
0.25
*
PI_M
-
PI_M
;
double
torque
=
0.0
;
for
(
int
i
=
1
;
i
<
6
;
++
i
)
{
double
c
=
0.1
*
(
i
+
1
);
torque
+=
-
c
*
i
*
std
::
pow
(
std
::
cos
(
psi
),
i
-
1
)
*
std
::
sin
(
psi
);
}
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
torque
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0.5
*
torque
,
-
0.5
*
torque
),
forces
[
3
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
forces
[
0
][
0
]
+
forces
[
1
][
0
]
+
forces
[
2
][
0
]
+
forces
[
3
][
0
],
forces
[
0
][
1
]
+
forces
[
1
][
1
]
+
forces
[
2
][
1
]
+
forces
[
3
][
1
],
forces
[
0
][
2
]
+
forces
[
1
][
2
]
+
forces
[
2
][
2
]
+
forces
[
3
][
2
]),
Vec3
(
0
,
0
,
0
),
TOL
);
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
double
c
=
0.1
*
(
i
+
1
);
energy
+=
c
*
std
::
pow
(
std
::
cos
(
psi
),
i
);
}
ASSERT_EQUAL_TOL
(
energy
,
state
.
getPotentialEnergy
(),
TOL
);
}
// Try changing the torsion parameters and make sure it's still correct.
forceField
->
setTorsionParameters
(
0
,
0
,
1
,
2
,
3
,
0.11
,
0.22
,
0.33
,
0.44
,
0.55
,
0.66
);
forceField
->
updateParametersInContext
(
context
);
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
{
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
double
psi
=
0.25
*
PI_M
-
PI_M
;
double
torque
=
0.0
;
for
(
int
i
=
1
;
i
<
6
;
++
i
)
{
double
c
=
0.11
*
(
i
+
1
);
torque
+=
-
c
*
i
*
std
::
pow
(
std
::
cos
(
psi
),
i
-
1
)
*
std
::
sin
(
psi
);
}
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
torque
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0.5
*
torque
,
-
0.5
*
torque
),
forces
[
3
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
forces
[
0
][
0
]
+
forces
[
1
][
0
]
+
forces
[
2
][
0
]
+
forces
[
3
][
0
],
forces
[
0
][
1
]
+
forces
[
1
][
1
]
+
forces
[
2
][
1
]
+
forces
[
3
][
1
],
forces
[
0
][
2
]
+
forces
[
1
][
2
]
+
forces
[
2
][
2
]
+
forces
[
3
][
2
]),
Vec3
(
0
,
0
,
0
),
TOL
);
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
double
c
=
0.11
*
(
i
+
1
);
energy
+=
c
*
std
::
pow
(
std
::
cos
(
psi
),
i
);
}
ASSERT_EQUAL_TOL
(
energy
,
state
.
getPotentialEnergy
(),
TOL
);
}
}
void
testParallelComputation
()
{
System
system
;
const
int
numParticles
=
200
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
RBTorsionForce
*
force
=
new
RBTorsionForce
();
for
(
int
i
=
3
;
i
<
numParticles
;
i
++
)
force
->
addTorsion
(
i
-
3
,
i
-
2
,
i
-
1
,
i
,
2
,
0.1
*
i
,
0.5
*
i
,
i
,
1
,
1
);
system
.
addForce
(
force
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
positions
[
i
]
=
Vec3
(
i
,
i
%
2
,
i
%
3
);
VerletIntegrator
integrator1
(
0.01
);
ReferencePlatform
reference
;
Context
context1
(
system
,
integrator1
,
reference
);
context1
.
setPositions
(
positions
);
State
state1
=
context1
.
getState
(
State
::
Forces
|
State
::
Energy
);
VerletIntegrator
integrator2
(
0.01
);
Context
context2
(
system
,
integrator2
,
platform
);
context2
.
setPositions
(
positions
);
State
state2
=
context2
.
getState
(
State
::
Forces
|
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
(),
state2
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state2
.
getForces
()[
i
],
1e-5
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
testRBTorsions
();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
Prev
1
2
3
4
5
6
7
8
9
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment