Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
0b5d58d7
Commit
0b5d58d7
authored
May 27, 2020
by
Charlles Abreu
Browse files
Conflict resolution in TestSplineFilter.cpp
parents
9026dbe7
b0d13582
Changes
105
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
511 additions
and
1394 deletions
+511
-1394
platforms/common/src/kernels/noseHooverIntegrator.cc
platforms/common/src/kernels/noseHooverIntegrator.cc
+66
-179
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+10
-2
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+1
-1
platforms/cpu/include/CpuNonbondedForceFvec.h
platforms/cpu/include/CpuNonbondedForceFvec.h
+350
-0
platforms/cpu/include/CpuNonbondedForceVec4.h
platforms/cpu/include/CpuNonbondedForceVec4.h
+0
-116
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+0
-118
platforms/cpu/src/CpuCustomGBForce.cpp
platforms/cpu/src/CpuCustomGBForce.cpp
+3
-3
platforms/cpu/src/CpuCustomManyParticleForce.cpp
platforms/cpu/src/CpuCustomManyParticleForce.cpp
+1
-1
platforms/cpu/src/CpuCustomNonbondedForce.cpp
platforms/cpu/src/CpuCustomNonbondedForce.cpp
+1
-1
platforms/cpu/src/CpuGayBerneForce.cpp
platforms/cpu/src/CpuGayBerneForce.cpp
+1
-1
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+2
-7
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+8
-8
platforms/cpu/src/CpuNonbondedForceFvec.cpp
platforms/cpu/src/CpuNonbondedForceFvec.cpp
+44
-0
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+4
-433
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+10
-476
platforms/cpu/src/CpuPlatform.cpp
platforms/cpu/src/CpuPlatform.cpp
+5
-2
platforms/cuda/src/CudaKernelFactory.cpp
platforms/cuda/src/CudaKernelFactory.cpp
+2
-4
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+1
-2
platforms/cuda/tests/TestCudaNoseHooverThermostat.cpp
platforms/cuda/tests/TestCudaNoseHooverThermostat.cpp
+0
-36
platforms/opencl/src/OpenCLKernelFactory.cpp
platforms/opencl/src/OpenCLKernelFactory.cpp
+2
-4
No files found.
platforms/common/src/kernels/
velocityVerlet
.cc
→
platforms/common/src/kernels/
noseHooverIntegrator
.cc
View file @
0b5d58d7
/**
* Perform the first
step of Velocity Verlet integration
.
* Perform the first
part of integration: velocity step
.
*/
KERNEL
void
integrateVelocityVerletPart1
(
int
numAtoms
,
int
numPairs
,
int
paddedNumAtoms
,
GLOBAL
const
mixed2
*
RESTRICT
dt
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
mm_long
*
RESTRICT
force
,
GLOBAL
mixed4
*
RESTRICT
posDelta
,
GLOBAL
const
int
*
RESTRICT
atomList
,
GLOBAL
const
int2
*
RESTRICT
pairList
#ifdef USE_MIXED_PRECISION
,
GLOBAL
const
real4
*
RESTRICT
posqCorrection
#endif
){
const
mixed2
stepSize
=
dt
[
0
];
const
mixed
dtPos
=
stepSize
.
y
;
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
scale
=
0.5
f
*
dtVel
/
(
mixed
)
0x100000000
;
KERNEL
void
integrateNoseHooverMiddlePart1
(
int
numAtoms
,
int
numPairs
,
int
paddedNumAtoms
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
mm_long
*
RESTRICT
force
,
GLOBAL
const
mixed2
*
RESTRICT
dt
,
GLOBAL
const
int
*
RESTRICT
atomList
,
GLOBAL
const
int2
*
RESTRICT
pairList
)
{
mixed
fscale
=
dt
[
0
].
y
/
(
mixed
)
0x100000000
;
int
index
=
GLOBAL_ID
;
while
(
index
<
numAtoms
)
{
int
atom
=
atomList
[
index
];
mixed4
velocity
=
velm
[
atom
];
if
(
velocity
.
w
!=
0.0
)
{
#ifdef USE_MIXED_PRECISION
real4
pos1
=
posq
[
atom
];
real4
pos2
=
posqCorrection
[
atom
];
mixed4
pos
=
make_mixed4
(
pos1
.
x
+
(
mixed
)
pos2
.
x
,
pos1
.
y
+
(
mixed
)
pos2
.
y
,
pos1
.
z
+
(
mixed
)
pos2
.
z
,
pos1
.
w
);
#else
real4
pos
=
posq
[
atom
];
#endif
velocity
.
x
+=
scale
*
force
[
atom
]
*
velocity
.
w
;
velocity
.
y
+=
scale
*
force
[
atom
+
paddedNumAtoms
]
*
velocity
.
w
;
velocity
.
z
+=
scale
*
force
[
atom
+
paddedNumAtoms
*
2
]
*
velocity
.
w
;
pos
.
x
=
velocity
.
x
*
dtPos
;
pos
.
y
=
velocity
.
y
*
dtPos
;
pos
.
z
=
velocity
.
z
*
dtPos
;
posDelta
[
atom
]
=
pos
;
velocity
.
x
+=
fscale
*
force
[
atom
]
*
velocity
.
w
;
velocity
.
y
+=
fscale
*
force
[
atom
+
paddedNumAtoms
]
*
velocity
.
w
;
velocity
.
z
+=
fscale
*
force
[
atom
+
paddedNumAtoms
*
2
]
*
velocity
.
w
;
velm
[
atom
]
=
velocity
;
}
index
+=
GLOBAL_SIZE
;
...
...
@@ -58,12 +38,12 @@ KERNEL void integrateVelocityVerletPart1(int numAtoms, int numPairs, int paddedN
relVel
.
z
=
v2
.
z
-
v1
.
z
;
mixed3
comFrc
;
mixed
F1x
=
scale
*
force
[
atom1
];
mixed
F1y
=
scale
*
force
[
atom1
+
paddedNumAtoms
];
mixed
F1z
=
scale
*
force
[
atom1
+
paddedNumAtoms
*
2
];
mixed
F2x
=
scale
*
force
[
atom2
];
mixed
F2y
=
scale
*
force
[
atom2
+
paddedNumAtoms
];
mixed
F2z
=
scale
*
force
[
atom2
+
paddedNumAtoms
*
2
];
mixed
F1x
=
f
scale
*
force
[
atom1
];
mixed
F1y
=
f
scale
*
force
[
atom1
+
paddedNumAtoms
];
mixed
F1z
=
f
scale
*
force
[
atom1
+
paddedNumAtoms
*
2
];
mixed
F2x
=
f
scale
*
force
[
atom2
];
mixed
F2y
=
f
scale
*
force
[
atom2
+
paddedNumAtoms
];
mixed
F2z
=
f
scale
*
force
[
atom2
+
paddedNumAtoms
*
2
];
comFrc
.
x
=
F1x
+
F2x
;
comFrc
.
y
=
F1y
+
F2y
;
comFrc
.
z
=
F1z
+
F2z
;
...
...
@@ -77,35 +57,16 @@ KERNEL void integrateVelocityVerletPart1(int numAtoms, int numPairs, int paddedN
relVel
.
x
+=
relFrc
.
x
*
invRedMass
;
relVel
.
y
+=
relFrc
.
y
*
invRedMass
;
relVel
.
z
+=
relFrc
.
z
*
invRedMass
;
#ifdef USE_MIXED_PRECISION
real4
posv1
=
posq
[
atom1
];
real4
posv2
=
posq
[
atom2
];
real4
posc1
=
posqCorrection
[
atom1
];
real4
posc2
=
posqCorrection
[
atom2
];
mixed4
pos1
=
make_mixed4
(
posv1
.
x
+
(
mixed
)
posc1
.
x
,
posv1
.
y
+
(
mixed
)
posc1
.
y
,
posv1
.
z
+
(
mixed
)
posc1
.
z
,
posv1
.
w
);
mixed4
pos2
=
make_mixed4
(
posv2
.
x
+
(
mixed
)
posc2
.
x
,
posv2
.
y
+
(
mixed
)
posc2
.
y
,
posv2
.
z
+
(
mixed
)
posc2
.
z
,
posv2
.
w
);
#else
real4
pos1
=
posq
[
atom1
];
real4
pos2
=
posq
[
atom2
];
#endif
if
(
v1
.
w
!=
0.0
f
)
{
v1
.
x
=
comVel
.
x
-
relVel
.
x
*
mass2fract
;
v1
.
y
=
comVel
.
y
-
relVel
.
y
*
mass2fract
;
v1
.
z
=
comVel
.
z
-
relVel
.
z
*
mass2fract
;
pos1
.
x
=
v1
.
x
*
dtPos
;
pos1
.
y
=
v1
.
y
*
dtPos
;
pos1
.
z
=
v1
.
z
*
dtPos
;
posDelta
[
atom1
]
=
pos1
;
velm
[
atom1
]
=
v1
;
}
if
(
v2
.
w
!=
0.0
f
)
{
v2
.
x
=
comVel
.
x
+
relVel
.
x
*
mass1fract
;
v2
.
y
=
comVel
.
y
+
relVel
.
y
*
mass1fract
;
v2
.
z
=
comVel
.
z
+
relVel
.
z
*
mass1fract
;
pos2
.
x
=
v2
.
x
*
dtPos
;
pos2
.
y
=
v2
.
y
*
dtPos
;
pos2
.
z
=
v2
.
z
*
dtPos
;
posDelta
[
atom2
]
=
pos2
;
velm
[
atom2
]
=
v2
;
}
index
+=
GLOBAL_SIZE
;
...
...
@@ -113,22 +74,60 @@ KERNEL void integrateVelocityVerletPart1(int numAtoms, int numPairs, int paddedN
}
/**
* Perform the second step of Velocity Verlet integration.
* Perform the second part of integration: position half step
*/
KERNEL
void
integrateNoseHooverMiddlePart2
(
int
numAtoms
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
mixed4
*
RESTRICT
posDelta
,
GLOBAL
mixed4
*
RESTRICT
oldDelta
,
GLOBAL
const
mixed2
*
RESTRICT
dt
)
{
mixed
halfdt
=
0.5
f
*
dt
[
0
].
y
;
int
index
=
GLOBAL_ID
;
while
(
index
<
numAtoms
)
{
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
mixed4
delta
=
make_mixed4
(
halfdt
*
velocity
.
x
,
halfdt
*
velocity
.
y
,
halfdt
*
velocity
.
z
,
0
);
posDelta
[
index
]
=
delta
;
oldDelta
[
index
]
=
delta
;
}
index
+=
GLOBAL_SIZE
;
}
}
/**
* Perform the third part of integration: another position half step
*/
KERNEL
void
integrateNoseHooverMiddlePart3
(
int
numAtoms
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
mixed4
*
RESTRICT
posDelta
,
GLOBAL
mixed4
*
RESTRICT
oldDelta
,
GLOBAL
const
mixed2
*
RESTRICT
dt
)
{
mixed
halfdt
=
0.5
f
*
dt
[
0
].
y
;
int
index
=
GLOBAL_ID
;
while
(
index
<
numAtoms
)
{
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
mixed4
delta
=
make_mixed4
(
halfdt
*
velocity
.
x
,
halfdt
*
velocity
.
y
,
halfdt
*
velocity
.
z
,
0
);
posDelta
[
index
]
+=
delta
;
oldDelta
[
index
]
+=
delta
;
}
index
+=
GLOBAL_SIZE
;
}
}
KERNEL
void
integrateVelocityVerletPart2
(
int
numAtoms
,
GLOBAL
mixed2
*
RESTRICT
dt
,
GLOBAL
real4
*
RESTRICT
posq
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
mixed4
*
RESTRICT
posDelta
/**
* Perform the fourth part of integration: apply constraint forces to velocities, then record
* the constrained positions.
*/
KERNEL
void
integrateNoseHooverMiddlePart4
(
int
numAtoms
,
GLOBAL
real4
*
RESTRICT
posq
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
mixed4
*
RESTRICT
posDelta
,
GLOBAL
mixed4
*
RESTRICT
oldDelta
,
GLOBAL
const
mixed2
*
RESTRICT
dt
#ifdef USE_MIXED_PRECISION
,
GLOBAL
real4
*
RESTRICT
posqCorrection
,
GLOBAL
real4
*
RESTRICT
posqCorrection
#endif
){
mixed2
stepSize
=
dt
[
0
];
int
index
=
GLOBAL_ID
;
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
while
(
index
<
numAtoms
)
{
)
{
mixed
invDt
=
1
/
dt
[
0
].
y
;
for
(
int
index
=
GLOBAL_ID
;
index
<
numAtoms
;
index
+=
GLOBAL_SIZE
)
{
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
mixed4
delta
=
posDelta
[
index
];
velocity
.
x
+=
(
delta
.
x
-
oldDelta
[
index
].
x
)
*
invDt
;
velocity
.
y
+=
(
delta
.
y
-
oldDelta
[
index
].
y
)
*
invDt
;
velocity
.
z
+=
(
delta
.
z
-
oldDelta
[
index
].
z
)
*
invDt
;
velm
[
index
]
=
velocity
;
#ifdef USE_MIXED_PRECISION
real4
pos1
=
posq
[
index
];
real4
pos2
=
posqCorrection
[
index
];
...
...
@@ -136,7 +135,6 @@ KERNEL void integrateVelocityVerletPart2(int numAtoms, GLOBAL mixed2* RESTRICT d
#else
real4
pos
=
posq
[
index
];
#endif
mixed4
delta
=
posDelta
[
index
];
pos
.
x
+=
delta
.
x
;
pos
.
y
+=
delta
.
y
;
pos
.
z
+=
delta
.
z
;
...
...
@@ -147,120 +145,10 @@ KERNEL void integrateVelocityVerletPart2(int numAtoms, GLOBAL mixed2* RESTRICT d
posq
[
index
]
=
pos
;
#endif
}
index
+=
GLOBAL_SIZE
;
}
}
/**
* Perform the third step of Velocity Verlet integration.
*/
KERNEL
void
integrateVelocityVerletPart3
(
int
numAtoms
,
int
numPairs
,
int
paddedNumAtoms
,
GLOBAL
mixed2
*
RESTRICT
dt
,
GLOBAL
real4
*
RESTRICT
posq
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
mm_long
*
RESTRICT
force
,
GLOBAL
const
mixed4
*
RESTRICT
posDelta
,
GLOBAL
const
int
*
RESTRICT
atomList
,
GLOBAL
const
int2
*
RESTRICT
pairList
#ifdef USE_MIXED_PRECISION
,
GLOBAL
const
real4
*
RESTRICT
posqCorrection
#endif
){
mixed2
stepSize
=
dt
[
0
];
#ifdef SUPPORTS_DOUBLE_PRECISION
double
oneOverDt
=
1.0
/
stepSize
.
y
;
#else
float
oneOverDt
=
1.0
f
/
stepSize
.
y
;
float
correction
=
(
1.0
f
-
oneOverDt
*
stepSize
.
y
)
/
stepSize
.
y
;
#endif
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
scale
=
0.5
f
*
dtVel
/
(
mixed
)
0x100000000
;
int
index
=
GLOBAL_ID
;
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
while
(
index
<
numAtoms
)
{
int
atom
=
atomList
[
index
];
mixed4
velocity
=
velm
[
atom
];
if
(
velocity
.
w
!=
0.0
)
{
mixed4
deltaXconstrained
=
posDelta
[
atom
];
velocity
.
x
+=
scale
*
force
[
atom
]
*
velocity
.
w
+
(
deltaXconstrained
.
x
-
velocity
.
x
*
stepSize
.
y
)
*
oneOverDt
;
velocity
.
y
+=
scale
*
force
[
atom
+
paddedNumAtoms
]
*
velocity
.
w
+
(
deltaXconstrained
.
y
-
velocity
.
y
*
stepSize
.
y
)
*
oneOverDt
;
velocity
.
z
+=
scale
*
force
[
atom
+
paddedNumAtoms
*
2
]
*
velocity
.
w
+
(
deltaXconstrained
.
z
-
velocity
.
z
*
stepSize
.
y
)
*
oneOverDt
;
#ifndef SUPPORTS_DOUBLE_PRECISION
velocity
.
x
+=
(
deltaXconstrained
.
x
-
velocity
.
x
*
stepSize
.
y
)
*
correction
;
velocity
.
y
+=
(
deltaXconstrained
.
y
-
velocity
.
y
*
stepSize
.
y
)
*
correction
;
velocity
.
z
+=
(
deltaXconstrained
.
z
-
velocity
.
z
*
stepSize
.
y
)
*
correction
;
#endif
velm
[
atom
]
=
velocity
;
}
index
+=
GLOBAL_SIZE
;
}
index
=
GLOBAL_ID
;
while
(
index
<
numPairs
)
{
int
atom1
=
pairList
[
index
].
x
;
int
atom2
=
pairList
[
index
].
y
;
mixed4
v1
=
velm
[
atom1
];
mixed4
v2
=
velm
[
atom2
];
mixed
m1
=
v1
.
w
==
0.0
f
?
0.0
f
:
1.0
f
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0.0
f
?
0.0
f
:
1.0
f
/
v2
.
w
;
mixed
mass1fract
=
m1
/
(
m1
+
m2
);
mixed
mass2fract
=
m2
/
(
m1
+
m2
);
mixed
invRedMass
=
(
m1
*
m2
!=
0.0
f
)
?
(
m1
+
m2
)
/
(
m1
*
m2
)
:
0.0
f
;
mixed
invTotMass
=
(
m1
+
m2
!=
0.0
f
)
?
1.0
f
/
(
m1
+
m2
)
:
0.0
f
;
mixed3
comVel
;
comVel
.
x
=
v1
.
x
*
mass1fract
+
v2
.
x
*
mass2fract
;
comVel
.
y
=
v1
.
y
*
mass1fract
+
v2
.
y
*
mass2fract
;
comVel
.
z
=
v1
.
z
*
mass1fract
+
v2
.
z
*
mass2fract
;
mixed3
relVel
;
relVel
.
x
=
v2
.
x
-
v1
.
x
;
relVel
.
y
=
v2
.
y
-
v1
.
y
;
relVel
.
z
=
v2
.
z
-
v1
.
z
;
mixed3
comFrc
;
mixed
F1x
=
scale
*
force
[
atom1
];
mixed
F1y
=
scale
*
force
[
atom1
+
paddedNumAtoms
];
mixed
F1z
=
scale
*
force
[
atom1
+
paddedNumAtoms
*
2
];
mixed
F2x
=
scale
*
force
[
atom2
];
mixed
F2y
=
scale
*
force
[
atom2
+
paddedNumAtoms
];
mixed
F2z
=
scale
*
force
[
atom2
+
paddedNumAtoms
*
2
];
comFrc
.
x
=
F1x
+
F2x
;
comFrc
.
y
=
F1y
+
F2y
;
comFrc
.
z
=
F1z
+
F2z
;
mixed3
relFrc
;
relFrc
.
x
=
mass1fract
*
F2x
-
mass2fract
*
F1x
;
relFrc
.
y
=
mass1fract
*
F2y
-
mass2fract
*
F1y
;
relFrc
.
z
=
mass1fract
*
F2z
-
mass2fract
*
F1z
;
comVel
.
x
+=
comFrc
.
x
*
invTotMass
;
comVel
.
y
+=
comFrc
.
y
*
invTotMass
;
comVel
.
z
+=
comFrc
.
z
*
invTotMass
;
relVel
.
x
+=
relFrc
.
x
*
invRedMass
;
relVel
.
y
+=
relFrc
.
y
*
invRedMass
;
relVel
.
z
+=
relFrc
.
z
*
invRedMass
;
if
(
v1
.
w
!=
0.0
f
)
{
mixed4
deltaXconstrained
=
posDelta
[
atom1
];
v1
.
x
=
comVel
.
x
-
relVel
.
x
*
mass2fract
+
(
deltaXconstrained
.
x
-
v1
.
x
*
stepSize
.
y
)
*
oneOverDt
;
v1
.
y
=
comVel
.
y
-
relVel
.
y
*
mass2fract
+
(
deltaXconstrained
.
y
-
v1
.
y
*
stepSize
.
y
)
*
oneOverDt
;
v1
.
z
=
comVel
.
z
-
relVel
.
z
*
mass2fract
+
(
deltaXconstrained
.
z
-
v1
.
z
*
stepSize
.
y
)
*
oneOverDt
;
#ifndef SUPPORTS_DOUBLE_PRECISION
v1
.
x
+=
(
deltaXconstrained
.
x
-
v1
.
x
*
stepSize
.
y
)
*
correction
;
v1
.
y
+=
(
deltaXconstrained
.
y
-
v1
.
y
*
stepSize
.
y
)
*
correction
;
v1
.
z
+=
(
deltaXconstrained
.
z
-
v1
.
z
*
stepSize
.
y
)
*
correction
;
#endif
velm
[
atom1
]
=
v1
;
}
if
(
v2
.
w
!=
0.0
f
)
{
mixed4
deltaXconstrained
=
posDelta
[
atom2
];
v2
.
x
=
comVel
.
x
+
relVel
.
x
*
mass1fract
+
(
deltaXconstrained
.
x
-
v2
.
x
*
stepSize
.
y
)
*
oneOverDt
;
v2
.
y
=
comVel
.
y
+
relVel
.
y
*
mass1fract
+
(
deltaXconstrained
.
y
-
v2
.
y
*
stepSize
.
y
)
*
oneOverDt
;
v2
.
z
=
comVel
.
z
+
relVel
.
z
*
mass1fract
+
(
deltaXconstrained
.
z
-
v2
.
z
*
stepSize
.
y
)
*
oneOverDt
;
#ifndef SUPPORTS_DOUBLE_PRECISION
v2
.
x
+=
(
deltaXconstrained
.
x
-
v2
.
x
*
stepSize
.
y
)
*
correction
;
v2
.
y
+=
(
deltaXconstrained
.
y
-
v2
.
y
*
stepSize
.
y
)
*
correction
;
v2
.
z
+=
(
deltaXconstrained
.
z
-
v2
.
z
*
stepSize
.
y
)
*
correction
;
#endif
velm
[
atom2
]
=
v2
;
}
index
+=
GLOBAL_SIZE
;
}
}
KERNEL
void
integrateVelocityVerletHardWall
(
int
numPairs
,
GLOBAL
const
float
*
RESTRICT
maxPairDistance
,
KERNEL
void
integrateNoseHooverHardWall
(
int
numPairs
,
GLOBAL
const
float
*
RESTRICT
maxPairDistance
,
GLOBAL
mixed2
*
RESTRICT
dt
,
GLOBAL
real4
*
RESTRICT
posq
,
GLOBAL
mixed4
*
RESTRICT
velm
,
GLOBAL
const
int2
*
RESTRICT
pairList
,
GLOBAL
const
float
*
RESTRICT
pairTemperature
...
...
@@ -370,4 +258,3 @@ KERNEL void integrateVelocityVerletHardWall(int numPairs, GLOBAL const float* RE
}
}
}
platforms/cpu/include/CpuNeighborList.h
View file @
0b5d58d7
...
...
@@ -53,7 +53,15 @@ public:
int
getBlockSize
()
const
;
const
std
::
vector
<
int
>&
getSortedAtoms
()
const
;
const
std
::
vector
<
int
>&
getBlockNeighbors
(
int
blockIndex
)
const
;
const
std
::
vector
<
char
>&
getBlockExclusions
(
int
blockIndex
)
const
;
/**
* Bitset for a single block, marking which indexes should be excluded. This data type needs to be big
* enough to store all the bits for any possible block size.
*/
using
BlockExclusionMask
=
int16_t
;
const
std
::
vector
<
BlockExclusionMask
>&
getBlockExclusions
(
int
blockIndex
)
const
;
/**
* This routine contains the code executed by each thread.
*/
...
...
@@ -64,7 +72,7 @@ private:
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
float
>
sortedPositions
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
std
::
vector
<
std
::
vector
<
BlockExclusionMask
>
>
blockExclusions
;
// The following variables are used to make information accessible to the individual threads.
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
atomBins
;
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
0b5d58d7
platforms/cpu/include/CpuNonbondedForceFvec.h
0 → 100644
View file @
0b5d58d7
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Daniel Towner
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_FVEC_H__
#define OPENMM_CPU_NONBONDED_FORCE_FVEC_H__
#include "CpuNonbondedForce.h"
#include "openmm/internal/vectorize.h"
#include "SimTKOpenMMUtilities.h"
#include <algorithm>
#include <vector>
namespace
OpenMM
{
enum
BlockType
{
EWALD
,
NON_EWALD
};
// :TODO: Better name for non-ewald.
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
/**
* Generic SIMD implementation of CpuNonbondedForce. The templating allows the same
* basic code to be reused for any sort of SIMD type, including SSE, AVX, AVX2, or
* AVX-512.
*/
template
<
typename
FVEC
>
class
CpuNonbondedForceFvec
:
public
CpuNonbondedForce
{
public:
/**
* Store how many elements are contained in each block of atoms.
*/
static
constexpr
int
blockSize
=
sizeof
(
FVEC
)
/
sizeof
(
float
);
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block. These are part of the virtual function interface
and consequently have names which explicitly call Ewald variant or not.
They internally call into the generic handler function below.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
---------------------------------------------------------------------------------------
@{
*/
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/** @} */
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block. Identical to function prototypes above but
with an extra template parameter to choose whether to use Ewald processing or not.
--------------------------------------------------------------------------------------- */
template
<
BlockType
BLOCK_TYPE
>
void
calculateBlockIxnHandler
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Templatized implementation of calculateBlockIxn. It can handle both Ewald and non-ewald interactions
* through a template parameter since the code is so similar for the two cases. Note also that the
* floating-point SIMD type is also templated to allow any suitable type to be used.
*/
template
<
int
PERIODIC_TYPE
,
BlockType
BLOCK_TYPE
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
template
<
int
PERIODIC_TYPE
>
void
getDeltaR
(
const
fvec4
&
posI
,
const
FVEC
&
x
,
const
FVEC
&
y
,
const
FVEC
&
z
,
FVEC
&
dx
,
FVEC
&
dy
,
FVEC
&
dz
,
FVEC
&
r2
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute an approximation of a function using a table lookup.
**/
FVEC
approximateFunctionFromTable
(
const
std
::
vector
<
float
>&
table
,
FVEC
x
,
FVEC
inverse
)
const
;
};
/**
* Use a table lookup to approximate a function specific function.
*/
template
<
typename
FVEC
>
FVEC
CpuNonbondedForceFvec
<
FVEC
>::
approximateFunctionFromTable
(
const
std
::
vector
<
float
>&
table
,
const
FVEC
x
,
const
FVEC
inverse
)
const
{
// Compute the set of 8 index positions from which to gather the table data.
const
auto
x1
=
x
*
inverse
;
const
auto
index
=
min
(
floor
(
x1
),
float
(
NUM_TABLE_POINTS
));
FVEC
s1
,
s2
;
gatherVecPair
(
table
.
data
(),
index
,
s1
,
s2
);
const
auto
coeff2
=
x1
-
FVEC
(
index
);
const
auto
coeff1
=
1.0
f
-
coeff2
;
return
coeff1
*
s1
+
coeff2
*
s2
;
}
template
<
typename
FVEC
>
void
CpuNonbondedForceFvec
<
FVEC
>::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
calculateBlockIxnHandler
<
BlockType
::
NON_EWALD
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
}
template
<
typename
FVEC
>
void
CpuNonbondedForceFvec
<
FVEC
>::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
calculateBlockIxnHandler
<
BlockType
::
EWALD
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
}
template
<
typename
FVEC
>
template
<
BlockType
BLOCK_TYPE
>
void
CpuNonbondedForceFvec
<
FVEC
>::
calculateBlockIxnHandler
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
using
std
::
min
;
using
std
::
max
;
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
blockSize
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockIxnImpl
<
NoPeriodic
,
BLOCK_TYPE
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockIxnImpl
<
PeriodicPerAtom
,
BLOCK_TYPE
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockIxnImpl
<
PeriodicPerInteraction
,
BLOCK_TYPE
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockIxnImpl
<
PeriodicTriclinic
,
BLOCK_TYPE
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
template
<
typename
FVEC
>
template
<
int
PERIODIC_TYPE
,
BlockType
BLOCK_TYPE
>
void
CpuNonbondedForceFvec
<
FVEC
>::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
fvec4
blockAtomPosq
[
blockSize
];
FVEC
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
FVEC
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
blockSize
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
// :TODO: Apply one to blockAtom?
}
transpose
(
blockAtomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
// Not the most efficient way to do this, but it works across all types we care about, and this isn't where
// the cycles are spent anyway.
FVEC
blockAtomSigma
=
{};
FVEC
blockAtomEpsilon
=
{};
for
(
int
i
=
0
;
i
<
blockSize
;
++
i
)
{
((
float
*
)
&
blockAtomSigma
)[
i
]
=
atomParameters
[
blockAtom
[
i
]].
first
;
((
float
*
)
&
blockAtomEpsilon
)[
i
]
=
atomParameters
[
blockAtom
[
i
]].
second
;
}
// Ewald needs C6 data gathered from a table. Unused variable for non-ewald.
const
FVEC
C6s
=
(
BLOCK_TYPE
==
BlockType
::
EWALD
)
?
FVEC
(
C6params
,
blockAtom
)
:
FVEC
();
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
FVEC
cutoffDistanceSquared
=
cutoffDistance
*
cutoffDistance
;
// Loop over neighbors for this block.
const
auto
&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
FVEC
dx
,
dy
,
dz
,
r2
;
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
boxSize
,
invBoxSize
);
const
auto
exclNotMask
=
FVEC
::
expandBitsToMask
(
~
exclusions
[
i
]);
const
auto
include
=
blendZero
(
r2
<
cutoffDistance
*
cutoffDistance
,
exclNotMask
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
const
auto
inverseR
=
rsqrt
(
r2
);
const
auto
r
=
r2
*
inverseR
;
FVEC
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
const
auto
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
const
auto
sig2
=
(
inverseR
*
sig
)
*
(
inverseR
*
sig
);
const
auto
sig6
=
sig2
*
sig2
*
sig2
;
const
auto
eps
=
blockAtomEpsilon
*
atomEpsilon
;
const
auto
epsSig6
=
eps
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
const
auto
t
=
blendZero
((
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
const
auto
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
const
auto
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
if
(
BLOCK_TYPE
==
BlockType
::
EWALD
&&
ljpme
)
{
const
auto
C6ij
=
C6s
*
C6params
[
atom
];
const
auto
inverseR2
=
inverseR
*
inverseR
;
const
auto
mysig2
=
sig
*
sig
;
const
auto
mysig6
=
mysig2
*
mysig2
*
mysig2
;
const
auto
emult
=
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
approximateFunctionFromTable
(
exptermsTable
,
r
,
FVEC
(
exptermsDXInv
));
const
auto
potentialShift
=
eps
*
(
1.0
f
-
mysig6
*
inverseRcut6
)
*
mysig6
*
inverseRcut6
-
C6ij
*
inverseRcut6Expterm
;
dEdR
+=
6.0
f
*
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
approximateFunctionFromTable
(
dExptermsTable
,
r
,
FVEC
(
exptermsDXInv
));
energy
+=
emult
+
potentialShift
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
const
auto
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
BLOCK_TYPE
==
BlockType
::
EWALD
)
{
dEdR
+=
chargeProd
*
inverseR
*
approximateFunctionFromTable
(
ewaldScaleTable
,
r
,
FVEC
(
ewaldDXInv
));
}
else
{
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
}
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
BLOCK_TYPE
==
BlockType
::
EWALD
)
energy
+=
chargeProd
*
inverseR
*
approximateFunctionFromTable
(
erfcTable
,
alphaEwald
*
r
,
FVEC
(
erfcDXInv
));
else
// Non-ewald.
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
}
energy
=
blendZero
(
energy
,
include
);
*
totalEnergy
+=
reduceAdd
(
energy
);
}
// Accumulate forces.
dEdR
=
blendZero
(
dEdR
,
include
);
const
auto
fx
=
dx
*
dEdR
;
const
auto
fy
=
dy
*
dEdR
;
const
auto
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
const
atomForce
=
forces
+
4
*
atom
;
const
fvec4
newAtomForce
=
fvec4
(
atomForce
)
-
reduceToVec3
(
fx
,
fy
,
fz
);
newAtomForce
.
store
(
atomForce
);
}
// Record the forces on the block atoms.
fvec4
f
[
blockSize
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
);
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
template
<
typename
FVEC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceFvec
<
FVEC
>::
getDeltaR
(
const
fvec4
&
posI
,
const
FVEC
&
x
,
const
FVEC
&
y
,
const
FVEC
&
z
,
FVEC
&
dx
,
FVEC
&
dy
,
FVEC
&
dz
,
FVEC
&
r2
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
PERIODIC_TYPE
==
PeriodicTriclinic
)
{
const
auto
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
const
auto
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
const
auto
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
else
if
(
PERIODIC_TYPE
==
PeriodicPerInteraction
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
}
// namespace OpenMM
#endif // OPENMM_CPU_NONBONDED_FORCE_FVEC_H__
platforms/cpu/include/CpuNonbondedForceVec4.h
deleted
100644 → 0
View file @
9026dbe7
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#define OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#include "CpuNonbondedForce.h"
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForceVec4
:
public
CpuNonbondedForce
{
public:
/**---------------------------------------------------------------------------------------
Constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4
();
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Templatized implementation of calculateBlockIxn.
*/
template
<
int
PERIODIC_TYPE
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Templatized implementation of calculateBlockEwaldIxn.
*/
template
<
int
PERIODIC_TYPE
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
template
<
int
PERIODIC_TYPE
>
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
fvec4
erfcApprox
(
const
fvec4
&
x
);
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec4
ewaldScaleFunction
(
const
fvec4
&
x
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec4
exptermsApprox
(
const
fvec4
&
R
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec4
dExptermsApprox
(
const
fvec4
&
R
);
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
platforms/cpu/include/CpuNonbondedForceVec8.h
deleted
100644 → 0
View file @
9026dbe7
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
#define OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
#include "CpuNonbondedForce.h"
#ifdef __AVX__
#include "openmm/internal/vectorize8.h"
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForceVec8
:
public
CpuNonbondedForce
{
public:
CpuNonbondedForceVec8
();
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Templatized implementation of calculateBlockIxn.
*/
template
<
int
PERIODIC_TYPE
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Templatized implementation of calculateBlockEwaldIxn.
*/
template
<
int
PERIODIC_TYPE
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
template
<
int
PERIODIC_TYPE
>
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
fvec8
erfcApprox
(
const
fvec8
&
x
);
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec8
ewaldScaleFunction
(
const
fvec8
&
x
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec8
exptermsApprox
(
const
fvec8
&
R
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec8
dExptermsApprox
(
const
fvec8
&
R
);
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // __AVX__
#endif // OPENMM_CPU_NONBONDED_FORCE_VEC8_H__
platforms/cpu/src/CpuCustomGBForce.cpp
View file @
0b5d58d7
...
...
@@ -363,7 +363,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
int
first
=
neighbors
[
i
];
for
(
int
k
=
0
;
k
<
blockSize
;
k
++
)
{
...
...
@@ -458,7 +458,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
int
first
=
neighbors
[
i
];
for
(
int
k
=
0
;
k
<
blockSize
;
k
++
)
{
...
...
@@ -545,7 +545,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
int
first
=
neighbors
[
i
];
for
(
int
k
=
0
;
k
<
blockSize
;
k
++
)
{
...
...
platforms/cpu/src/CpuCustomManyParticleForce.cpp
View file @
0b5d58d7
...
...
@@ -110,7 +110,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, vector<
neighborList
->
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
periodicBoxVectors
,
usePeriodic
,
cutoffDistance
,
threads
);
for
(
int
blockIndex
=
0
;
blockIndex
<
neighborList
->
getNumBlocks
();
blockIndex
++
)
{
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
int
numNeighbors
=
neighbors
.
size
();
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
int
p1
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
...
...
platforms/cpu/src/CpuCustomNonbondedForce.cpp
View file @
0b5d58d7
...
...
@@ -195,7 +195,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
int
first
=
neighbors
[
i
];
for
(
int
j
=
0
;
j
<
(
int
)
paramNames
.
size
();
j
++
)
...
...
platforms/cpu/src/CpuGayBerneForce.cpp
View file @
0b5d58d7
...
...
@@ -183,7 +183,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
int
first
=
neighbors
[
i
];
if
(
particles
[
first
].
sqrtEpsilon
==
0.0
f
)
...
...
platforms/cpu/src/CpuKernels.cpp
View file @
0b5d58d7
...
...
@@ -472,16 +472,11 @@ private:
int
numParticles
;
};
bool
isVec8Supported
();
CpuNonbondedForce
*
createCpuNonbondedForceVec4
();
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
CpuNonbondedForce
*
createCpuNonbondedForceVec
();
CpuCalcNonbondedForceKernel
::
CpuCalcNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
hasInitializedPme
(
false
),
hasInitializedDispersionPme
(
false
),
nonbonded
(
NULL
)
{
if
(
isVec8Supported
())
nonbonded
=
createCpuNonbondedForceVec8
();
else
nonbonded
=
createCpuNonbondedForceVec4
();
nonbonded
=
createCpuNonbondedForceVec
();
}
CpuCalcNonbondedForceKernel
::~
CpuCalcNonbondedForceKernel
()
{
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
0b5d58d7
...
...
@@ -164,7 +164,7 @@ public:
return
VoxelIndex
(
y
,
z
);
}
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
const
fvec4
&
blockCenter
,
const
fvec4
&
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>&
blockAtoms
,
const
vector
<
float
>&
blockAtomX
,
const
vector
<
float
>&
blockAtomY
,
const
vector
<
float
>&
blockAtomZ
,
const
vector
<
float
>&
sortedPositions
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
const
fvec4
&
blockCenter
,
const
fvec4
&
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
CpuNeighborList
::
BlockExclusionMask
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>&
blockAtoms
,
const
vector
<
float
>&
blockAtomX
,
const
vector
<
float
>&
blockAtomY
,
const
vector
<
float
>&
blockAtomZ
,
const
vector
<
float
>&
sortedPositions
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
neighbors
.
resize
(
0
);
exclusions
.
resize
(
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
...
...
@@ -484,10 +484,10 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
int
numPadding
=
numBlocks
*
blockSize
-
numAtoms
;
if
(
numPadding
>
0
)
{
c
har
mask
=
((
0xFFFF
-
(
1
<<
blockSize
)
+
1
)
>>
numPadding
);
c
onst
BlockExclusionMask
mask
=
(
~
0
)
<<
(
blockSize
-
numPadding
);
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
sortedAtoms
.
push_back
(
0
);
vector
<
char
>
&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
auto
&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
for
(
int
i
=
0
;
i
<
(
int
)
exc
.
size
();
i
++
)
exc
[
i
]
|=
mask
;
}
...
...
@@ -509,7 +509,7 @@ const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const
return
blockNeighbors
[
blockIndex
];
}
const
std
::
vector
<
char
>&
CpuNeighborList
::
getBlockExclusions
(
int
blockIndex
)
const
{
const
std
::
vector
<
CpuNeighborList
::
BlockExclusionMask
>&
CpuNeighborList
::
getBlockExclusions
(
int
blockIndex
)
const
{
return
blockExclusions
[
blockIndex
];
}
...
...
@@ -573,12 +573,12 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
// Record the exclusions for this block.
map
<
int
,
char
>
atomFlags
;
map
<
int
,
BlockExclusionMask
>
atomFlags
;
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
const
set
<
int
>&
atomExclusions
=
(
*
exclusions
)[
sortedAtoms
[
firstIndex
+
j
]];
c
har
mask
=
1
<<
j
;
c
onst
BlockExclusionMask
mask
=
1
<<
j
;
for
(
int
exclusion
:
atomExclusions
)
{
map
<
int
,
char
>::
itera
to
r
thisAtomFlags
=
atomFlags
.
find
(
exclusion
);
const
au
to
thisAtomFlags
=
atomFlags
.
find
(
exclusion
);
if
(
thisAtomFlags
==
atomFlags
.
end
())
atomFlags
[
exclusion
]
=
mask
;
else
...
...
@@ -588,7 +588,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int
numNeighbors
=
blockNeighbors
[
i
].
size
();
for
(
int
k
=
0
;
k
<
numNeighbors
;
k
++
)
{
int
atomIndex
=
blockNeighbors
[
i
][
k
];
map
<
int
,
char
>::
iter
ato
r
thisAtomFlags
=
atomFlags
.
find
(
atomIndex
);
a
u
to
thisAtomFlags
=
atomFlags
.
find
(
atomIndex
);
if
(
thisAtomFlags
!=
atomFlags
.
end
())
blockExclusions
[
i
][
k
]
|=
thisAtomFlags
->
second
;
}
...
...
platforms/cpu/src/CpuNonbondedForceFvec.cpp
0 → 100644
View file @
0b5d58d7
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Daniel Towner
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuNonbondedForceFvec.h"
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec4
();
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
bool
isVec8Supported
();
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec
()
{
if
(
isVec8Supported
())
return
createCpuNonbondedForceVec8
();
else
return
createCpuNonbondedForceVec4
();
}
int
getVecBlockSize
()
{
if
(
isVec8Supported
())
return
8
;
else
return
4
;
}
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
0b5d58d7
...
...
@@ -22,440 +22,11 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
#include <algorithm>
#include <iostream>
#include "CpuNonbondedForceFvec.h"
using
namespace
std
;
using
namespace
OpenMM
;
// Very minimal file. It exists purely to be able to compile it in SIMD-4.
/**
* Factory method to create a CpuNonbondedForceVec4.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec4
()
{
return
new
CpuNonbondedForceVec4
();
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForceVec4 constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
}
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
inverseR
=
rsqrt
(
r2
);
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
r
=
r2
*
inverseR
;
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockEwaldIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockEwaldIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockEwaldIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockEwaldIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
C6s
(
C6params
[
blockAtom
[
0
]],
C6params
[
blockAtom
[
1
]],
C6params
[
blockAtom
[
2
]],
C6params
[
blockAtom
[
3
]]);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
include
=
-
1
;
else
include
=
ivec4
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
inverseR
=
rsqrt
(
r2
);
fvec4
r
=
r2
*
inverseR
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomEpsilon
;
fvec4
epsSig6
=
eps
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
}
if
(
ljpme
)
{
fvec4
C6ij
=
C6s
*
C6params
[
atom
];
fvec4
inverseR2
=
inverseR
*
inverseR
;
fvec4
mysig2
=
sig
*
sig
;
fvec4
mysig6
=
mysig2
*
mysig2
*
mysig2
;
fvec4
emult
=
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
exptermsApprox
(
r
);
fvec4
potentialShift
=
eps
*
(
1.0
f
-
mysig6
*
inverseRcut6
)
*
mysig6
*
inverseRcut6
-
C6ij
*
inverseRcut6Expterm
;
dEdR
+=
6.0
f
*
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
dExptermsApprox
(
r
);
energy
+=
emult
+
potentialShift
;
}
}
else
{
energy
=
0.0
f
;
dEdR
=
0.0
f
;
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
fvec4
one
(
1.0
f
);
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot4
(
energy
,
one
);
}
// Accumulate forces.
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
fvec4
fx
=
dx
*
dEdR
;
fvec4
fy
=
dy
*
dEdR
;
fvec4
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
PERIODIC_TYPE
==
PeriodicTriclinic
)
{
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
else
if
(
PERIODIC_TYPE
==
PeriodicPerInteraction
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuNonbondedForceVec4
::
erfcApprox
(
const
fvec4
&
x
)
{
fvec4
x1
=
x
*
erfcDXInv
;
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
erfcTable
[
index
[
0
]]);
fvec4
t2
(
&
erfcTable
[
index
[
1
]]);
fvec4
t3
(
&
erfcTable
[
index
[
2
]]);
fvec4
t4
(
&
erfcTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
fvec4
CpuNonbondedForceVec4
::
ewaldScaleFunction
(
const
fvec4
&
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
x1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
ewaldScaleTable
[
index
[
0
]]);
fvec4
t2
(
&
ewaldScaleTable
[
index
[
1
]]);
fvec4
t3
(
&
ewaldScaleTable
[
index
[
2
]]);
fvec4
t4
(
&
ewaldScaleTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
fvec4
CpuNonbondedForceVec4
::
exptermsApprox
(
const
fvec4
&
r
)
{
fvec4
r1
=
r
*
exptermsDXInv
;
ivec4
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
r1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
exptermsTable
[
index
[
0
]]);
fvec4
t2
(
&
exptermsTable
[
index
[
1
]]);
fvec4
t3
(
&
exptermsTable
[
index
[
2
]]);
fvec4
t4
(
&
exptermsTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
fvec4
CpuNonbondedForceVec4
::
dExptermsApprox
(
const
fvec4
&
r
)
{
fvec4
r1
=
r
*
exptermsDXInv
;
ivec4
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
r1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
dExptermsTable
[
index
[
0
]]);
fvec4
t2
(
&
dExptermsTable
[
index
[
1
]]);
fvec4
t3
(
&
dExptermsTable
[
index
[
2
]]);
fvec4
t4
(
&
dExptermsTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec4
()
{
return
new
OpenMM
::
CpuNonbondedForceFvec
<
fvec4
>
();
}
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
0b5d58d7
This diff is collapsed.
Click to expand it.
platforms/cpu/src/CpuPlatform.cpp
View file @
0b5d58d7
...
...
@@ -165,11 +165,14 @@ CpuPlatform::PlatformData::~PlatformData() {
delete
neighborList
;
}
bool
isVec8Supported
();
/**
* Return how much vectorisation is supported for host platform.
*/
int
getVecBlockSize
();
void
CpuPlatform
::
PlatformData
::
requestNeighborList
(
double
cutoffDistance
,
double
padding
,
bool
useExclusions
,
const
vector
<
set
<
int
>
>&
exclusionList
)
{
if
(
neighborList
==
NULL
)
neighborList
=
new
CpuNeighborList
(
isVec8Supported
()
?
8
:
4
);
neighborList
=
new
CpuNeighborList
(
getVecBlockSize
()
);
if
(
cutoffDistance
>
cutoff
)
cutoff
=
cutoffDistance
;
if
(
cutoffDistance
+
padding
>
paddedCutoff
)
...
...
platforms/cuda/src/CudaKernelFactory.cpp
View file @
0b5d58d7
...
...
@@ -133,10 +133,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return
new
CommonIntegrateCustomStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
return
new
CommonApplyAndersenThermostatKernel
(
name
,
platform
,
cu
);
if
(
name
==
NoseHooverChainKernel
::
Name
())
return
new
CommonNoseHooverChainKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateVelocityVerletStepKernel
::
Name
())
return
new
CommonIntegrateVelocityVerletStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateNoseHooverStepKernel
::
Name
())
return
new
CommonIntegrateNoseHooverStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
return
new
CudaApplyMonteCarloBarostatKernel
(
name
,
platform
,
cu
);
if
(
name
==
RemoveCMMotionKernel
::
Name
())
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
0b5d58d7
...
...
@@ -96,7 +96,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
Integrate
VelocityVerlet
StepKernel
::
Name
(),
factory
);
registerKernelFactory
(
Integrate
NoseHoover
StepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinMiddleStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateBrownianStepKernel
::
Name
(),
factory
);
...
...
@@ -104,7 +104,6 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory
(
IntegrateVariableLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateCustomStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyAndersenThermostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
NoseHooverChainKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyMonteCarloBarostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
RemoveCMMotionKernel
::
Name
(),
factory
);
platformProperties
.
push_back
(
CudaDeviceIndex
());
...
...
platforms/cuda/tests/TestCudaNoseHooverThermostat.cpp
deleted
100644 → 0
View file @
9026dbe7
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Andreas Krämer and Andrew C. Simmonett *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestNoseHooverThermostat.h"
void
runPlatformTests
()
{
}
platforms/opencl/src/OpenCLKernelFactory.cpp
View file @
0b5d58d7
...
...
@@ -131,10 +131,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return
new
CommonIntegrateCustomStepKernel
(
name
,
platform
,
cl
);
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
return
new
CommonApplyAndersenThermostatKernel
(
name
,
platform
,
cl
);
if
(
name
==
NoseHooverChainKernel
::
Name
())
return
new
CommonNoseHooverChainKernel
(
name
,
platform
,
cl
);
if
(
name
==
IntegrateVelocityVerletStepKernel
::
Name
())
return
new
CommonIntegrateVelocityVerletStepKernel
(
name
,
platform
,
cl
);
if
(
name
==
IntegrateNoseHooverStepKernel
::
Name
())
return
new
CommonIntegrateNoseHooverStepKernel
(
name
,
platform
,
cl
);
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
return
new
OpenCLApplyMonteCarloBarostatKernel
(
name
,
platform
,
cl
);
if
(
name
==
RemoveCMMotionKernel
::
Name
())
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment