Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
6b10b909
Commit
6b10b909
authored
Oct 11, 2013
by
peastman
Browse files
Continuing to optimize CPU nonbonded routines
parent
9bc194b9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
113 additions
and
132 deletions
+113
-132
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+5
-4
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+82
-102
platforms/cpu/tests/TestCpuEwald.cpp
platforms/cpu/tests/TestCpuEwald.cpp
+26
-26
No files found.
platforms/cpu/include/CpuNonbondedForce.h
View file @
6b10b909
...
...
@@ -29,6 +29,7 @@
#include <set>
#include <utility>
#include <vector>
#include <smmintrin.h>
// ---------------------------------------------------------------------------------------
class
CpuNonbondedForce
{
...
...
@@ -47,6 +48,7 @@ class CpuNonbondedForce {
float
alphaEwald
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
__m128
boxSize
,
invBoxSize
,
half
;
// parameter indices
...
...
@@ -69,7 +71,7 @@ class CpuNonbondedForce {
void
calculateOneIxn
(
int
atom1
,
int
atom2
,
float
*
atomCoordinates
,
float
**
atomParameters
,
float
*
forces
,
float
*
totalEnergy
)
const
;
double
*
totalEnergy
)
const
;
public:
...
...
@@ -122,7 +124,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
setPeriodic
(
float
*
b
oxSize
);
void
setPeriodic
(
float
*
periodicB
oxSize
);
/**---------------------------------------------------------------------------------------
...
...
@@ -194,8 +196,7 @@ private:
float
*
fixedParameters
,
float
*
forces
,
float
*
totalEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
const
;
void
getDeltaR
(
const
float
*
atomCoordinatesI
,
const
float
*
atomCoordinatesJ
,
const
float
*
boxSize
,
float
*
deltaR
,
bool
periodic
)
const
;
void
getDeltaR
(
const
__m128
&
posI
,
const
__m128
&
posJ
,
__m128
&
deltaR
,
float
&
r2
,
bool
periodic
)
const
;
};
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
6b10b909
...
...
@@ -113,16 +113,19 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setPeriodic
(
float
*
b
oxSize
)
{
void
CpuNonbondedForce
::
setPeriodic
(
float
*
periodicB
oxSize
)
{
assert
(
cutoff
);
assert
(
b
oxSize
[
0
]
>=
2
*
cutoffDistance
);
assert
(
b
oxSize
[
1
]
>=
2
*
cutoffDistance
);
assert
(
b
oxSize
[
2
]
>=
2
*
cutoffDistance
);
assert
(
periodicB
oxSize
[
0
]
>=
2
*
cutoffDistance
);
assert
(
periodicB
oxSize
[
1
]
>=
2
*
cutoffDistance
);
assert
(
periodicB
oxSize
[
2
]
>=
2
*
cutoffDistance
);
periodic
=
true
;
periodicBoxSize
[
0
]
=
boxSize
[
0
];
periodicBoxSize
[
1
]
=
boxSize
[
1
];
periodicBoxSize
[
2
]
=
boxSize
[
2
];
this
->
periodicBoxSize
[
0
]
=
periodicBoxSize
[
0
];
this
->
periodicBoxSize
[
1
]
=
periodicBoxSize
[
1
];
this
->
periodicBoxSize
[
2
]
=
periodicBoxSize
[
2
];
boxSize
=
_mm_set_ps
(
0
,
periodicBoxSize
[
2
],
periodicBoxSize
[
1
],
periodicBoxSize
[
0
]);
invBoxSize
=
_mm_set_ps
(
0
,
(
1
/
periodicBoxSize
[
2
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
0
]));
half
=
_mm_set1_ps
(
0.5
);
}
/**---------------------------------------------------------------------------------------
...
...
@@ -198,7 +201,6 @@ void CpuNonbondedForce::calculateEwaldIxn(int numberOfAtoms, float* atomCoordina
float
totalSelfEwaldEnergy
=
0.0
;
float
realSpaceEwaldEnergy
=
0.0
;
float
recipEnergy
=
0.0
;
float
totalRecipEnergy
=
0.0
;
float
vdwEnergy
=
0.0
;
// **************************************************************************************
...
...
@@ -207,7 +209,7 @@ void CpuNonbondedForce::calculateEwaldIxn(int numberOfAtoms, float* atomCoordina
if
(
includeReciprocal
)
{
for
(
int
atomID
=
0
;
atomID
<
numberOfAtoms
;
atomID
++
){
float
selfEwaldEnergy
=
(
float
)
(
ONE_4PI_EPS0
*
atom
Parame
te
r
s
[
atomID
][
QIndex
]
*
atomParame
te
r
s
[
atomID
][
QIndex
]
*
alphaEwald
/
SQRT_PI
);
float
selfEwaldEnergy
=
(
float
)
(
ONE_4PI_EPS0
*
atom
Coordina
tes
[
4
*
atomID
+
3
]
*
atomCoordina
tes
[
4
*
atomID
+
3
]
*
alphaEwald
/
SQRT_PI
);
totalSelfEwaldEnergy
-=
selfEwaldEnergy
;
}
}
...
...
@@ -326,11 +328,8 @@ void CpuNonbondedForce::calculateEwaldIxn(int numberOfAtoms, float* atomCoordina
f
[
2
]
+=
2
*
recipCoeff
*
force
*
kz
;
}
recipEnergy
=
recipCoeff
*
ak
*
(
cs
*
cs
+
ss
*
ss
);
totalRecipEnergy
+=
recipEnergy
;
if
(
totalEnergy
)
*
totalEnergy
+=
recip
Energy
;
*
totalEnergy
+=
recip
Coeff
*
ak
*
(
cs
*
cs
+
ss
*
ss
)
;
lowrz
=
1
-
numRz
;
}
...
...
@@ -345,18 +344,21 @@ void CpuNonbondedForce::calculateEwaldIxn(int numberOfAtoms, float* atomCoordina
if
(
!
includeDirect
)
return
;
float
totalVdwEnergy
=
0.0
f
;
float
totalRealSpaceEwaldEnergy
=
0.0
f
;
double
totalVdwEnergy
=
0.0
f
;
double
totalRealSpaceEwaldEnergy
=
0.0
f
;
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
->
size
();
i
++
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
int
ii
=
pair
.
first
;
int
jj
=
pair
.
second
;
float
deltaR
[
2
][
ReferenceForce
::
LastDeltaRIndex
];
getDeltaR
(
atomCoordinates
+
4
*
jj
,
atomCoordinates
+
4
*
ii
,
periodicBoxSize
,
deltaR
[
0
],
true
);
float
r
=
deltaR
[
0
][
ReferenceForce
::
RIndex
];
float
inverseR
=
one
/
(
deltaR
[
0
][
ReferenceForce
::
RIndex
]);
__m128
deltaR
;
__m128
posI
=
_mm_loadu_ps
(
atomCoordinates
+
4
*
ii
);
__m128
posJ
=
_mm_loadu_ps
(
atomCoordinates
+
4
*
jj
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
true
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
switchValue
=
1
,
switchDeriv
=
0
;
if
(
useSwitch
&&
r
>
switchingDistance
)
{
float
t
=
(
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
);
...
...
@@ -366,8 +368,9 @@ void CpuNonbondedForce::calculateEwaldIxn(int numberOfAtoms, float* atomCoordina
float
alphaR
=
alphaEwald
*
r
;
float
dEdR
=
(
float
)
(
ONE_4PI_EPS0
*
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
erfc
(
alphaR
)
+
2
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
));
float
chargeProd
=
ONE_4PI_EPS0
*
atomCoordinates
[
4
*
ii
+
3
]
*
atomCoordinates
[
4
*
jj
+
3
];
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
erfc
(
alphaR
)
+
2
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
));
float
sig
=
atomParameters
[
ii
][
SigIndex
]
+
atomParameters
[
jj
][
SigIndex
];
float
sig2
=
inverseR
*
sig
;
...
...
@@ -383,15 +386,13 @@ void CpuNonbondedForce::calculateEwaldIxn(int numberOfAtoms, float* atomCoordina
// accumulate forces
for
(
int
kk
=
0
;
kk
<
3
;
kk
++
){
float
force
=
dEdR
*
deltaR
[
0
][
kk
];
forces
[
4
*
ii
+
kk
]
+=
force
;
forces
[
4
*
jj
+
kk
]
-=
force
;
}
__m128
result
=
_mm_mul_ps
(
deltaR
,
_mm_set1_ps
(
dEdR
));
_mm_storeu_ps
(
forces
+
4
*
ii
,
_mm_add_ps
(
_mm_loadu_ps
(
forces
+
4
*
ii
),
result
));
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
),
result
));
// accumulate energies
realSpaceEwaldEnergy
=
(
float
)
(
ONE_4PI_EPS0
*
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]
*
inverseR
*
erfc
(
alphaR
));
realSpaceEwaldEnergy
=
(
float
)
(
chargeProd
*
inverseR
*
erfc
(
alphaR
));
totalVdwEnergy
+=
vdwEnergy
;
totalRealSpaceEwaldEnergy
+=
realSpaceEwaldEnergy
;
...
...
@@ -410,26 +411,28 @@ void CpuNonbondedForce::calculateEwaldIxn(int numberOfAtoms, float* atomCoordina
int
ii
=
i
;
int
jj
=
*
iter
;
float
deltaR
[
2
][
ReferenceForce
::
LastDeltaRIndex
];
getDeltaR
(
atomCoordinates
+
4
*
jj
,
atomCoordinates
+
4
*
ii
,
periodicBoxSize
,
deltaR
[
0
],
false
);
float
r
=
deltaR
[
0
][
ReferenceForce
::
RIndex
];
float
inverseR
=
one
/
(
deltaR
[
0
][
ReferenceForce
::
RIndex
]);
__m128
deltaR
;
__m128
posI
=
_mm_loadu_ps
(
atomCoordinates
+
4
*
ii
);
__m128
posJ
=
_mm_loadu_ps
(
atomCoordinates
+
4
*
jj
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
false
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
alphaR
=
alphaEwald
*
r
;
if
(
erf
(
alphaR
)
>
1e-6
)
{
float
dEdR
=
(
float
)
(
ONE_4PI_EPS0
*
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
erf
(
alphaR
)
-
2
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
));
float
chargeProd
=
ONE_4PI_EPS0
*
atomCoordinates
[
4
*
ii
+
3
]
*
atomCoordinates
[
4
*
jj
+
3
];
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
erf
(
alphaR
)
-
2
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
));
// accumulate forces
for
(
int
kk
=
0
;
kk
<
3
;
kk
++
){
float
force
=
dEdR
*
deltaR
[
0
][
kk
];
forces
[
4
*
ii
+
kk
]
-=
force
;
forces
[
4
*
jj
+
kk
]
+=
force
;
}
__m128
result
=
_mm_mul_ps
(
deltaR
,
_mm_set1_ps
(
dEdR
));
_mm_storeu_ps
(
forces
+
4
*
ii
,
_mm_add_ps
(
_mm_loadu_ps
(
forces
+
4
*
ii
),
result
));
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
),
result
));
// accumulate energies
realSpaceEwaldEnergy
=
(
float
)
(
ONE_4PI_EPS0
*
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]
*
inverseR
*
erf
(
alphaR
));
realSpaceEwaldEnergy
=
(
float
)
(
chargeProd
*
inverseR
*
erf
(
alphaR
));
totalExclusionEnergy
+=
realSpaceEwaldEnergy
;
}
...
...
@@ -470,10 +473,12 @@ void CpuNonbondedForce::calculatePairIxn(int numberOfAtoms, float* atomCoordinat
}
if
(
!
includeDirect
)
return
;
double
directEnergy
=
0
;
double
*
energyPtr
=
(
totalEnergy
==
NULL
?
NULL
:
&
directEnergy
);
if
(
cutoff
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
->
size
();
i
++
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateOneIxn
(
pair
.
first
,
pair
.
second
,
atomCoordinates
,
atomParameters
,
forces
,
totalE
nergy
);
calculateOneIxn
(
pair
.
first
,
pair
.
second
,
atomCoordinates
,
atomParameters
,
forces
,
e
nergy
Ptr
);
}
}
else
{
...
...
@@ -482,9 +487,11 @@ void CpuNonbondedForce::calculatePairIxn(int numberOfAtoms, float* atomCoordinat
for
(
int
jj
=
ii
+
1
;
jj
<
numberOfAtoms
;
jj
++
)
if
(
exclusions
[
jj
].
find
(
ii
)
==
exclusions
[
jj
].
end
())
calculateOneIxn
(
ii
,
jj
,
atomCoordinates
,
atomParameters
,
forces
,
totalE
nergy
);
calculateOneIxn
(
ii
,
jj
,
atomCoordinates
,
atomParameters
,
forces
,
e
nergy
Ptr
);
}
}
if
(
totalEnergy
!=
NULL
)
*
totalEnergy
+=
(
float
)
directEnergy
;
}
/**---------------------------------------------------------------------------------------
...
...
@@ -502,90 +509,63 @@ void CpuNonbondedForce::calculatePairIxn(int numberOfAtoms, float* atomCoordinat
void
CpuNonbondedForce
::
calculateOneIxn
(
int
ii
,
int
jj
,
float
*
atomCoordinates
,
float
**
atomParameters
,
float
*
forces
,
float
*
totalEnergy
)
const
{
// ---------------------------------------------------------------------------------------
static
const
std
::
string
methodName
=
"
\n
CpuNonbondedForce::calculateOneIxn"
;
// ---------------------------------------------------------------------------------------
// constants -- reduce Visual Studio warnings regarding conversions between float & double
static
const
float
zero
=
0.0
;
static
const
float
one
=
1.0
;
static
const
float
two
=
2.0
;
static
const
float
three
=
3.0
;
static
const
float
six
=
6.0
;
static
const
float
twelve
=
12.0
;
static
const
float
oneM
=
-
1.0
;
static
const
int
threeI
=
3
;
static
const
int
LastAtomIndex
=
2
;
float
deltaR
[
2
][
ReferenceForce
::
LastDeltaRIndex
];
double
*
totalEnergy
)
const
{
// get deltaR, R2, and R between 2 atoms
getDeltaR
(
atomCoordinates
+
4
*
jj
,
atomCoordinates
+
4
*
ii
,
periodicBoxSize
,
deltaR
[
0
],
periodic
);
float
r2
=
deltaR
[
0
][
ReferenceForce
::
R2Index
];
float
inverseR
=
one
/
(
deltaR
[
0
][
ReferenceForce
::
RIndex
]);
__m128
deltaR
;
__m128
posI
=
_mm_loadu_ps
(
atomCoordinates
+
4
*
ii
);
__m128
posJ
=
_mm_loadu_ps
(
atomCoordinates
+
4
*
jj
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
periodic
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
switchValue
=
1
,
switchDeriv
=
0
;
if
(
useSwitch
)
{
float
r
=
deltaR
[
0
][
ReferenceForce
::
RIndex
];
if
(
r
>
switchingDistance
)
{
float
t
=
(
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
);
switchValue
=
1
+
t
*
t
*
t
*
(
-
10
+
t
*
(
15
-
t
*
6
));
switchDeriv
=
t
*
t
*
(
-
30
+
t
*
(
60
-
t
*
30
))
/
(
cutoffDistance
-
switchingDistance
);
}
if
(
useSwitch
&&
r
>
switchingDistance
)
{
float
t
=
(
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
);
switchValue
=
1
+
t
*
t
*
t
*
(
-
10
+
t
*
(
15
-
t
*
6
));
switchDeriv
=
t
*
t
*
(
-
30
+
t
*
(
60
-
t
*
30
))
/
(
cutoffDistance
-
switchingDistance
);
}
float
sig
=
atomParameters
[
ii
][
SigIndex
]
+
atomParameters
[
jj
][
SigIndex
];
float
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
atomParameters
[
ii
][
EpsIndex
]
*
atomParameters
[
jj
][
EpsIndex
];
float
dEdR
=
switchValue
*
eps
*
(
twelve
*
sig6
-
six
)
*
sig6
;
float
dEdR
=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
float
chargeProd
=
ONE_4PI_EPS0
*
atomCoordinates
[
4
*
ii
+
3
]
*
atomCoordinates
[
4
*
jj
+
3
];
if
(
cutoff
)
dEdR
+=
(
float
)
(
ONE_4PI_EPS0
*
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]
*
(
inverseR
-
2.0
f
*
krf
*
r2
));
dEdR
+=
(
float
)
(
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
));
else
dEdR
+=
(
float
)
(
ONE_4PI_EPS0
*
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]
*
inverseR
);
dEdR
+=
(
float
)
(
chargeProd
*
inverseR
);
dEdR
*=
inverseR
*
inverseR
;
float
energy
=
eps
*
(
sig6
-
one
)
*
sig6
;
float
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
if
(
cutoff
)
energy
+=
(
float
)
(
ONE_4PI_EPS0
*
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]
*
(
inverseR
+
krf
*
r2
-
crf
));
else
energy
+=
(
float
)
(
ONE_4PI_EPS0
*
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]
*
inverseR
);
// accumulate
forc
es
// accumulate
energi
es
for
(
int
kk
=
0
;
kk
<
3
;
kk
++
){
float
force
=
dEdR
*
deltaR
[
0
][
kk
];
forces
[
4
*
ii
+
kk
]
+=
force
;
forces
[
4
*
jj
+
kk
]
-=
force
;
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
(
float
)
(
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
));
else
energy
+=
(
float
)
(
chargeProd
*
inverseR
);
*
totalEnergy
+=
energy
;
}
// accumulate
energi
es
// accumulate
forc
es
if
(
totalEnergy
)
*
totalEnergy
+=
energy
;
__m128
result
=
_mm_mul_ps
(
deltaR
,
_mm_set1_ps
(
dEdR
));
_mm_storeu_ps
(
forces
+
4
*
ii
,
_mm_add_ps
(
_mm_loadu_ps
(
forces
+
4
*
ii
),
result
));
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
),
result
));
}
void
CpuNonbondedForce
::
getDeltaR
(
const
float
*
atomCoordinatesI
,
const
float
*
atomCoordinatesJ
,
const
float
*
boxSize
,
float
*
deltaR
,
bool
periodic
)
const
{
deltaR
[
ReferenceForce
::
XIndex
]
=
atomCoordinatesJ
[
0
]
-
atomCoordinatesI
[
0
];
deltaR
[
ReferenceForce
::
YIndex
]
=
atomCoordinatesJ
[
1
]
-
atomCoordinatesI
[
1
];
deltaR
[
ReferenceForce
::
ZIndex
]
=
atomCoordinatesJ
[
2
]
-
atomCoordinatesI
[
2
];
void
CpuNonbondedForce
::
getDeltaR
(
const
__m128
&
posI
,
const
__m128
&
posJ
,
__m128
&
deltaR
,
float
&
r2
,
bool
periodic
)
const
{
deltaR
=
_mm_sub_ps
(
posJ
,
posI
);
if
(
periodic
)
{
deltaR
[
ReferenceForce
::
XIndex
]
-=
(
float
)
(
floor
(
deltaR
[
ReferenceForce
::
XIndex
]
/
boxSize
[
0
]
+
0.5
)
*
boxSize
[
0
]);
deltaR
[
ReferenceForce
::
YIndex
]
-=
(
float
)
(
floor
(
deltaR
[
ReferenceForce
::
YIndex
]
/
boxSize
[
1
]
+
0.5
)
*
boxSize
[
1
]);
deltaR
[
ReferenceForce
::
ZIndex
]
-=
(
float
)
(
floor
(
deltaR
[
ReferenceForce
::
ZIndex
]
/
boxSize
[
2
]
+
0.5
)
*
boxSize
[
2
]);
__m128
base
=
_mm_mul_ps
(
_mm_floor_ps
(
_mm_add_ps
(
_mm_mul_ps
(
deltaR
,
invBoxSize
),
half
)),
boxSize
);
deltaR
=
_mm_sub_ps
(
deltaR
,
base
);
}
deltaR
[
ReferenceForce
::
R2Index
]
=
DOT3
(
deltaR
,
deltaR
);
deltaR
[
ReferenceForce
::
RIndex
]
=
(
float
)
SQRT
(
deltaR
[
ReferenceForce
::
R2Index
]);
r2
=
_mm_cvtss_f32
(
_mm_dp_ps
(
deltaR
,
deltaR
,
0x71
));
}
platforms/cpu/tests/TestCpuEwald.cpp
View file @
6b10b909
...
...
@@ -93,28 +93,28 @@ void testEwaldPME(bool includeExceptions) {
}
}
// (1) Check whether the Reference and CU
DA
platforms agree when using Ewald Method
// (1) Check whether the Reference and C
P
U platforms agree when using Ewald Method
VerletIntegrator
integrator1
(
0.01
);
VerletIntegrator
integrator2
(
0.01
);
Context
cuContext
(
system
,
integrator1
,
platform
);
Context
c
p
uContext
(
system
,
integrator1
,
platform
);
Context
referenceContext
(
system
,
integrator2
,
reference
);
cuContext
.
setPositions
(
positions
);
c
p
uContext
.
setPositions
(
positions
);
referenceContext
.
setPositions
(
positions
);
State
cuState
=
cuContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
c
p
uState
=
c
p
uContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
referenceState
=
referenceContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
tol
=
1e-2
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL_VEC
(
referenceState
.
getForces
()[
i
],
cuState
.
getForces
()[
i
],
tol
);
ASSERT_EQUAL_VEC
(
referenceState
.
getForces
()[
i
],
c
p
uState
.
getForces
()[
i
],
tol
);
}
tol
=
1e-5
;
ASSERT_EQUAL_TOL
(
referenceState
.
getPotentialEnergy
(),
cuState
.
getPotentialEnergy
(),
tol
);
ASSERT_EQUAL_TOL
(
referenceState
.
getPotentialEnergy
(),
c
p
uState
.
getPotentialEnergy
(),
tol
);
// (2) Check whether Ewald method in CU
DA
is self-consistent
// (2) Check whether Ewald method in C
P
U is self-consistent
double
norm
=
0.0
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
f
=
cuState
.
getForces
()[
i
];
Vec3
f
=
c
p
uState
.
getForces
()[
i
];
norm
+=
f
[
0
]
*
f
[
0
]
+
f
[
1
]
*
f
[
1
]
+
f
[
2
]
*
f
[
2
];
}
...
...
@@ -123,38 +123,38 @@ void testEwaldPME(bool includeExceptions) {
double
step
=
delta
/
norm
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
p
=
positions
[
i
];
Vec3
f
=
cuState
.
getForces
()[
i
];
Vec3
f
=
c
p
uState
.
getForces
()[
i
];
positions
[
i
]
=
Vec3
(
p
[
0
]
-
f
[
0
]
*
step
,
p
[
1
]
-
f
[
1
]
*
step
,
p
[
2
]
-
f
[
2
]
*
step
);
}
VerletIntegrator
integrator3
(
0.01
);
Context
cuContext2
(
system
,
integrator3
,
platform
);
cuContext2
.
setPositions
(
positions
);
Context
c
p
uContext2
(
system
,
integrator3
,
platform
);
c
p
uContext2
.
setPositions
(
positions
);
tol
=
1e-2
;
State
cuState2
=
cuContext2
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL
(
norm
,
(
cuState2
.
getPotentialEnergy
()
-
cuState
.
getPotentialEnergy
())
/
delta
,
tol
)
State
c
p
uState2
=
c
p
uContext2
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL
(
norm
,
(
c
p
uState2
.
getPotentialEnergy
()
-
c
p
uState
.
getPotentialEnergy
())
/
delta
,
tol
)
// (3) Check whether the Reference and CU
DA
platforms agree when using PME
// (3) Check whether the Reference and C
P
U platforms agree when using PME
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
cuContext
.
reinitialize
();
c
p
uContext
.
reinitialize
();
referenceContext
.
reinitialize
();
cuContext
.
setPositions
(
positions
);
c
p
uContext
.
setPositions
(
positions
);
referenceContext
.
setPositions
(
positions
);
cuState
=
cuContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
c
p
uState
=
c
p
uContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
referenceState
=
referenceContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
tol
=
1e-2
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL_VEC
(
referenceState
.
getForces
()[
i
],
cuState
.
getForces
()[
i
],
tol
);
ASSERT_EQUAL_VEC
(
referenceState
.
getForces
()[
i
],
c
p
uState
.
getForces
()[
i
],
tol
);
}
tol
=
1e-5
;
ASSERT_EQUAL_TOL
(
referenceState
.
getPotentialEnergy
(),
cuState
.
getPotentialEnergy
(),
tol
);
ASSERT_EQUAL_TOL
(
referenceState
.
getPotentialEnergy
(),
c
p
uState
.
getPotentialEnergy
(),
tol
);
// (4) Check whether PME method in CU
DA
is self-consistent
// (4) Check whether PME method in C
P
U is self-consistent
norm
=
0.0
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
f
=
cuState
.
getForces
()[
i
];
Vec3
f
=
c
p
uState
.
getForces
()[
i
];
norm
+=
f
[
0
]
*
f
[
0
]
+
f
[
1
]
*
f
[
1
]
+
f
[
2
]
*
f
[
2
];
}
...
...
@@ -162,16 +162,16 @@ void testEwaldPME(bool includeExceptions) {
step
=
delta
/
norm
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
p
=
positions
[
i
];
Vec3
f
=
cuState
.
getForces
()[
i
];
Vec3
f
=
c
p
uState
.
getForces
()[
i
];
positions
[
i
]
=
Vec3
(
p
[
0
]
-
f
[
0
]
*
step
,
p
[
1
]
-
f
[
1
]
*
step
,
p
[
2
]
-
f
[
2
]
*
step
);
}
VerletIntegrator
integrator4
(
0.01
);
Context
cuContext3
(
system
,
integrator4
,
platform
);
cuContext3
.
setPositions
(
positions
);
Context
c
p
uContext3
(
system
,
integrator4
,
platform
);
c
p
uContext3
.
setPositions
(
positions
);
tol
=
1e-2
;
State
cuState3
=
cuContext3
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL
(
norm
,
(
cuState3
.
getPotentialEnergy
()
-
cuState
.
getPotentialEnergy
())
/
delta
,
tol
)
State
c
p
uState3
=
c
p
uContext3
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL
(
norm
,
(
c
p
uState3
.
getPotentialEnergy
()
-
c
p
uState
.
getPotentialEnergy
())
/
delta
,
tol
)
}
void
testEwald2Ions
()
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment