Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
563914a7
Commit
563914a7
authored
Nov 06, 2013
by
peastman
Browse files
Optimizations to CPU version of GBSA
parent
c6b335e7
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
211 additions
and
113 deletions
+211
-113
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+19
-0
platforms/cpu/include/CpuGBSAOBCForce.h
platforms/cpu/include/CpuGBSAOBCForce.h
+5
-5
platforms/cpu/src/CpuGBSAOBCForce.cpp
platforms/cpu/src/CpuGBSAOBCForce.cpp
+187
-107
platforms/cpu/tests/TestCpuGBSAOBCForce.cpp
platforms/cpu/tests/TestCpuGBSAOBCForce.cpp
+0
-1
No files found.
openmmapi/include/openmm/internal/vectorize.h
View file @
563914a7
...
...
@@ -160,6 +160,21 @@ public:
ivec4
operator
==
(
ivec4
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
}
ivec4
operator
!=
(
ivec4
other
)
const
{
return
_mm_xor_si128
(
val
==
other
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
>
(
ivec4
other
)
const
{
return
_mm_cmpgt_epi32
(
val
,
other
);
}
ivec4
operator
<
(
ivec4
other
)
const
{
return
_mm_cmplt_epi32
(
val
,
other
);
}
ivec4
operator
>=
(
ivec4
other
)
const
{
return
_mm_xor_si128
(
_mm_cmplt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
<=
(
ivec4
other
)
const
{
return
_mm_xor_si128
(
_mm_cmpgt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
operator
fvec4
()
const
;
};
...
...
@@ -230,6 +245,10 @@ static inline ivec4 abs(ivec4 v) {
return
ivec4
(
_mm_abs_epi32
(
v
.
val
));
}
static
inline
bool
any
(
ivec4
v
)
{
return
!
_mm_test_all_zeros
(
v
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
fvec4
v2
)
{
...
...
platforms/cpu/include/CpuGBSAOBCForce.h
View file @
563914a7
...
...
@@ -106,11 +106,11 @@ private:
std
::
vector
<
std
::
vector
<
float
>
>*
threadForce
;
bool
includeEnergy
;
/**
* Compute the displacement and squared distance between
two
points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute the displacement and squared distance between
a collection of
points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
};
}
// namespace OpenMM
...
...
platforms/cpu/src/CpuGBSAOBCForce.cpp
View file @
563914a7
...
...
@@ -69,8 +69,8 @@ const std::vector<std::pair<float, float> >& CpuGBSAOBCForce::getParticleParamet
void
CpuGBSAOBCForce
::
setParticleParameters
(
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
params
)
{
particleParams
=
params
;
bornRadii
.
resize
(
params
.
size
());
obcChain
.
resize
(
params
.
size
());
bornRadii
.
resize
(
params
.
size
()
+
3
);
obcChain
.
resize
(
params
.
size
()
+
3
);
}
void
CpuGBSAOBCForce
::
computeForce
(
const
std
::
vector
<
float
>&
posq
,
vector
<
vector
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
...
...
@@ -83,7 +83,7 @@ void CpuGBSAOBCForce::computeForce(const std::vector<float>& posq, vector<vector
threadEnergy
.
resize
(
numThreads
);
threadBornForces
.
resize
(
numThreads
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
threadBornForces
[
i
].
resize
(
particleParams
.
size
());
threadBornForces
[
i
].
resize
(
particleParams
.
size
()
+
3
);
// Signal the threads to start running and wait for them to finish.
...
...
@@ -121,45 +121,68 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
// Calculate Born radii
for
(
int
atomI
=
start
;
atomI
<
end
;
atomI
++
)
{
fvec4
posI
(
posq
+
4
*
atomI
);
float
offsetRadiusI
=
particleParams
[
atomI
].
first
;
float
radiusIInverse
=
1.0
f
/
offsetRadiusI
;
float
sum
=
0.0
f
;
for
(
int
blockStart
=
start
;
blockStart
<
end
;
blockStart
+=
4
)
{
int
numInBlock
=
min
(
4
,
end
-
blockStart
);
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomRadius
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
atomRadius
[
i
]
=
particleParams
[
atomIndex
].
first
;
atomx
[
i
]
=
posq
[
4
*
atomIndex
];
atomy
[
i
]
=
posq
[
4
*
atomIndex
+
1
];
atomz
[
i
]
=
posq
[
4
*
atomIndex
+
2
];
blockMask
[
i
]
=
0xFFFFFFFF
;
}
fvec4
offsetRadiusI
(
atomRadius
);
fvec4
radiusIInverse
=
1.0
f
/
offsetRadiusI
;
fvec4
x
(
atomx
);
fvec4
y
(
atomy
);
fvec4
z
(
atomz
);
ivec4
mask
(
blockMask
);
float
sum
[
4
]
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
for
(
int
atomJ
=
0
;
atomJ
<
numParticles
;
atomJ
++
)
{
if
(
atomJ
!=
atomI
)
{
fvec4
posJ
(
posq
+
4
*
atomJ
);
fvec4
deltaR
;
float
r2
;
getDeltaR
(
posI
,
posJ
,
deltaR
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
if
(
cutoff
&&
r2
>=
cutoffDistance
*
cutoffDistance
)
continue
;
float
r
=
sqrtf
(
r2
);
float
scaledRadiusJ
=
particleParams
[
atomJ
].
second
;
float
rScaledRadiusJ
=
r
+
scaledRadiusJ
;
if
(
offsetRadiusI
<
rScaledRadiusJ
)
{
float
rInverse
=
1.0
f
/
r
;
float
l_ij
=
1.0
f
/
(
offsetRadiusI
>
fabs
(
r
-
scaledRadiusJ
)
?
offsetRadiusI
:
fabs
(
r
-
scaledRadiusJ
));
float
u_ij
=
1.0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
((
u_ij
/
l_ij
));
float
term
=
l_ij
-
u_ij
+
0.25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0.5
f
*
rInverse
*
ratio
)
+
(
0.25
f
*
scaledRadiusJ
*
scaledRadiusJ
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
if
(
offsetRadiusI
<
(
scaledRadiusJ
-
r
))
term
+=
2.0
f
*
(
radiusIInverse
-
l_ij
);
sum
+=
term
;
fvec4
posJ
(
posq
+
4
*
atomJ
);
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posJ
,
x
,
y
,
z
,
dx
,
dy
,
dz
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
ivec4
include
=
mask
&
(
blockAtomIndex
!=
ivec4
(
atomJ
));
if
(
cutoff
)
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
fvec4
r
=
sqrt
(
r2
);
float
scaledRadiusJ
=
particleParams
[
atomJ
].
second
;
float
scaledRadiusJ2
=
scaledRadiusJ
*
scaledRadiusJ
;
fvec4
rScaledRadiusJ
=
r
+
scaledRadiusJ
;
include
=
include
&
(
offsetRadiusI
<
rScaledRadiusJ
);
fvec4
l_ij
=
1.0
f
/
max
(
offsetRadiusI
,
abs
(
r
-
scaledRadiusJ
));
fvec4
u_ij
=
1.0
f
/
rScaledRadiusJ
;
fvec4
l_ij2
=
l_ij
*
l_ij
;
fvec4
u_ij2
=
u_ij
*
u_ij
;
fvec4
rInverse
=
1.0
f
/
r
;
fvec4
r2Inverse
=
rInverse
*
rInverse
;
fvec4
ratio
=
u_ij
/
l_ij
;
fvec4
logRatio
(
logf
(
ratio
[
0
]),
logf
(
ratio
[
1
]),
logf
(
ratio
[
2
]),
logf
(
ratio
[
3
]));
fvec4
term
=
l_ij
-
u_ij
+
0.25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0.5
f
*
rInverse
*
logRatio
)
+
(
0.25
f
*
scaledRadiusJ
*
scaledRadiusJ
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
sum
[
j
]
+=
term
[
j
];
if
(
offsetRadiusI
[
j
]
<
scaledRadiusJ
-
r
[
j
])
sum
[
j
]
+=
2.0
f
*
(
radiusIInverse
[
j
]
-
l_ij
[
j
]);
}
}
}
sum
*=
0.5
f
*
offsetRadiusI
;
float
sum2
=
sum
*
sum
;
float
sum3
=
sum
*
sum2
;
float
tanhSum
=
tanh
(
alphaObc
*
sum
-
betaObc
*
sum2
+
gammaObc
*
sum3
);
float
radiusI
=
offsetRadiusI
+
dielectricOffset
;
bornRadii
[
atomI
]
=
1.0
f
/
(
1.0
f
/
offsetRadiusI
-
tanhSum
/
radiusI
);
obcChain
[
atomI
]
=
offsetRadiusI
*
(
alphaObc
-
2.0
f
*
betaObc
*
sum
+
3.0
f
*
gammaObc
*
sum2
);
obcChain
[
atomI
]
=
(
1.0
f
-
tanhSum
*
tanhSum
)
*
obcChain
[
atomI
]
/
radiusI
;
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
sum
[
i
]
*=
0.5
f
*
offsetRadiusI
[
i
];
float
sum2
=
sum
[
i
]
*
sum
[
i
];
float
sum3
=
sum
[
i
]
*
sum2
;
float
tanhSum
=
tanh
(
alphaObc
*
sum
[
i
]
-
betaObc
*
sum2
+
gammaObc
*
sum3
);
float
radiusI
=
offsetRadiusI
[
i
]
+
dielectricOffset
;
bornRadii
[
atomIndex
]
=
1.0
f
/
(
1.0
f
/
offsetRadiusI
[
i
]
-
tanhSum
/
radiusI
);
obcChain
[
atomIndex
]
=
offsetRadiusI
[
i
]
*
(
alphaObc
-
2.0
f
*
betaObc
*
sum
[
i
]
+
3.0
f
*
gammaObc
*
sum2
);
obcChain
[
atomIndex
]
=
(
1.0
f
-
tanhSum
*
tanhSum
)
*
obcChain
[
atomIndex
]
/
radiusI
;
}
}
threads
.
syncThreads
();
...
...
@@ -193,93 +216,150 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
preFactor
=
ONE_4PI_EPS0
*
((
1.0
f
/
solventDielectric
)
-
(
1.0
f
/
soluteDielectric
));
else
preFactor
=
0.0
f
;
for
(
int
atomI
=
start
;
atomI
<
end
;
atomI
++
)
{
fvec4
posI
(
posq
+
4
*
atomI
);
fvec4
forceI
(
0.0
f
);
float
partialChargeI
=
preFactor
*
posI
[
3
];
for
(
int
atomJ
=
atomI
;
atomJ
<
numParticles
;
atomJ
++
)
{
for
(
int
blockStart
=
start
;
blockStart
<
end
;
blockStart
+=
4
)
{
int
numInBlock
=
min
(
4
,
end
-
blockStart
);
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomCharge
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
atomx
[
i
]
=
posq
[
4
*
atomIndex
];
atomy
[
i
]
=
posq
[
4
*
atomIndex
+
1
];
atomz
[
i
]
=
posq
[
4
*
atomIndex
+
2
];
atomCharge
[
i
]
=
preFactor
*
posq
[
4
*
atomIndex
+
3
];
blockMask
[
i
]
=
0xFFFFFFFF
;
blockAtomForce
[
i
]
=
0.0
f
;
}
fvec4
radii
(
&
bornRadii
[
blockStart
]);
fvec4
x
(
atomx
);
fvec4
y
(
atomy
);
fvec4
z
(
atomz
);
fvec4
partialChargeI
(
atomCharge
);
ivec4
mask
(
blockMask
);
for
(
int
atomJ
=
blockStart
;
atomJ
<
numParticles
;
atomJ
++
)
{
fvec4
posJ
(
posq
+
4
*
atomJ
);
fvec4
deltaR
;
float
r2
;
getDeltaR
(
posI
,
posJ
,
deltaR
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
if
(
cutoff
&&
r2
>=
cutoffDistance
*
cutoffDistance
)
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posJ
,
x
,
y
,
z
,
dx
,
dy
,
dz
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
ivec4
include
=
mask
&
(
blockAtomIndex
<=
ivec4
(
atomJ
));
if
(
cutoff
)
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
float
r
=
sqrtf
(
r2
);
float
alpha2_ij
=
bornRadii
[
atomI
]
*
bornRadii
[
atomJ
];
float
D_ij
=
r2
/
(
4.0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
partialChargeI
*
posJ
[
3
])
/
denominator
;
float
dGpol_dr
=
-
Gpol
*
(
1.0
f
-
0.25
f
*
expTerm
)
/
denominator2
;
float
dGpol_dalpha2_ij
=
-
0.5
f
*
Gpol
*
expTerm
*
(
1.0
f
+
D_ij
)
/
denominator2
;
float
termEnergy
=
Gpol
;
if
(
atomI
!=
atomJ
)
{
if
(
cutoff
)
termEnergy
-=
partialChargeI
*
posJ
[
3
]
/
cutoffDistance
;
bornForces
[
atomJ
]
+=
dGpol_dalpha2_ij
*
bornRadii
[
atomI
];
fvec4
result
=
deltaR
*
dGpol_dr
;
forceI
+=
result
;
(
fvec4
(
forces
+
4
*
atomJ
)
-
result
).
store
(
forces
+
4
*
atomJ
);
fvec4
r
=
sqrt
(
r2
);
fvec4
alpha2_ij
=
radii
*
bornRadii
[
atomJ
];
fvec4
D_ij
=
r2
/
(
4.0
f
*
alpha2_ij
);
fvec4
expTerm
(
exp
(
-
D_ij
[
0
]),
exp
(
-
D_ij
[
1
]),
exp
(
-
D_ij
[
2
]),
exp
(
-
D_ij
[
3
]));
fvec4
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
fvec4
denominator
=
sqrt
(
denominator2
);
fvec4
Gpol
=
(
partialChargeI
*
posJ
[
3
])
/
denominator
;
fvec4
dGpol_dr
=
-
Gpol
*
(
1.0
f
-
0.25
f
*
expTerm
)
/
denominator2
;
fvec4
dGpol_dalpha2_ij
=
-
0.5
f
*
Gpol
*
expTerm
*
(
1.0
f
+
D_ij
)
/
denominator2
;
fvec4
result
[
4
]
=
{
dx
*
dGpol_dr
,
dy
*
dGpol_dr
,
dz
*
dGpol_dr
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atomJ
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
float
termEnergy
=
Gpol
[
j
];
if
(
blockStart
+
j
!=
atomJ
)
{
if
(
cutoff
)
termEnergy
-=
partialChargeI
[
j
]
*
posJ
[
3
]
/
cutoffDistance
;
bornForces
[
atomJ
]
+=
dGpol_dalpha2_ij
[
j
]
*
radii
[
j
];
blockAtomForce
[
j
]
-=
result
[
j
];
(
fvec4
(
forces
+
4
*
atomJ
)
+
result
[
j
]).
store
(
forces
+
4
*
atomJ
);
}
else
termEnergy
*=
0.5
f
;
energy
+=
termEnergy
;
bornForces
[
blockStart
+
j
]
+=
dGpol_dalpha2_ij
[
j
]
*
bornRadii
[
atomJ
];
}
}
else
termEnergy
*=
0.5
f
;
energy
+=
termEnergy
;
bornForces
[
atomI
]
+=
dGpol_dalpha2_ij
*
bornRadii
[
atomJ
];
}
(
fvec4
(
forces
+
4
*
atomI
)
+
forceI
).
store
(
forces
+
4
*
atomI
);
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
(
fvec4
(
forces
+
4
*
atomIndex
)
+
blockAtomForce
[
i
]).
store
(
forces
+
4
*
atomIndex
);
}
}
threads
.
syncThreads
();
// Second loop of Born energy computation.
for
(
int
atomI
=
start
;
atomI
<
end
;
atomI
++
)
{
f
loat
bornForce
=
0
;
for
(
int
blockStart
=
start
;
blockStart
<
end
;
blockStart
+=
4
)
{
f
vec4
bornForce
(
0.0
f
)
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
bornForce
+=
threadBornForces
[
i
][
atomI
];
bornForce
*=
bornRadii
[
atomI
]
*
bornRadii
[
atomI
]
*
obcChain
[
atomI
];
float
offsetRadiusI
=
particleParams
[
atomI
].
first
;
fvec4
posI
(
posq
+
4
*
atomI
);
fvec4
forceI
(
0.0
f
);
bornForce
+=
fvec4
(
&
threadBornForces
[
i
][
blockStart
]);
fvec4
radii
(
&
bornRadii
[
blockStart
]);
bornForce
*=
radii
*
radii
*
fvec4
(
&
obcChain
[
blockStart
]);
int
numInBlock
=
min
(
4
,
end
-
blockStart
);
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomRadius
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
atomRadius
[
i
]
=
particleParams
[
atomIndex
].
first
;
atomx
[
i
]
=
posq
[
4
*
atomIndex
];
atomy
[
i
]
=
posq
[
4
*
atomIndex
+
1
];
atomz
[
i
]
=
posq
[
4
*
atomIndex
+
2
];
blockMask
[
i
]
=
0xFFFFFFFF
;
blockAtomForce
[
i
]
=
0.0
f
;
}
fvec4
offsetRadiusI
(
atomRadius
);
fvec4
x
(
atomx
);
fvec4
y
(
atomy
);
fvec4
z
(
atomz
);
ivec4
mask
(
blockMask
);
for
(
int
atomJ
=
0
;
atomJ
<
numParticles
;
atomJ
++
)
{
if
(
atomJ
!=
atomI
)
{
fvec4
posJ
(
posq
+
4
*
atomJ
);
fvec4
deltaR
;
float
r2
;
getDeltaR
(
posI
,
posJ
,
deltaR
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
if
(
cutoff
&&
r2
>=
cutoffDistance
*
cutoffDistance
)
continue
;
float
r
=
sqrtf
(
r2
);
float
scaledRadiusJ
=
particleParams
[
atomJ
].
second
;
float
scaledRadiusJ2
=
scaledRadiusJ
*
scaledRadiusJ
;
float
rScaledRadiusJ
=
r
+
scaledRadiusJ
;
if
(
offsetRadiusI
<
rScaledRadiusJ
)
{
float
l_ij
=
1.0
f
/
(
offsetRadiusI
>
fabs
(
r
-
scaledRadiusJ
)
?
offsetRadiusI
:
fabs
(
r
-
scaledRadiusJ
));
float
u_ij
=
1.0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
rInverse
=
1.0
f
/
r
;
float
r2Inverse
=
rInverse
*
rInverse
;
float
t3
=
0.125
f
*
(
1.0
f
+
scaledRadiusJ2
*
r2Inverse
)
*
(
l_ij2
-
u_ij2
)
+
0.25
f
*
log
(
u_ij
/
l_ij
)
*
r2Inverse
;
float
de
=
bornForce
*
t3
*
rInverse
;
fvec4
result
=
deltaR
*
de
;
forceI
-=
result
;
(
fvec4
(
forces
+
4
*
atomJ
)
+
result
).
store
(
forces
+
4
*
atomJ
);
fvec4
posJ
(
posq
+
4
*
atomJ
);
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
posJ
,
x
,
y
,
z
,
dx
,
dy
,
dz
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
ivec4
include
=
mask
&
(
blockAtomIndex
!=
ivec4
(
atomJ
));
if
(
cutoff
)
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
if
(
!
any
(
include
))
continue
;
fvec4
r
=
sqrt
(
r2
);
float
scaledRadiusJ
=
particleParams
[
atomJ
].
second
;
float
scaledRadiusJ2
=
scaledRadiusJ
*
scaledRadiusJ
;
fvec4
rScaledRadiusJ
=
r
+
scaledRadiusJ
;
include
=
include
&
(
offsetRadiusI
<
rScaledRadiusJ
);
fvec4
l_ij
=
1.0
f
/
max
(
offsetRadiusI
,
abs
(
r
-
scaledRadiusJ
));
fvec4
u_ij
=
1.0
f
/
rScaledRadiusJ
;
fvec4
l_ij2
=
l_ij
*
l_ij
;
fvec4
u_ij2
=
u_ij
*
u_ij
;
fvec4
rInverse
=
1.0
f
/
r
;
fvec4
r2Inverse
=
rInverse
*
rInverse
;
fvec4
ratio
=
u_ij
/
l_ij
;
fvec4
logRatio
(
logf
(
ratio
[
0
]),
logf
(
ratio
[
1
]),
logf
(
ratio
[
2
]),
logf
(
ratio
[
3
]));
fvec4
t3
=
0.125
f
*
(
1.0
f
+
scaledRadiusJ2
*
r2Inverse
)
*
(
l_ij2
-
u_ij2
)
+
0.25
f
*
logRatio
*
r2Inverse
;
fvec4
de
=
bornForce
*
t3
*
rInverse
;
fvec4
result
[
4
]
=
{
dx
*
de
,
dy
*
de
,
dz
*
de
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atomJ
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
}
atomForce
.
store
(
forces
+
4
*
atomJ
);
}
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
(
fvec4
(
forces
+
4
*
atomIndex
)
+
blockAtomForce
[
i
]).
store
(
forces
+
4
*
atomIndex
);
}
(
fvec4
(
forces
+
4
*
atomI
)
+
forceI
).
store
(
forces
+
4
*
atomI
);
}
threadEnergy
[
threadIndex
]
=
energy
;
}
void
CpuGBSAOBCForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
deltaR
=
posJ
-
posI
;
void
CpuGBSAOBCForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
fvec4
base
=
round
(
deltaR
*
invBoxSize
)
*
boxSize
;
deltaR
=
deltaR
-
base
;
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
d
ot3
(
deltaR
,
deltaR
)
;
r2
=
d
x
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
platforms/cpu/tests/TestCpuGBSAOBCForce.cpp
View file @
563914a7
...
...
@@ -223,7 +223,6 @@ void testForce(int numParticles, NonbondedForce::NonbondedMethod method, GBSAOBC
int
main
()
{
try
{
testForce
(
729
,
NonbondedForce
::
CutoffNonPeriodic
,
GBSAOBCForce
::
CutoffNonPeriodic
);
testSingleParticle
();
testCutoffAndPeriodic
();
for
(
int
i
=
5
;
i
<
11
;
i
++
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment