Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
4b5f48ee
Commit
4b5f48ee
authored
Nov 26, 2014
by
peastman
Browse files
Workaround for compiler bug in Visual Studio
parent
59484a6f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
335 additions
and
329 deletions
+335
-329
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+335
-329
No files found.
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
4b5f48ee
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
* Contributors: Pande Group
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
* to the following conditions:
*
*
* The above copyright notice and this permission notice shall be included
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
* in all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec8.h"
#include "CpuNonbondedForceVec8.h"
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/hardware.h"
using
namespace
std
;
using
namespace
std
;
using
namespace
OpenMM
;
using
namespace
OpenMM
;
#ifndef __AVX__
#ifdef _MSC_VER
bool
isVec8Supported
()
{
// Workaround for a compiler bug in Visual Studio 10. Hopefully we can remove this
return
false
;
// once we move to a later version.
}
#undef __AVX__
#endif
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
throw
OpenMMException
(
"Internal error: OpenMM was compiled without AVX support"
);
#ifndef __AVX__
}
bool
isVec8Supported
()
{
#else
return
false
;
/**
}
* Check whether 8 component vectors are supported with the current CPU.
*/
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
bool
isVec8Supported
()
{
throw
OpenMMException
(
"Internal error: OpenMM was compiled without AVX support"
);
// Make sure the CPU supports AVX.
}
#else
int
cpuInfo
[
4
];
/**
cpuid
(
cpuInfo
,
0
);
* Check whether 8 component vectors are supported with the current CPU.
if
(
cpuInfo
[
0
]
>=
1
)
{
*/
cpuid
(
cpuInfo
,
1
);
bool
isVec8Supported
()
{
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
28
))
!=
0
);
// Make sure the CPU supports AVX.
}
return
false
;
int
cpuInfo
[
4
];
}
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
/**
cpuid
(
cpuInfo
,
1
);
* Factory method to create a CpuNonbondedForceVec8.
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
28
))
!=
0
);
*/
}
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
return
false
;
return
new
CpuNonbondedForceVec8
();
}
}
/**
/**---------------------------------------------------------------------------------------
* Factory method to create a CpuNonbondedForceVec8.
*/
CpuNonbondedForceVec8 constructor
CpuNonbondedForce
*
createCpuNonbondedForceVec8
()
{
return
new
CpuNonbondedForceVec8
();
--------------------------------------------------------------------------------------- */
}
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
/**---------------------------------------------------------------------------------------
}
CpuNonbondedForceVec8 constructor
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
--------------------------------------------------------------------------------------- */
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
fvec4
blockAtomPosq
[
8
];
}
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
// Load the positions and parameters of the atoms in the block.
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
for
(
int
i
=
0
;
i
<
8
;
i
++
)
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
// Loop over neighbors for this block.
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Load the next neighbor.
// Loop over neighbors for this block.
int
atom
=
neighbors
[
i
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
// Compute the distances to the block atoms.
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
fvec8
dx
,
dy
,
dz
,
r2
;
// Load the next neighbor.
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
int
atom
=
neighbors
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
// Compute the distances to the block atoms.
include
=
-
1
;
else
fvec8
dx
,
dy
,
dz
,
r2
;
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
ivec8
include
;
if
(
!
any
(
include
))
char
excl
=
exclusions
[
i
];
continue
;
// No interactions to compute.
if
(
excl
==
0
)
include
=
-
1
;
// Compute the interactions.
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
fvec8
r
=
sqrt
(
r2
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
if
(
!
any
(
include
))
fvec8
energy
,
dEdR
;
continue
;
// No interactions to compute.
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
// Compute the interactions.
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
fvec8
r
=
sqrt
(
r2
);
sig2
*=
sig2
;
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
energy
,
dEdR
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
if
(
atomEpsilon
!=
0.0
f
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
if
(
useSwitch
)
{
fvec8
sig2
=
inverseR
*
sig
;
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
sig2
*=
sig2
;
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
*=
switchValue
;
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
}
if
(
useSwitch
)
{
}
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
else
{
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
energy
=
0.0
f
;
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
0.0
f
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
}
energy
*=
switchValue
;
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
}
if
(
cutoff
)
}
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
{
else
energy
=
0.0
f
;
dEdR
+=
chargeProd
*
inverseR
;
dEdR
=
0.0
f
;
dEdR
*=
inverseR
*
inverseR
;
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
// Accumulate energies.
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
fvec8
one
(
1.0
f
);
else
if
(
totalEnergy
)
{
dEdR
+=
chargeProd
*
inverseR
;
if
(
cutoff
)
dEdR
*=
inverseR
*
inverseR
;
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
// Accumulate energies.
energy
+=
chargeProd
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
fvec8
one
(
1.0
f
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
if
(
totalEnergy
)
{
}
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
// Accumulate forces.
else
energy
+=
chargeProd
*
inverseR
;
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
fvec8
fx
=
dx
*
dEdR
;
*
totalEnergy
+=
dot8
(
energy
,
one
);
fvec8
fy
=
dy
*
dEdR
;
}
fvec8
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
// Accumulate forces.
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
float
*
atomForce
=
forces
+
4
*
atom
;
fvec8
fx
=
dx
*
dEdR
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
fvec8
fy
=
dy
*
dEdR
;
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
fvec8
fz
=
dz
*
dEdR
;
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
blockAtomForceX
+=
fx
;
}
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
// Record the forces on the block atoms.
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
fvec4
f
[
8
];
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
}
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
// Record the forces on the block atoms.
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
fvec4
f
[
8
];
// Load the positions and parameters of the atoms in the block.
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
fvec4
blockAtomPosq
[
8
];
}
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
// Load the positions and parameters of the atoms in the block.
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
for
(
int
i
=
0
;
i
<
8
;
i
++
)
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
// Loop over neighbors for this block.
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Load the next neighbor.
// Loop over neighbors for this block.
int
atom
=
neighbors
[
i
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
// Compute the distances to the block atoms.
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
fvec8
dx
,
dy
,
dz
,
r2
;
// Load the next neighbor.
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
int
atom
=
neighbors
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
// Compute the distances to the block atoms.
include
=
-
1
;
else
fvec8
dx
,
dy
,
dz
,
r2
;
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
getDeltaR
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
ivec8
include
;
if
(
!
any
(
include
))
char
excl
=
exclusions
[
i
];
continue
;
// No interactions to compute.
if
(
excl
==
0
)
include
=
-
1
;
// Compute the interactions.
else
include
=
ivec8
(
excl
&
1
?
0
:
-
1
,
excl
&
2
?
0
:
-
1
,
excl
&
4
?
0
:
-
1
,
excl
&
8
?
0
:
-
1
,
excl
&
16
?
0
:
-
1
,
excl
&
32
?
0
:
-
1
,
excl
&
64
?
0
:
-
1
,
excl
&
128
?
0
:
-
1
);
fvec8
r
=
sqrt
(
r2
);
include
=
include
&
(
r2
<
cutoffDistance
*
cutoffDistance
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
if
(
!
any
(
include
))
fvec8
energy
,
dEdR
;
continue
;
// No interactions to compute.
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
// Compute the interactions.
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec8
sig2
=
inverseR
*
sig
;
fvec8
r
=
sqrt
(
r2
);
sig2
*=
sig2
;
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
energy
,
dEdR
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
if
(
atomEpsilon
!=
0.0
f
)
{
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
fvec8
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
if
(
useSwitch
)
{
fvec8
sig2
=
inverseR
*
sig
;
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
sig2
*=
sig2
;
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
*=
switchValue
;
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
}
if
(
useSwitch
)
{
}
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
else
{
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
energy
=
0.0
f
;
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
0.0
f
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
}
energy
*=
switchValue
;
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
}
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
}
dEdR
*=
inverseR
*
inverseR
;
else
{
energy
=
0.0
f
;
// Accumulate energies.
dEdR
=
0.0
f
;
}
fvec8
one
(
1.0
f
);
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
totalEnergy
)
{
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
dEdR
*=
inverseR
*
inverseR
;
energy
=
blend
(
0.0
f
,
energy
,
include
);
*
totalEnergy
+=
dot8
(
energy
,
one
);
// Accumulate energies.
}
fvec8
one
(
1.0
f
);
// Accumulate forces.
if
(
totalEnergy
)
{
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
energy
=
blend
(
0.0
f
,
energy
,
include
);
fvec8
fx
=
dx
*
dEdR
;
*
totalEnergy
+=
dot8
(
energy
,
one
);
fvec8
fy
=
dy
*
dEdR
;
}
fvec8
fz
=
dz
*
dEdR
;
blockAtomForceX
+=
fx
;
// Accumulate forces.
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
dEdR
=
blend
(
0.0
f
,
dEdR
,
include
);
float
*
atomForce
=
forces
+
4
*
atom
;
fvec8
fx
=
dx
*
dEdR
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
fvec8
fy
=
dy
*
dEdR
;
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
fvec8
fz
=
dz
*
dEdR
;
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
blockAtomForceX
+=
fx
;
}
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
// Record the forces on the block atoms.
float
*
atomForce
=
forces
+
4
*
atom
;
atomForce
[
0
]
-=
dot8
(
fx
,
one
);
fvec4
f
[
8
];
atomForce
[
1
]
-=
dot8
(
fy
,
one
);
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
atomForce
[
2
]
-=
dot8
(
fz
,
one
);
for
(
int
j
=
0
;
j
<
8
;
j
++
)
}
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
// Record the forces on the block atoms.
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
float
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
fvec4
f
[
8
];
dx
=
x
-
posI
[
0
];
transpose
(
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
,
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
]);
dy
=
y
-
posI
[
1
];
for
(
int
j
=
0
;
j
<
8
;
j
++
)
dz
=
z
-
posI
[
2
];
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
if
(
periodic
)
{
}
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
float
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
dx
=
x
-
posI
[
0
];
}
dy
=
y
-
posI
[
1
];
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
dz
=
z
-
posI
[
2
];
}
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
fvec8
CpuNonbondedForceVec8
::
erfcApprox
(
const
fvec8
&
x
)
{
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
}
// error of 3e-7.
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec8
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
fvec8
CpuNonbondedForceVec8
::
erfcApprox
(
const
fvec8
&
x
)
{
t
*=
t
;
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
t
*=
t
;
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
return
1.0
f
/
(
t
*
t
);
// error of 3e-7.
}
fvec8
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
fvec8
CpuNonbondedForceVec8
::
ewaldScaleFunction
(
const
fvec8
&
x
)
{
t
*=
t
;
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
t
*=
t
;
t
*=
t
;
fvec8
x1
=
x
*
ewaldDXInv
;
return
1.0
f
/
(
t
*
t
);
ivec8
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
}
fvec8
coeff2
=
x1
-
index
;
fvec8
coeff1
=
1.0
f
-
coeff2
;
fvec8
CpuNonbondedForceVec8
::
ewaldScaleFunction
(
const
fvec8
&
x
)
{
ivec4
indexLower
=
index
.
lowerVec
();
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t1
(
&
ewaldScaleTable
[
indexLower
[
0
]]);
fvec8
x1
=
x
*
ewaldDXInv
;
fvec4
t2
(
&
ewaldScaleTable
[
indexLower
[
1
]]);
ivec8
index
=
min
(
floor
(
x1
),
NUM_TABLE_POINTS
);
fvec4
t3
(
&
ewaldScaleTable
[
indexLower
[
2
]]);
fvec8
coeff2
=
x1
-
index
;
fvec4
t4
(
&
ewaldScaleTable
[
indexLower
[
3
]]);
fvec8
coeff1
=
1.0
f
-
coeff2
;
fvec4
t5
(
&
ewaldScaleTable
[
indexUpper
[
0
]]);
ivec4
indexLower
=
index
.
lowerVec
();
fvec4
t6
(
&
ewaldScaleTable
[
indexUpper
[
1
]]);
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t7
(
&
ewaldScaleTable
[
indexUpper
[
2
]]);
fvec4
t1
(
&
ewaldScaleTable
[
indexLower
[
0
]]);
fvec4
t8
(
&
ewaldScaleTable
[
indexUpper
[
3
]]);
fvec4
t2
(
&
ewaldScaleTable
[
indexLower
[
1
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
fvec4
t3
(
&
ewaldScaleTable
[
indexLower
[
2
]]);
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
fvec4
t4
(
&
ewaldScaleTable
[
indexLower
[
3
]]);
return
coeff1
*
s1
+
coeff2
*
s2
;
fvec4
t5
(
&
ewaldScaleTable
[
indexUpper
[
0
]]);
}
fvec4
t6
(
&
ewaldScaleTable
[
indexUpper
[
1
]]);
#endif
fvec4
t7
(
&
ewaldScaleTable
[
indexUpper
[
2
]]);
fvec4
t8
(
&
ewaldScaleTable
[
indexUpper
[
3
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment