Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
0b5d58d7
Commit
0b5d58d7
authored
May 27, 2020
by
Charlles Abreu
Browse files
Conflict resolution in TestSplineFilter.cpp
parents
9026dbe7
b0d13582
Changes
105
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
811 additions
and
244 deletions
+811
-244
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+2
-2
openmmapi/include/openmm/internal/vectorize8.h
openmmapi/include/openmm/internal/vectorize8.h
+145
-4
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+105
-1
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+93
-1
openmmapi/include/openmm/internal/vectorize_ppc.h
openmmapi/include/openmm/internal/vectorize_ppc.h
+95
-3
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+104
-4
openmmapi/src/CompoundIntegrator.cpp
openmmapi/src/CompoundIntegrator.cpp
+13
-1
openmmapi/src/ContextImpl.cpp
openmmapi/src/ContextImpl.cpp
+2
-0
openmmapi/src/CustomIntegrator.cpp
openmmapi/src/CustomIntegrator.cpp
+25
-0
openmmapi/src/NoseHooverChain.cpp
openmmapi/src/NoseHooverChain.cpp
+4
-1
openmmapi/src/NoseHooverIntegrator.cpp
openmmapi/src/NoseHooverIntegrator.cpp
+17
-28
openmmapi/src/SplineFitter.cpp
openmmapi/src/SplineFitter.cpp
+6
-8
platforms/common/include/openmm/common/CommonKernels.h
platforms/common/include/openmm/common/CommonKernels.h
+33
-45
platforms/common/include/openmm/common/IntegrationUtilities.h
...forms/common/include/openmm/common/IntegrationUtilities.h
+0
-7
platforms/common/src/CommonKernels.cpp
platforms/common/src/CommonKernels.cpp
+151
-80
platforms/common/src/IntegrationUtilities.cpp
platforms/common/src/IntegrationUtilities.cpp
+0
-42
platforms/common/src/kernels/cmapTorsionForce.cc
platforms/common/src/kernels/cmapTorsionForce.cc
+2
-2
platforms/common/src/kernels/customIntegratorPerDof.cc
platforms/common/src/kernels/customIntegratorPerDof.cc
+4
-4
platforms/common/src/kernels/langevinMiddle.cc
platforms/common/src/kernels/langevinMiddle.cc
+1
-1
platforms/common/src/kernels/noseHooverChain.cc
platforms/common/src/kernels/noseHooverChain.cc
+9
-10
No files found.
openmmapi/include/openmm/internal/vectorize.h
View file @
0b5d58d7
...
...
@@ -32,7 +32,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#if defined(__ARM__)
#if defined(__ARM__)
|| defined(__ARM64__)
#include "vectorize_neon.h"
#elif defined(__PPC__)
#include "vectorize_ppc.h"
...
...
openmmapi/include/openmm/internal/vectorize8.h
View file @
0b5d58d7
...
...
@@ -46,11 +46,23 @@ class fvec8 {
public:
__m256
val
;
fvec8
()
{}
fvec8
()
=
default
;
fvec8
(
float
v
)
:
val
(
_mm256_set1_ps
(
v
))
{}
fvec8
(
float
v1
,
float
v2
,
float
v3
,
float
v4
,
float
v5
,
float
v6
,
float
v7
,
float
v8
)
:
val
(
_mm256_set_ps
(
v8
,
v7
,
v6
,
v5
,
v4
,
v3
,
v2
,
v1
))
{}
fvec8
(
__m256
v
)
:
val
(
v
)
{}
fvec8
(
const
float
*
v
)
:
val
(
_mm256_loadu_ps
(
v
))
{}
/** Create a vector by gathering individual indexes of data from a table. Element i of the vector will
* be loaded from table[idx[i]].
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvec8
(
const
float
*
table
,
const
int
idx
[
8
])
{
// :TODO: Using int32_t explicitly as the index type could allow the real gather instruction to be used.
// Use gather and static assert? Conditional code?
val
=
_mm256_setr_ps
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]],
table
[
idx
[
4
]],
table
[
idx
[
5
]],
table
[
idx
[
6
]],
table
[
idx
[
7
]]);
}
operator
__m256
()
const
{
return
val
;
}
...
...
@@ -115,6 +127,12 @@ public:
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_LE_OQ
);
}
operator
ivec8
()
const
;
/**
* Convert an integer bitmask into a full vector of elements which can be used
* by the blend function.
*/
static
fvec8
expandBitsToMask
(
int
bitmask
);
};
/**
...
...
@@ -160,6 +178,19 @@ inline ivec8::operator fvec8() const {
return
_mm256_cvtepi32_ps
(
val
);
}
inline
fvec8
fvec8
::
expandBitsToMask
(
int
bitmask
)
{
// Put a copy of bit 0 in the first element, bit 1 in the second, and so on.
const
auto
expandedBits
=
_mm256_and_ps
(
_mm256_castsi256_ps
(
_mm256_set1_epi8
(
bitmask
)),
_mm256_castsi256_ps
(
_mm256_setr_epi32
(
1
,
2
,
4
,
8
,
16
,
32
,
64
,
128
)));
// The individual bits are essentially extremely small floating-point values. By comparing against zero
// (even a floating-point zero), the individual bits are turned into a complete element mask.
const
auto
elementMask
=
_mm256_cmp_ps
(
expandedBits
,
__m256
(),
_CMP_NEQ_OQ
);
return
elementMask
;
}
// Functions that operate on fvec8s.
static
inline
fvec8
floor
(
const
fvec8
&
v
)
{
...
...
@@ -208,6 +239,11 @@ static inline float dot8(const fvec8& v1, const fvec8& v2) {
return
_mm_cvtss_f32
(
result
.
lowerVec
())
+
_mm_cvtss_f32
(
result
.
upperVec
());
}
static
inline
float
reduceAdd
(
const
fvec8
v
)
{
// :TODO: There are more efficient ways to do this.
return
dot8
(
v
,
fvec8
(
1.0
f
));
}
static
inline
void
transpose
(
const
fvec4
&
in1
,
const
fvec4
&
in2
,
const
fvec4
&
in3
,
const
fvec4
&
in4
,
const
fvec4
&
in5
,
const
fvec4
&
in6
,
const
fvec4
&
in7
,
const
fvec4
&
in8
,
fvec8
&
out1
,
fvec8
&
out2
,
fvec8
&
out3
,
fvec8
&
out4
)
{
fvec4
i1
=
in1
,
i2
=
in2
,
i3
=
in3
,
i4
=
in4
;
fvec4
i5
=
in5
,
i6
=
in6
,
i7
=
in7
,
i8
=
in8
;
...
...
@@ -231,6 +267,14 @@ static inline void transpose(const fvec4& in1, const fvec4& in2, const fvec4& in
out4
=
_mm256_insertf128_ps
(
out4
,
i8
,
1
);
}
/** Given a vec4[8] input array, generate 4 vec8 outputs. The first output contains all the first elements
* the second output the second elements, and so on. Note that the prototype is essentially differing only
* in output type so it can be overloaded in other SIMD fvec types.
*/
static
inline
void
transpose
(
const
fvec4
in
[
8
],
fvec8
&
out1
,
fvec8
&
out2
,
fvec8
&
out3
,
fvec8
&
out4
)
{
transpose
(
in
[
0
],
in
[
1
],
in
[
2
],
in
[
3
],
in
[
4
],
in
[
5
],
in
[
6
],
in
[
7
],
out1
,
out2
,
out3
,
out4
);
}
static
inline
void
transpose
(
const
fvec8
&
in1
,
const
fvec8
&
in2
,
const
fvec8
&
in3
,
const
fvec8
&
in4
,
fvec4
&
out1
,
fvec4
&
out2
,
fvec4
&
out3
,
fvec4
&
out4
,
fvec4
&
out5
,
fvec4
&
out6
,
fvec4
&
out7
,
fvec4
&
out8
)
{
out1
=
in1
.
lowerVec
();
out2
=
in2
.
lowerVec
();
...
...
@@ -244,6 +288,13 @@ static inline void transpose(const fvec8& in1, const fvec8& in2, const fvec8& in
_MM_TRANSPOSE4_PS
(
out5
,
out6
,
out7
,
out8
);
}
/**
* Given 4 input vectors of 8 elements, transpose them to form 8 output vectors of 4 elements.
*/
static
inline
void
transpose
(
const
fvec8
&
in1
,
const
fvec8
&
in2
,
const
fvec8
&
in3
,
const
fvec8
&
in4
,
fvec4
out
[
8
])
{
transpose
(
in1
,
in2
,
in3
,
in4
,
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
],
out
[
4
],
out
[
5
],
out
[
6
],
out
[
7
]);
}
// Functions that operate on ivec8s.
static
inline
bool
any
(
const
ivec8
&
v
)
{
...
...
@@ -268,10 +319,100 @@ static inline fvec8 operator/(float v1, const fvec8& v2) {
return
fvec8
(
v1
)
/
v2
;
}
// Operations for blending fvec8s based on an ivec8.
// Operation for blending fvec8 from a full bitmask.
static
inline
fvec8
blend
(
const
fvec8
&
v1
,
const
fvec8
&
v2
,
const
fvec8
&
mask
)
{
return
fvec8
(
_mm256_blendv_ps
(
v1
.
val
,
v2
.
val
,
mask
.
val
));
}
static
inline
fvec8
blendZero
(
const
fvec8
v
,
const
fvec8
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
}
static
inline
fvec8
blend
(
const
fvec8
&
v1
,
const
fvec8
&
v2
,
const
ivec8
&
mask
)
{
return
fvec8
(
_mm256_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm256_castsi256_ps
(
mask
.
val
)));
/**
* Given a table of floating-point values and a set of indexes, perform a gather read into a pair
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec8
index
,
fvec8
&
out0
,
fvec8
&
out1
)
{
const
auto
lower
=
index
.
lowerVec
();
const
auto
upper
=
index
.
upperVec
();
// Gather all the separate memory data together. Each vector will have two values
// which get used, and two which are ultimately discarded.
fvec4
t0
(
table
+
lower
[
0
]);
fvec4
t1
(
table
+
lower
[
1
]);
fvec4
t2
(
table
+
lower
[
2
]);
fvec4
t3
(
table
+
lower
[
3
]);
fvec4
t4
(
table
+
upper
[
0
]);
fvec4
t5
(
table
+
upper
[
1
]);
fvec4
t6
(
table
+
upper
[
2
]);
fvec4
t7
(
table
+
upper
[
3
]);
// Tranposing the 8 vectors above will put all the first elements into one output
// vector, all the second elements into the next vector and so on.
fvec8
discard0
,
discard1
;
transpose
(
t0
,
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
out0
,
out1
,
discard0
,
discard1
);
}
/**
* Given 3 vectors of floating-point data, reduce them to a single 3-element position
* value by adding all the elements in each vector. Given inputs of:
* X0 X1 X2 X3 X4 X5 X6 X7
* Y0 Y1 Y2 Y3 Y4 Y5 Y6 Y7
* Z0 Z1 Z2 Z3 Z4 Z5 Z6 Z7
* Each vector of values needs to be summed into a single value, and then stored into
* the output vector:
* output[0] = (X0 + X1 + X2 + ...)
* output[1] = (Y0 + Y1 + Y2 + ...)
* output[2] = (Z0 + Z1 + Z2 + ...)
* output[3] = undefined
*/
static
inline
fvec4
reduceToVec3
(
const
fvec8
x
,
const
fvec8
y
,
const
fvec8
z
)
{
// The general strategy for a vector reduce-add operation is to take values from
// different parts of the vector and overlap them a different part of the vector and then
// add together. Repeat this several times until all values have been summed. Initially 8
// values can be reduced to 4, 4 to 2, and 2 to 1. The following code essentially does this
// but exploits two things:
// - having multiple inputs means that some vectors can be combined together to amortise the
// cost of shuffling.
// - the output destinations are part of anther vector, so accumulate into the correct
// offsets to start with, instead of reducing to position 0 and re-inserting to the correct
// output location.
//
// As far as possible, accumulate x, y and z into their output positions in both the top and
// bottom 128-bits to exploit in-lane permutes as much as possible early on.
// Shuffle X and Z together to form one reduced vector.
// X2 X3 Z0 Z1 X6 X7 Z4 Z5
const
auto
xzshuf
=
_mm256_shuffle_ps
(
x
,
z
,
0b01001110
);
// Blend X and Z together to form another reduced vector, overlapping the previous.
// X0 X1 Z2 Z3 X4 X5 Z6 Z7
const
auto
xzblend
=
_mm256_blend_ps
(
x
,
z
,
0b11001100
);
// Add them together to form:
// (X0 + X2) (X1 + X3) (Z0 + Z2) (Z1 + Z3) etc.
const
auto
xz0
=
_mm256_add_ps
(
xzshuf
,
xzblend
);
// Now there's only one vector containing all values. Shuffle again to form another overlap,
// and then add.
const
auto
xz1
=
_mm256_permute_ps
(
xz0
,
0b00110001
);
const
auto
xz2
=
_mm256_add_ps
(
xz0
,
xz1
);
// Work on Z on its own as there's nothing else to work with. Start by permuting it to
// form some overlaps, and then add:
// (Y0 + Y2) (Y1 + Y3) - - (Y4 + Y6) (Y5 + Y7) - -
const
auto
yshuf
=
_mm256_permute_ps
(
y
,
0b11101110
);
const
auto
y0
=
_mm256_add_ps
(
yshuf
,
y
);
// Shift the bottom float of each pair to the right, into the correct Y location.
const
auto
y1
=
_mm256_permute_ps
(
y0
,
0b00000000
);
const
auto
y2
=
_mm256_add_ps
(
y0
,
y1
);
// Blend the results together to give a complete set of XYZ in the correct respective positions
// of both top and bottom 128-bit lanes.
const
auto
laneResult
=
fvec8
(
_mm256_blend_ps
(
xz2
,
y2
,
0b00100010
));
return
laneResult
.
lowerVec
()
+
laneResult
.
upperVec
();
}
#endif
/*OPENMM_VECTORIZE8_H_*/
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
0b5d58d7
...
...
@@ -32,7 +32,12 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifdef __ANDROID__
#include <cpu-features.h>
#else
#include <sys/auxv.h>
#include <asm/hwcap.h>
#endif
#include <arm_neon.h>
#include <cmath>
...
...
@@ -48,8 +53,16 @@ float32x4_t log_ps(float32x4_t);
* Determine whether ivec4 and fvec4 are supported on this processor.
*/
static
bool
isVec4Supported
()
{
#ifdef __ANDROID__
uint64_t
features
=
android_getCpuFeatures
();
return
(
features
&
ANDROID_CPU_ARM_FEATURE_NEON
)
!=
0
;
#elif defined(__ARM__)
unsigned
long
features
=
getauxval
(
AT_HWCAP
);
return
(
features
&
HWCAP_NEON
)
!=
0
;
#else
unsigned
long
features
=
getauxval
(
AT_HWCAP
);
return
(
features
&
HWCAP_ASIMD
)
!=
0
;
#endif
}
class
ivec4
;
...
...
@@ -61,7 +74,7 @@ class fvec4 {
public:
float32x4_t
val
;
fvec4
()
{}
fvec4
()
=
default
;
fvec4
(
float
v
)
:
val
(
vdupq_n_f32
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
{
float
v
[]
=
{
v1
,
v2
,
v3
,
v4
};
...
...
@@ -72,6 +85,16 @@ public:
operator
float32x4_t
()
const
{
return
val
;
}
/**
* Create a vector by gathering individual indexes of data from a table. Element i of the vector will
* be loaded from table[idx[i]].
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
float
operator
[](
int
i
)
const
{
switch
(
i
)
{
case
0
:
...
...
@@ -88,6 +111,16 @@ public:
void
store
(
float
*
v
)
const
{
vst1q_f32
(
v
,
val
);
}
/**
* Store only the lower three elements of the vector.
*/
void
storeVec3
(
float
*
v
)
const
{
v
[
0
]
=
vgetq_lane_f32
(
val
,
0
);
v
[
1
]
=
vgetq_lane_f32
(
val
,
1
);
v
[
2
]
=
vgetq_lane_f32
(
val
,
2
);
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
vaddq_f32
(
val
,
other
);
}
...
...
@@ -146,6 +179,13 @@ public:
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcleq_f32
(
val
,
other
)));
}
operator
ivec4
()
const
;
/**
* Convert an integer bitmask into a full vector of elements which can be used
* by the blend function.
*/
static
ivec4
expandBitsToMask
(
int
bitmask
);
};
/**
...
...
@@ -241,6 +281,12 @@ inline ivec4::operator fvec4() const {
return
fvec4
(
vcvtq_f32_s32
(
val
));
}
inline
ivec4
fvec4
::
expandBitsToMask
(
int
bitmask
)
{
return
ivec4
(
bitmask
&
1
?
-
1
:
0
,
bitmask
&
2
?
-
1
:
0
,
bitmask
&
4
?
-
1
:
0
,
bitmask
&
8
?
-
1
:
0
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
...
...
@@ -284,6 +330,10 @@ static inline float dot4(const fvec4& v1, const fvec4& v2) {
return
vgetq_lane_f32
(
result
,
0
)
+
vgetq_lane_f32
(
result
,
1
)
+
vgetq_lane_f32
(
result
,
2
)
+
vgetq_lane_f32
(
result
,
3
);
}
static
inline
float
reduceAdd
(
const
fvec4
v
)
{
return
dot4
(
v
,
fvec4
(
1.0
f
));
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
[
1
]
*
v2
[
2
]
-
v1
[
2
]
*
v2
[
1
],
v1
[
2
]
*
v2
[
0
]
-
v1
[
0
]
*
v2
[
2
],
...
...
@@ -301,6 +351,22 @@ static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
v4
=
t4
.
val
[
1
];
}
/**
* Out-of-place transpose from an array into named variables.
*/
static
inline
void
transpose
(
const
fvec4
in
[
4
],
fvec4
&
v0
,
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
)
{
v0
=
in
[
0
];
v1
=
in
[
1
];
v2
=
in
[
2
];
v3
=
in
[
3
];
transpose
(
v0
,
v1
,
v2
,
v3
);
}
/**
* Out-of-place transpose from named variables into an array.
*/
static
inline
void
transpose
(
const
fvec4
v0
,
const
fvec4
v1
,
const
fvec4
v2
,
const
fvec4
v3
,
fvec4
out
[
4
])
{
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
...
...
@@ -343,6 +409,10 @@ static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const ivec4& mask) {
return
vbslq_f32
(
vreinterpretq_u32_s32
(
mask
),
v2
,
v1
);
}
static
inline
fvec4
blendZero
(
const
fvec4
v
,
const
ivec4
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
}
// These are at the end since they involve other functions defined above.
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
...
...
@@ -361,4 +431,38 @@ static inline fvec4 ceil(const fvec4& v) {
return
rounded
+
blend
(
0.0
f
,
1.0
f
,
rounded
<
v
);
}
/* Given a table of floating-point values and a set of indexes, perform a gather read into a pair
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t2
(
table
+
index
[
2
]);
fvec4
t3
(
table
+
index
[
3
]);
transpose
(
t0
,
t1
,
t2
,
t3
);
out0
=
t0
;
out1
=
t1
;
}
/**
* Given 3 vectors of floating-point data, reduce them to a single 3-element position
* value by adding all the elements in each vector. Given inputs of:
* X0 X1 X2 X3
* Y0 Y1 Y2 Y3
* Z0 Z1 Z2 Z3
* Each vector of values needs to be summed into a single value, and then stored into
* the output vector:
* output[0] = (X0 + X1 + X2 + X3)
* output[1] = (Y0 + Y1 + Y2 + Y3)
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[3] = undefined
*/
static
inline
fvec4
reduceToVec3
(
const
fvec4
x
,
const
fvec4
y
,
const
fvec4
z
)
{
const
auto
nx
=
reduceAdd
(
x
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
nz
=
reduceAdd
(
z
);
return
fvec4
(
nx
,
ny
,
nz
,
0.0
);
}
#endif
/*OPENMM_VECTORIZE_NEON_H_*/
openmmapi/include/openmm/internal/vectorize_pnacl.h
View file @
0b5d58d7
...
...
@@ -56,7 +56,7 @@ class fvec4 {
public:
__m128
val
;
fvec4
()
{}
fvec4
()
=
default
;
fvec4
(
float
v
)
{
val
=
{
v
,
v
,
v
,
v
};
}
...
...
@@ -67,6 +67,16 @@ public:
fvec4
(
const
float
*
v
)
{
val
=
*
((
__m128
*
)
v
);
}
/**
* Create a vector by gathering individual indexes of data from a table. Element i of the vector will
* be loaded from table[idx[i]].
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
operator
__m128
()
const
{
return
val
;
}
...
...
@@ -76,6 +86,15 @@ public:
void
store
(
float
*
v
)
const
{
*
((
__m128
*
)
v
)
=
val
;
}
/**
* Store only the lower three elements of the vector.
*/
void
storeVec3
(
float
*
v
)
const
{
v
[
0
]
=
val
[
0
];
v
[
1
]
=
val
[
1
];
v
[
2
]
=
val
[
2
];
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
val
+
other
;
}
...
...
@@ -116,6 +135,13 @@ public:
ivec4
operator
>=
(
const
fvec4
&
other
)
const
;
ivec4
operator
<=
(
const
fvec4
&
other
)
const
;
operator
ivec4
()
const
;
/**
* Convert an integer bitmask into a full vector of elements which can be used
* by the blend function.
*/
static
ivec4
expandBitsToMask
(
int
bitmask
);
};
/**
...
...
@@ -227,6 +253,13 @@ inline ivec4::operator fvec4() const {
return
__builtin_convertvector
(
val
,
__m128
);
}
inline
ivec4
fvec4
::
expandBitsToMask
(
int
bitmask
)
{
return
ivec4
(
bitmask
&
1
?
-
1
:
0
,
bitmask
&
2
?
-
1
:
0
,
bitmask
&
4
?
-
1
:
0
,
bitmask
&
8
?
-
1
:
0
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
...
...
@@ -252,6 +285,10 @@ static inline float dot4(const fvec4& v1, const fvec4& v2) {
return
temp
[
0
]
+
temp
[
1
];
}
static
inline
float
reduceAdd
(
const
fvec4
v
)
{
return
dot4
(
v
,
fvec4
(
1.0
f
));
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
__m128
temp
=
v2
.
val
*
__builtin_shufflevector
(
v1
.
val
,
v1
.
val
,
2
,
0
,
1
,
3
)
-
v1
.
val
*
__builtin_shufflevector
(
v2
.
val
,
v2
.
val
,
2
,
0
,
1
,
3
);
...
...
@@ -269,6 +306,22 @@ static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
v4
=
__builtin_shufflevector
(
a2
,
a4
,
2
,
3
,
6
,
7
);
}
/**
* Out-of-place transpose from an array into named variables.
*/
static
inline
void
transpose
(
const
fvec4
in
[
4
],
fvec4
&
v0
,
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
)
{
v0
=
in
[
0
];
v1
=
in
[
1
];
v2
=
in
[
2
];
v3
=
in
[
3
];
transpose
(
v0
,
v1
,
v2
,
v3
);
}
/**
* Out-of-place transpose from named variables into an array.
*/
static
inline
void
transpose
(
const
fvec4
v0
,
const
fvec4
v1
,
const
fvec4
v2
,
const
fvec4
v3
,
fvec4
out
[
4
])
{
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
...
...
@@ -312,6 +365,10 @@ static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const __m128i& mask)
return
(
__m128
)
((
mask
&
(
__m128i
)
v2
)
+
((
ivec4
(
0xFFFFFFFF
)
-
ivec4
(
mask
))
&
(
__m128i
)
v1
));
}
static
inline
fvec4
blendZero
(
const
fvec4
v
,
const
ivec4
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
}
// These are at the end since they involve other functions defined above.
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
...
...
@@ -358,5 +415,40 @@ static inline fvec4 sqrt(const fvec4& v) {
return
rsqrt
(
v
)
*
v
;
}
/**
* Given a table of floating-point values and a set of indexes, perform a gather read into a pair
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t2
(
table
+
index
[
2
]);
fvec4
t3
(
table
+
index
[
3
]);
transpose
(
t0
,
t1
,
t2
,
t3
);
out0
=
t0
;
out1
=
t1
;
}
/**
* Given 3 vectors of floating-point data, reduce them to a single 3-element position
* value by adding all the elements in each vector. Given inputs of:
* X0 X1 X2 X3
* Y0 Y1 Y2 Y3
* Z0 Z1 Z2 Z3
* Each vector of values needs to be summed into a single value, and then stored into
* the output vector:
* output[0] = (X0 + X1 + X2 + X3)
* output[1] = (Y0 + Y1 + Y2 + Y3)
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[3] = undefined
*/
static
inline
fvec4
reduceToVec3
(
const
fvec4
x
,
const
fvec4
y
,
const
fvec4
z
)
{
const
auto
nx
=
reduceAdd
(
x
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
nz
=
reduceAdd
(
z
);
return
fvec4
(
nx
,
ny
,
nz
,
0.0
);
}
#endif
/*OPENMM_VECTORIZE_PNACL_H_*/
openmmapi/include/openmm/internal/vectorize_ppc.h
View file @
0b5d58d7
...
...
@@ -57,7 +57,7 @@ class fvec4 {
public:
__m128
val
;
fvec4
()
{}
fvec4
()
=
default
;
fvec4
(
float
v
)
{
val
=
(
__m128
)
{
v
,
v
,
v
,
v
};
}
...
...
@@ -68,6 +68,16 @@ public:
fvec4
(
const
float
*
v
)
{
val
=
*
((
__m128
*
)
v
);
}
/**
* Create a vector by gathering individual indexes of data from a table. Element i of the vector will
* be loaded from table[idx[i]].
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
operator
__m128
()
const
{
return
val
;
}
...
...
@@ -77,6 +87,16 @@ public:
void
store
(
float
*
v
)
const
{
*
((
__m128
*
)
v
)
=
val
;
}
/**
* Store only the lower three elements of the vector.
*/
void
storeVec3
(
float
*
v
)
const
{
v
[
0
]
=
val
[
0
];
v
[
1
]
=
val
[
1
];
v
[
2
]
=
val
[
2
];
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
vec_add
(
val
,
other
.
val
);
}
...
...
@@ -117,6 +137,13 @@ public:
ivec4
operator
>=
(
const
fvec4
&
other
)
const
;
ivec4
operator
<=
(
const
fvec4
&
other
)
const
;
operator
ivec4
()
const
;
/***
* Convert an integer bitmask into a full vector of elements which can be used
* by the blend function.
*/
static
ivec4
expandBitsToMask
(
int
bitmask
);
};
/**
...
...
@@ -228,6 +255,13 @@ inline ivec4::operator fvec4() const {
return
(
__m128
)
{(
float
)
val
[
0
],
(
float
)
val
[
1
],
(
float
)
val
[
2
],
(
float
)
val
[
3
]};
}
inline
ivec4
fvec4
::
expandBitsToMask
(
int
bitmask
)
{
return
ivec4
(
bitmask
&
1
?
-
1
:
0
,
bitmask
&
2
?
-
1
:
0
,
bitmask
&
4
?
-
1
:
0
,
bitmask
&
8
?
-
1
:
0
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
...
...
@@ -253,6 +287,10 @@ static inline float dot4(const fvec4& v1, const fvec4& v2) {
return
temp
[
0
]
+
temp
[
1
];
}
static
inline
float
reduceAdd
(
const
fvec4
v
)
{
return
dot4
(
v
,
fvec4
(
1.0
f
));
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
vector
unsigned
char
perm
=
(
vector
unsigned
char
)
{
8
,
9
,
10
,
11
,
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
12
,
13
,
14
,
15
};
__m128
temp
=
v2
.
val
*
vec_perm
(
v1
.
val
,
v1
.
val
,
perm
)
-
...
...
@@ -275,6 +313,22 @@ static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
v4
=
vec_perm
(
a2
,
a4
,
perm4
);
}
/**
* Out-of-place transpose from an array into named variables.
*/
static
inline
void
transpose
(
const
fvec4
in
[
4
],
fvec4
&
v0
,
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
)
{
v0
=
in
[
0
];
v1
=
in
[
1
];
v2
=
in
[
2
];
v3
=
in
[
3
];
transpose
(
v0
,
v1
,
v2
,
v3
);
}
/**
* Out-of-place transpose from named variables into an array.
*/
static
inline
void
transpose
(
const
fvec4
v0
,
const
fvec4
v1
,
const
fvec4
v2
,
const
fvec4
v3
,
fvec4
out
[
4
])
{
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
...
...
@@ -289,8 +343,8 @@ static inline ivec4 abs(const ivec4& v) {
return
vec_abs
(
v
.
val
);
}
static
inline
bool
any
(
const
__m128i
&
v
)
{
return
!
vec_all_eq
(
v
,
ivec4
(
0
).
val
);
static
inline
bool
any
(
const
ivec4
v
)
{
return
!
vec_all_eq
(
v
.
val
,
ivec4
(
0
).
val
);
}
// Mathematical operators involving a scalar and a vector.
...
...
@@ -317,6 +371,10 @@ static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const __m128i& mask)
return
(
__m128
)
((
mask
&
(
__m128i
)
v2
.
val
)
+
((
ivec4
(
0xFFFFFFFF
)
-
ivec4
(
mask
))
&
(
__m128i
)
v1
.
val
).
val
);
}
static
inline
fvec4
blendZero
(
const
fvec4
v
,
const
ivec4
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
}
// These are at the end since they involve other functions defined above.
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
...
...
@@ -355,5 +413,39 @@ static inline fvec4 sqrt(const fvec4& v) {
return
vec_sqrt
(
v
.
val
);
}
/* Given a table of floating-point values and a set of indexes, perform a gather read into a pair
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t2
(
table
+
index
[
2
]);
fvec4
t3
(
table
+
index
[
3
]);
transpose
(
t0
,
t1
,
t2
,
t3
);
out0
=
t0
;
out1
=
t1
;
}
/**
* Given 3 vectors of floating-point data, reduce them to a single 3-element position
* value by adding all the elements in each vector. Given inputs of:
* X0 X1 X2 X3
* Y0 Y1 Y2 Y3
* Z0 Z1 Z2 Z3
* Each vector of values needs to be summed into a single value, and then stored into
* the output vector:
* output[0] = (X0 + X1 + X2 + X3)
* output[1] = (Y0 + Y1 + Y2 + Y3)
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[3] = undefined
*/
static
inline
fvec4
reduceToVec3
(
const
fvec4
x
,
const
fvec4
y
,
const
fvec4
z
)
{
const
auto
nx
=
reduceAdd
(
x
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
nz
=
reduceAdd
(
z
);
return
fvec4
(
nx
,
ny
,
nz
,
0.0
);
}
#endif
/*OPENMM_VECTORIZE_PPC_H_*/
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
0b5d58d7
...
...
@@ -32,7 +32,12 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifdef __AVX__
#include <immintrin.h>
#else
#include <smmintrin.h>
#endif
#include "hardware.h"
// This file defines classes and functions to simplify vectorizing code with SSE.
...
...
@@ -63,11 +68,21 @@ class fvec4 {
public:
__m128
val
;
fvec4
()
{}
fvec4
()
=
default
;
fvec4
(
float
v
)
:
val
(
_mm_set1_ps
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
:
val
(
_mm_set_ps
(
v4
,
v3
,
v2
,
v1
))
{}
fvec4
(
__m128
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
:
val
(
_mm_loadu_ps
(
v
))
{}
/**
* Create a vector by gathering individual indexes of data from a table. Element i of the vector will
* be loaded from table[idx[i]].
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
operator
__m128
()
const
{
return
val
;
}
...
...
@@ -79,6 +94,20 @@ public:
void
store
(
float
*
v
)
const
{
_mm_storeu_ps
(
v
,
val
);
}
/**
* Store only the lower three elements of the vector.
*/
void
storeVec3
(
float
*
v
)
const
{
// This code could be called from objects compiled for better SIMD domains (e.g., AVX) so conditionally
// compile in the most efficient variant of the instruction.
#ifdef __AVX__
_mm_maskstore_ps
(
v
,
_mm_setr_epi32
(
-
1
,
-
1
,
-
1
,
0
),
val
);
#else
_mm_maskmoveu_si128
(
_mm_castps_si128
(
val
),
_mm_setr_epi32
(
-
1
,
-
1
,
-
1
,
0
),
(
char
*
)
v
);
#endif
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
_mm_add_ps
(
val
,
other
);
}
...
...
@@ -131,6 +160,12 @@ public:
return
_mm_cmple_ps
(
val
,
other
);
}
operator
ivec4
()
const
;
/**
* Convert an integer bitmask into a full vector of elements which can be used
* by the blend function.
*/
static
fvec4
expandBitsToMask
(
int
bitmask
);
};
/**
...
...
@@ -214,6 +249,13 @@ inline ivec4::operator fvec4() const {
return
_mm_cvtepi32_ps
(
val
);
}
inline
fvec4
fvec4
::
expandBitsToMask
(
int
bitmask
)
{
// Not optimal for SSE (see AVX implementation for better version)
// but useful as an example for other SIMD architectures.
const
auto
values
=
fvec4
(
bitmask
&
1
,
bitmask
&
2
,
bitmask
&
4
,
bitmask
&
8
);
return
values
!=
fvec4
(
0.0
f
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
...
...
@@ -273,6 +315,10 @@ static inline float dot4(const fvec4& v1, const fvec4& v2) {
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
static
inline
float
reduceAdd
(
const
fvec4
v
)
{
return
dot4
(
v
,
fvec4
(
1.0
f
));
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
fvec4
temp
=
fvec4
(
_mm_mul_ps
(
v1
,
_mm_shuffle_ps
(
v2
,
v2
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
))))
-
fvec4
(
_mm_mul_ps
(
v2
,
_mm_shuffle_ps
(
v1
,
v1
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
))));
...
...
@@ -283,6 +329,22 @@ static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
_MM_TRANSPOSE4_PS
(
v1
,
v2
,
v3
,
v4
);
}
/**
* Out-of-place transpose from an array into named variables.
*/
static
inline
void
transpose
(
const
fvec4
in
[
4
],
fvec4
&
v0
,
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
)
{
v0
=
in
[
0
];
v1
=
in
[
1
];
v2
=
in
[
2
];
v3
=
in
[
3
];
transpose
(
v0
,
v1
,
v2
,
v3
);
}
/**
* Out-of-place transpose from named variables into an array.
*/
static
inline
void
transpose
(
const
fvec4
v0
,
const
fvec4
v1
,
const
fvec4
v2
,
const
fvec4
v3
,
fvec4
out
[
4
])
{
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
...
...
@@ -319,10 +381,48 @@ static inline fvec4 operator/(float v1, const fvec4& v2) {
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
// Operations for blending fvec4
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
fvec4
&
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
mask
.
val
));
}
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
ivec4
&
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm_castsi128_ps
(
mask
.
val
)));
static
inline
fvec4
blendZero
(
const
fvec4
v
,
const
fvec4
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
}
/* Given a table of floating-point values and a set of indexes, perform a gather read into a pair
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t2
(
table
+
index
[
2
]);
fvec4
t3
(
table
+
index
[
3
]);
transpose
(
t0
,
t1
,
t2
,
t3
);
out0
=
t0
;
out1
=
t1
;
}
/**
* Given 3 vectors of floating-point data, reduce them to a single 3-element position
* value by adding all the elements in each vector. Given inputs of:
* X0 X1 X2 X3
* Y0 Y1 Y2 Y3
* Z0 Z1 Z2 Z3
* Each vector of values needs to be summed into a single value, and then stored into
* the output vector:
* output[0] = (X0 + X1 + X2 + X3)
* output[1] = (Y0 + Y1 + Y2 + Y3)
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[3] = undefined
*/
static
inline
fvec4
reduceToVec3
(
const
fvec4
x
,
const
fvec4
y
,
const
fvec4
z
)
{
// :TODO: Could be made more efficient.
const
auto
nx
=
reduceAdd
(
x
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
nz
=
reduceAdd
(
z
);
return
fvec4
(
nx
,
ny
,
nz
,
0.0
);
}
#endif
/*OPENMM_VECTORIZE_SSE_H_*/
...
...
openmmapi/src/CompoundIntegrator.cpp
View file @
0b5d58d7
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015-20
16
Stanford University and the Authors. *
* Portions copyright (c) 2015-20
20
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -128,3 +128,15 @@ void CompoundIntegrator::stateChanged(State::DataType changed) {
double
CompoundIntegrator
::
computeKineticEnergy
()
{
return
integrators
[
currentIntegrator
]
->
computeKineticEnergy
();
}
void
CompoundIntegrator
::
createCheckpoint
(
std
::
ostream
&
stream
)
const
{
stream
.
write
((
char
*
)
&
currentIntegrator
,
sizeof
(
int
));
for
(
int
i
=
0
;
i
<
integrators
.
size
();
i
++
)
integrators
[
i
]
->
createCheckpoint
(
stream
);
}
void
CompoundIntegrator
::
loadCheckpoint
(
std
::
istream
&
stream
)
{
stream
.
read
((
char
*
)
&
currentIntegrator
,
sizeof
(
int
));
for
(
int
i
=
0
;
i
<
integrators
.
size
();
i
++
)
integrators
[
i
]
->
loadCheckpoint
(
stream
);
}
openmmapi/src/ContextImpl.cpp
View file @
0b5d58d7
...
...
@@ -454,6 +454,7 @@ void ContextImpl::createCheckpoint(ostream& stream) {
stream
.
write
((
char
*
)
&
param
.
second
,
sizeof
(
double
));
}
updateStateDataKernel
.
getAs
<
UpdateStateDataKernel
>
().
createCheckpoint
(
*
this
,
stream
);
integrator
.
createCheckpoint
(
stream
);
stream
.
flush
();
}
...
...
@@ -480,6 +481,7 @@ void ContextImpl::loadCheckpoint(istream& stream) {
parameters
[
name
]
=
value
;
}
updateStateDataKernel
.
getAs
<
UpdateStateDataKernel
>
().
loadCheckpoint
(
*
this
,
stream
);
integrator
.
loadCheckpoint
(
stream
);
hasSetPositions
=
true
;
integrator
.
stateChanged
(
State
::
Positions
);
integrator
.
stateChanged
(
State
::
Velocities
);
...
...
openmmapi/src/CustomIntegrator.cpp
View file @
0b5d58d7
...
...
@@ -114,6 +114,31 @@ bool CustomIntegrator::kineticEnergyRequiresForce() const {
return
keNeedsForce
;
}
void
CustomIntegrator
::
createCheckpoint
(
std
::
ostream
&
stream
)
const
{
for
(
int
i
=
0
;
i
<
getNumGlobalVariables
();
i
++
)
{
double
value
=
getGlobalVariable
(
i
);
stream
.
write
((
char
*
)
&
value
,
sizeof
(
double
));
}
vector
<
Vec3
>
values
;
for
(
int
i
=
0
;
i
<
getNumPerDofVariables
();
i
++
)
{
getPerDofVariable
(
i
,
values
);
stream
.
write
((
char
*
)
values
.
data
(),
sizeof
(
Vec3
)
*
values
.
size
());
}
}
void
CustomIntegrator
::
loadCheckpoint
(
std
::
istream
&
stream
)
{
double
value
;
for
(
int
i
=
0
;
i
<
getNumGlobalVariables
();
i
++
)
{
stream
.
read
((
char
*
)
&
value
,
sizeof
(
double
));
setGlobalVariable
(
i
,
value
);
}
vector
<
Vec3
>
values
(
context
->
getSystem
().
getNumParticles
());
for
(
int
i
=
0
;
i
<
getNumPerDofVariables
();
i
++
)
{
stream
.
read
((
char
*
)
values
.
data
(),
sizeof
(
Vec3
)
*
values
.
size
());
setPerDofVariable
(
i
,
values
);
}
}
void
CustomIntegrator
::
step
(
int
steps
)
{
if
(
context
==
NULL
)
throw
OpenMMException
(
"This Integrator is not bound to a context!"
);
...
...
openmmapi/src/NoseHooverChain.cpp
View file @
0b5d58d7
...
...
@@ -55,7 +55,10 @@ std::vector<double> NoseHooverChain::getYoshidaSuzukiWeights() const {
case
5
:
return
{
0.2967324292201065
,
0.2967324292201065
,
-
0.186929716880426
,
0.2967324292201065
,
0.2967324292201065
};
case
7
:
return
{
0.784513610477560
,
0.235573213359357
,
-
1.17767998417887
,
1.31518632068391
,
-
1.17767998417887
,
0.235573213359357
,
0.784513610477560
};
default:
throw
OpenMMException
(
"The number of Yoshida-Suzuki weights must be 1,3, or
5
."
);
throw
OpenMMException
(
"The number of Yoshida-Suzuki weights must be 1,3,
5,
or
7
."
);
}
}
openmmapi/src/NoseHooverIntegrator.cpp
View file @
0b5d58d7
...
...
@@ -68,11 +68,6 @@ NoseHooverIntegrator::NoseHooverIntegrator(double temperature, double collisionF
NoseHooverIntegrator
::~
NoseHooverIntegrator
()
{}
std
::
pair
<
double
,
double
>
NoseHooverIntegrator
::
propagateChain
(
std
::
pair
<
double
,
double
>
kineticEnergy
,
int
chainID
)
{
return
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
propagateChain
(
*
context
,
noseHooverChains
.
at
(
chainID
),
kineticEnergy
,
getStepSize
());
}
int
NoseHooverIntegrator
::
addThermostat
(
double
temperature
,
double
collisionFrequency
,
int
chainLength
,
int
numMTS
,
int
numYoshidaSuzuki
)
{
hasSubsystemThermostats_
=
false
;
...
...
@@ -271,10 +266,10 @@ double NoseHooverIntegrator::computeKineticEnergy() {
double
kE
=
0.0
;
if
(
noseHooverChains
.
size
()
>
0
)
{
for
(
const
auto
&
nhc
:
noseHooverChains
){
kE
+=
nhcK
ernel
.
getAs
<
NoseHoover
Chain
Kernel
>
().
computeMaskedKineticEnergy
(
*
context
,
nhc
,
true
).
first
;
kE
+=
k
ernel
.
getAs
<
Integrate
NoseHoover
Step
Kernel
>
().
computeMaskedKineticEnergy
(
*
context
,
nhc
,
true
).
first
;
}
}
else
{
kE
=
vvK
ernel
.
getAs
<
Integrate
VelocityVerlet
StepKernel
>
().
computeKineticEnergy
(
*
context
,
*
this
);
kE
=
k
ernel
.
getAs
<
Integrate
NoseHoover
StepKernel
>
().
computeKineticEnergy
(
*
context
,
*
this
);
}
return
kE
;
}
...
...
@@ -287,7 +282,7 @@ double NoseHooverIntegrator::computeHeatBathEnergy() {
double
energy
=
0
;
for
(
auto
&
nhc
:
noseHooverChains
)
{
if
(
context
&&
(
nhc
.
getNumDegreesOfFreedom
()
>
0
))
{
energy
+=
nhcK
ernel
.
getAs
<
NoseHoover
Chain
Kernel
>
().
computeHeatBathEnergy
(
*
context
,
nhc
);
energy
+=
k
ernel
.
getAs
<
Integrate
NoseHoover
Step
Kernel
>
().
computeHeatBathEnergy
(
*
context
,
nhc
);
}
}
return
energy
;
...
...
@@ -300,10 +295,8 @@ void NoseHooverIntegrator::initialize(ContextImpl& contextRef) {
context
=
&
contextRef
;
const
System
&
system
=
context
->
getSystem
();
owner
=
&
contextRef
.
getOwner
();
vvKernel
=
context
->
getPlatform
().
createKernel
(
IntegrateVelocityVerletStepKernel
::
Name
(),
contextRef
);
vvKernel
.
getAs
<
IntegrateVelocityVerletStepKernel
>
().
initialize
(
contextRef
.
getSystem
(),
*
this
);
nhcKernel
=
context
->
getPlatform
().
createKernel
(
NoseHooverChainKernel
::
Name
(),
contextRef
);
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
initialize
();
kernel
=
context
->
getPlatform
().
createKernel
(
IntegrateNoseHooverStepKernel
::
Name
(),
contextRef
);
kernel
.
getAs
<
IntegrateNoseHooverStepKernel
>
().
initialize
(
contextRef
.
getSystem
(),
*
this
);
forcesAreValid
=
false
;
// check for drude particles and build the Nose-Hoover Chains
...
...
@@ -329,34 +322,30 @@ void NoseHooverIntegrator::initialize(ContextImpl& contextRef) {
}
void
NoseHooverIntegrator
::
cleanup
()
{
vvKernel
=
Kernel
();
nhcKernel
=
Kernel
();
kernel
=
Kernel
();
}
vector
<
string
>
NoseHooverIntegrator
::
getKernelNames
()
{
std
::
vector
<
std
::
string
>
names
;
names
.
push_back
(
NoseHooverChainKernel
::
Name
());
names
.
push_back
(
IntegrateVelocityVerletStepKernel
::
Name
());
names
.
push_back
(
IntegrateNoseHooverStepKernel
::
Name
());
return
names
;
}
void
NoseHooverIntegrator
::
step
(
int
steps
)
{
if
(
context
==
NULL
)
throw
OpenMMException
(
"This Integrator is not bound to a context!"
);
std
::
pair
<
double
,
double
>
scale
,
kineticEnergy
;
for
(
int
i
=
0
;
i
<
steps
;
++
i
)
{
if
(
context
->
updateContextState
())
forcesAreValid
=
false
;
for
(
auto
&
nhc
:
noseHooverChains
)
{
kineticEnergy
=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
computeMaskedKineticEnergy
(
*
context
,
nhc
,
false
);
scale
=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
propagateChain
(
*
context
,
nhc
,
kineticEnergy
,
getStepSize
());
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
scaleVelocities
(
*
context
,
nhc
,
scale
);
}
vvKernel
.
getAs
<
IntegrateVelocityVerletStepKernel
>
().
execute
(
*
context
,
*
this
,
forcesAreValid
);
for
(
auto
&
nhc
:
noseHooverChains
)
{
kineticEnergy
=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
computeMaskedKineticEnergy
(
*
context
,
nhc
,
false
);
scale
=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
propagateChain
(
*
context
,
nhc
,
kineticEnergy
,
getStepSize
());
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
scaleVelocities
(
*
context
,
nhc
,
scale
);
}
context
->
calcForcesAndEnergy
(
true
,
false
);
kernel
.
getAs
<
IntegrateNoseHooverStepKernel
>
().
execute
(
*
context
,
*
this
,
forcesAreValid
);
}
}
void
NoseHooverIntegrator
::
createCheckpoint
(
std
::
ostream
&
stream
)
const
{
kernel
.
getAs
<
IntegrateNoseHooverStepKernel
>
().
createCheckpoint
(
*
context
,
stream
);
}
void
NoseHooverIntegrator
::
loadCheckpoint
(
std
::
istream
&
stream
)
{
kernel
.
getAs
<
IntegrateNoseHooverStepKernel
>
().
loadCheckpoint
(
*
context
,
stream
);
}
\ No newline at end of file
openmmapi/src/SplineFitter.cpp
View file @
0b5d58d7
...
...
@@ -86,8 +86,8 @@ void SplineFitter::createPeriodicSpline(const vector<double>& x, const vector<do
// Create the system of equations to solve.
vector
<
double
>
a
(
n
),
b
(
n
),
c
(
n
),
rhs
(
n
);
a
[
0
]
=
0.0
;
vector
<
double
>
a
(
n
-
1
),
b
(
n
-
1
),
c
(
n
-
1
),
rhs
(
n
-
1
);
a
[
0
]
=
x
[
n
-
1
]
-
x
[
n
-
2
]
;
b
[
0
]
=
2.0
*
(
x
[
1
]
-
x
[
0
]
+
x
[
n
-
1
]
-
x
[
n
-
2
]);
c
[
0
]
=
x
[
1
]
-
x
[
0
];
rhs
[
0
]
=
6.0
*
((
y
[
1
]
-
y
[
0
])
/
(
x
[
1
]
-
x
[
0
])
-
(
y
[
n
-
1
]
-
y
[
n
-
2
])
/
(
x
[
n
-
1
]
-
x
[
n
-
2
]));
...
...
@@ -97,17 +97,14 @@ void SplineFitter::createPeriodicSpline(const vector<double>& x, const vector<do
c
[
i
]
=
x
[
i
+
1
]
-
x
[
i
];
rhs
[
i
]
=
6.0
*
((
y
[
i
+
1
]
-
y
[
i
])
/
(
x
[
i
+
1
]
-
x
[
i
])
-
(
y
[
i
]
-
y
[
i
-
1
])
/
(
x
[
i
]
-
x
[
i
-
1
]));
}
a
[
n
-
1
]
=
0.0
;
b
[
n
-
1
]
=
1.0
;
c
[
n
-
1
]
=
0.0
;
rhs
[
n
-
1
]
=
0.0
;
double
beta
=
x
[
n
-
1
]
-
x
[
n
-
2
];
double
alpha
=
-
1.0
;
double
beta
=
a
[
0
];
double
alpha
=
c
[
n
-
2
];
double
gamma
=
-
b
[
0
];
// This is a cyclic tridiagonal matrix. We solve it using the Sherman-Morrison method,
// which involves solving two tridiagonal systems.
n
--
;
b
[
0
]
-=
gamma
;
b
[
n
-
1
]
-=
alpha
*
beta
/
gamma
;
solveTridiagonalMatrix
(
a
,
b
,
c
,
rhs
,
deriv
);
...
...
@@ -118,6 +115,7 @@ void SplineFitter::createPeriodicSpline(const vector<double>& x, const vector<do
double
scale
=
(
deriv
[
0
]
+
beta
*
deriv
[
n
-
1
]
/
gamma
)
/
(
1.0
+
z
[
0
]
+
beta
*
z
[
n
-
1
]
/
gamma
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
deriv
[
i
]
-=
scale
*
z
[
i
];
deriv
[
n
]
=
deriv
[
0
];
}
double
SplineFitter
::
evaluateSpline
(
const
vector
<
double
>&
x
,
const
vector
<
double
>&
y
,
const
vector
<
double
>&
deriv
,
double
t
)
{
...
...
platforms/common/include/openmm/common/CommonKernels.h
View file @
0b5d58d7
...
...
@@ -944,11 +944,13 @@ private:
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class
CommonIntegrate
VelocityVerlet
StepKernel
:
public
Integrate
VelocityVerlet
StepKernel
{
class
CommonIntegrate
NoseHoover
StepKernel
:
public
Integrate
NoseHoover
StepKernel
{
public:
CommonIntegrateVelocityVerletStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
ComputeContext
&
cc
)
:
IntegrateVelocityVerletStepKernel
(
name
,
platform
),
cc
(
cc
),
hasInitializedKernels
(
false
)
{
}
~
CommonIntegrateVelocityVerletStepKernel
()
{}
CommonIntegrateNoseHooverStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
ComputeContext
&
cc
)
:
IntegrateNoseHooverStepKernel
(
name
,
platform
),
cc
(
cc
),
hasInitializedKernels
(
false
),
hasInitializedKineticEnergyKernel
(
false
),
hasInitializedHeatBathEnergyKernel
(
false
),
hasInitializedScaleVelocitiesKernel
(
false
),
hasInitializedPropagateKernel
(
false
)
{}
~
CommonIntegrateNoseHooverStepKernel
()
{}
/**
* Initialize the kernel.
*
...
...
@@ -972,39 +974,16 @@ public:
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
);
private:
ComputeContext
&
cc
;
float
prevMaxPairDistance
;
ComputeArray
maxPairDistanceBuffer
,
pairListBuffer
,
atomListBuffer
,
pairTemperatureBuffer
;
ComputeKernel
kernel1
,
kernel2
,
kernel3
,
kernelHardWall
;
bool
hasInitializedKernels
;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class
CommonNoseHooverChainKernel
:
public
NoseHooverChainKernel
{
public:
CommonNoseHooverChainKernel
(
std
::
string
name
,
const
Platform
&
platform
,
ComputeContext
&
cc
)
:
NoseHooverChainKernel
(
name
,
platform
),
cc
(
cc
),
hasInitializedPropagateKernel
(
false
),
hasInitializedKineticEnergyKernel
(
false
),
hasInitializedHeatBathEnergyKernel
(
false
),
hasInitializedScaleVelocitiesKernel
(
false
)
{}
~
CommonNoseHooverChainKernel
()
{}
/**
* Initialize the kernel.
*/
void
initialize
();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnerg
ies
the {
absolute
, relative} kineticEnerg
y
of the particles being thermostated by this chain.
* @param kineticEnerg
y
the {
center of mass
, relative} kineticEnerg
ies
of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the
{absolute, relative}
velocity scale factor to apply to the particles associated with this heat bath.
* @return the velocity scale factor to apply to the particles associated with this heat bath.
*/
std
::
pair
<
double
,
double
>
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
n
hc
,
std
::
pair
<
double
,
double
>
kineticEnerg
ies
,
double
timeStep
);
std
::
pair
<
double
,
double
>
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
n
oseHooverChain
,
std
::
pair
<
double
,
double
>
kineticEnerg
y
,
double
timeStep
);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
...
...
@@ -1012,7 +991,7 @@ public:
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
double
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
n
hc
);
double
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
n
oseHooverChain
);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
...
...
@@ -1020,22 +999,37 @@ public:
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
std
::
pair
<
double
,
double
>
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
bool
downloadValue
);
std
::
pair
<
double
,
double
>
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
bool
downloadValue
);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactor
s
the
{absolute, relative}
multiplicative factor by which velocities are scaled.
* @param scaleFactor the multiplicative factor by which
{absolute, relative}
velocities are scaled.
*/
void
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
std
::
pair
<
double
,
double
>
scaleFactors
);
void
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
std
::
pair
<
double
,
double
>
scaleFactor
);
/**
* Write the chain states to a checkpoint.
*/
void
createCheckpoint
(
ContextImpl
&
context
,
std
::
ostream
&
stream
)
const
;
/**
* Load the chain states from a checkpoint.
*/
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
private:
int
sumWorkGroupSize
;
ComputeContext
&
cc
;
float
prevMaxPairDistance
;
ComputeArray
maxPairDistanceBuffer
,
pairListBuffer
,
atomListBuffer
,
pairTemperatureBuffer
,
oldDelta
;
std
::
map
<
int
,
ComputeArray
>
chainState
;
ComputeKernel
kernel1
,
kernel2
,
kernel3
,
kernel4
,
kernelHardWall
;
bool
hasInitializedKernels
;
ComputeKernel
reduceEnergyKernel
;
ComputeKernel
computeHeatBathEnergyKernel
;
ComputeKernel
computeAtomsKineticEnergyKernel
;
ComputeKernel
computePairsKineticEnergyKernel
;
ComputeKernel
scaleAtomsVelocitiesKernel
;
ComputeKernel
scalePairsVelocitiesKernel
;
ComputeArray
energyBuffer
,
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
ComputeArray
>
atomlists
,
pairlists
;
std
::
map
<
int
,
ComputeKernel
>
propagateKernels
;
...
...
@@ -1043,12 +1037,6 @@ private:
bool
hasInitializedKineticEnergyKernel
;
bool
hasInitializedHeatBathEnergyKernel
;
bool
hasInitializedScaleVelocitiesKernel
;
ComputeKernel
reduceEnergyKernel
;
ComputeKernel
computeHeatBathEnergyKernel
;
ComputeKernel
computeAtomsKineticEnergyKernel
;
ComputeKernel
computePairsKineticEnergyKernel
;
ComputeKernel
scaleAtomsVelocitiesKernel
;
ComputeKernel
scalePairsVelocitiesKernel
;
};
/**
...
...
platforms/common/include/openmm/common/IntegrationUtilities.h
View file @
0b5d58d7
...
...
@@ -130,12 +130,6 @@ public:
* @param timeShift the amount by which to shift the velocities in time
*/
double
computeKineticEnergy
(
double
timeShift
);
/**
* Get the data structure that holds the state of all Nose-Hoover chains
*/
std
::
map
<
int
,
ComputeArray
>&
getNoseHooverChainState
()
{
return
noseHooverChainState
;
}
protected:
virtual
void
applyConstraintsImpl
(
bool
constrainVelocities
,
double
tol
)
=
0
;
ComputeContext
&
context
;
...
...
@@ -174,7 +168,6 @@ protected:
ComputeArray
vsiteLocalCoordsWeights
;
ComputeArray
vsiteLocalCoordsPos
;
ComputeArray
vsiteLocalCoordsStartIndex
;
std
::
map
<
int
,
ComputeArray
>
noseHooverChainState
;
int
randomPos
,
lastSeed
,
numVsites
;
bool
hasOverlappingVsites
;
mm_double2
lastStepSize
;
...
...
platforms/common/src/CommonKernels.cpp
View file @
0b5d58d7
...
...
@@ -5640,20 +5640,68 @@ double CommonIntegrateLangevinMiddleStepKernel::computeKineticEnergy(ContextImpl
return
cc
.
getIntegrationUtilities
().
computeKineticEnergy
(
0.0
);
}
void
CommonIntegrate
VelocityVerlet
StepKernel
::
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
)
{
void
CommonIntegrate
NoseHoover
StepKernel
::
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
)
{
cc
.
initializeContexts
();
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
map
<
string
,
string
>
defines
;
defines
[
"BOLTZ"
]
=
cc
.
doubleToString
(
BOLTZ
);
ComputeProgram
program
=
cc
.
compileProgram
(
CommonKernelSources
::
velocityVerlet
,
defines
);
kernel1
=
program
->
createKernel
(
"integrateVelocityVerletPart1"
);
kernel2
=
program
->
createKernel
(
"integrateVelocityVerletPart2"
);
kernel3
=
program
->
createKernel
(
"integrateVelocityVerletPart3"
);
kernelHardWall
=
program
->
createKernel
(
"integrateVelocityVerletHardWall"
);
ComputeProgram
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverIntegrator
,
defines
);
kernel1
=
program
->
createKernel
(
"integrateNoseHooverMiddlePart1"
);
kernel2
=
program
->
createKernel
(
"integrateNoseHooverMiddlePart2"
);
kernel3
=
program
->
createKernel
(
"integrateNoseHooverMiddlePart3"
);
kernel4
=
program
->
createKernel
(
"integrateNoseHooverMiddlePart4"
);
if
(
useDouble
)
{
oldDelta
.
initialize
<
mm_double4
>
(
cc
,
cc
.
getPaddedNumAtoms
(),
"oldDelta"
);
}
else
{
oldDelta
.
initialize
<
mm_float4
>
(
cc
,
cc
.
getPaddedNumAtoms
(),
"oldDelta"
);
}
kernelHardWall
=
program
->
createKernel
(
"integrateNoseHooverHardWall"
);
prevMaxPairDistance
=
-
1.0
f
;
maxPairDistanceBuffer
.
initialize
<
float
>
(
cc
,
1
,
"maxPairDistanceBuffer"
);
int
workGroupSize
=
std
::
min
(
cc
.
getMaxThreadBlockSize
(),
512
);
defines
[
"WORK_GROUP_SIZE"
]
=
std
::
to_string
(
workGroupSize
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[1] = {1.0};"
"for(int i=0;i<1;++i) {"
"const real ys = arr[i];"
;
defines
[
"END_YS_LOOP"
]
=
"}"
;
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
1
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[3] = {0.828981543588751, -0.657963087177502, 0.828981543588751};"
"for(int i=0;i<3;++i) {"
"const real ys = arr[i];"
;
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
3
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[5] = {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065};"
"for(int i=0;i<5;++i) {"
"const real ys = arr[i];"
;
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
5
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[7] = {0.784513610477560, 0.235573213359357, -1.17767998417887, 1.31518632068391,-1.17767998417887, 0.235573213359357, 0.784513610477560};"
"for(int i=0;i<7;++i) {"
"const real ys = arr[i];"
;
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
7
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
reduceEnergyKernel
=
program
->
createKernel
(
"reduceEnergyPair"
);
computeHeatBathEnergyKernel
=
program
->
createKernel
(
"computeHeatBathEnergy"
);
computeAtomsKineticEnergyKernel
=
program
->
createKernel
(
"computeAtomsKineticEnergy"
);
computePairsKineticEnergyKernel
=
program
->
createKernel
(
"computePairsKineticEnergy"
);
scaleAtomsVelocitiesKernel
=
program
->
createKernel
(
"scaleAtomsVelocities"
);
scalePairsVelocitiesKernel
=
program
->
createKernel
(
"scalePairsVelocities"
);
int
energyBufferSize
=
cc
.
getEnergyBuffer
().
getSize
();
if
(
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
())
energyBuffer
.
initialize
<
mm_double2
>
(
cc
,
energyBufferSize
,
"energyBuffer"
);
else
energyBuffer
.
initialize
<
mm_float2
>
(
cc
,
energyBufferSize
,
"energyBuffer"
);
}
void
CommonIntegrate
VelocityVerlet
StepKernel
::
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
void
CommonIntegrate
NoseHoover
StepKernel
::
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
IntegrationUtilities
&
integration
=
cc
.
getIntegrationUtilities
();
int
paddedNumAtoms
=
cc
.
getPaddedNumAtoms
();
double
dt
=
integrator
.
getStepSize
();
...
...
@@ -5700,32 +5748,39 @@ void CommonIntegrateVelocityVerletStepKernel::execute(ContextImpl& context, cons
pairListBuffer
.
upload
(
tmp
);
pairTemperatureBuffer
.
upload
(
tmp2
);
}
int
totalAtoms
=
cc
.
getNumAtoms
();
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
kernel1
->
addArg
(
numAtoms
);
kernel1
->
addArg
(
numPairs
);
kernel1
->
addArg
(
paddedNumAtoms
);
kernel1
->
addArg
(
cc
.
getIntegrationUtilities
().
getStepSize
());
kernel1
->
addArg
(
cc
.
getPosq
());
kernel1
->
addArg
(
cc
.
getVelm
());
kernel1
->
addArg
(
cc
.
getLongForceBuffer
());
kernel1
->
addArg
(
integration
.
get
PosDelta
());
kernel1
->
addArg
(
integration
.
get
StepSize
());
kernel1
->
addArg
(
numAtoms
>
0
?
atomListBuffer
:
cc
.
getEnergyBuffer
());
// The array is not used if num == 0
kernel1
->
addArg
(
numPairs
>
0
?
pairListBuffer
:
cc
.
getEnergyBuffer
());
// The array is not used if num == 0
if
(
cc
.
getUseMixedPrecision
())
kernel1
->
addArg
(
cc
.
getPosqCorrection
());
kernel2
->
addArg
(
numParticles
);
kernel2
->
addArg
(
cc
.
getIntegrationUtilities
().
getStepSize
());
kernel2
->
addArg
(
cc
.
getPosq
());
kernel2
->
addArg
(
totalAtoms
);
kernel2
->
addArg
(
cc
.
getVelm
());
kernel2
->
addArg
(
integration
.
getPosDelta
());
kernel2
->
addArg
(
oldDelta
);
kernel2
->
addArg
(
integration
.
getStepSize
());
kernel3
->
addArg
(
totalAtoms
);
kernel3
->
addArg
(
cc
.
getVelm
());
kernel3
->
addArg
(
integration
.
getPosDelta
());
kernel3
->
addArg
(
oldDelta
);
kernel3
->
addArg
(
integration
.
getStepSize
());
kernel4
->
addArg
(
totalAtoms
);
kernel4
->
addArg
(
cc
.
getPosq
());
kernel4
->
addArg
(
cc
.
getVelm
());
kernel4
->
addArg
(
integration
.
getPosDelta
());
kernel4
->
addArg
(
oldDelta
);
kernel4
->
addArg
(
integration
.
getStepSize
());
if
(
cc
.
getUseMixedPrecision
())
kernel
2
->
addArg
(
cc
.
getPosqCorrection
());
kernel
4
->
addArg
(
cc
.
getPosqCorrection
());
if
(
numPairs
>
0
)
{
kernelHardWall
->
addArg
(
numPairs
);
kernelHardWall
->
addArg
(
maxPairDistanceBuffer
);
kernelHardWall
->
addArg
(
cc
.
getI
ntegration
Utilities
()
.
getStepSize
());
kernelHardWall
->
addArg
(
i
ntegration
.
getStepSize
());
kernelHardWall
->
addArg
(
cc
.
getPosq
());
kernelHardWall
->
addArg
(
cc
.
getVelm
());
kernelHardWall
->
addArg
(
pairListBuffer
);
...
...
@@ -5733,78 +5788,50 @@ void CommonIntegrateVelocityVerletStepKernel::execute(ContextImpl& context, cons
if
(
cc
.
getUseMixedPrecision
())
kernelHardWall
->
addArg
(
cc
.
getPosqCorrection
());
}
kernel3
->
addArg
(
numAtoms
);
kernel3
->
addArg
(
numPairs
);
kernel3
->
addArg
(
paddedNumAtoms
);
kernel3
->
addArg
(
cc
.
getIntegrationUtilities
().
getStepSize
());
kernel3
->
addArg
(
cc
.
getPosq
());
kernel3
->
addArg
(
cc
.
getVelm
());
kernel3
->
addArg
(
cc
.
getLongForceBuffer
());
kernel3
->
addArg
(
integration
.
getPosDelta
());
kernel3
->
addArg
(
numAtoms
>
0
?
atomListBuffer
:
cc
.
getEnergyBuffer
());
// The array is not used if num == 0
kernel3
->
addArg
(
numPairs
>
0
?
pairListBuffer
:
cc
.
getEnergyBuffer
());
// The array is not used if num == 0
if
(
cc
.
getUseMixedPrecision
())
kernel3
->
addArg
(
cc
.
getPosqCorrection
());
}
/*
* Carry out the integration
* Carry out the
LF-middle
integration
(c.f. J. Phys. Chem. A 2019, 123, 6056−6079)
*/
// Advance the velocities a half step
// Velocity update
kernel1
->
execute
(
std
::
max
(
numAtoms
,
numPairs
));
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
//
Advance particle positions a full s
te
p
integration
.
apply
Velocity
Constraints
(
integrator
.
getConstraintTolerance
());
//
Position upda
te
kernel2
->
execute
(
numParticles
);
// Apply the thermostat
int
numChains
=
integrator
.
getNumThermostats
();
for
(
int
chain
=
0
;
chain
<
numChains
;
++
chain
)
{
const
auto
&
thermostatChain
=
integrator
.
getThermostat
(
chain
);
auto
KEs
=
computeMaskedKineticEnergy
(
context
,
thermostatChain
,
false
);
auto
scaleFactors
=
propagateChain
(
context
,
thermostatChain
,
KEs
,
dt
);
scaleVelocities
(
context
,
thermostatChain
,
scaleFactors
);
}
// Position update
kernel3
->
execute
(
numParticles
);
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
// Apply constraint forces
kernel4
->
execute
(
numAtoms
);
// Make sure any Drude-like particles have not wandered too far from home
if
(
numPairs
>
0
)
kernelHardWall
->
execute
(
numPairs
);
integration
.
computeVirtualSites
();
context
.
calcForcesAndEnergy
(
true
,
false
);
forcesAreValid
=
true
;
// Update velocities another half step
kernel3
->
execute
(
std
::
max
(
numAtoms
,
numPairs
));
integration
.
applyVelocityConstraints
(
integrator
.
getConstraintTolerance
());
// Update the time and step count.
cc
.
setTime
(
cc
.
getTime
()
+
dt
);
cc
.
setStepCount
(
cc
.
getStepCount
()
+
1
);
cc
.
reorderAtoms
();
// Reduce UI lag.
#ifdef WIN32
cc
.
flushQueue
();
#endif
}
double
CommonIntegrate
VelocityVerlet
StepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
)
{
double
CommonIntegrate
NoseHoover
StepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
)
{
return
cc
.
getIntegrationUtilities
().
computeKineticEnergy
(
0
);
}
void
CommonNoseHooverChainKernel
::
initialize
()
{
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
map
<
string
,
string
>
defines
;
int
workGroupSize
=
std
::
min
(
cc
.
getMaxThreadBlockSize
(),
512
);
defines
[
"WORK_GROUP_SIZE"
]
=
std
::
to_string
(
workGroupSize
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[1] = {1.0}; for(int i=0;i<1;++i) { const real ys = arr[i];"
;
defines
[
"END_YS_LOOP"
]
=
"}"
;
ComputeProgram
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
1
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[3] = {0.828981543588751, -0.657963087177502, 0.828981543588751}; for(int i=0;i<3;++i) { const real ys = arr[i];"
;
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
3
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
defines
[
"BEGIN_YS_LOOP"
]
=
"const real arr[5] = {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}; for(int i=0;i<5;++i) { const real ys = arr[i];"
;
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
propagateKernels
[
5
]
=
program
->
createKernel
(
"propagateNoseHooverChain"
);
program
=
cc
.
compileProgram
(
CommonKernelSources
::
noseHooverChain
,
defines
);
reduceEnergyKernel
=
program
->
createKernel
(
"reduceEnergyPair"
);
computeHeatBathEnergyKernel
=
program
->
createKernel
(
"computeHeatBathEnergy"
);
computeAtomsKineticEnergyKernel
=
program
->
createKernel
(
"computeAtomsKineticEnergy"
);
computePairsKineticEnergyKernel
=
program
->
createKernel
(
"computePairsKineticEnergy"
);
scaleAtomsVelocitiesKernel
=
program
->
createKernel
(
"scaleAtomsVelocities"
);
scalePairsVelocitiesKernel
=
program
->
createKernel
(
"scalePairsVelocities"
);
int
energyBufferSize
=
cc
.
getEnergyBuffer
().
getSize
();
if
(
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
())
energyBuffer
.
initialize
<
mm_double2
>
(
cc
,
energyBufferSize
,
"energyBuffer"
);
else
energyBuffer
.
initialize
<
mm_float2
>
(
cc
,
energyBufferSize
,
"energyBuffer"
);
}
std
::
pair
<
double
,
double
>
CommonNoseHoover
Chain
Kernel
::
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
)
{
std
::
pair
<
double
,
double
>
Common
Integrate
NoseHoover
Step
Kernel
::
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
)
{
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
...
...
@@ -5813,12 +5840,10 @@ std::pair<double, double> CommonNoseHooverChainKernel::propagateChain(ContextImp
int
numYS
=
nhc
.
getNumYoshidaSuzukiTimeSteps
();
int
numMTS
=
nhc
.
getNumMultiTimeSteps
();
if
(
numYS
!=
1
&&
numYS
!=
3
&&
numYS
!=
5
)
{
throw
OpenMMException
(
"Number of Yoshida Suzuki time steps has to be 1, 3, or
5
."
);
if
(
numYS
!=
1
&&
numYS
!=
3
&&
numYS
!=
5
&&
numYS
!=
7
)
{
throw
OpenMMException
(
"Number of Yoshida Suzuki time steps has to be 1, 3,
5,
or
7
."
);
}
auto
&
chainState
=
cc
.
getIntegrationUtilities
().
getNoseHooverChainState
();
if
(
!
scaleFactorBuffer
.
isInitialized
()
||
scaleFactorBuffer
.
getSize
()
==
0
)
{
if
(
useDouble
)
{
std
::
vector
<
mm_double2
>
zeros
{{
0
,
0
}};
...
...
@@ -5973,15 +5998,13 @@ std::pair<double, double> CommonNoseHooverChainKernel::propagateChain(ContextImp
return
{
0
,
0
};
}
double
CommonNoseHoover
Chain
Kernel
::
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
)
{
double
Common
Integrate
NoseHoover
Step
Kernel
::
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
)
{
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
int
chainID
=
nhc
.
getChainID
();
int
chainLength
=
nhc
.
getChainLength
();
auto
&
chainState
=
cc
.
getIntegrationUtilities
().
getNoseHooverChainState
();
bool
absChainIsValid
=
chainState
.
count
(
2
*
chainID
)
!=
0
&&
chainState
[
2
*
chainID
].
isInitialized
()
&&
chainState
[
2
*
chainID
].
getSize
()
==
chainLength
;
...
...
@@ -6058,7 +6081,7 @@ double CommonNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context,
return
*
((
float
*
)
pinnedBuffer
);
}
std
::
pair
<
double
,
double
>
CommonNoseHoover
Chain
Kernel
::
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
bool
downloadValue
)
{
std
::
pair
<
double
,
double
>
Common
Integrate
NoseHoover
Step
Kernel
::
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
bool
downloadValue
)
{
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
...
...
@@ -6153,7 +6176,7 @@ std::pair<double, double> CommonNoseHooverChainKernel::computeMaskedKineticEnerg
return
KEs
;
}
void
CommonNoseHoover
Chain
Kernel
::
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
scaleFactor
)
{
void
Common
Integrate
NoseHoover
Step
Kernel
::
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
scaleFactor
)
{
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
...
...
@@ -6184,6 +6207,54 @@ void CommonNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const No
}
}
void
CommonIntegrateNoseHooverStepKernel
::
createCheckpoint
(
ContextImpl
&
context
,
ostream
&
stream
)
const
{
int
numChains
=
chainState
.
size
();
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
stream
.
write
((
char
*
)
&
numChains
,
sizeof
(
int
));
for
(
auto
&
state
:
chainState
){
int
chainID
=
state
.
first
;
int
chainLength
=
state
.
second
.
getSize
();
stream
.
write
((
char
*
)
&
chainID
,
sizeof
(
int
));
stream
.
write
((
char
*
)
&
chainLength
,
sizeof
(
int
));
if
(
useDouble
)
{
vector
<
mm_double2
>
stateVec
;
state
.
second
.
download
(
stateVec
);
stream
.
write
((
char
*
)
stateVec
.
data
(),
sizeof
(
mm_double2
)
*
chainLength
);
}
else
{
vector
<
mm_float2
>
stateVec
;
state
.
second
.
download
(
stateVec
);
stream
.
write
((
char
*
)
stateVec
.
data
(),
sizeof
(
mm_float2
)
*
chainLength
);
}
}
}
void
CommonIntegrateNoseHooverStepKernel
::
loadCheckpoint
(
ContextImpl
&
context
,
istream
&
stream
)
{
int
numChains
;
bool
useDouble
=
cc
.
getUseDoublePrecision
()
||
cc
.
getUseMixedPrecision
();
stream
.
read
((
char
*
)
&
numChains
,
sizeof
(
int
));
chainState
.
clear
();
for
(
int
i
=
0
;
i
<
numChains
;
i
++
)
{
int
chainID
,
chainLength
;
stream
.
read
((
char
*
)
&
chainID
,
sizeof
(
int
));
stream
.
read
((
char
*
)
&
chainLength
,
sizeof
(
int
));
if
(
useDouble
)
{
chainState
[
chainID
]
=
ComputeArray
();
chainState
[
chainID
].
initialize
<
mm_double2
>
(
cc
,
chainLength
,
"chainState"
+
to_string
(
chainID
));
vector
<
mm_double2
>
stateVec
(
chainLength
);
stream
.
read
((
char
*
)
&
stateVec
[
0
],
sizeof
(
mm_double2
)
*
chainLength
);
chainState
[
chainID
].
upload
(
stateVec
);
}
else
{
chainState
[
chainID
]
=
ComputeArray
();
chainState
[
chainID
].
initialize
<
mm_float2
>
(
cc
,
chainLength
,
"chainState"
+
to_string
(
chainID
));
vector
<
mm_float2
>
stateVec
(
chainLength
);
stream
.
read
((
char
*
)
&
stateVec
[
0
],
sizeof
(
mm_float2
)
*
chainLength
);
chainState
[
chainID
].
upload
(
stateVec
);
}
}
}
void
CommonIntegrateBrownianStepKernel
::
initialize
(
const
System
&
system
,
const
BrownianIntegrator
&
integrator
)
{
cc
.
initializeContexts
();
cc
.
setAsCurrent
();
...
...
platforms/common/src/IntegrationUtilities.cpp
View file @
0b5d58d7
...
...
@@ -753,25 +753,6 @@ int IntegrationUtilities::prepareRandomNumbers(int numValues) {
}
void
IntegrationUtilities
::
createCheckpoint
(
ostream
&
stream
)
{
int
numChains
=
noseHooverChainState
.
size
();
bool
useDouble
=
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
();
stream
.
write
((
char
*
)
&
numChains
,
sizeof
(
int
));
for
(
auto
&
chainState
:
noseHooverChainState
){
int
chainID
=
chainState
.
first
;
int
chainLength
=
chainState
.
second
.
getSize
();
stream
.
write
((
char
*
)
&
chainID
,
sizeof
(
int
));
stream
.
write
((
char
*
)
&
chainLength
,
sizeof
(
int
));
if
(
useDouble
)
{
vector
<
mm_double2
>
stateVec
;
chainState
.
second
.
download
(
stateVec
);
stream
.
write
((
char
*
)
stateVec
.
data
(),
sizeof
(
mm_double2
)
*
chainLength
);
}
else
{
vector
<
mm_float2
>
stateVec
;
chainState
.
second
.
download
(
stateVec
);
stream
.
write
((
char
*
)
stateVec
.
data
(),
sizeof
(
mm_float2
)
*
chainLength
);
}
}
if
(
!
random
.
isInitialized
())
return
;
stream
.
write
((
char
*
)
&
randomPos
,
sizeof
(
int
));
...
...
@@ -784,29 +765,6 @@ void IntegrationUtilities::createCheckpoint(ostream& stream) {
}
void
IntegrationUtilities
::
loadCheckpoint
(
istream
&
stream
)
{
int
numChains
;
bool
useDouble
=
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
();
stream
.
read
((
char
*
)
&
numChains
,
sizeof
(
int
));
noseHooverChainState
.
clear
();
for
(
int
i
=
0
;
i
<
numChains
;
i
++
)
{
int
chainID
,
chainLength
;
stream
.
read
((
char
*
)
&
chainID
,
sizeof
(
int
));
stream
.
read
((
char
*
)
&
chainLength
,
sizeof
(
int
));
if
(
useDouble
)
{
noseHooverChainState
[
chainID
]
=
ComputeArray
();
noseHooverChainState
[
chainID
].
initialize
<
mm_double2
>
(
context
,
chainLength
,
"chainState"
+
to_string
(
chainID
));
vector
<
mm_double2
>
stateVec
(
chainLength
);
stream
.
read
((
char
*
)
&
stateVec
[
0
],
sizeof
(
mm_double2
)
*
chainLength
);
noseHooverChainState
[
chainID
].
upload
(
stateVec
);
}
else
{
noseHooverChainState
[
chainID
]
=
ComputeArray
();
noseHooverChainState
[
chainID
].
initialize
<
mm_float2
>
(
context
,
chainLength
,
"chainState"
+
to_string
(
chainID
));
vector
<
mm_float2
>
stateVec
(
chainLength
);
stream
.
read
((
char
*
)
&
stateVec
[
0
],
sizeof
(
mm_float2
)
*
chainLength
);
noseHooverChainState
[
chainID
].
upload
(
stateVec
);
}
}
if
(
!
random
.
isInitialized
())
return
;
stream
.
read
((
char
*
)
&
randomPos
,
sizeof
(
int
));
...
...
platforms/common/src/kernels/cmapTorsionForce.cc
View file @
0b5d58d7
...
...
@@ -61,8 +61,8 @@ angleB = fmod(angleB+2.0f*PI, 2.0f*PI);
int2
pos
=
MAP_POS
[
MAPS
[
index
]];
int
size
=
pos
.
y
;
real
delta
=
2
*
PI
/
size
;
int
s
=
(
int
)
(
angleA
/
delta
);
int
t
=
(
int
)
(
angleB
/
delta
);
int
s
=
(
int
)
fmin
(
angleA
/
delta
,
size
-
1
);
int
t
=
(
int
)
fmin
(
angleB
/
delta
,
size
-
1
);
float4
c
[
4
];
int
coeffIndex
=
pos
.
x
+
4
*
(
s
+
size
*
t
);
c
[
0
]
=
COEFF
[
coeffIndex
];
...
...
platforms/common/src/kernels/customIntegratorPerDof.cc
View file @
0b5d58d7
...
...
@@ -3,8 +3,8 @@ typedef double TempType;
typedef
double3
TempType3
;
typedef
double4
TempType4
;
#define make_TempType3(
a
...) make_double3(
a
)
#define make_TempType4(
a
...) make_double4(
a
)
#define make_TempType3(...) make_double3(
__VA_ARGS__
)
#define make_TempType4(...) make_double4(
__VA_ARGS__
)
#define convertToTempType3(a) make_double3((a).x, (a).y, (a).z)
#define convertToTempType4(a) make_double4((a).x, (a).y, (a).z, (a).w)
...
...
@@ -16,8 +16,8 @@ typedef float TempType;
typedef
float3
TempType3
;
typedef
float4
TempType4
;
#define make_TempType3(
a
...) make_float3(
a
)
#define make_TempType4(
a
...) make_float4(
a
)
#define make_TempType3(...) make_float3(
__VA_ARGS__
)
#define make_TempType4(...) make_float4(
__VA_ARGS__
)
#define convertToTempType3(a) make_float3((a).x, (a).y, (a).z)
#define convertToTempType4(a) make_float4((a).x, (a).y, (a).z, (a).w)
#endif
...
...
platforms/common/src/kernels/langevinMiddle.cc
View file @
0b5d58d7
...
...
@@ -28,7 +28,7 @@ KERNEL void integrateLangevinMiddlePart2(int numAtoms, GLOBAL mixed4* RESTRICT v
)
{
mixed
vscale
=
paramBuffer
[
VelScale
];
mixed
noisescale
=
paramBuffer
[
NoiseScale
];
mixed
halfdt
=
0.5
*
dt
[
0
].
y
;
mixed
halfdt
=
0.5
f
*
dt
[
0
].
y
;
int
index
=
GLOBAL_ID
;
randomIndex
+=
index
;
while
(
index
<
numAtoms
)
{
...
...
platforms/common/src/kernels/noseHooverChain.cc
View file @
0b5d58d7
// Propagates a Nose Hoover chain a full timestep
KERNEL
void
propagateNoseHooverChain
(
GLOBAL
mixed2
*
RESTRICT
chainData
,
GLOBAL
const
mixed2
*
RESTRICT
energySum
,
GLOBAL
mixed2
*
RESTRICT
scaleFactor
,
GLOBAL
mixed
*
RESTRICT
chainMasses
,
GLOBAL
mixed
*
RESTRICT
chainForces
,
int
chainType
,
int
chainLength
,
int
numMTS
,
int
numDOFs
,
float
timeStep
,
mixed
kT
,
float
frequency
){
...
...
@@ -15,30 +16,28 @@ KERNEL void propagateNoseHooverChain(GLOBAL mixed2* RESTRICT chainData, GLOBAL c
for
(
int
mts
=
0
;
mts
<
numMTS
;
++
mts
)
{
BEGIN_YS_LOOP
mixed
wdt
=
ys
*
timeOverMTS
;
chainData
[
chainLength
-
1
].
y
+=
0.
2
5
f
*
wdt
*
chainForces
[
chainLength
-
1
];
chainData
[
chainLength
-
1
].
y
+=
0.5
f
*
wdt
*
chainForces
[
chainLength
-
1
];
for
(
int
bead
=
chainLength
-
2
;
bead
>=
0
;
--
bead
)
{
mixed
aa
=
EXP
(
-
0.
1
25
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
chainData
[
bead
].
y
*
aa
+
0.
2
5
f
*
wdt
*
chainForces
[
bead
]);
mixed
aa
=
exp
(
-
0.25
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
chainData
[
bead
].
y
*
aa
+
0.5
f
*
wdt
*
chainForces
[
bead
]);
}
// update particle velocities
mixed
aa
=
EXP
(
-
0.5
f
*
wdt
*
chainData
[
0
].
y
);
scale
*=
aa
;
scale
*=
(
mixed
)
exp
(
-
wdt
*
chainData
[
0
].
y
);;
// update the thermostat positions
for
(
int
bead
=
0
;
bead
<
chainLength
;
++
bead
)
{
chainData
[
bead
].
x
+=
0.5
f
*
chainData
[
bead
].
y
*
wdt
;
chainData
[
bead
].
x
+=
chainData
[
bead
].
y
*
wdt
;
}
// update the forces
chainForces
[
0
]
=
(
scale
*
scale
*
KE2
-
numDOFs
*
kT
)
/
chainMasses
[
0
];
// update thermostat velocities
for
(
int
bead
=
0
;
bead
<
chainLength
-
1
;
++
bead
)
{
mixed
aa
=
EXP
(
-
0.
1
25
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
aa
*
chainData
[
bead
].
y
+
0.
2
5
f
*
wdt
*
chainForces
[
bead
]);
mixed
aa
=
exp
(
-
0.25
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
aa
*
chainData
[
bead
].
y
+
0.5
f
*
wdt
*
chainForces
[
bead
]);
chainForces
[
bead
+
1
]
=
(
chainMasses
[
bead
]
*
chainData
[
bead
].
y
*
chainData
[
bead
].
y
-
kT
)
/
chainMasses
[
bead
+
1
];
}
chainData
[
chainLength
-
1
].
y
+=
0.
2
5
f
*
wdt
*
chainForces
[
chainLength
-
1
];
chainData
[
chainLength
-
1
].
y
+=
0.5
f
*
wdt
*
chainForces
[
chainLength
-
1
];
END_YS_LOOP
}
// MTS loop
if
(
chainType
==
0
)
{
scaleFactor
[
0
].
x
=
scale
;
}
else
{
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment