Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
454caac9
Commit
454caac9
authored
May 14, 2014
by
peastman
Browse files
Preliminary support for ARM/Android.
parent
51828eaa
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
722 additions
and
316 deletions
+722
-316
openmmapi/include/openmm/internal/hardware.h
openmmapi/include/openmm/internal/hardware.h
+41
-31
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+7
-252
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+337
-0
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+286
-0
platforms/cpu/sharedTarget/CMakeLists.txt
platforms/cpu/sharedTarget/CMakeLists.txt
+11
-7
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+0
-1
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+2
-2
platforms/cpu/src/CpuPlatform.cpp
platforms/cpu/src/CpuPlatform.cpp
+15
-9
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+2
-2
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+2
-4
plugins/cpupme/CMakeLists.txt
plugins/cpupme/CMakeLists.txt
+5
-1
plugins/cpupme/src/CpuPmeKernels.cpp
plugins/cpupme/src/CpuPmeKernels.cpp
+14
-7
No files found.
openmmapi/include/openmm/internal/hardware.h
View file @
454caac9
...
@@ -47,8 +47,12 @@
...
@@ -47,8 +47,12 @@
#define NOMINMAX
#define NOMINMAX
#include <windows.h>
#include <windows.h>
#else
#else
#include <dlfcn.h>
#ifdef __ANDROID__
#include <unistd.h>
#include <cpu-features.h>
#else
#include <dlfcn.h>
#include <unistd.h>
#endif
#endif
#endif
#endif
#endif
...
@@ -70,11 +74,15 @@ static int getNumProcessors() {
...
@@ -70,11 +74,15 @@ static int getNumProcessors() {
ncpu
=
1
;
ncpu
=
1
;
return
ncpu
;
return
ncpu
;
#else
#else
long
nProcessorsOnline
=
sysconf
(
_SC_NPROCESSORS_ONLN
);
#ifdef __ANDROID__
if
(
nProcessorsOnline
==
-
1
)
return
android_getCpuCount
();
return
1
;
#else
else
long
nProcessorsOnline
=
sysconf
(
_SC_NPROCESSORS_ONLN
);
return
(
int
)
nProcessorsOnline
;
if
(
nProcessorsOnline
==
-
1
)
return
1
;
else
return
(
int
)
nProcessorsOnline
;
#endif
#endif
#endif
#endif
#endif
}
}
...
@@ -85,30 +93,32 @@ static int getNumProcessors() {
...
@@ -85,30 +93,32 @@ static int getNumProcessors() {
#ifdef _WIN32
#ifdef _WIN32
#define cpuid __cpuid
#define cpuid __cpuid
#else
#else
static
void
cpuid
(
int
cpuInfo
[
4
],
int
infoType
){
#ifndef __ANDROID__
#ifdef __LP64__
static
void
cpuid
(
int
cpuInfo
[
4
],
int
infoType
){
__asm__
__volatile__
(
#ifdef __LP64__
"cpuid"
:
__asm__
__volatile__
(
"=a"
(
cpuInfo
[
0
]),
"cpuid"
:
"=b"
(
cpuInfo
[
1
]),
"=a"
(
cpuInfo
[
0
]),
"=c"
(
cpuInfo
[
2
]),
"=b"
(
cpuInfo
[
1
]),
"=d"
(
cpuInfo
[
3
])
:
"=c"
(
cpuInfo
[
2
]),
"a"
(
infoType
)
"=d"
(
cpuInfo
[
3
])
:
);
"a"
(
infoType
)
#else
);
__asm__
__volatile__
(
#else
"pushl %%ebx
\n
"
__asm__
__volatile__
(
"cpuid
\n
"
"pushl %%ebx
\n
"
"movl %%ebx, %1
\n
"
"cpuid
\n
"
"popl %%ebx
\n
"
:
"movl %%ebx, %1
\n
"
"=a"
(
cpuInfo
[
0
]),
"popl %%ebx
\n
"
:
"=r"
(
cpuInfo
[
1
]),
"=a"
(
cpuInfo
[
0
]),
"=c"
(
cpuInfo
[
2
]),
"=r"
(
cpuInfo
[
1
]),
"=d"
(
cpuInfo
[
3
])
:
"=c"
(
cpuInfo
[
2
]),
"a"
(
infoType
)
"=d"
(
cpuInfo
[
3
])
:
);
"a"
(
infoType
)
#endif
);
}
#endif
}
#endif
#endif
#endif
#endif // OPENMM_HARDWARE_H_
#endif // OPENMM_HARDWARE_H_
openmmapi/include/openmm/internal/vectorize.h
View file @
454caac9
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 201
3
Stanford University and the Authors. *
* Portions copyright (c) 201
4
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -31,256 +31,11 @@
...
@@ -31,256 +31,11 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include <smmintrin.h>
#if defined(__ANDROID__)
#include "vectorize_neon.h"
#else
// This file defines classes and functions to simplify vectorizing code with SSE.
#include "vectorize_sse.h"
#endif
class
ivec4
;
/**
* A four element vector of floats.
*/
class
fvec4
{
public:
__m128
val
;
fvec4
()
{}
fvec4
(
float
v
)
:
val
(
_mm_set1_ps
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
:
val
(
_mm_set_ps
(
v4
,
v3
,
v2
,
v1
))
{}
fvec4
(
__m128
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
:
val
(
_mm_loadu_ps
(
v
))
{}
operator
__m128
()
const
{
return
val
;
}
float
operator
[](
int
i
)
const
{
float
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
float
*
v
)
const
{
_mm_storeu_ps
(
v
,
val
);
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
_mm_add_ps
(
val
,
other
);
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
return
_mm_sub_ps
(
val
,
other
);
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
return
_mm_mul_ps
(
val
,
other
);
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
return
_mm_div_ps
(
val
,
other
);
}
void
operator
+=
(
const
fvec4
&
other
)
{
val
=
_mm_add_ps
(
val
,
other
);
}
void
operator
-=
(
const
fvec4
&
other
)
{
val
=
_mm_sub_ps
(
val
,
other
);
}
void
operator
*=
(
const
fvec4
&
other
)
{
val
=
_mm_mul_ps
(
val
,
other
);
}
void
operator
/=
(
const
fvec4
&
other
)
{
val
=
_mm_div_ps
(
val
,
other
);
}
fvec4
operator
-
()
const
{
return
_mm_sub_ps
(
_mm_set1_ps
(
0.0
f
),
val
);
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
return
_mm_and_ps
(
val
,
other
);
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
return
_mm_or_ps
(
val
,
other
);
}
fvec4
operator
==
(
const
fvec4
&
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
}
fvec4
operator
!=
(
const
fvec4
&
other
)
const
{
return
_mm_cmpneq_ps
(
val
,
other
);
}
fvec4
operator
>
(
const
fvec4
&
other
)
const
{
return
_mm_cmpgt_ps
(
val
,
other
);
}
fvec4
operator
<
(
const
fvec4
&
other
)
const
{
return
_mm_cmplt_ps
(
val
,
other
);
}
fvec4
operator
>=
(
const
fvec4
&
other
)
const
{
return
_mm_cmpge_ps
(
val
,
other
);
}
fvec4
operator
<=
(
const
fvec4
&
other
)
const
{
return
_mm_cmple_ps
(
val
,
other
);
}
operator
ivec4
()
const
;
};
/**
* A four element vector of ints.
*/
class
ivec4
{
public:
__m128i
val
;
ivec4
()
{}
ivec4
(
int
v
)
:
val
(
_mm_set1_epi32
(
v
))
{}
ivec4
(
int
v1
,
int
v2
,
int
v3
,
int
v4
)
:
val
(
_mm_set_epi32
(
v4
,
v3
,
v2
,
v1
))
{}
ivec4
(
__m128i
v
)
:
val
(
v
)
{}
ivec4
(
const
int
*
v
)
:
val
(
_mm_loadu_si128
((
const
__m128i
*
)
v
))
{}
operator
__m128i
()
const
{
return
val
;
}
int
operator
[](
int
i
)
const
{
int
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
int
*
v
)
const
{
_mm_storeu_si128
((
__m128i
*
)
v
,
val
);
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
return
_mm_add_epi32
(
val
,
other
);
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
return
_mm_sub_epi32
(
val
,
other
);
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
return
_mm_mul_epi32
(
val
,
other
);
}
void
operator
+=
(
const
ivec4
&
other
)
{
val
=
_mm_add_epi32
(
val
,
other
);
}
void
operator
-=
(
const
ivec4
&
other
)
{
val
=
_mm_sub_epi32
(
val
,
other
);
}
void
operator
*=
(
const
ivec4
&
other
)
{
val
=
_mm_mul_epi32
(
val
,
other
);
}
ivec4
operator
-
()
const
{
return
_mm_sub_epi32
(
_mm_set1_epi32
(
0
),
val
);
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
return
_mm_or_si128
(
val
,
other
);
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
*
this
==
other
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
return
_mm_cmpgt_epi32
(
val
,
other
);
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
return
_mm_cmplt_epi32
(
val
,
other
);
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
_mm_cmplt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
_mm_cmpgt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
operator
fvec4
()
const
;
};
// Conversion operators.
inline
fvec4
::
operator
ivec4
()
const
{
return
_mm_cvttps_epi32
(
val
);
}
inline
ivec4
::
operator
fvec4
()
const
{
return
_mm_cvtepi32_ps
(
val
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_floor_ps
(
v
.
val
));
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_ceil_ps
(
v
.
val
));
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
}
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
_mm_min_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
_mm_max_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
static
const
__m128
mask
=
_mm_castsi128_ps
(
_mm_set1_epi32
(
0x7FFFFFFF
));
return
fvec4
(
_mm_and_ps
(
v
.
val
,
mask
));
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
_MM_TRANSPOSE4_PS
(
v1
,
v2
,
v3
,
v4
);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
_mm_min_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
_mm_max_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
return
ivec4
(
_mm_abs_epi32
(
v
.
val
));
}
static
inline
bool
any
(
const
ivec4
&
v
)
{
return
!
_mm_test_all_zeros
(
v
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
ivec4
&
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm_castsi128_ps
(
mask
.
val
)));
}
#endif
/*OPENMM_VECTORIZE_H_*/
#endif
/*OPENMM_VECTORIZE_H_*/
openmmapi/include/openmm/internal/vectorize_neon.h
0 → 100644
View file @
454caac9
#ifndef OPENMM_VECTORIZE_NEON_H_
#define OPENMM_VECTORIZE_NEON_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2014 Stanford University and the Authors. *
* Authors: Mateus Lima, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <cpu-features.h>
#include <arm_neon.h>
#include <cmath>
typedef
int
int32_t
;
// This file defines classes and functions to simplify vectorizing code with NEON.
class
ivec4
;
/**
* A four element vector of floats.
*/
class
fvec4
{
public:
float32x4_t
val
;
fvec4
()
{}
fvec4
(
float
v
)
:
val
(
vdupq_n_f32
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
{
float
v
[]
=
{
v1
,
v2
,
v3
,
v4
};
val
=
vld1q_f32
(
v
);
}
fvec4
(
float32x4_t
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
:
val
(
vld1q_f32
(
v
))
{}
operator
float32x4_t
()
const
{
return
val
;
}
float
operator
[](
int
i
)
const
{
float
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
float
*
v
)
const
{
vst1q_f32
(
v
,
val
);
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
// Tested OK
return
vaddq_f32
(
val
,
other
);
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
// Tested OK
return
vsubq_f32
(
val
,
other
);
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
// Tested OK
return
vmulq_f32
(
val
,
other
);
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
// Tested OK
// NEON does not have a divide float-point operator, so we get the reciprocal and multiply.
float32x4_t
reciprocal
=
vrecpeq_f32
(
other
);
reciprocal
=
vmulq_f32
(
vrecpsq_f32
(
other
,
reciprocal
),
reciprocal
);
reciprocal
=
vmulq_f32
(
vrecpsq_f32
(
other
,
reciprocal
),
reciprocal
);
fvec4
result
=
vmulq_f32
(
val
,
reciprocal
);
return
result
;
}
void
operator
+=
(
const
fvec4
&
other
)
{
val
=
vaddq_f32
(
val
,
other
);
}
void
operator
-=
(
const
fvec4
&
other
)
{
val
=
vsubq_f32
(
val
,
other
);
}
void
operator
*=
(
const
fvec4
&
other
)
{
val
=
vmulq_f32
(
val
,
other
);
}
void
operator
/=
(
const
fvec4
&
other
)
{
val
=
val
/
other
.
val
;
}
fvec4
operator
-
()
const
{
return
vnegq_f32
(
val
);
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
return
vreinterpretq_f32_u32
(
vandq_u32
(
vreinterpretq_u32_f32
(
val
),
vreinterpretq_u32_f32
(
other
)));
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vorrq_u32
(
vcvtq_u32_f32
(
val
),
vcvtq_u32_f32
(
other
))));
}
fvec4
operator
==
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vceqq_f32
(
val
,
other
)));
}
fvec4
operator
!=
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vmvnq_u32
(
vceqq_f32
(
val
,
other
))));
// not(equals(val, other))
}
fvec4
operator
>
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcgtq_f32
(
val
,
other
)));
}
fvec4
operator
<
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcltq_f32
(
val
,
other
)));
}
fvec4
operator
>=
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcgeq_f32
(
val
,
other
)));
}
fvec4
operator
<=
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcleq_f32
(
val
,
other
)));
}
operator
ivec4
()
const
;
};
/**
* A four element vector of ints.
*/
class
ivec4
{
public:
int32x4_t
val
;
ivec4
()
{}
ivec4
(
int
v
)
:
val
(
vdupq_n_s32
(
v
))
{}
ivec4
(
int
v1
,
int
v2
,
int
v3
,
int
v4
)
{
int
v
[]
=
{
v1
,
v2
,
v3
,
v4
};
val
=
vld1q_s32
(
v
);
}
ivec4
(
int32x4_t
v
)
:
val
(
v
)
{}
ivec4
(
const
int
*
v
)
:
val
(
vld1q_s32
(
v
))
{}
operator
int32x4_t
()
const
{
return
val
;
}
int
operator
[](
int
i
)
const
{
int
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
int
*
v
)
const
{
vst1q_s32
(
v
,
val
);
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
return
vaddq_s32
(
val
,
other
);
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
return
vsubq_s32
(
val
,
other
);
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
return
vmulq_s32
(
val
,
other
);
}
void
operator
+=
(
const
ivec4
&
other
)
{
val
=
vaddq_s32
(
val
,
other
);
}
void
operator
-=
(
const
ivec4
&
other
)
{
val
=
vsubq_s32
(
val
,
other
);
}
void
operator
*=
(
const
ivec4
&
other
)
{
val
=
vmulq_s32
(
val
,
other
);
}
ivec4
operator
-
()
const
{
return
vnegq_s32
(
val
);
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
// Tested OK
return
ivec4
(
vandq_s32
(
val
,
other
));
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vorrq_s32
(
val
,
other
));
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vreinterpretq_s32_u32
(
vceqq_s32
(
val
,
other
)));
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
// OK
return
ivec4
(
vreinterpretq_s32_u32
(
vmvnq_u32
(
vceqq_s32
(
val
,
other
))));
// not(equal(val, other))
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vreinterpretq_s32_u32
(
vcgtq_s32
(
val
,
other
)));
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vreinterpretq_s32_u32
(
vcltq_s32
(
val
,
other
)));
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vreinterpretq_s32_u32
(
vcgeq_s32
(
val
,
other
)));
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
// OK
return
ivec4
(
vreinterpretq_s32_u32
(
vcleq_s32
(
val
,
other
)));
}
operator
fvec4
()
const
;
};
// Conversion operators.
inline
fvec4
::
operator
ivec4
()
const
{
return
ivec4
(
vcvtq_s32_f32
(
val
));
}
inline
ivec4
::
operator
fvec4
()
const
{
return
fvec4
(
vcvtq_f32_s32
(
val
));
}
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
// Tested: OK
fvec4
result
=
v
+
fvec4
(
0.5
f
);
result
=
(
fvec4
)
((
ivec4
)
result
);
return
result
;
}
static
inline
float
roundToNearest
(
float
num
)
{
return
(
num
>
0.0
f
)
?
std
::
floor
(
num
+
0.5
f
)
:
std
::
ceil
(
num
-
0.5
f
);
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
// Tested: OK - Needs optimization
float
aux
[
4
];
vst1q_f32
(
aux
,
v
);
return
fvec4
(
roundToNearest
(
aux
[
0
]),
roundToNearest
(
aux
[
1
]),
roundToNearest
(
aux
[
2
]),
roundToNearest
(
aux
[
3
]));
}
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
// Tested OK
return
fvec4
(
vminq_f32
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
// Tested OK
return
fvec4
(
vmaxq_f32
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
// Tested OK
return
fvec4
(
vabdq_f32
(
v
.
val
,
fvec4
(
0.0
)));
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
// Tested OK
ivec4
intVersion
=
(
ivec4
)
v
;
fvec4
result
=
min
((
fvec4
)
(
v
>
intVersion
),
fvec4
(
1.0
f
));
result
+=
intVersion
;
return
result
;
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
float32x4_t
recipSqrt
=
vrsqrteq_f32
(
v
);
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
return
vmulq_f32
(
v
,
recipSqrt
);
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
// Tested: OK
fvec4
result
=
v1
*
v2
;
float
aux
[
4
];
vst1q_f32
(
aux
,
result
);
return
aux
[
0
]
+
aux
[
1
]
+
aux
[
2
];
// Ignore w component
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
// Tested: OK
fvec4
result
=
v1
*
v2
;
float
aux
[
4
];
vst1q_f32
(
aux
,
result
);
return
aux
[
0
]
+
aux
[
1
]
+
aux
[
2
]
+
aux
[
3
];
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
// Tested: OK
float
aux1
[
4
];
float
aux2
[
4
];
float
aux3
[
4
];
float
aux4
[
4
];
vst1q_f32
(
aux1
,
v1
);
vst1q_f32
(
aux2
,
v2
);
vst1q_f32
(
aux3
,
v3
);
vst1q_f32
(
aux4
,
v4
);
v1
=
fvec4
(
aux1
[
0
],
aux2
[
0
],
aux3
[
0
],
aux4
[
0
]);
v2
=
fvec4
(
aux1
[
1
],
aux2
[
1
],
aux3
[
1
],
aux4
[
1
]);
v3
=
fvec4
(
aux1
[
2
],
aux2
[
2
],
aux3
[
2
],
aux4
[
2
]);
v4
=
fvec4
(
aux1
[
3
],
aux2
[
3
],
aux3
[
3
],
aux4
[
3
]);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
// Tested: not tested
ivec4
res
=
ivec4
(
vminq_s32
(
v1
.
val
,
v2
.
val
));
return
res
;
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
// Tested: not tested
ivec4
res
=
ivec4
(
vmaxq_s32
(
v1
.
val
,
v2
.
val
));
return
res
;
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
// Tested: Not tested
ivec4
res
=
ivec4
(
vabdq_s32
(
v
.
val
,
ivec4
(
0
)));
return
res
;
}
static
inline
bool
any
(
const
ivec4
&
v
)
{
// Tested: OK
int
result
[
4
];
vst1q_s32
(
result
,
v
);
return
result
[
0
]
!=
0
||
result
[
1
]
!=
0
||
result
[
2
]
!=
0
||
result
[
3
]
!=
0
;
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
ivec4
&
mask
)
{
// Tested OK
return
fvec4
(
vbslq_f32
(
vreinterpretq_u32_s32
(
mask
.
val
),
v2
,
v1
));
}
#endif
/*OPENMM_VECTORIZE_NEON_H_*/
openmmapi/include/openmm/internal/vectorize_sse.h
0 → 100644
View file @
454caac9
#ifndef OPENMM_VECTORIZE_SSE_H_
#define OPENMM_VECTORIZE_SSE_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <smmintrin.h>
// This file defines classes and functions to simplify vectorizing code with SSE.
class
ivec4
;
/**
* A four element vector of floats.
*/
class
fvec4
{
public:
__m128
val
;
fvec4
()
{}
fvec4
(
float
v
)
:
val
(
_mm_set1_ps
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
:
val
(
_mm_set_ps
(
v4
,
v3
,
v2
,
v1
))
{}
fvec4
(
__m128
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
:
val
(
_mm_loadu_ps
(
v
))
{}
operator
__m128
()
const
{
return
val
;
}
float
operator
[](
int
i
)
const
{
float
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
float
*
v
)
const
{
_mm_storeu_ps
(
v
,
val
);
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
_mm_add_ps
(
val
,
other
);
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
return
_mm_sub_ps
(
val
,
other
);
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
return
_mm_mul_ps
(
val
,
other
);
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
return
_mm_div_ps
(
val
,
other
);
}
void
operator
+=
(
const
fvec4
&
other
)
{
val
=
_mm_add_ps
(
val
,
other
);
}
void
operator
-=
(
const
fvec4
&
other
)
{
val
=
_mm_sub_ps
(
val
,
other
);
}
void
operator
*=
(
const
fvec4
&
other
)
{
val
=
_mm_mul_ps
(
val
,
other
);
}
void
operator
/=
(
const
fvec4
&
other
)
{
val
=
_mm_div_ps
(
val
,
other
);
}
fvec4
operator
-
()
const
{
return
_mm_sub_ps
(
_mm_set1_ps
(
0.0
f
),
val
);
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
return
_mm_and_ps
(
val
,
other
);
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
return
_mm_or_ps
(
val
,
other
);
}
fvec4
operator
==
(
const
fvec4
&
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
}
fvec4
operator
!=
(
const
fvec4
&
other
)
const
{
return
_mm_cmpneq_ps
(
val
,
other
);
}
fvec4
operator
>
(
const
fvec4
&
other
)
const
{
return
_mm_cmpgt_ps
(
val
,
other
);
}
fvec4
operator
<
(
const
fvec4
&
other
)
const
{
return
_mm_cmplt_ps
(
val
,
other
);
}
fvec4
operator
>=
(
const
fvec4
&
other
)
const
{
return
_mm_cmpge_ps
(
val
,
other
);
}
fvec4
operator
<=
(
const
fvec4
&
other
)
const
{
return
_mm_cmple_ps
(
val
,
other
);
}
operator
ivec4
()
const
;
};
/**
* A four element vector of ints.
*/
class
ivec4
{
public:
__m128i
val
;
ivec4
()
{}
ivec4
(
int
v
)
:
val
(
_mm_set1_epi32
(
v
))
{}
ivec4
(
int
v1
,
int
v2
,
int
v3
,
int
v4
)
:
val
(
_mm_set_epi32
(
v4
,
v3
,
v2
,
v1
))
{}
ivec4
(
__m128i
v
)
:
val
(
v
)
{}
ivec4
(
const
int
*
v
)
:
val
(
_mm_loadu_si128
((
const
__m128i
*
)
v
))
{}
operator
__m128i
()
const
{
return
val
;
}
int
operator
[](
int
i
)
const
{
int
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
int
*
v
)
const
{
_mm_storeu_si128
((
__m128i
*
)
v
,
val
);
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
return
_mm_add_epi32
(
val
,
other
);
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
return
_mm_sub_epi32
(
val
,
other
);
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
return
_mm_mul_epi32
(
val
,
other
);
}
void
operator
+=
(
const
ivec4
&
other
)
{
val
=
_mm_add_epi32
(
val
,
other
);
}
void
operator
-=
(
const
ivec4
&
other
)
{
val
=
_mm_sub_epi32
(
val
,
other
);
}
void
operator
*=
(
const
ivec4
&
other
)
{
val
=
_mm_mul_epi32
(
val
,
other
);
}
ivec4
operator
-
()
const
{
return
_mm_sub_epi32
(
_mm_set1_epi32
(
0
),
val
);
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
return
_mm_or_si128
(
val
,
other
);
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
*
this
==
other
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
return
_mm_cmpgt_epi32
(
val
,
other
);
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
return
_mm_cmplt_epi32
(
val
,
other
);
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
_mm_cmplt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
_mm_cmpgt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
operator
fvec4
()
const
;
};
// Conversion operators.
inline
fvec4
::
operator
ivec4
()
const
{
return
_mm_cvttps_epi32
(
val
);
}
inline
ivec4
::
operator
fvec4
()
const
{
return
_mm_cvtepi32_ps
(
val
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_floor_ps
(
v
.
val
));
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_ceil_ps
(
v
.
val
));
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
}
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
_mm_min_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
_mm_max_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
static
const
__m128
mask
=
_mm_castsi128_ps
(
_mm_set1_epi32
(
0x7FFFFFFF
));
return
fvec4
(
_mm_and_ps
(
v
.
val
,
mask
));
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
_MM_TRANSPOSE4_PS
(
v1
,
v2
,
v3
,
v4
);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
_mm_min_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
_mm_max_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
return
ivec4
(
_mm_abs_epi32
(
v
.
val
));
}
static
inline
bool
any
(
const
ivec4
&
v
)
{
return
!
_mm_test_all_zeros
(
v
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
ivec4
&
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm_castsi128_ps
(
mask
.
val
)));
}
#endif
/*OPENMM_VECTORIZE_SSE_H_*/
platforms/cpu/sharedTarget/CMakeLists.txt
View file @
454caac9
FOREACH
(
file
${
SOURCE_FILES
}
)
FOREACH
(
file
${
SOURCE_FILES
}
)
IF
(
file MATCHES
".*Vec8.*"
)
IF
(
file MATCHES
".*Vec8.*"
)
IF
(
MSVC
)
IF
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX /D__AVX__"
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX /D__AVX__"
)
ELSE
(
MSVC
)
ELSE
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
IF
(
NOT ANDROID
)
ENDIF
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ENDIF
(
NOT ANDROID
)
ENDIF
(
MSVC
)
ELSE
(
file MATCHES
".*Vec8.*"
)
ELSE
(
file MATCHES
".*Vec8.*"
)
IF
(
NOT MSVC
)
IF
(
NOT MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
IF
(
NOT ANDROID
)
ENDIF
(
NOT MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
(
NOT ANDROID
)
ENDIF
(
NOT MSVC
)
ENDIF
(
file MATCHES
".*Vec8.*"
)
ENDIF
(
file MATCHES
".*Vec8.*"
)
ENDFOREACH
(
file
)
ENDFOREACH
(
file
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
454caac9
...
@@ -37,7 +37,6 @@
...
@@ -37,7 +37,6 @@
#include <set>
#include <set>
#include <map>
#include <map>
#include <cmath>
#include <cmath>
#include <smmintrin.h>
using
namespace
std
;
using
namespace
std
;
...
...
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
454caac9
...
@@ -103,7 +103,7 @@ void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, dou
...
@@ -103,7 +103,7 @@ void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, dou
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
(
(
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
...
@@ -214,7 +214,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
...
@@ -214,7 +214,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
(
(
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
...
...
platforms/cpu/src/CpuPlatform.cpp
View file @
454caac9
...
@@ -36,6 +36,7 @@
...
@@ -36,6 +36,7 @@
#include "ReferenceConstraints.h"
#include "ReferenceConstraints.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/hardware.h"
#include <sstream>
#include <sstream>
#include <stdlib.h>
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
...
@@ -93,15 +94,20 @@ bool CpuPlatform::supportsDoublePrecision() const {
...
@@ -93,15 +94,20 @@ bool CpuPlatform::supportsDoublePrecision() const {
}
}
bool
CpuPlatform
::
isProcessorSupported
()
{
bool
CpuPlatform
::
isProcessorSupported
()
{
// Make sure the CPU supports SSE 4.1.
// Make sure the CPU supports SSE 4.1 or NEON.
int
cpuInfo
[
4
];
#ifdef __ANDROID__
cpuid
(
cpuInfo
,
0
);
uint64_t
features
=
android_getCpuFeatures
();
if
(
cpuInfo
[
0
]
>=
1
)
{
return
(
features
&
ANDROID_CPU_ARM_FEATURE_NEON
)
!=
0
;
cpuid
(
cpuInfo
,
1
);
#else
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
19
))
!=
0
);
int
cpuInfo
[
4
];
}
cpuid
(
cpuInfo
,
0
);
return
false
;
if
(
cpuInfo
[
0
]
>=
1
)
{
cpuid
(
cpuInfo
,
1
);
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
19
))
!=
0
);
}
return
false
;
#endif
}
}
void
CpuPlatform
::
contextCreated
(
ContextImpl
&
context
,
const
map
<
string
,
string
>&
properties
)
const
{
void
CpuPlatform
::
contextCreated
(
ContextImpl
&
context
,
const
map
<
string
,
string
>&
properties
)
const
{
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
454caac9
...
@@ -4816,7 +4816,7 @@ void OpenCLIntegrateVariableVerletStepKernel::initialize(const System& system, c
...
@@ -4816,7 +4816,7 @@ void OpenCLIntegrateVariableVerletStepKernel::initialize(const System& system, c
kernel1
=
cl
::
Kernel
(
program
,
"integrateVerletPart1"
);
kernel1
=
cl
::
Kernel
(
program
,
"integrateVerletPart1"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateVerletPart2"
);
kernel2
=
cl
::
Kernel
(
program
,
"integrateVerletPart2"
);
selectSizeKernel
=
cl
::
Kernel
(
program
,
"selectVerletStepSize"
);
selectSizeKernel
=
cl
::
Kernel
(
program
,
"selectVerletStepSize"
);
blockSize
=
min
(
min
(
256
,
system
.
getNumParticles
()),
(
int
)
selectSizeKernel
.
getInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
());
blockSize
=
min
(
min
(
256
,
system
.
getNumParticles
()),
(
int
)
selectSizeKernel
.
get
WorkGroup
Info
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
cl
.
getDevice
()
));
}
}
double
OpenCLIntegrateVariableVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableVerletIntegrator
&
integrator
,
double
maxTime
)
{
double
OpenCLIntegrateVariableVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableVerletIntegrator
&
integrator
,
double
maxTime
)
{
...
@@ -4926,7 +4926,7 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
...
@@ -4926,7 +4926,7 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
params
=
new
OpenCLArray
(
cl
,
3
,
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
()
?
sizeof
(
cl_double
)
:
sizeof
(
cl_float
),
"langevinParams"
);
params
=
new
OpenCLArray
(
cl
,
3
,
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
()
?
sizeof
(
cl_double
)
:
sizeof
(
cl_float
),
"langevinParams"
);
blockSize
=
min
(
256
,
system
.
getNumParticles
());
blockSize
=
min
(
256
,
system
.
getNumParticles
());
blockSize
=
max
(
blockSize
,
params
->
getSize
());
blockSize
=
max
(
blockSize
,
params
->
getSize
());
blockSize
=
min
(
blockSize
,
(
int
)
selectSizeKernel
.
getInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
());
blockSize
=
min
(
blockSize
,
(
int
)
selectSizeKernel
.
get
WorkGroup
Info
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
cl
.
getDevice
()
));
}
}
double
OpenCLIntegrateVariableLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableLangevinIntegrator
&
integrator
,
double
maxTime
)
{
double
OpenCLIntegrateVariableLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableLangevinIntegrator
&
integrator
,
double
maxTime
)
{
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
454caac9
...
@@ -32,6 +32,7 @@
...
@@ -32,6 +32,7 @@
#include "openmm/Context.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include "openmm/System.h"
#include <algorithm>
#include <algorithm>
#include <cctype>
#include <sstream>
#include <sstream>
#ifdef __APPLE__
#ifdef __APPLE__
#include "sys/sysctl.h"
#include "sys/sysctl.h"
...
@@ -39,10 +40,7 @@
...
@@ -39,10 +40,7 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
std
::
map
;
using
namespace
std
;
using
std
::
string
;
using
std
::
stringstream
;
using
std
::
vector
;
#ifdef OPENMM_OPENCL_BUILDING_STATIC_LIBRARY
#ifdef OPENMM_OPENCL_BUILDING_STATIC_LIBRARY
extern
"C"
void
registerOpenCLPlatform
()
{
extern
"C"
void
registerOpenCLPlatform
()
{
...
...
plugins/cpupme/CMakeLists.txt
View file @
454caac9
...
@@ -57,7 +57,11 @@ ENDFOREACH(subdir)
...
@@ -57,7 +57,11 @@ ENDFOREACH(subdir)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
IF
(
NOT MSVC
)
IF
(
NOT MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
"-msse4.1"
)
IF
(
ANDROID
)
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
""
)
ELSE
(
ANDROID
)
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
"-msse4.1"
)
ENDIF
(
ANDROID
)
ENDIF
(
NOT MSVC
)
ENDIF
(
NOT MSVC
)
# Include FFTW related files.
# Include FFTW related files.
...
...
plugins/cpupme/src/CpuPmeKernels.cpp
View file @
454caac9
...
@@ -569,13 +569,20 @@ double CpuCalcPmeReciprocalForceKernel::finishComputation(IO& io) {
...
@@ -569,13 +569,20 @@ double CpuCalcPmeReciprocalForceKernel::finishComputation(IO& io) {
}
}
bool
CpuCalcPmeReciprocalForceKernel
::
isProcessorSupported
()
{
bool
CpuCalcPmeReciprocalForceKernel
::
isProcessorSupported
()
{
int
cpuInfo
[
4
];
// Make sure the CPU supports SSE 4.1 or NEON.
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
#ifdef __ANDROID__
cpuid
(
cpuInfo
,
1
);
uint64_t
features
=
android_getCpuFeatures
();
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
19
))
!=
0
);
// Require SSE 4.1
return
(
features
&
ANDROID_CPU_ARM_FEATURE_NEON
)
!=
0
;
}
#else
return
false
;
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
cpuid
(
cpuInfo
,
1
);
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
19
))
!=
0
);
}
return
false
;
#endif
}
}
int
CpuCalcPmeReciprocalForceKernel
::
findFFTDimension
(
int
minimum
)
{
int
CpuCalcPmeReciprocalForceKernel
::
findFFTDimension
(
int
minimum
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment