Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
454caac9
Commit
454caac9
authored
May 14, 2014
by
peastman
Browse files
Preliminary support for ARM/Android.
parent
51828eaa
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
722 additions
and
316 deletions
+722
-316
openmmapi/include/openmm/internal/hardware.h
openmmapi/include/openmm/internal/hardware.h
+41
-31
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+7
-252
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+337
-0
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+286
-0
platforms/cpu/sharedTarget/CMakeLists.txt
platforms/cpu/sharedTarget/CMakeLists.txt
+11
-7
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+0
-1
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+2
-2
platforms/cpu/src/CpuPlatform.cpp
platforms/cpu/src/CpuPlatform.cpp
+15
-9
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+2
-2
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+2
-4
plugins/cpupme/CMakeLists.txt
plugins/cpupme/CMakeLists.txt
+5
-1
plugins/cpupme/src/CpuPmeKernels.cpp
plugins/cpupme/src/CpuPmeKernels.cpp
+14
-7
No files found.
openmmapi/include/openmm/internal/hardware.h
View file @
454caac9
...
...
@@ -46,10 +46,14 @@
#ifdef WIN32
#define NOMINMAX
#include <windows.h>
#else
#ifdef __ANDROID__
#include <cpu-features.h>
#else
#include <dlfcn.h>
#include <unistd.h>
#endif
#endif
#endif
static
int
getNumProcessors
()
{
...
...
@@ -70,11 +74,15 @@ static int getNumProcessors() {
ncpu
=
1
;
return
ncpu
;
#else
#ifdef __ANDROID__
return
android_getCpuCount
();
#else
long
nProcessorsOnline
=
sysconf
(
_SC_NPROCESSORS_ONLN
);
if
(
nProcessorsOnline
==
-
1
)
return
1
;
else
return
(
int
)
nProcessorsOnline
;
#endif
#endif
#endif
}
...
...
@@ -85,8 +93,9 @@ static int getNumProcessors() {
#ifdef _WIN32
#define cpuid __cpuid
#else
static
void
cpuid
(
int
cpuInfo
[
4
],
int
infoType
){
#ifdef __LP64__
#ifndef __ANDROID__
static
void
cpuid
(
int
cpuInfo
[
4
],
int
infoType
){
#ifdef __LP64__
__asm__
__volatile__
(
"cpuid"
:
"=a"
(
cpuInfo
[
0
]),
...
...
@@ -95,7 +104,7 @@ static void cpuid(int cpuInfo[4], int infoType){
"=d"
(
cpuInfo
[
3
])
:
"a"
(
infoType
)
);
#else
#else
__asm__
__volatile__
(
"pushl %%ebx
\n
"
"cpuid
\n
"
...
...
@@ -107,8 +116,9 @@ static void cpuid(int cpuInfo[4], int infoType){
"=d"
(
cpuInfo
[
3
])
:
"a"
(
infoType
)
);
#endif
}
#endif
}
#endif
#endif
#endif // OPENMM_HARDWARE_H_
openmmapi/include/openmm/internal/vectorize.h
View file @
454caac9
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 201
3
Stanford University and the Authors. *
* Portions copyright (c) 201
4
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -32,255 +32,10 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <smmintrin.h>
// This file defines classes and functions to simplify vectorizing code with SSE.
class
ivec4
;
/**
* A four element vector of floats.
*/
class
fvec4
{
public:
__m128
val
;
fvec4
()
{}
fvec4
(
float
v
)
:
val
(
_mm_set1_ps
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
:
val
(
_mm_set_ps
(
v4
,
v3
,
v2
,
v1
))
{}
fvec4
(
__m128
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
:
val
(
_mm_loadu_ps
(
v
))
{}
operator
__m128
()
const
{
return
val
;
}
float
operator
[](
int
i
)
const
{
float
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
float
*
v
)
const
{
_mm_storeu_ps
(
v
,
val
);
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
_mm_add_ps
(
val
,
other
);
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
return
_mm_sub_ps
(
val
,
other
);
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
return
_mm_mul_ps
(
val
,
other
);
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
return
_mm_div_ps
(
val
,
other
);
}
void
operator
+=
(
const
fvec4
&
other
)
{
val
=
_mm_add_ps
(
val
,
other
);
}
void
operator
-=
(
const
fvec4
&
other
)
{
val
=
_mm_sub_ps
(
val
,
other
);
}
void
operator
*=
(
const
fvec4
&
other
)
{
val
=
_mm_mul_ps
(
val
,
other
);
}
void
operator
/=
(
const
fvec4
&
other
)
{
val
=
_mm_div_ps
(
val
,
other
);
}
fvec4
operator
-
()
const
{
return
_mm_sub_ps
(
_mm_set1_ps
(
0.0
f
),
val
);
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
return
_mm_and_ps
(
val
,
other
);
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
return
_mm_or_ps
(
val
,
other
);
}
fvec4
operator
==
(
const
fvec4
&
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
}
fvec4
operator
!=
(
const
fvec4
&
other
)
const
{
return
_mm_cmpneq_ps
(
val
,
other
);
}
fvec4
operator
>
(
const
fvec4
&
other
)
const
{
return
_mm_cmpgt_ps
(
val
,
other
);
}
fvec4
operator
<
(
const
fvec4
&
other
)
const
{
return
_mm_cmplt_ps
(
val
,
other
);
}
fvec4
operator
>=
(
const
fvec4
&
other
)
const
{
return
_mm_cmpge_ps
(
val
,
other
);
}
fvec4
operator
<=
(
const
fvec4
&
other
)
const
{
return
_mm_cmple_ps
(
val
,
other
);
}
operator
ivec4
()
const
;
};
/**
* A four element vector of ints.
*/
class
ivec4
{
public:
__m128i
val
;
ivec4
()
{}
ivec4
(
int
v
)
:
val
(
_mm_set1_epi32
(
v
))
{}
ivec4
(
int
v1
,
int
v2
,
int
v3
,
int
v4
)
:
val
(
_mm_set_epi32
(
v4
,
v3
,
v2
,
v1
))
{}
ivec4
(
__m128i
v
)
:
val
(
v
)
{}
ivec4
(
const
int
*
v
)
:
val
(
_mm_loadu_si128
((
const
__m128i
*
)
v
))
{}
operator
__m128i
()
const
{
return
val
;
}
int
operator
[](
int
i
)
const
{
int
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
int
*
v
)
const
{
_mm_storeu_si128
((
__m128i
*
)
v
,
val
);
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
return
_mm_add_epi32
(
val
,
other
);
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
return
_mm_sub_epi32
(
val
,
other
);
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
return
_mm_mul_epi32
(
val
,
other
);
}
void
operator
+=
(
const
ivec4
&
other
)
{
val
=
_mm_add_epi32
(
val
,
other
);
}
void
operator
-=
(
const
ivec4
&
other
)
{
val
=
_mm_sub_epi32
(
val
,
other
);
}
void
operator
*=
(
const
ivec4
&
other
)
{
val
=
_mm_mul_epi32
(
val
,
other
);
}
ivec4
operator
-
()
const
{
return
_mm_sub_epi32
(
_mm_set1_epi32
(
0
),
val
);
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
return
_mm_or_si128
(
val
,
other
);
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
*
this
==
other
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
return
_mm_cmpgt_epi32
(
val
,
other
);
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
return
_mm_cmplt_epi32
(
val
,
other
);
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
_mm_cmplt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
_mm_cmpgt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
operator
fvec4
()
const
;
};
// Conversion operators.
inline
fvec4
::
operator
ivec4
()
const
{
return
_mm_cvttps_epi32
(
val
);
}
inline
ivec4
::
operator
fvec4
()
const
{
return
_mm_cvtepi32_ps
(
val
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_floor_ps
(
v
.
val
));
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_ceil_ps
(
v
.
val
));
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
}
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
_mm_min_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
_mm_max_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
static
const
__m128
mask
=
_mm_castsi128_ps
(
_mm_set1_epi32
(
0x7FFFFFFF
));
return
fvec4
(
_mm_and_ps
(
v
.
val
,
mask
));
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
_MM_TRANSPOSE4_PS
(
v1
,
v2
,
v3
,
v4
);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
_mm_min_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
_mm_max_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
return
ivec4
(
_mm_abs_epi32
(
v
.
val
));
}
static
inline
bool
any
(
const
ivec4
&
v
)
{
return
!
_mm_test_all_zeros
(
v
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
ivec4
&
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm_castsi128_ps
(
mask
.
val
)));
}
#if defined(__ANDROID__)
#include "vectorize_neon.h"
#else
#include "vectorize_sse.h"
#endif
#endif
/*OPENMM_VECTORIZE_H_*/
openmmapi/include/openmm/internal/vectorize_neon.h
0 → 100644
View file @
454caac9
#ifndef OPENMM_VECTORIZE_NEON_H_
#define OPENMM_VECTORIZE_NEON_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2014 Stanford University and the Authors. *
* Authors: Mateus Lima, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <cpu-features.h>
#include <arm_neon.h>
#include <cmath>
typedef
int
int32_t
;
// This file defines classes and functions to simplify vectorizing code with NEON.
class
ivec4
;
/**
* A four element vector of floats.
*/
class
fvec4
{
public:
float32x4_t
val
;
fvec4
()
{}
fvec4
(
float
v
)
:
val
(
vdupq_n_f32
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
{
float
v
[]
=
{
v1
,
v2
,
v3
,
v4
};
val
=
vld1q_f32
(
v
);
}
fvec4
(
float32x4_t
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
:
val
(
vld1q_f32
(
v
))
{}
operator
float32x4_t
()
const
{
return
val
;
}
float
operator
[](
int
i
)
const
{
float
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
float
*
v
)
const
{
vst1q_f32
(
v
,
val
);
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
// Tested OK
return
vaddq_f32
(
val
,
other
);
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
// Tested OK
return
vsubq_f32
(
val
,
other
);
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
// Tested OK
return
vmulq_f32
(
val
,
other
);
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
// Tested OK
// NEON does not have a divide float-point operator, so we get the reciprocal and multiply.
float32x4_t
reciprocal
=
vrecpeq_f32
(
other
);
reciprocal
=
vmulq_f32
(
vrecpsq_f32
(
other
,
reciprocal
),
reciprocal
);
reciprocal
=
vmulq_f32
(
vrecpsq_f32
(
other
,
reciprocal
),
reciprocal
);
fvec4
result
=
vmulq_f32
(
val
,
reciprocal
);
return
result
;
}
void
operator
+=
(
const
fvec4
&
other
)
{
val
=
vaddq_f32
(
val
,
other
);
}
void
operator
-=
(
const
fvec4
&
other
)
{
val
=
vsubq_f32
(
val
,
other
);
}
void
operator
*=
(
const
fvec4
&
other
)
{
val
=
vmulq_f32
(
val
,
other
);
}
void
operator
/=
(
const
fvec4
&
other
)
{
val
=
val
/
other
.
val
;
}
fvec4
operator
-
()
const
{
return
vnegq_f32
(
val
);
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
return
vreinterpretq_f32_u32
(
vandq_u32
(
vreinterpretq_u32_f32
(
val
),
vreinterpretq_u32_f32
(
other
)));
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vorrq_u32
(
vcvtq_u32_f32
(
val
),
vcvtq_u32_f32
(
other
))));
}
fvec4
operator
==
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vceqq_f32
(
val
,
other
)));
}
fvec4
operator
!=
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vmvnq_u32
(
vceqq_f32
(
val
,
other
))));
// not(equals(val, other))
}
fvec4
operator
>
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcgtq_f32
(
val
,
other
)));
}
fvec4
operator
<
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcltq_f32
(
val
,
other
)));
}
fvec4
operator
>=
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcgeq_f32
(
val
,
other
)));
}
fvec4
operator
<=
(
const
fvec4
&
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcleq_f32
(
val
,
other
)));
}
operator
ivec4
()
const
;
};
/**
* A four element vector of ints.
*/
class
ivec4
{
public:
int32x4_t
val
;
ivec4
()
{}
ivec4
(
int
v
)
:
val
(
vdupq_n_s32
(
v
))
{}
ivec4
(
int
v1
,
int
v2
,
int
v3
,
int
v4
)
{
int
v
[]
=
{
v1
,
v2
,
v3
,
v4
};
val
=
vld1q_s32
(
v
);
}
ivec4
(
int32x4_t
v
)
:
val
(
v
)
{}
ivec4
(
const
int
*
v
)
:
val
(
vld1q_s32
(
v
))
{}
operator
int32x4_t
()
const
{
return
val
;
}
int
operator
[](
int
i
)
const
{
int
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
int
*
v
)
const
{
vst1q_s32
(
v
,
val
);
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
return
vaddq_s32
(
val
,
other
);
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
return
vsubq_s32
(
val
,
other
);
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
return
vmulq_s32
(
val
,
other
);
}
void
operator
+=
(
const
ivec4
&
other
)
{
val
=
vaddq_s32
(
val
,
other
);
}
void
operator
-=
(
const
ivec4
&
other
)
{
val
=
vsubq_s32
(
val
,
other
);
}
void
operator
*=
(
const
ivec4
&
other
)
{
val
=
vmulq_s32
(
val
,
other
);
}
ivec4
operator
-
()
const
{
return
vnegq_s32
(
val
);
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
// Tested OK
return
ivec4
(
vandq_s32
(
val
,
other
));
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vorrq_s32
(
val
,
other
));
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vreinterpretq_s32_u32
(
vceqq_s32
(
val
,
other
)));
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
// OK
return
ivec4
(
vreinterpretq_s32_u32
(
vmvnq_u32
(
vceqq_s32
(
val
,
other
))));
// not(equal(val, other))
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vreinterpretq_s32_u32
(
vcgtq_s32
(
val
,
other
)));
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vreinterpretq_s32_u32
(
vcltq_s32
(
val
,
other
)));
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
return
ivec4
(
vreinterpretq_s32_u32
(
vcgeq_s32
(
val
,
other
)));
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
// OK
return
ivec4
(
vreinterpretq_s32_u32
(
vcleq_s32
(
val
,
other
)));
}
operator
fvec4
()
const
;
};
// Conversion operators.
inline
fvec4
::
operator
ivec4
()
const
{
return
ivec4
(
vcvtq_s32_f32
(
val
));
}
inline
ivec4
::
operator
fvec4
()
const
{
return
fvec4
(
vcvtq_f32_s32
(
val
));
}
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
// Tested: OK
fvec4
result
=
v
+
fvec4
(
0.5
f
);
result
=
(
fvec4
)
((
ivec4
)
result
);
return
result
;
}
static
inline
float
roundToNearest
(
float
num
)
{
return
(
num
>
0.0
f
)
?
std
::
floor
(
num
+
0.5
f
)
:
std
::
ceil
(
num
-
0.5
f
);
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
// Tested: OK - Needs optimization
float
aux
[
4
];
vst1q_f32
(
aux
,
v
);
return
fvec4
(
roundToNearest
(
aux
[
0
]),
roundToNearest
(
aux
[
1
]),
roundToNearest
(
aux
[
2
]),
roundToNearest
(
aux
[
3
]));
}
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
// Tested OK
return
fvec4
(
vminq_f32
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
// Tested OK
return
fvec4
(
vmaxq_f32
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
// Tested OK
return
fvec4
(
vabdq_f32
(
v
.
val
,
fvec4
(
0.0
)));
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
// Tested OK
ivec4
intVersion
=
(
ivec4
)
v
;
fvec4
result
=
min
((
fvec4
)
(
v
>
intVersion
),
fvec4
(
1.0
f
));
result
+=
intVersion
;
return
result
;
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
float32x4_t
recipSqrt
=
vrsqrteq_f32
(
v
);
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
return
vmulq_f32
(
v
,
recipSqrt
);
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
// Tested: OK
fvec4
result
=
v1
*
v2
;
float
aux
[
4
];
vst1q_f32
(
aux
,
result
);
return
aux
[
0
]
+
aux
[
1
]
+
aux
[
2
];
// Ignore w component
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
// Tested: OK
fvec4
result
=
v1
*
v2
;
float
aux
[
4
];
vst1q_f32
(
aux
,
result
);
return
aux
[
0
]
+
aux
[
1
]
+
aux
[
2
]
+
aux
[
3
];
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
// Tested: OK
float
aux1
[
4
];
float
aux2
[
4
];
float
aux3
[
4
];
float
aux4
[
4
];
vst1q_f32
(
aux1
,
v1
);
vst1q_f32
(
aux2
,
v2
);
vst1q_f32
(
aux3
,
v3
);
vst1q_f32
(
aux4
,
v4
);
v1
=
fvec4
(
aux1
[
0
],
aux2
[
0
],
aux3
[
0
],
aux4
[
0
]);
v2
=
fvec4
(
aux1
[
1
],
aux2
[
1
],
aux3
[
1
],
aux4
[
1
]);
v3
=
fvec4
(
aux1
[
2
],
aux2
[
2
],
aux3
[
2
],
aux4
[
2
]);
v4
=
fvec4
(
aux1
[
3
],
aux2
[
3
],
aux3
[
3
],
aux4
[
3
]);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
// Tested: not tested
ivec4
res
=
ivec4
(
vminq_s32
(
v1
.
val
,
v2
.
val
));
return
res
;
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
// Tested: not tested
ivec4
res
=
ivec4
(
vmaxq_s32
(
v1
.
val
,
v2
.
val
));
return
res
;
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
// Tested: Not tested
ivec4
res
=
ivec4
(
vabdq_s32
(
v
.
val
,
ivec4
(
0
)));
return
res
;
}
static
inline
bool
any
(
const
ivec4
&
v
)
{
// Tested: OK
int
result
[
4
];
vst1q_s32
(
result
,
v
);
return
result
[
0
]
!=
0
||
result
[
1
]
!=
0
||
result
[
2
]
!=
0
||
result
[
3
]
!=
0
;
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
ivec4
&
mask
)
{
// Tested OK
return
fvec4
(
vbslq_f32
(
vreinterpretq_u32_s32
(
mask
.
val
),
v2
,
v1
));
}
#endif
/*OPENMM_VECTORIZE_NEON_H_*/
openmmapi/include/openmm/internal/vectorize_sse.h
0 → 100644
View file @
454caac9
#ifndef OPENMM_VECTORIZE_SSE_H_
#define OPENMM_VECTORIZE_SSE_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <smmintrin.h>
// This file defines classes and functions to simplify vectorizing code with SSE.
class
ivec4
;
/**
* A four element vector of floats.
*/
class
fvec4
{
public:
__m128
val
;
fvec4
()
{}
fvec4
(
float
v
)
:
val
(
_mm_set1_ps
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
:
val
(
_mm_set_ps
(
v4
,
v3
,
v2
,
v1
))
{}
fvec4
(
__m128
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
:
val
(
_mm_loadu_ps
(
v
))
{}
operator
__m128
()
const
{
return
val
;
}
float
operator
[](
int
i
)
const
{
float
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
float
*
v
)
const
{
_mm_storeu_ps
(
v
,
val
);
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
_mm_add_ps
(
val
,
other
);
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
return
_mm_sub_ps
(
val
,
other
);
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
return
_mm_mul_ps
(
val
,
other
);
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
return
_mm_div_ps
(
val
,
other
);
}
void
operator
+=
(
const
fvec4
&
other
)
{
val
=
_mm_add_ps
(
val
,
other
);
}
void
operator
-=
(
const
fvec4
&
other
)
{
val
=
_mm_sub_ps
(
val
,
other
);
}
void
operator
*=
(
const
fvec4
&
other
)
{
val
=
_mm_mul_ps
(
val
,
other
);
}
void
operator
/=
(
const
fvec4
&
other
)
{
val
=
_mm_div_ps
(
val
,
other
);
}
fvec4
operator
-
()
const
{
return
_mm_sub_ps
(
_mm_set1_ps
(
0.0
f
),
val
);
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
return
_mm_and_ps
(
val
,
other
);
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
return
_mm_or_ps
(
val
,
other
);
}
fvec4
operator
==
(
const
fvec4
&
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
}
fvec4
operator
!=
(
const
fvec4
&
other
)
const
{
return
_mm_cmpneq_ps
(
val
,
other
);
}
fvec4
operator
>
(
const
fvec4
&
other
)
const
{
return
_mm_cmpgt_ps
(
val
,
other
);
}
fvec4
operator
<
(
const
fvec4
&
other
)
const
{
return
_mm_cmplt_ps
(
val
,
other
);
}
fvec4
operator
>=
(
const
fvec4
&
other
)
const
{
return
_mm_cmpge_ps
(
val
,
other
);
}
fvec4
operator
<=
(
const
fvec4
&
other
)
const
{
return
_mm_cmple_ps
(
val
,
other
);
}
operator
ivec4
()
const
;
};
/**
* A four element vector of ints.
*/
class
ivec4
{
public:
__m128i
val
;
ivec4
()
{}
ivec4
(
int
v
)
:
val
(
_mm_set1_epi32
(
v
))
{}
ivec4
(
int
v1
,
int
v2
,
int
v3
,
int
v4
)
:
val
(
_mm_set_epi32
(
v4
,
v3
,
v2
,
v1
))
{}
ivec4
(
__m128i
v
)
:
val
(
v
)
{}
ivec4
(
const
int
*
v
)
:
val
(
_mm_loadu_si128
((
const
__m128i
*
)
v
))
{}
operator
__m128i
()
const
{
return
val
;
}
int
operator
[](
int
i
)
const
{
int
result
[
4
];
store
(
result
);
return
result
[
i
];
}
void
store
(
int
*
v
)
const
{
_mm_storeu_si128
((
__m128i
*
)
v
,
val
);
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
return
_mm_add_epi32
(
val
,
other
);
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
return
_mm_sub_epi32
(
val
,
other
);
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
return
_mm_mul_epi32
(
val
,
other
);
}
void
operator
+=
(
const
ivec4
&
other
)
{
val
=
_mm_add_epi32
(
val
,
other
);
}
void
operator
-=
(
const
ivec4
&
other
)
{
val
=
_mm_sub_epi32
(
val
,
other
);
}
void
operator
*=
(
const
ivec4
&
other
)
{
val
=
_mm_mul_epi32
(
val
,
other
);
}
ivec4
operator
-
()
const
{
return
_mm_sub_epi32
(
_mm_set1_epi32
(
0
),
val
);
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
return
_mm_or_si128
(
val
,
other
);
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
*
this
==
other
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
return
_mm_cmpgt_epi32
(
val
,
other
);
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
return
_mm_cmplt_epi32
(
val
,
other
);
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
_mm_cmplt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
return
_mm_xor_si128
(
_mm_cmpgt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
operator
fvec4
()
const
;
};
// Conversion operators.
inline
fvec4
::
operator
ivec4
()
const
{
return
_mm_cvttps_epi32
(
val
);
}
inline
ivec4
::
operator
fvec4
()
const
{
return
_mm_cvtepi32_ps
(
val
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_floor_ps
(
v
.
val
));
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_ceil_ps
(
v
.
val
));
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
}
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
_mm_min_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
_mm_max_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
static
const
__m128
mask
=
_mm_castsi128_ps
(
_mm_set1_epi32
(
0x7FFFFFFF
));
return
fvec4
(
_mm_and_ps
(
v
.
val
,
mask
));
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
_MM_TRANSPOSE4_PS
(
v1
,
v2
,
v3
,
v4
);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
_mm_min_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
_mm_max_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
return
ivec4
(
_mm_abs_epi32
(
v
.
val
));
}
static
inline
bool
any
(
const
ivec4
&
v
)
{
return
!
_mm_test_all_zeros
(
v
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
ivec4
&
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
_mm_castsi128_ps
(
mask
.
val
)));
}
#endif
/*OPENMM_VECTORIZE_SSE_H_*/
platforms/cpu/sharedTarget/CMakeLists.txt
View file @
454caac9
...
...
@@ -3,11 +3,15 @@ FOREACH(file ${SOURCE_FILES})
IF
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX /D__AVX__"
)
ELSE
(
MSVC
)
IF
(
NOT ANDROID
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ENDIF
(
NOT ANDROID
)
ENDIF
(
MSVC
)
ELSE
(
file MATCHES
".*Vec8.*"
)
IF
(
NOT MSVC
)
IF
(
NOT ANDROID
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
(
NOT ANDROID
)
ENDIF
(
NOT MSVC
)
ENDIF
(
file MATCHES
".*Vec8.*"
)
ENDFOREACH
(
file
)
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
454caac9
...
...
@@ -37,7 +37,6 @@
#include <set>
#include <map>
#include <cmath>
#include <smmintrin.h>
using
namespace
std
;
...
...
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
454caac9
...
...
@@ -103,7 +103,7 @@ void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, dou
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
(
(
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
...
...
@@ -214,7 +214,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
(
(
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
...
...
platforms/cpu/src/CpuPlatform.cpp
View file @
454caac9
...
...
@@ -36,6 +36,7 @@
#include "ReferenceConstraints.h"
#include "openmm/internal/hardware.h"
#include <sstream>
#include <stdlib.h>
using
namespace
OpenMM
;
using
namespace
std
;
...
...
@@ -93,8 +94,12 @@ bool CpuPlatform::supportsDoublePrecision() const {
}
bool
CpuPlatform
::
isProcessorSupported
()
{
// Make sure the CPU supports SSE 4.1.
// Make sure the CPU supports SSE 4.1
or NEON
.
#ifdef __ANDROID__
uint64_t
features
=
android_getCpuFeatures
();
return
(
features
&
ANDROID_CPU_ARM_FEATURE_NEON
)
!=
0
;
#else
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
...
...
@@ -102,6 +107,7 @@ bool CpuPlatform::isProcessorSupported() {
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
19
))
!=
0
);
}
return
false
;
#endif
}
void
CpuPlatform
::
contextCreated
(
ContextImpl
&
context
,
const
map
<
string
,
string
>&
properties
)
const
{
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
454caac9
...
...
@@ -4816,7 +4816,7 @@ void OpenCLIntegrateVariableVerletStepKernel::initialize(const System& system, c
kernel1 = cl::Kernel(program, "integrateVerletPart1");
kernel2 = cl::Kernel(program, "integrateVerletPart2");
selectSizeKernel = cl::Kernel(program, "selectVerletStepSize");
blockSize
=
min
(
min
(
256
,
system
.
getNumParticles
()),
(
int
)
selectSizeKernel
.
getInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
());
blockSize = min(min(256, system.getNumParticles()), (int) selectSizeKernel.get
WorkGroup
Info<CL_KERNEL_WORK_GROUP_SIZE>(
cl.getDevice()
));
}
double OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
...
...
@@ -4926,7 +4926,7 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
params = new OpenCLArray(cl, 3, cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float), "langevinParams");
blockSize = min(256, system.getNumParticles());
blockSize = max(blockSize, params->getSize());
blockSize
=
min
(
blockSize
,
(
int
)
selectSizeKernel
.
getInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
());
blockSize = min(blockSize, (int) selectSizeKernel.get
WorkGroup
Info<CL_KERNEL_WORK_GROUP_SIZE>(
cl.getDevice()
));
}
double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
454caac9
...
...
@@ -32,6 +32,7 @@
#include "openmm/Context.h"
#include "openmm/System.h"
#include <algorithm>
#include <cctype>
#include <sstream>
#ifdef __APPLE__
#include "sys/sysctl.h"
...
...
@@ -39,10 +40,7 @@
using
namespace
OpenMM
;
using
std
::
map
;
using
std
::
string
;
using
std
::
stringstream
;
using
std
::
vector
;
using
namespace
std
;
#ifdef OPENMM_OPENCL_BUILDING_STATIC_LIBRARY
extern
"C"
void
registerOpenCLPlatform
()
{
...
...
plugins/cpupme/CMakeLists.txt
View file @
454caac9
...
...
@@ -57,7 +57,11 @@ ENDFOREACH(subdir)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
IF
(
NOT MSVC
)
IF
(
ANDROID
)
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
""
)
ELSE
(
ANDROID
)
SET_SOURCE_FILES_PROPERTIES
(
${
SOURCE_FILES
}
PROPERTIES COMPILE_FLAGS
"-msse4.1"
)
ENDIF
(
ANDROID
)
ENDIF
(
NOT MSVC
)
# Include FFTW related files.
...
...
plugins/cpupme/src/CpuPmeKernels.cpp
View file @
454caac9
...
...
@@ -569,13 +569,20 @@ double CpuCalcPmeReciprocalForceKernel::finishComputation(IO& io) {
}
bool
CpuCalcPmeReciprocalForceKernel
::
isProcessorSupported
()
{
// Make sure the CPU supports SSE 4.1 or NEON.
#ifdef __ANDROID__
uint64_t
features
=
android_getCpuFeatures
();
return
(
features
&
ANDROID_CPU_ARM_FEATURE_NEON
)
!=
0
;
#else
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
cpuid
(
cpuInfo
,
1
);
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
19
))
!=
0
);
// Require SSE 4.1
return
((
cpuInfo
[
2
]
&
((
int
)
1
<<
19
))
!=
0
);
}
return
false
;
#endif
}
int
CpuCalcPmeReciprocalForceKernel
::
findFFTDimension
(
int
minimum
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment