Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
170ac691
Commit
170ac691
authored
Jun 09, 2020
by
Daniel Towner
Browse files
CPU: Added support for AVX2.
parent
8c43e37a
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
269 additions
and
47 deletions
+269
-47
openmmapi/include/openmm/internal/vectorizeAvx.h
openmmapi/include/openmm/internal/vectorizeAvx.h
+4
-6
openmmapi/include/openmm/internal/vectorizeAvx2.h
openmmapi/include/openmm/internal/vectorizeAvx2.h
+104
-0
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+1
-1
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+1
-1
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+1
-1
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+1
-1
platforms/cpu/include/CpuNonbondedForceFvec.h
platforms/cpu/include/CpuNonbondedForceFvec.h
+2
-2
platforms/cpu/sharedTarget/CMakeLists.txt
platforms/cpu/sharedTarget/CMakeLists.txt
+12
-12
platforms/cpu/src/CpuCustomGBForce.cpp
platforms/cpu/src/CpuCustomGBForce.cpp
+3
-3
platforms/cpu/src/CpuCustomNonbondedForce.cpp
platforms/cpu/src/CpuCustomNonbondedForce.cpp
+1
-1
platforms/cpu/src/CpuGayBerneForce.cpp
platforms/cpu/src/CpuGayBerneForce.cpp
+1
-1
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+1
-1
platforms/cpu/src/CpuNonbondedForceAvx.cpp
platforms/cpu/src/CpuNonbondedForceAvx.cpp
+5
-5
platforms/cpu/src/CpuNonbondedForceAvx2.cpp
platforms/cpu/src/CpuNonbondedForceAvx2.cpp
+44
-0
platforms/cpu/src/CpuNonbondedForceFvec.cpp
platforms/cpu/src/CpuNonbondedForceFvec.cpp
+11
-5
tests/CMakeLists.txt
tests/CMakeLists.txt
+5
-2
tests/TestVectorizeAvx.cpp
tests/TestVectorizeAvx.cpp
+1
-1
tests/TestVectorizeAvx2.cpp
tests/TestVectorizeAvx2.cpp
+69
-0
tests/TestVectorizeGeneric.h
tests/TestVectorizeGeneric.h
+2
-4
No files found.
openmmapi/include/openmm/internal/vectorize
8
.h
→
openmmapi/include/openmm/internal/vectorize
Avx
.h
View file @
170ac691
#ifndef OPENMM_VECTORIZE
8
_H_
#define OPENMM_VECTORIZE
8
_H_
#ifndef OPENMM_VECTORIZE
AVX
_H_
#define OPENMM_VECTORIZE
AVX
_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
...
...
@@ -57,9 +57,7 @@ public:
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvec8
(
const
float
*
table
,
const
int
idx
[
8
])
{
// :TODO: Using int32_t explicitly as the index type could allow the real gather instruction to be used.
// Use gather and static assert? Conditional code?
fvec8
(
const
float
*
table
,
const
int32_t
idx
[
8
])
{
val
=
_mm256_setr_ps
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]],
table
[
idx
[
4
]],
table
[
idx
[
5
]],
table
[
idx
[
6
]],
table
[
idx
[
7
]]);
}
...
...
@@ -415,4 +413,4 @@ static inline fvec4 reduceToVec3(fvec8 x, fvec8 y, fvec8 z) {
return
laneResult
.
lowerVec
()
+
laneResult
.
upperVec
();
}
#endif
/*OPENMM_VECTORIZE
8
_H_*/
#endif
/*OPENMM_VECTORIZE
AVX
_H_*/
openmmapi/include/openmm/internal/vectorizeAvx2.h
0 → 100644
View file @
170ac691
#ifndef OPENMM_VECTORIZE_AVX2_H_
#define OPENMM_VECTORIZE_AVX2_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2014 Stanford University and the Authors. *
* Authors: Daniel Towner *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "vectorizeAvx.h"
#include <immintrin.h>
// This file defines classes and functions to simplify vectorizing code with AVX.
bool
isAvx2Supported
()
{
// Provide an alternative implementation of CPUID to support AVX2. On older
// non-Windows OSes the hardware.h support for CPUID doesn't set the CX register
// properly and gives the wrong answer when detecting AVX2 and beyond. On Windows
// the cpuid seems to work as expected so can be used.
#if !(defined(_WIN32) || defined(WIN32))
auto
cpuid
=
[](
int
output
[
4
],
int
functionnumber
)
{
int
a
,
b
,
c
,
d
;
__asm
(
"cpuid"
:
"=a"
(
a
),
"=b"
(
b
),
"=c"
(
c
),
"=d"
(
d
)
:
"a"
(
functionnumber
),
"c"
(
0
)
:
);
output
[
0
]
=
a
;
output
[
1
]
=
b
;
output
[
2
]
=
c
;
output
[
3
]
=
d
;
};
#endif
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
7
)
{
cpuInfo
[
2
]
=
0
;
cpuid
(
cpuInfo
,
7
);
return
((
cpuInfo
[
1
]
&
((
int
)
1
<<
5
))
!=
0
);
}
return
false
;
}
/**
* Derive from fvec8 so that default implementations of everything are provided,
* but can be overriden with AVX2-specific variants where possible.
*/
class
fvecAvx2
:
public
fvec8
{
public:
fvecAvx2
()
=
default
;
fvecAvx2
(
fvec8
v
)
:
fvec8
(
v
)
{}
fvecAvx2
(
float
v
)
:
fvec8
(
v
)
{}
fvecAvx2
(
float
v1
,
float
v2
,
float
v3
,
float
v4
,
float
v5
,
float
v6
,
float
v7
,
float
v8
)
:
fvec8
(
v8
,
v7
,
v6
,
v5
,
v4
,
v3
,
v2
,
v1
)
{}
fvecAvx2
(
__m256
v
)
:
fvec8
(
v
)
{}
fvecAvx2
(
const
float
*
v
)
:
fvec8
(
v
)
{}
/** Create a vector by gathering individual indexes of data from a table. Element i of the vector will
* be loaded from table[idx[i]].
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvecAvx2
(
const
float
*
table
,
const
int
idx
[
8
])
:
fvec8
(
_mm256_i32gather_ps
(
table
,
_mm256_loadu_si256
((
const
__m256i
*
)
idx
),
4
))
{}
static
fvecAvx2
expandBitsToMask
(
int
bitmask
);
};
inline
fvecAvx2
fvecAvx2
::
expandBitsToMask
(
int
bitmask
)
{
// Put a copy of all bits into each vector element and then shift so that the
// appropriate sub-bit becomes the MSB. For masking purposes, only the MSB matters and
// the other bits can be completely arbitrary.
const
auto
msb
=
_mm256_sllv_epi32
(
_mm256_set1_epi8
(
bitmask
),
_mm256_setr_epi32
(
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
));
return
_mm256_castsi256_ps
(
msb
);
}
#endif
/*OPENMM_VECTORIZE_AVX2_H_*/
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
170ac691
...
...
@@ -89,7 +89,7 @@ public:
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
fvec4
(
const
float
*
table
,
const
int
32_t
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
float
operator
[](
int
i
)
const
{
...
...
openmmapi/include/openmm/internal/vectorize_pnacl.h
View file @
170ac691
...
...
@@ -74,7 +74,7 @@ public:
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
fvec4
(
const
float
*
table
,
const
int
32_t
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
operator
__m128
()
const
{
...
...
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
170ac691
...
...
@@ -80,7 +80,7 @@ public:
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
fvec4
(
const
float
*
table
,
const
int
32_t
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
operator
__m128
()
const
{
...
...
platforms/cpu/include/CpuNeighborList.h
View file @
170ac691
...
...
@@ -51,7 +51,7 @@ public:
const
Vec3
*
periodicBoxVectors
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
int
getNumBlocks
()
const
;
int
getBlockSize
()
const
;
const
std
::
vector
<
int
>&
getSortedAtoms
()
const
;
const
std
::
vector
<
int
32_t
>&
getSortedAtoms
()
const
;
const
std
::
vector
<
int
>&
getBlockNeighbors
(
int
blockIndex
)
const
;
/**
...
...
platforms/cpu/include/CpuNonbondedForceFvec.h
View file @
170ac691
...
...
@@ -140,7 +140,7 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnHandler(int blockIndex, float
using
std
::
min
;
using
std
::
max
;
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
...
...
@@ -183,7 +183,7 @@ template <int PERIODIC_TYPE, BlockType BLOCK_TYPE>
void
CpuNonbondedForceFvec
<
FVEC
>::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
fvec4
blockAtomPosq
[
blockSize
];
FVEC
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
FVEC
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
...
...
platforms/cpu/sharedTarget/CMakeLists.txt
View file @
170ac691
FOREACH
(
file
${
SOURCE_FILES
}
)
IF
(
file MATCHES
".*Vec8.*"
)
IF
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX /D__AVX__"
)
ELSEIF
(
X86
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ELSE
()
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ENDIF
()
ELSE
()
IF
(
X86 AND NOT MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
()
IF
(
X86 AND NOT MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
()
ENDFOREACH
(
file
)
# Override some sources files with platform specific flags.
IF
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX /D__AVX__"
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX2 /D__AVX2__"
)
ELSEIF
(
X86
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mavx"
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mavx2 -mfma"
)
ENDIF
()
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
PTHREADS_LIB
}
)
...
...
platforms/cpu/src/CpuCustomGBForce.cpp
View file @
170ac691
...
...
@@ -361,7 +361,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
...
@@ -456,7 +456,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
...
@@ -543,7 +543,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
...
platforms/cpu/src/CpuCustomNonbondedForce.cpp
View file @
170ac691
...
...
@@ -193,7 +193,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
...
platforms/cpu/src/CpuGayBerneForce.cpp
View file @
170ac691
...
...
@@ -181,7 +181,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
170ac691
...
...
@@ -501,7 +501,7 @@ int CpuNeighborList::getBlockSize() const {
return
blockSize
;
}
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
const
std
::
vector
<
int
32_t
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
return
sortedAtoms
;
}
...
...
platforms/cpu/src/CpuNonbondedForce
Vec8
.cpp
→
platforms/cpu/src/CpuNonbondedForce
Avx
.cpp
View file @
170ac691
...
...
@@ -27,9 +27,9 @@
#ifdef __AVX__
#include "openmm/internal/vectorize
8
.h"
#include "openmm/internal/vectorize
Avx
.h"
bool
is
Vec8
Supported
()
{
bool
is
Avx
Supported
()
{
// Make sure the CPU supports AVX.
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
...
...
@@ -40,16 +40,16 @@ bool isVec8Supported() {
return
false
;
}
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForce
Vec8
()
{
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForce
Avx
()
{
return
new
OpenMM
::
CpuNonbondedForceFvec
<
fvec8
>
();
}
#else
bool
is
Vec8
Supported
()
{
bool
is
Avx
Supported
()
{
return
false
;
}
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForce
Vec8
()
{
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForce
Avx
()
{
throw
OpenMM
::
OpenMMException
(
"Internal error: OpenMM was compiled without AVX support"
);
}
#endif
platforms/cpu/src/CpuNonbondedForceAvx2.cpp
0 → 100644
View file @
170ac691
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Daniel Towner
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuNonbondedForceFvec.h"
#include "openmm/OpenMMException.h"
#ifdef __AVX2__
#include "openmm/internal/vectorizeAvx2.h"
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceAvx2
()
{
return
new
OpenMM
::
CpuNonbondedForceFvec
<
fvecAvx2
>
();
}
#else
bool
isAvx2Supported
()
{
return
false
;
}
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceAvx2
()
{
throw
OpenMM
::
OpenMMException
(
"Internal error: OpenMM was compiled without AVX2 support"
);
}
#endif
platforms/cpu/src/CpuNonbondedForceFvec.cpp
View file @
170ac691
...
...
@@ -25,19 +25,25 @@
#include "CpuNonbondedForceFvec.h"
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec4
();
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceAvx
();
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceAvx2
();
bool
isVec8Supported
();
bool
isAvxSupported
();
bool
isAvx2Supported
();
#include <iostream>
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec
()
{
if
(
isVec8Supported
())
return
createCpuNonbondedForceVec8
();
if
(
isAvx2Supported
())
return
createCpuNonbondedForceAvx2
();
else
if
(
isAvxSupported
())
return
createCpuNonbondedForceAvx
();
else
return
createCpuNonbondedForceVec4
();
}
int
getVecBlockSize
()
{
if
(
is
Vec8
Supported
())
if
(
is
Avx2Supported
()
||
isAvx
Supported
())
return
8
;
else
return
4
;
...
...
tests/CMakeLists.txt
View file @
170ac691
...
...
@@ -8,7 +8,7 @@ ENABLE_TESTING()
FILE
(
GLOB TEST_PROGS
"*Test*.cpp"
)
FOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
GET_FILENAME_COMPONENT
(
TEST_ROOT
${
TEST_PROG
}
NAME_WE
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
8
)
AND NOT X86
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
Avx*
)
AND NOT X86
)
CONTINUE
()
ENDIF
()
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
...
...
@@ -21,9 +21,12 @@ FOREACH(TEST_PROG ${TEST_PROGS})
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
)
AND X86 AND NOT MSVC
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
()
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
8
)
AND X86 AND NOT MSVC
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
Avx
)
AND X86 AND NOT MSVC
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mavx"
)
ENDIF
()
IF
((
${
TEST_ROOT
}
MATCHES TestVectorizeAvx2
)
AND X86 AND NOT MSVC
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mfma -mavx2"
)
ENDIF
()
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_TEST_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
...
...
tests/TestVectorize
8
.cpp
→
tests/TestVectorize
Avx
.cpp
View file @
170ac691
...
...
@@ -34,7 +34,7 @@
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/vectorize
8
.h"
#include "openmm/internal/vectorize
Avx
.h"
#include <iostream>
#include "TestVectorizeGeneric.h"
...
...
tests/TestVectorizeAvx2.cpp
0 → 100644
View file @
170ac691
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014-2015 Stanford University and the Authors. *
* Authors: Daniel Towner *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests vectorized operations.
*/
#include "openmm/internal/AssertionUtilities.h"
#include <iostream>
#ifndef __AVX2__
int
main
()
{
std
::
cout
<<
"AVX2 CPU is not supported. Exiting."
<<
std
::
endl
;
return
0
;
}
#else
#include "openmm/internal/vectorizeAvx2.h"
#include "TestVectorizeGeneric.h"
using
namespace
OpenMM
;
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
!
isAvx2Supported
())
{
std
::
cout
<<
"CPU is not supported. Exiting."
<<
std
::
endl
;
return
0
;
}
TestFvec
<
fvecAvx2
>::
testAll
();
}
catch
(
const
std
::
exception
&
e
)
{
std
::
cout
<<
"exception: "
<<
e
.
what
()
<<
std
::
endl
;
return
1
;
}
std
::
cout
<<
"Done"
<<
std
::
endl
;
return
0
;
}
#endif
\ No newline at end of file
tests/TestVectorizeGeneric.h
View file @
170ac691
...
...
@@ -338,12 +338,10 @@ void TestFvec<FVEC>::testBinaryOps() const {
ASSERT_VEC_ALMOST_EQUAL
(
f
/
v0
,
applyBinaryFn
(
fdup
,
v0
,
std
::
divides
<
float
>
()));
// Binary functions.
using
std
::
min
;
using
std
::
max
;
ASSERT_VEC_EQUAL
(
min
(
v0
,
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
min
(
x
,
y
);
}));
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
std
::
min
<
float
>
(
x
,
y
);
}));
ASSERT_VEC_EQUAL
(
max
(
v0
,
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
max
(
x
,
y
);
}));
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
std
::
max
<
float
>
(
x
,
y
);
}));
}
template
<
typename
FVEC
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment