Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
e4c1f73c
Unverified
Commit
e4c1f73c
authored
Jun 09, 2020
by
peastman
Committed by
GitHub
Jun 09, 2020
Browse files
Merge pull request #2724 from dwtowner/cpu_avx2_support
CPU: Added support for AVX2.
parents
8c43e37a
170ac691
Changes
19
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
269 additions
and
47 deletions
+269
-47
openmmapi/include/openmm/internal/vectorizeAvx.h
openmmapi/include/openmm/internal/vectorizeAvx.h
+4
-6
openmmapi/include/openmm/internal/vectorizeAvx2.h
openmmapi/include/openmm/internal/vectorizeAvx2.h
+104
-0
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+1
-1
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+1
-1
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+1
-1
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+1
-1
platforms/cpu/include/CpuNonbondedForceFvec.h
platforms/cpu/include/CpuNonbondedForceFvec.h
+2
-2
platforms/cpu/sharedTarget/CMakeLists.txt
platforms/cpu/sharedTarget/CMakeLists.txt
+12
-12
platforms/cpu/src/CpuCustomGBForce.cpp
platforms/cpu/src/CpuCustomGBForce.cpp
+3
-3
platforms/cpu/src/CpuCustomNonbondedForce.cpp
platforms/cpu/src/CpuCustomNonbondedForce.cpp
+1
-1
platforms/cpu/src/CpuGayBerneForce.cpp
platforms/cpu/src/CpuGayBerneForce.cpp
+1
-1
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+1
-1
platforms/cpu/src/CpuNonbondedForceAvx.cpp
platforms/cpu/src/CpuNonbondedForceAvx.cpp
+5
-5
platforms/cpu/src/CpuNonbondedForceAvx2.cpp
platforms/cpu/src/CpuNonbondedForceAvx2.cpp
+44
-0
platforms/cpu/src/CpuNonbondedForceFvec.cpp
platforms/cpu/src/CpuNonbondedForceFvec.cpp
+11
-5
tests/CMakeLists.txt
tests/CMakeLists.txt
+5
-2
tests/TestVectorizeAvx.cpp
tests/TestVectorizeAvx.cpp
+1
-1
tests/TestVectorizeAvx2.cpp
tests/TestVectorizeAvx2.cpp
+69
-0
tests/TestVectorizeGeneric.h
tests/TestVectorizeGeneric.h
+2
-4
No files found.
openmmapi/include/openmm/internal/vectorize
8
.h
→
openmmapi/include/openmm/internal/vectorize
Avx
.h
View file @
e4c1f73c
#ifndef OPENMM_VECTORIZE
8
_H_
#ifndef OPENMM_VECTORIZE
AVX
_H_
#define OPENMM_VECTORIZE
8
_H_
#define OPENMM_VECTORIZE
AVX
_H_
/* -------------------------------------------------------------------------- *
/* -------------------------------------------------------------------------- *
* OpenMM *
* OpenMM *
...
@@ -57,9 +57,7 @@ public:
...
@@ -57,9 +57,7 @@ public:
* @param table The table from which to do a lookup.
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
* @param indexes The indexes to gather.
*/
*/
fvec8
(
const
float
*
table
,
const
int
idx
[
8
])
{
fvec8
(
const
float
*
table
,
const
int32_t
idx
[
8
])
{
// :TODO: Using int32_t explicitly as the index type could allow the real gather instruction to be used.
// Use gather and static assert? Conditional code?
val
=
_mm256_setr_ps
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]],
table
[
idx
[
4
]],
table
[
idx
[
5
]],
table
[
idx
[
6
]],
table
[
idx
[
7
]]);
val
=
_mm256_setr_ps
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]],
table
[
idx
[
4
]],
table
[
idx
[
5
]],
table
[
idx
[
6
]],
table
[
idx
[
7
]]);
}
}
...
@@ -415,4 +413,4 @@ static inline fvec4 reduceToVec3(fvec8 x, fvec8 y, fvec8 z) {
...
@@ -415,4 +413,4 @@ static inline fvec4 reduceToVec3(fvec8 x, fvec8 y, fvec8 z) {
return
laneResult
.
lowerVec
()
+
laneResult
.
upperVec
();
return
laneResult
.
lowerVec
()
+
laneResult
.
upperVec
();
}
}
#endif
/*OPENMM_VECTORIZE
8
_H_*/
#endif
/*OPENMM_VECTORIZE
AVX
_H_*/
openmmapi/include/openmm/internal/vectorizeAvx2.h
0 → 100644
View file @
e4c1f73c
#ifndef OPENMM_VECTORIZE_AVX2_H_
#define OPENMM_VECTORIZE_AVX2_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2014 Stanford University and the Authors. *
* Authors: Daniel Towner *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "vectorizeAvx.h"
#include <immintrin.h>
// This file defines classes and functions to simplify vectorizing code with AVX.
bool
isAvx2Supported
()
{
// Provide an alternative implementation of CPUID to support AVX2. On older
// non-Windows OSes the hardware.h support for CPUID doesn't set the CX register
// properly and gives the wrong answer when detecting AVX2 and beyond. On Windows
// the cpuid seems to work as expected so can be used.
#if !(defined(_WIN32) || defined(WIN32))
auto
cpuid
=
[](
int
output
[
4
],
int
functionnumber
)
{
int
a
,
b
,
c
,
d
;
__asm
(
"cpuid"
:
"=a"
(
a
),
"=b"
(
b
),
"=c"
(
c
),
"=d"
(
d
)
:
"a"
(
functionnumber
),
"c"
(
0
)
:
);
output
[
0
]
=
a
;
output
[
1
]
=
b
;
output
[
2
]
=
c
;
output
[
3
]
=
d
;
};
#endif
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
7
)
{
cpuInfo
[
2
]
=
0
;
cpuid
(
cpuInfo
,
7
);
return
((
cpuInfo
[
1
]
&
((
int
)
1
<<
5
))
!=
0
);
}
return
false
;
}
/**
* Derive from fvec8 so that default implementations of everything are provided,
* but can be overriden with AVX2-specific variants where possible.
*/
class
fvecAvx2
:
public
fvec8
{
public:
fvecAvx2
()
=
default
;
fvecAvx2
(
fvec8
v
)
:
fvec8
(
v
)
{}
fvecAvx2
(
float
v
)
:
fvec8
(
v
)
{}
fvecAvx2
(
float
v1
,
float
v2
,
float
v3
,
float
v4
,
float
v5
,
float
v6
,
float
v7
,
float
v8
)
:
fvec8
(
v8
,
v7
,
v6
,
v5
,
v4
,
v3
,
v2
,
v1
)
{}
fvecAvx2
(
__m256
v
)
:
fvec8
(
v
)
{}
fvecAvx2
(
const
float
*
v
)
:
fvec8
(
v
)
{}
/** Create a vector by gathering individual indexes of data from a table. Element i of the vector will
* be loaded from table[idx[i]].
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
*/
fvecAvx2
(
const
float
*
table
,
const
int
idx
[
8
])
:
fvec8
(
_mm256_i32gather_ps
(
table
,
_mm256_loadu_si256
((
const
__m256i
*
)
idx
),
4
))
{}
static
fvecAvx2
expandBitsToMask
(
int
bitmask
);
};
inline
fvecAvx2
fvecAvx2
::
expandBitsToMask
(
int
bitmask
)
{
// Put a copy of all bits into each vector element and then shift so that the
// appropriate sub-bit becomes the MSB. For masking purposes, only the MSB matters and
// the other bits can be completely arbitrary.
const
auto
msb
=
_mm256_sllv_epi32
(
_mm256_set1_epi8
(
bitmask
),
_mm256_setr_epi32
(
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
));
return
_mm256_castsi256_ps
(
msb
);
}
#endif
/*OPENMM_VECTORIZE_AVX2_H_*/
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
e4c1f73c
...
@@ -89,7 +89,7 @@ public:
...
@@ -89,7 +89,7 @@ public:
* @param table The table from which to do a lookup.
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
* @param indexes The indexes to gather.
*/
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
fvec4
(
const
float
*
table
,
const
int
32_t
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
float
operator
[](
int
i
)
const
{
float
operator
[](
int
i
)
const
{
...
...
openmmapi/include/openmm/internal/vectorize_pnacl.h
View file @
e4c1f73c
...
@@ -74,7 +74,7 @@ public:
...
@@ -74,7 +74,7 @@ public:
* @param table The table from which to do a lookup.
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
* @param indexes The indexes to gather.
*/
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
fvec4
(
const
float
*
table
,
const
int
32_t
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
operator
__m128
()
const
{
operator
__m128
()
const
{
...
...
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
e4c1f73c
...
@@ -80,7 +80,7 @@ public:
...
@@ -80,7 +80,7 @@ public:
* @param table The table from which to do a lookup.
* @param table The table from which to do a lookup.
* @param indexes The indexes to gather.
* @param indexes The indexes to gather.
*/
*/
fvec4
(
const
float
*
table
,
const
int
idx
[
4
])
fvec4
(
const
float
*
table
,
const
int
32_t
idx
[
4
])
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
:
fvec4
(
table
[
idx
[
0
]],
table
[
idx
[
1
]],
table
[
idx
[
2
]],
table
[
idx
[
3
]])
{
}
operator
__m128
()
const
{
operator
__m128
()
const
{
...
...
platforms/cpu/include/CpuNeighborList.h
View file @
e4c1f73c
...
@@ -51,7 +51,7 @@ public:
...
@@ -51,7 +51,7 @@ public:
const
Vec3
*
periodicBoxVectors
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
const
Vec3
*
periodicBoxVectors
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
int
getNumBlocks
()
const
;
int
getNumBlocks
()
const
;
int
getBlockSize
()
const
;
int
getBlockSize
()
const
;
const
std
::
vector
<
int
>&
getSortedAtoms
()
const
;
const
std
::
vector
<
int
32_t
>&
getSortedAtoms
()
const
;
const
std
::
vector
<
int
>&
getBlockNeighbors
(
int
blockIndex
)
const
;
const
std
::
vector
<
int
>&
getBlockNeighbors
(
int
blockIndex
)
const
;
/**
/**
...
...
platforms/cpu/include/CpuNonbondedForceFvec.h
View file @
e4c1f73c
...
@@ -140,7 +140,7 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnHandler(int blockIndex, float
...
@@ -140,7 +140,7 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnHandler(int blockIndex, float
using
std
::
min
;
using
std
::
min
;
using
std
::
max
;
using
std
::
max
;
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
...
@@ -183,7 +183,7 @@ template <int PERIODIC_TYPE, BlockType BLOCK_TYPE>
...
@@ -183,7 +183,7 @@ template <int PERIODIC_TYPE, BlockType BLOCK_TYPE>
void
CpuNonbondedForceFvec
<
FVEC
>::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
void
CpuNonbondedForceFvec
<
FVEC
>::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
fvec4
blockAtomPosq
[
blockSize
];
fvec4
blockAtomPosq
[
blockSize
];
FVEC
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
FVEC
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
FVEC
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
FVEC
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
...
...
platforms/cpu/sharedTarget/CMakeLists.txt
View file @
e4c1f73c
FOREACH
(
file
${
SOURCE_FILES
}
)
FOREACH
(
file
${
SOURCE_FILES
}
)
IF
(
file MATCHES
".*Vec8.*"
)
IF
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX /D__AVX__"
)
ELSEIF
(
X86
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ELSE
()
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ENDIF
()
ELSE
()
IF
(
X86 AND NOT MSVC
)
IF
(
X86 AND NOT MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
()
ENDIF
()
ENDIF
()
ENDFOREACH
(
file
)
ENDFOREACH
(
file
)
# Override some sources files with platform specific flags.
IF
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX /D__AVX__"
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX2 /D__AVX2__"
)
ELSEIF
(
X86
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mavx"
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mavx2 -mfma"
)
ENDIF
()
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
PTHREADS_LIB
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
PTHREADS_LIB
}
)
...
...
platforms/cpu/src/CpuCustomGBForce.cpp
View file @
e4c1f73c
...
@@ -361,7 +361,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i
...
@@ -361,7 +361,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
@@ -456,7 +456,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da
...
@@ -456,7 +456,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
@@ -543,7 +543,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
...
@@ -543,7 +543,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
blockExclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
...
platforms/cpu/src/CpuCustomNonbondedForce.cpp
View file @
e4c1f73c
...
@@ -193,7 +193,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
...
@@ -193,7 +193,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
...
platforms/cpu/src/CpuGayBerneForce.cpp
View file @
e4c1f73c
...
@@ -181,7 +181,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
...
@@ -181,7 +181,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
break
;
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
int
32_t
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
const
auto
&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
e4c1f73c
...
@@ -501,7 +501,7 @@ int CpuNeighborList::getBlockSize() const {
...
@@ -501,7 +501,7 @@ int CpuNeighborList::getBlockSize() const {
return
blockSize
;
return
blockSize
;
}
}
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
const
std
::
vector
<
int
32_t
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
return
sortedAtoms
;
return
sortedAtoms
;
}
}
...
...
platforms/cpu/src/CpuNonbondedForce
Vec8
.cpp
→
platforms/cpu/src/CpuNonbondedForce
Avx
.cpp
View file @
e4c1f73c
...
@@ -27,9 +27,9 @@
...
@@ -27,9 +27,9 @@
#ifdef __AVX__
#ifdef __AVX__
#include "openmm/internal/vectorize
8
.h"
#include "openmm/internal/vectorize
Avx
.h"
bool
is
Vec8
Supported
()
{
bool
is
Avx
Supported
()
{
// Make sure the CPU supports AVX.
// Make sure the CPU supports AVX.
int
cpuInfo
[
4
];
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
cpuid
(
cpuInfo
,
0
);
...
@@ -40,16 +40,16 @@ bool isVec8Supported() {
...
@@ -40,16 +40,16 @@ bool isVec8Supported() {
return
false
;
return
false
;
}
}
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForce
Vec8
()
{
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForce
Avx
()
{
return
new
OpenMM
::
CpuNonbondedForceFvec
<
fvec8
>
();
return
new
OpenMM
::
CpuNonbondedForceFvec
<
fvec8
>
();
}
}
#else
#else
bool
is
Vec8
Supported
()
{
bool
is
Avx
Supported
()
{
return
false
;
return
false
;
}
}
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForce
Vec8
()
{
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForce
Avx
()
{
throw
OpenMM
::
OpenMMException
(
"Internal error: OpenMM was compiled without AVX support"
);
throw
OpenMM
::
OpenMMException
(
"Internal error: OpenMM was compiled without AVX support"
);
}
}
#endif
#endif
platforms/cpu/src/CpuNonbondedForceAvx2.cpp
0 → 100644
View file @
e4c1f73c
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Daniel Towner
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuNonbondedForceFvec.h"
#include "openmm/OpenMMException.h"
#ifdef __AVX2__
#include "openmm/internal/vectorizeAvx2.h"
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceAvx2
()
{
return
new
OpenMM
::
CpuNonbondedForceFvec
<
fvecAvx2
>
();
}
#else
bool
isAvx2Supported
()
{
return
false
;
}
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceAvx2
()
{
throw
OpenMM
::
OpenMMException
(
"Internal error: OpenMM was compiled without AVX2 support"
);
}
#endif
platforms/cpu/src/CpuNonbondedForceFvec.cpp
View file @
e4c1f73c
...
@@ -25,19 +25,25 @@
...
@@ -25,19 +25,25 @@
#include "CpuNonbondedForceFvec.h"
#include "CpuNonbondedForceFvec.h"
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec4
();
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec4
();
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceAvx
();
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceAvx2
();
bool
isVec8Supported
();
bool
isAvxSupported
();
bool
isAvx2Supported
();
#include <iostream>
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec
()
{
OpenMM
::
CpuNonbondedForce
*
createCpuNonbondedForceVec
()
{
if
(
isVec8Supported
())
if
(
isAvx2Supported
())
return
createCpuNonbondedForceVec8
();
return
createCpuNonbondedForceAvx2
();
else
if
(
isAvxSupported
())
return
createCpuNonbondedForceAvx
();
else
else
return
createCpuNonbondedForceVec4
();
return
createCpuNonbondedForceVec4
();
}
}
int
getVecBlockSize
()
{
int
getVecBlockSize
()
{
if
(
is
Vec8
Supported
())
if
(
is
Avx2Supported
()
||
isAvx
Supported
())
return
8
;
return
8
;
else
else
return
4
;
return
4
;
...
...
tests/CMakeLists.txt
View file @
e4c1f73c
...
@@ -8,7 +8,7 @@ ENABLE_TESTING()
...
@@ -8,7 +8,7 @@ ENABLE_TESTING()
FILE
(
GLOB TEST_PROGS
"*Test*.cpp"
)
FILE
(
GLOB TEST_PROGS
"*Test*.cpp"
)
FOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
FOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
GET_FILENAME_COMPONENT
(
TEST_ROOT
${
TEST_PROG
}
NAME_WE
)
GET_FILENAME_COMPONENT
(
TEST_ROOT
${
TEST_PROG
}
NAME_WE
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
8
)
AND NOT X86
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
Avx*
)
AND NOT X86
)
CONTINUE
()
CONTINUE
()
ENDIF
()
ENDIF
()
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
...
@@ -21,9 +21,12 @@ FOREACH(TEST_PROG ${TEST_PROGS})
...
@@ -21,9 +21,12 @@ FOREACH(TEST_PROG ${TEST_PROGS})
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
)
AND X86 AND NOT MSVC
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
)
AND X86 AND NOT MSVC
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
()
ENDIF
()
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
8
)
AND X86 AND NOT MSVC
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
Avx
)
AND X86 AND NOT MSVC
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mavx"
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mavx"
)
ENDIF
()
ENDIF
()
IF
((
${
TEST_ROOT
}
MATCHES TestVectorizeAvx2
)
AND X86 AND NOT MSVC
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mfma -mavx2"
)
ENDIF
()
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_TEST_FLAGS
}
"
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_TEST_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
...
...
tests/TestVectorize
8
.cpp
→
tests/TestVectorize
Avx
.cpp
View file @
e4c1f73c
...
@@ -34,7 +34,7 @@
...
@@ -34,7 +34,7 @@
*/
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/vectorize
8
.h"
#include "openmm/internal/vectorize
Avx
.h"
#include <iostream>
#include <iostream>
#include "TestVectorizeGeneric.h"
#include "TestVectorizeGeneric.h"
...
...
tests/TestVectorizeAvx2.cpp
0 → 100644
View file @
e4c1f73c
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014-2015 Stanford University and the Authors. *
* Authors: Daniel Towner *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests vectorized operations.
*/
#include "openmm/internal/AssertionUtilities.h"
#include <iostream>
#ifndef __AVX2__
int
main
()
{
std
::
cout
<<
"AVX2 CPU is not supported. Exiting."
<<
std
::
endl
;
return
0
;
}
#else
#include "openmm/internal/vectorizeAvx2.h"
#include "TestVectorizeGeneric.h"
using
namespace
OpenMM
;
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
!
isAvx2Supported
())
{
std
::
cout
<<
"CPU is not supported. Exiting."
<<
std
::
endl
;
return
0
;
}
TestFvec
<
fvecAvx2
>::
testAll
();
}
catch
(
const
std
::
exception
&
e
)
{
std
::
cout
<<
"exception: "
<<
e
.
what
()
<<
std
::
endl
;
return
1
;
}
std
::
cout
<<
"Done"
<<
std
::
endl
;
return
0
;
}
#endif
\ No newline at end of file
tests/TestVectorizeGeneric.h
View file @
e4c1f73c
...
@@ -338,12 +338,10 @@ void TestFvec<FVEC>::testBinaryOps() const {
...
@@ -338,12 +338,10 @@ void TestFvec<FVEC>::testBinaryOps() const {
ASSERT_VEC_ALMOST_EQUAL
(
f
/
v0
,
applyBinaryFn
(
fdup
,
v0
,
std
::
divides
<
float
>
()));
ASSERT_VEC_ALMOST_EQUAL
(
f
/
v0
,
applyBinaryFn
(
fdup
,
v0
,
std
::
divides
<
float
>
()));
// Binary functions.
// Binary functions.
using
std
::
min
;
using
std
::
max
;
ASSERT_VEC_EQUAL
(
min
(
v0
,
v1
),
ASSERT_VEC_EQUAL
(
min
(
v0
,
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
min
(
x
,
y
);
}));
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
std
::
min
<
float
>
(
x
,
y
);
}));
ASSERT_VEC_EQUAL
(
max
(
v0
,
v1
),
ASSERT_VEC_EQUAL
(
max
(
v0
,
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
max
(
x
,
y
);
}));
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
std
::
max
<
float
>
(
x
,
y
);
}));
}
}
template
<
typename
FVEC
>
template
<
typename
FVEC
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment