Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
43b669e9
Commit
43b669e9
authored
Jul 24, 2014
by
peastman
Browse files
Created PNaCl implementation of vector instructions (not yet optimized!)
parent
86a8c924
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
516 additions
and
8 deletions
+516
-8
CMakeLists.txt
CMakeLists.txt
+6
-4
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+5
-1
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+311
-0
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+2
-2
tests/CMakeLists.txt
tests/CMakeLists.txt
+5
-1
tests/TestVectorize.cpp
tests/TestVectorize.cpp
+187
-0
No files found.
CMakeLists.txt
View file @
43b669e9
...
@@ -115,7 +115,7 @@ ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 )
...
@@ -115,7 +115,7 @@ ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 )
SET
(
LIB64
)
SET
(
LIB64
)
ENDIF
(
CMAKE_SIZEOF_VOID_P EQUAL 8
)
ENDIF
(
CMAKE_SIZEOF_VOID_P EQUAL 8
)
IF
(
APPLE
)
IF
(
APPLE
AND
(
NOT PNACL
)
)
# Build universal binaries compatible with OS X 10.7
# Build universal binaries compatible with OS X 10.7
IF
(
NOT CMAKE_OSX_DEPLOYMENT_TARGET
)
IF
(
NOT CMAKE_OSX_DEPLOYMENT_TARGET
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.7"
CACHE STRING
"The minimum version of OS X to support"
FORCE
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.7"
CACHE STRING
"The minimum version of OS X to support"
FORCE
)
...
@@ -127,13 +127,13 @@ IF (APPLE)
...
@@ -127,13 +127,13 @@ IF (APPLE)
# Improve the linking behavior of Mac libraries
# Improve the linking behavior of Mac libraries
SET
(
CMAKE_INSTALL_NAME_DIR
"@rpath"
)
SET
(
CMAKE_INSTALL_NAME_DIR
"@rpath"
)
SET
(
EXTRA_COMPILE_FLAGS
"-msse2 -stdlib=libc++"
)
SET
(
EXTRA_COMPILE_FLAGS
"-msse2 -stdlib=libc++"
)
ELSE
(
APPLE
)
ELSE
(
APPLE
AND
(
NOT PNACL
)
)
IF
(
MSVC OR ANDROID OR PNACL
)
IF
(
MSVC OR ANDROID OR PNACL
)
SET
(
EXTRA_COMPILE_FLAGS
)
SET
(
EXTRA_COMPILE_FLAGS
)
ELSE
(
MSVC OR ANDROID OR PNACL
)
ELSE
(
MSVC OR ANDROID OR PNACL
)
SET
(
EXTRA_COMPILE_FLAGS
"-msse2"
)
SET
(
EXTRA_COMPILE_FLAGS
"-msse2"
)
ENDIF
(
MSVC OR ANDROID OR PNACL
)
ENDIF
(
MSVC OR ANDROID OR PNACL
)
ENDIF
(
APPLE
)
ENDIF
(
APPLE
AND
(
NOT PNACL
)
)
IF
(
UNIX AND NOT CMAKE_BUILD_TYPE
)
IF
(
UNIX AND NOT CMAKE_BUILD_TYPE
)
SET
(
CMAKE_BUILD_TYPE Release CACHE STRING
"Debug or Release build"
FORCE
)
SET
(
CMAKE_BUILD_TYPE Release CACHE STRING
"Debug or Release build"
FORCE
)
...
@@ -307,7 +307,9 @@ ENDIF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS)
...
@@ -307,7 +307,9 @@ ENDIF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS)
# On Linux need to link to libdl
# On Linux need to link to libdl
FIND_LIBRARY
(
DL_LIBRARY dl
)
FIND_LIBRARY
(
DL_LIBRARY dl
)
IF
(
DL_LIBRARY
)
IF
(
DL_LIBRARY
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
DL_LIBRARY
}
${
PTHREADS_LIB
}
)
IF
(
OPENMM_BUILD_SHARED_LIB
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
DL_LIBRARY
}
${
PTHREADS_LIB
}
)
ENDIF
(
OPENMM_BUILD_SHARED_LIB
)
IF
(
OPENMM_BUILD_STATIC_LIB
)
IF
(
OPENMM_BUILD_STATIC_LIB
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
DL_LIBRARY
}
${
PTHREADS_LIB
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
DL_LIBRARY
}
${
PTHREADS_LIB
}
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
...
...
openmmapi/include/openmm/internal/vectorize.h
View file @
43b669e9
...
@@ -35,7 +35,11 @@
...
@@ -35,7 +35,11 @@
#if defined(__ANDROID__)
#if defined(__ANDROID__)
#include "vectorize_neon.h"
#include "vectorize_neon.h"
#else
#else
#include "vectorize_sse.h"
#if defined(__PNACL__)
#include "vectorize_pnacl.h"
#else
#include "vectorize_sse.h"
#endif
#endif
#endif
#endif
/*OPENMM_VECTORIZE_H_*/
#endif
/*OPENMM_VECTORIZE_H_*/
openmmapi/include/openmm/internal/vectorize_pnacl.h
0 → 100644
View file @
43b669e9
#ifndef OPENMM_VECTORIZE_PNACL_H_
#define OPENMM_VECTORIZE_PNACL_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2014 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <cmath>
// This file defines classes and functions to simplify vectorizing code with portable SIMD vectors.
/**
* Determine whether ivec4 and fvec4 are supported on this processor.
*/
static
bool
isVec4Supported
()
{
return
true
;
}
typedef
float
__m128
__attribute__
((
vector_size
(
16
)));
typedef
int
__m128i
__attribute__
((
vector_size
(
16
)));
class
ivec4
;
/**
* A four element vector of floats.
*/
class
fvec4
{
public:
__m128
val
;
fvec4
()
{}
fvec4
(
float
v
)
{
val
=
{
v
,
v
,
v
,
v
};
}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
{
val
=
{
v1
,
v2
,
v3
,
v4
};
}
fvec4
(
__m128
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
{
val
=
*
((
__m128
*
)
v
);
}
operator
__m128
()
const
{
return
val
;
}
float
operator
[](
int
i
)
const
{
return
val
[
i
];
}
void
store
(
float
*
v
)
const
{
*
((
__m128
*
)
v
)
=
val
;
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
val
+
other
;
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
return
val
-
other
;
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
return
val
*
other
;
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
return
val
/
other
;
}
void
operator
+=
(
const
fvec4
&
other
)
{
val
=
val
+
other
;
}
void
operator
-=
(
const
fvec4
&
other
)
{
val
=
val
-
other
;
}
void
operator
*=
(
const
fvec4
&
other
)
{
val
=
val
*
other
;
}
void
operator
/=
(
const
fvec4
&
other
)
{
val
=
val
/
other
;
}
fvec4
operator
-
()
const
{
return
-
val
;
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
return
(
fvec4
)
(((
__m128i
)
val
)
&
((
__m128i
)
other
.
val
));
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
return
(
fvec4
)
(((
__m128i
)
val
)
|
((
__m128i
)
other
.
val
));
}
fvec4
operator
==
(
const
fvec4
&
other
)
const
{
return
(
val
==
other
.
val
);
}
fvec4
operator
!=
(
const
fvec4
&
other
)
const
{
return
(
val
!=
other
.
val
);
}
fvec4
operator
>
(
const
fvec4
&
other
)
const
{
return
(
val
>
other
.
val
);
}
fvec4
operator
<
(
const
fvec4
&
other
)
const
{
return
(
val
<
other
.
val
);
}
fvec4
operator
>=
(
const
fvec4
&
other
)
const
{
return
(
val
>=
other
.
val
);
}
fvec4
operator
<=
(
const
fvec4
&
other
)
const
{
return
(
val
<=
other
.
val
);
}
operator
ivec4
()
const
;
};
/**
* A four element vector of ints.
*/
class
ivec4
{
public:
__m128i
val
;
ivec4
()
{}
ivec4
(
int
v
)
{
val
=
{
v
,
v
,
v
,
v
};
}
ivec4
(
int
v1
,
int
v2
,
int
v3
,
int
v4
)
{
val
=
{
v1
,
v2
,
v3
,
v4
};
}
ivec4
(
__m128i
v
)
:
val
(
v
)
{}
ivec4
(
const
int
*
v
)
{
val
=
*
((
__m128
*
)
v
);
}
operator
__m128i
()
const
{
return
val
;
}
int
operator
[](
int
i
)
const
{
return
val
[
i
];
}
void
store
(
int
*
v
)
const
{
*
((
__m128
*
)
v
)
=
val
;
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
return
val
+
other
;
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
return
val
-
other
;
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
return
val
*
other
;
}
void
operator
+=
(
const
ivec4
&
other
)
{
val
=
val
+
other
;
}
void
operator
-=
(
const
ivec4
&
other
)
{
val
=
val
-
other
;
}
void
operator
*=
(
const
ivec4
&
other
)
{
val
=
val
*
other
;
}
ivec4
operator
-
()
const
{
return
-
val
;
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
return
val
&
other
.
val
;
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
return
val
|
other
.
val
;
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
return
(
val
==
other
.
val
);
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
return
(
val
!=
other
.
val
);
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
return
(
val
>
other
.
val
);
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
return
(
val
<
other
.
val
);
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
return
(
val
>=
other
.
val
);
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
return
(
val
<=
other
.
val
);
}
operator
fvec4
()
const
;
};
// Conversion operators.
inline
fvec4
::
operator
ivec4
()
const
{
return
__builtin_convertvector
(
val
,
__m128i
);
}
inline
ivec4
::
operator
fvec4
()
const
{
return
__builtin_convertvector
(
val
,
__m128
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
return
fvec4
(
std
::
floor
(
v
[
0
]),
std
::
floor
(
v
[
1
]),
std
::
floor
(
v
[
2
]),
std
::
floor
(
v
[
3
]));
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
return
fvec4
(
std
::
ceil
(
v
[
0
]),
std
::
ceil
(
v
[
1
]),
std
::
ceil
(
v
[
2
]),
std
::
ceil
(
v
[
3
]));
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
return
fvec4
(
std
::
round
(
v
[
0
]),
std
::
round
(
v
[
1
]),
std
::
round
(
v
[
2
]),
std
::
round
(
v
[
3
]));
}
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
std
::
min
(
v1
[
0
],
v2
[
0
]),
std
::
min
(
v1
[
1
],
v2
[
1
]),
std
::
min
(
v1
[
2
],
v2
[
2
]),
std
::
min
(
v1
[
3
],
v2
[
3
]));
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
std
::
max
(
v1
[
0
],
v2
[
0
]),
std
::
max
(
v1
[
1
],
v2
[
1
]),
std
::
max
(
v1
[
2
],
v2
[
2
]),
std
::
max
(
v1
[
3
],
v2
[
3
]));
}
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
return
fvec4
(
std
::
abs
(
v
[
0
]),
std
::
abs
(
v
[
1
]),
std
::
abs
(
v
[
2
]),
std
::
abs
(
v
[
3
]));
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
fvec4
(
std
::
sqrt
(
v
[
0
]),
std
::
sqrt
(
v
[
1
]),
std
::
sqrt
(
v
[
2
]),
std
::
sqrt
(
v
[
3
]));
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
fvec4
r
=
v1
*
v2
;
return
r
[
0
]
+
r
[
1
]
+
r
[
2
];
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
fvec4
r
=
v1
*
v2
;
return
r
[
0
]
+
r
[
1
]
+
r
[
2
]
+
r
[
3
];
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
__m128
a1
=
__builtin_shufflevector
(
v1
.
val
,
v2
.
val
,
0
,
4
,
2
,
6
);
__m128
a2
=
__builtin_shufflevector
(
v1
.
val
,
v2
.
val
,
1
,
5
,
3
,
7
);
__m128
a3
=
__builtin_shufflevector
(
v3
.
val
,
v4
.
val
,
0
,
4
,
2
,
6
);
__m128
a4
=
__builtin_shufflevector
(
v3
.
val
,
v4
.
val
,
1
,
5
,
3
,
7
);
v1
=
__builtin_shufflevector
(
a1
,
a3
,
0
,
1
,
4
,
5
);
v2
=
__builtin_shufflevector
(
a2
,
a4
,
0
,
1
,
4
,
5
);
v3
=
__builtin_shufflevector
(
a1
,
a3
,
2
,
3
,
6
,
7
);
v4
=
__builtin_shufflevector
(
a2
,
a4
,
2
,
3
,
6
,
7
);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
std
::
min
(
v1
[
0
],
v2
[
0
]),
std
::
min
(
v1
[
1
],
v2
[
1
]),
std
::
min
(
v1
[
2
],
v2
[
2
]),
std
::
min
(
v1
[
3
],
v2
[
3
]));
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
ivec4
(
std
::
max
(
v1
[
0
],
v2
[
0
]),
std
::
max
(
v1
[
1
],
v2
[
1
]),
std
::
max
(
v1
[
2
],
v2
[
2
]),
std
::
max
(
v1
[
3
],
v2
[
3
]));
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
return
ivec4
(
std
::
abs
(
v
[
0
]),
std
::
abs
(
v
[
1
]),
std
::
abs
(
v
[
2
]),
std
::
abs
(
v
[
3
]));
}
static
inline
bool
any
(
const
ivec4
&
v
)
{
return
(
v
[
0
]
||
v
[
1
]
||
v
[
2
]
||
v
[
3
]);
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
ivec4
&
mask
)
{
return
fvec4
(
mask
[
0
]
?
v2
[
0
]
:
v1
[
0
],
mask
[
1
]
?
v2
[
1
]
:
v1
[
1
],
mask
[
2
]
?
v2
[
2
]
:
v1
[
2
],
mask
[
3
]
?
v2
[
3
]
:
v1
[
3
]);
}
#endif
/*OPENMM_VECTORIZE_PNACL_H_*/
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
43b669e9
...
@@ -159,7 +159,7 @@ public:
...
@@ -159,7 +159,7 @@ public:
return
_mm_sub_epi32
(
val
,
other
);
return
_mm_sub_epi32
(
val
,
other
);
}
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
return
_mm_mul_epi32
(
val
,
other
);
return
_mm_mul
lo
_epi32
(
val
,
other
);
}
}
void
operator
+=
(
const
ivec4
&
other
)
{
void
operator
+=
(
const
ivec4
&
other
)
{
val
=
_mm_add_epi32
(
val
,
other
);
val
=
_mm_add_epi32
(
val
,
other
);
...
@@ -168,7 +168,7 @@ public:
...
@@ -168,7 +168,7 @@ public:
val
=
_mm_sub_epi32
(
val
,
other
);
val
=
_mm_sub_epi32
(
val
,
other
);
}
}
void
operator
*=
(
const
ivec4
&
other
)
{
void
operator
*=
(
const
ivec4
&
other
)
{
val
=
_mm_mul_epi32
(
val
,
other
);
val
=
_mm_mul
lo
_epi32
(
val
,
other
);
}
}
ivec4
operator
-
()
const
{
ivec4
operator
-
()
const
{
return
_mm_sub_epi32
(
_mm_set1_epi32
(
0
),
val
);
return
_mm_sub_epi32
(
_mm_set1_epi32
(
0
),
val
);
...
...
tests/CMakeLists.txt
View file @
43b669e9
...
@@ -14,7 +14,11 @@ FOREACH(TEST_PROG ${TEST_PROGS})
...
@@ -14,7 +14,11 @@ FOREACH(TEST_PROG ${TEST_PROGS})
ELSE
(
OPENMM_BUILD_SHARED_LIB
)
ELSE
(
OPENMM_BUILD_SHARED_LIB
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
STATIC_TARGET
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
STATIC_TARGET
}
)
ENDIF
(
OPENMM_BUILD_SHARED_LIB
)
ENDIF
(
OPENMM_BUILD_SHARED_LIB
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
)
AND
NOT
(
MSVC OR ANDROID OR PNACL
))
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
((
${
TEST_ROOT
}
MATCHES TestVectorize
)
AND
NOT
(
MSVC OR ANDROID OR PNACL
))
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_TEST_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_TEST_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
tests/TestVectorize.cpp
0 → 100644
View file @
43b669e9
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests vectorized operations.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/vectorize.h"
#include <iostream>
using
namespace
OpenMM
;
using
namespace
std
;
#define ASSERT_VEC4_EQUAL(found, expected0, expected1, expected2, expected3) {if (std::abs((found)[0]-(expected0))>1e-6 || std::abs((found)[1]-(expected1))>1e-6 || std::abs((found)[2]-(expected2))>1e-6 || std::abs((found)[3]-(expected3))>1e-6) {std::stringstream details; details << " Expected ("<<(expected0)<<","<<(expected1)<<","<<(expected2)<<","<<(expected3)<<"), found ("<<(found)[0]<<","<<(found)[1]<<","<<(found)[2]<<","<<(found)[3]<<")"; throwException(__FILE__, __LINE__, details.str());}};
void
testLoadStore
()
{
fvec4
f1
(
2.0
);
ivec4
i1
(
3
);
ASSERT_VEC4_EQUAL
(
f1
,
2.0
,
2.0
,
2.0
,
2.0
);
ASSERT_VEC4_EQUAL
(
i1
,
3
,
3
,
3
,
3
);
fvec4
f2
(
2.5
,
3.0
,
3.5
,
4.0
);
ivec4
i2
(
2
,
3
,
4
,
5
);
ASSERT_VEC4_EQUAL
(
f2
,
2.5
,
3.0
,
3.5
,
4.0
);
ASSERT_VEC4_EQUAL
(
i2
,
2
,
3
,
4
,
5
);
float
farray
[
4
];
int
iarray
[
4
];
f2
.
store
(
farray
);
i2
.
store
(
iarray
);
fvec4
f3
(
farray
);
ivec4
i3
(
iarray
);
ASSERT_VEC4_EQUAL
(
f3
,
2.5
,
3.0
,
3.5
,
4.0
);
ASSERT_VEC4_EQUAL
(
i3
,
2
,
3
,
4
,
5
);
ASSERT_EQUAL
(
f3
[
0
],
2.5
);
ASSERT_EQUAL
(
f3
[
1
],
3.0
);
ASSERT_EQUAL
(
f3
[
2
],
3.5
);
ASSERT_EQUAL
(
f3
[
3
],
4.0
);
ASSERT_EQUAL
(
i3
[
0
],
2
);
ASSERT_EQUAL
(
i3
[
1
],
3
);
ASSERT_EQUAL
(
i3
[
2
],
4
);
ASSERT_EQUAL
(
i3
[
3
],
5
);
}
void
testArithmetic
()
{
fvec4
f1
(
0.5
,
1.0
,
1.5
,
2.0
);
ASSERT_VEC4_EQUAL
(
f1
+
fvec4
(
1
,
2
,
3
,
4
),
1.5
,
3
,
4.5
,
6
);
ASSERT_VEC4_EQUAL
(
f1
-
fvec4
(
1
,
2
,
3
,
4
),
-
0.5
,
-
1.0
,
-
1.5
,
-
2.0
);
ASSERT_VEC4_EQUAL
(
f1
*
fvec4
(
1
,
2
,
3
,
4
),
0.5
,
2.0
,
4.5
,
8.0
);
ASSERT_VEC4_EQUAL
(
f1
/
fvec4
(
1
,
2
,
3
,
4
),
0.5
,
0.5
,
0.5
,
0.5
);
ivec4
i1
(
1
,
2
,
3
,
4
);
ASSERT_VEC4_EQUAL
(
i1
+
ivec4
(
5
,
2
,
1
,
3
),
6
,
4
,
4
,
7
);
ASSERT_VEC4_EQUAL
(
i1
-
ivec4
(
5
,
2
,
1
,
3
),
-
4
,
0
,
2
,
1
);
ASSERT_VEC4_EQUAL
(
i1
*
ivec4
(
5
,
2
,
1
,
3
),
5
,
4
,
3
,
12
);
f1
=
fvec4
(
0.5
,
1.0
,
1.5
,
2.0
);
f1
+=
fvec4
(
1
,
2
,
3
,
4
);
ASSERT_VEC4_EQUAL
(
f1
,
1.5
,
3
,
4.5
,
6
);
f1
=
fvec4
(
0.5
,
1.0
,
1.5
,
2.0
);
f1
-=
fvec4
(
1
,
2
,
3
,
4
);
ASSERT_VEC4_EQUAL
(
f1
,
-
0.5
,
-
1.0
,
-
1.5
,
-
2.0
);
f1
=
fvec4
(
0.5
,
1.0
,
1.5
,
2.0
);
f1
*=
fvec4
(
1
,
2
,
3
,
4
);
ASSERT_VEC4_EQUAL
(
f1
,
0.5
,
2.0
,
4.5
,
8.0
);
f1
=
fvec4
(
0.5
,
1.0
,
1.5
,
2.0
);
f1
/=
fvec4
(
1
,
2
,
3
,
4
);
ASSERT_VEC4_EQUAL
(
f1
,
0.5
,
0.5
,
0.5
,
0.5
);
i1
=
ivec4
(
1
,
2
,
3
,
4
);
i1
+=
ivec4
(
5
,
2
,
1
,
3
);
ASSERT_VEC4_EQUAL
(
i1
,
6
,
4
,
4
,
7
);
i1
=
ivec4
(
1
,
2
,
3
,
4
);
i1
-=
ivec4
(
5
,
2
,
1
,
3
);
ASSERT_VEC4_EQUAL
(
i1
,
-
4
,
0
,
2
,
1
);
i1
=
ivec4
(
1
,
2
,
3
,
4
);
i1
*=
ivec4
(
5
,
2
,
1
,
3
);
ASSERT_VEC4_EQUAL
(
i1
,
5
,
4
,
3
,
12
);
}
void
testLogic
()
{
int
allBits
=
-
1
;
float
allBitsf
=
*
((
float
*
)
&
allBits
);
ivec4
mask
(
0
,
allBits
,
allBits
,
0
);
fvec4
fmask
(
0
,
allBitsf
,
allBitsf
,
0
);;
fvec4
f1
(
0.5
,
1.0
,
1.5
,
2.0
);
ivec4
i1
(
1
,
2
,
3
,
4
);
ASSERT_VEC4_EQUAL
(
f1
&
fmask
,
0
,
1.0
,
1.5
,
0
);
fvec4
temp
=
f1
|
fmask
;
ASSERT_EQUAL
(
0.5
,
temp
[
0
]);
ASSERT
(
temp
[
1
]
!=
temp
[
1
]);
// All bits set, which is nan
ASSERT
(
temp
[
2
]
!=
temp
[
2
]);
// All bits set, which is nan
ASSERT_EQUAL
(
2.0
,
temp
[
3
]);
ASSERT_VEC4_EQUAL
(
i1
&
mask
,
0
,
2
,
3
,
0
);
ASSERT_VEC4_EQUAL
(
i1
|
mask
,
1
,
allBits
,
allBits
,
4
);
}
void
testComparisons
()
{
fvec4
fmask
(
1.5
,
1.5
,
1.5
,
1.5
);
ASSERT_VEC4_EQUAL
((
fvec4
(
1.0
,
1.5
,
3.0
,
2.2
)
==
fvec4
(
1.1
,
1.5
,
3.0
,
2.1
))
&
fmask
,
0.0
,
1.5
,
1.5
,
0.0
);
ASSERT_VEC4_EQUAL
((
fvec4
(
1.0
,
1.5
,
3.0
,
2.2
)
!=
fvec4
(
1.1
,
1.5
,
3.0
,
2.1
))
&
fmask
,
1.5
,
0.0
,
0.0
,
1.5
);
ASSERT_VEC4_EQUAL
((
fvec4
(
1.0
,
1.5
,
3.0
,
2.2
)
<
fvec4
(
1.1
,
1.5
,
3.0
,
2.1
))
&
fmask
,
1.5
,
0.0
,
0.0
,
0.0
);
ASSERT_VEC4_EQUAL
((
fvec4
(
1.0
,
1.5
,
3.0
,
2.2
)
>
fvec4
(
1.1
,
1.5
,
3.0
,
2.1
))
&
fmask
,
0.0
,
0.0
,
0.0
,
1.5
);
ASSERT_VEC4_EQUAL
((
fvec4
(
1.0
,
1.5
,
3.0
,
2.2
)
<=
fvec4
(
1.1
,
1.5
,
3.0
,
2.1
))
&
fmask
,
1.5
,
1.5
,
1.5
,
0.0
);
ASSERT_VEC4_EQUAL
((
fvec4
(
1.0
,
1.5
,
3.0
,
2.2
)
>=
fvec4
(
1.1
,
1.5
,
3.0
,
2.1
))
&
fmask
,
0.0
,
1.5
,
1.5
,
1.5
);
fvec4
imask
(
3
,
3
,
3
,
3
);
ASSERT_VEC4_EQUAL
((
ivec4
(
1
,
3
,
7
,
5
)
==
ivec4
(
2
,
3
,
7
,
4
))
&
imask
,
0
,
3
,
3
,
0
);
ASSERT_VEC4_EQUAL
((
ivec4
(
1
,
3
,
7
,
5
)
!=
ivec4
(
2
,
3
,
7
,
4
))
&
imask
,
3
,
0
,
0
,
3
);
ASSERT_VEC4_EQUAL
((
ivec4
(
1
,
3
,
7
,
5
)
<
ivec4
(
2
,
3
,
7
,
4
))
&
imask
,
3
,
0
,
0
,
0
);
ASSERT_VEC4_EQUAL
((
ivec4
(
1
,
3
,
7
,
5
)
>
ivec4
(
2
,
3
,
7
,
4
))
&
imask
,
0
,
0
,
0
,
3
);
ASSERT_VEC4_EQUAL
((
ivec4
(
1
,
3
,
7
,
5
)
<=
ivec4
(
2
,
3
,
7
,
4
))
&
imask
,
3
,
3
,
3
,
0
);
ASSERT_VEC4_EQUAL
((
ivec4
(
1
,
3
,
7
,
5
)
>=
ivec4
(
2
,
3
,
7
,
4
))
&
imask
,
0
,
3
,
3
,
3
);
}
void
testMathFunctions
()
{
fvec4
f1
(
0.4
,
1.9
,
-
1.2
,
-
3.8
);
fvec4
f2
(
1.1
,
1.2
,
1.3
,
-
5.0
);
ASSERT_VEC4_EQUAL
(
floor
(
f1
),
0.0
,
1.0
,
-
2.0
,
-
4.0
);
ASSERT_VEC4_EQUAL
(
ceil
(
f1
),
1.0
,
2.0
,
-
1.0
,
-
3.0
);
ASSERT_VEC4_EQUAL
(
round
(
f1
),
0.0
,
2.0
,
-
1.0
,
-
4.0
);
ASSERT_VEC4_EQUAL
(
abs
(
f1
),
0.4
,
1.9
,
1.2
,
3.8
);
ASSERT_VEC4_EQUAL
(
min
(
f1
,
f2
),
0.4
,
1.2
,
-
1.2
,
-
5.0
);
ASSERT_VEC4_EQUAL
(
max
(
f1
,
f2
),
1.1
,
1.9
,
1.3
,
-
3.8
);
ASSERT_VEC4_EQUAL
(
sqrt
(
fvec4
(
1.5
,
3.1
,
4.0
,
15.0
)),
sqrt
(
1.5
),
sqrt
(
3.1
),
sqrt
(
4.0
),
sqrt
(
15.0
));
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
],
dot3
(
f1
,
f2
),
1e-6
);
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
]
+
f1
[
3
]
*
f2
[
3
],
dot4
(
f1
,
f2
),
1e-6
);
ASSERT
(
any
(
f1
>
0.5
));
ASSERT
(
!
any
(
f1
>
2.0
));
ASSERT_VEC4_EQUAL
(
blend
(
f1
,
f2
,
ivec4
(
-
1
,
0
,
-
1
,
0
)),
1.1
,
1.9
,
1.3
,
-
3.8
);
}
void
testTranspose
()
{
fvec4
f1
(
1.0
,
2.0
,
3.0
,
4.0
);
fvec4
f2
(
5.0
,
6.0
,
7.0
,
8.0
);
fvec4
f3
(
9.0
,
10.0
,
11.0
,
12.0
);
fvec4
f4
(
13.0
,
14.0
,
15.0
,
16.0
);
transpose
(
f1
,
f2
,
f3
,
f4
);
ASSERT_VEC4_EQUAL
(
f1
,
1.0
,
5.0
,
9.0
,
13.0
);
ASSERT_VEC4_EQUAL
(
f2
,
2.0
,
6.0
,
10.0
,
14.0
);
ASSERT_VEC4_EQUAL
(
f3
,
3.0
,
7.0
,
11.0
,
15.0
);
ASSERT_VEC4_EQUAL
(
f4
,
4.0
,
8.0
,
12.0
,
16.0
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
!
isVec4Supported
())
{
cout
<<
"CPU is not supported. Exiting."
<<
endl
;
return
0
;
}
testLoadStore
();
testArithmetic
();
testLogic
();
testComparisons
();
testMathFunctions
();
testTranspose
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment