Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cdfc47e4
Commit
cdfc47e4
authored
Nov 07, 2018
by
peastman
Browse files
Initial support for PPC
parent
689e2bc9
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
600 additions
and
79 deletions
+600
-79
CMakeLists.txt
CMakeLists.txt
+37
-21
cmake_modules/TargetArch.cmake
cmake_modules/TargetArch.cmake
+159
-0
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+5
-7
openmmapi/include/openmm/internal/vectorize_ppc.h
openmmapi/include/openmm/internal/vectorize_ppc.h
+347
-0
platforms/cpu/sharedTarget/CMakeLists.txt
platforms/cpu/sharedTarget/CMakeLists.txt
+12
-14
platforms/cpu/staticTarget/CMakeLists.txt
platforms/cpu/staticTarget/CMakeLists.txt
+10
-10
tests/CMakeLists.txt
tests/CMakeLists.txt
+30
-27
No files found.
CMakeLists.txt
View file @
cdfc47e4
...
...
@@ -33,6 +33,21 @@ MARK_AS_ADVANCED(DART_ROOT)
# We have custom cmake modules for FindOpenMM and running python tests
SET
(
CMAKE_MODULE_PATH
${
CMAKE_MODULE_PATH
}
"
${
CMAKE_SOURCE_DIR
}
/cmake_modules"
)
# Determine what architecture we are compiling for.
INCLUDE
(
TargetArch
)
target_architecture
(
TARGET_ARCH
)
if
(
"
${
TARGET_ARCH
}
"
MATCHES
"x86_64|i386"
)
set
(
X86 ON
)
endif
()
if
(
"
${
TARGET_ARCH
}
"
MATCHES
"arm"
)
set
(
ARM ON
)
add_compile_definitions
(
__ARM__=1
)
endif
()
if
(
"
${
TARGET_ARCH
}
"
MATCHES
"ppc"
)
set
(
PPC ON
)
add_compile_definitions
(
__PPC__=1
)
endif
()
# Where to install
IF
(
WIN32
)
IF
(
NOT OPENMM_INSTALL_PREFIX
)
...
...
@@ -69,7 +84,10 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT})
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET
(
OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 platforms/reference serialization libraries/irrxml libraries/vecmath
)
SET
(
OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 platforms/reference serialization libraries/irrxml
)
IF
(
X86 OR ARM
)
SET
(
OPENMM_SOURCE_SUBDIRS
${
OPENMM_SOURCE_SUBDIRS
}
libraries/vecmath
)
ENDIF
()
IF
(
WIN32
)
SET
(
OPENMM_SOURCE_SUBDIRS
${
OPENMM_SOURCE_SUBDIRS
}
libraries/pthreads
)
ELSE
(
WIN32
)
...
...
@@ -82,7 +100,7 @@ ENDIF(WIN32)
SET
(
CMAKE_CXX_STANDARD 11
)
IF
(
APPLE
AND
(
NOT PNACL
)
)
IF
(
APPLE
)
# Build 64 bit binaries compatible with OS X 10.7
IF
(
NOT CMAKE_OSX_DEPLOYMENT_TARGET
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.7"
CACHE STRING
"The minimum version of OS X to support"
FORCE
)
...
...
@@ -102,19 +120,17 @@ IF (APPLE AND (NOT PNACL))
# Improve the linking behavior of Mac libraries
SET
(
CMAKE_INSTALL_NAME_DIR
"@rpath"
)
SET
(
EXTRA_COMPILE_FLAGS
"-msse2 -stdlib=libc++"
)
ELSE
(
APPLE
AND
(
NOT PNACL
))
IF
(
MSVC OR ANDROID OR PNACL
)
SET
(
EXTRA_COMPILE_FLAGS
)
IF
(
MSVC
)
# Use warning level 2, not whatever warning level CMake picked.
STRING
(
REGEX REPLACE
"/W[0-4]"
"/W2"
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
"
)
# Explicitly suppress warnings 4305 and 4244.
SET
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/wd4305 /wd4244"
)
ENDIF
(
MSVC
)
ELSE
(
MSVC OR ANDROID OR PNACL
)
SET
(
EXTRA_COMPILE_FLAGS
"-msse2"
)
ENDIF
(
MSVC OR ANDROID OR PNACL
)
ENDIF
(
APPLE
AND
(
NOT PNACL
))
ELSEIF
(
MSVC
)
SET
(
EXTRA_COMPILE_FLAGS
)
# Use warning level 2, not whatever warning level CMake picked.
STRING
(
REGEX REPLACE
"/W[0-4]"
"/W2"
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
"
)
# Explicitly suppress warnings 4305 and 4244.
SET
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/wd4305 /wd4244"
)
ELSEIF
(
X86
)
SET
(
EXTRA_COMPILE_FLAGS
"-msse2"
)
ELSE
()
SET
(
EXTRA_COMPILE_FLAGS
)
ENDIF
()
IF
(
UNIX AND NOT CMAKE_BUILD_TYPE
)
SET
(
CMAKE_BUILD_TYPE Release CACHE STRING
"Debug or Release build"
FORCE
)
...
...
@@ -219,19 +235,19 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
## OpenMM was previously installed there.
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
)
ENDFOREACH
(
subdir
)
IF
(
ANDROID OR PNACL
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS
"-UHAVE_SSE2"
)
ELSE
(
ANDROID OR PNACL
)
IF
(
X86
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS
"-DHAVE_SSE2=1"
)
ENDIF
(
ANDROID OR PNACL
)
IF
(
NOT
(
ANDROID OR PNACL
OR
(
WIN32 AND OPENMM_BUILD_STATIC_LIB
)))
ELSE
()
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS
"-UHAVE_SSE2"
)
ENDIF
()
IF
(
X86 AND
NOT
(
WIN32 AND OPENMM_BUILD_STATIC_LIB
))
FILE
(
GLOB src_files
${
CMAKE_CURRENT_SOURCE_DIR
}
/libraries/asmjit/*/*.cpp
)
FILE
(
GLOB incl_files
${
CMAKE_CURRENT_SOURCE_DIR
}
/libraries/asmjit/*.h
)
SET
(
SOURCE_FILES
${
SOURCE_FILES
}
${
src_files
}
)
SET
(
SOURCE_INCLUDE_FILES
${
SOURCE_INCLUDE_FILES
}
${
incl_files
}
)
INCLUDE_DIRECTORIES
(
BEFORE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/libraries/asmjit"
)
SET
(
EXTRA_COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DLEPTON_USE_JIT"
)
ENDIF
(
NOT
(
ANDROID OR PNACL
OR
(
WIN32 AND OPENMM_BUILD_STATIC_LIB
))
)
ENDIF
(
)
# If API wrappers are being generated, and add them to the build.
SET
(
OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS ON CACHE BOOL
"Build wrappers for C and Fortran"
)
...
...
cmake_modules/TargetArch.cmake
0 → 100644
View file @
cdfc47e4
# This is from Solar CMake (https://github.com/axr/solar-cmake).
#
# Copyright (c) 2012 Petroules Corporation. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# Based on the Qt 5 processor detection code, so should be very accurate
# https://qt.gitorious.org/qt/qtbase/blobs/master/src/corelib/global/qprocessordetection.h
# Currently handles arm (v5, v6, v7), x86 (32/64), ia64, and ppc (32/64)
# Regarding POWER/PowerPC, just as is noted in the Qt source,
# "There are many more known variants/revisions that we do not handle/detect."
set
(
archdetect_c_code
"
#if defined(__arm__) || defined(__TARGET_ARCH_ARM)
#if defined(__ARM_ARCH_7__)
\\
|| defined(__ARM_ARCH_7A__)
\\
|| defined(__ARM_ARCH_7R__)
\\
|| defined(__ARM_ARCH_7M__)
\\
|| (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 7)
#error cmake_ARCH armv7
#elif defined(__ARM_ARCH_6__)
\\
|| defined(__ARM_ARCH_6J__)
\\
|| defined(__ARM_ARCH_6T2__)
\\
|| defined(__ARM_ARCH_6Z__)
\\
|| defined(__ARM_ARCH_6K__)
\\
|| defined(__ARM_ARCH_6ZK__)
\\
|| defined(__ARM_ARCH_6M__)
\\
|| (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 6)
#error cmake_ARCH armv6
#elif defined(__ARM_ARCH_5TEJ__)
\\
|| (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 5)
#error cmake_ARCH armv5
#else
#error cmake_ARCH arm
#endif
#elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
#error cmake_ARCH i386
#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64)
#error cmake_ARCH x86_64
#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
#error cmake_ARCH ia64
#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__)
\\
|| defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC)
\\
|| defined(_M_MPPC) || defined(_M_PPC)
#if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
#error cmake_ARCH ppc64
#else
#error cmake_ARCH ppc
#endif
#endif
#error cmake_ARCH unknown
"
)
# Set ppc_support to TRUE before including this file or ppc and ppc64
# will be treated as invalid architectures since they are no longer supported by Apple
function
(
target_architecture output_var
)
if
(
APPLE AND CMAKE_OSX_ARCHITECTURES
)
# On OS X we use CMAKE_OSX_ARCHITECTURES *if* it was set
# First let's normalize the order of the values
# Note that it's not possible to compile PowerPC applications if you are using
# the OS X SDK version 10.6 or later - you'll need 10.4/10.5 for that, so we
# disable it by default
# See this page for more information:
# http://stackoverflow.com/questions/5333490/how-can-we-restore-ppc-ppc64-as-well-as-full-10-4-10-5-sdk-support-to-xcode-4
# Architecture defaults to i386 or ppc on OS X 10.5 and earlier, depending on the CPU type detected at runtime.
# On OS X 10.6+ the default is x86_64 if the CPU supports it, i386 otherwise.
foreach
(
osx_arch
${
CMAKE_OSX_ARCHITECTURES
}
)
if
(
"
${
osx_arch
}
"
STREQUAL
"ppc"
AND ppc_support
)
set
(
osx_arch_ppc TRUE
)
elseif
(
"
${
osx_arch
}
"
STREQUAL
"i386"
)
set
(
osx_arch_i386 TRUE
)
elseif
(
"
${
osx_arch
}
"
STREQUAL
"x86_64"
)
set
(
osx_arch_x86_64 TRUE
)
elseif
(
"
${
osx_arch
}
"
STREQUAL
"ppc64"
AND ppc_support
)
set
(
osx_arch_ppc64 TRUE
)
else
()
message
(
FATAL_ERROR
"Invalid OS X arch name:
${
osx_arch
}
"
)
endif
()
endforeach
()
# Now add all the architectures in our normalized order
if
(
osx_arch_ppc
)
list
(
APPEND ARCH ppc
)
endif
()
if
(
osx_arch_i386
)
list
(
APPEND ARCH i386
)
endif
()
if
(
osx_arch_x86_64
)
list
(
APPEND ARCH x86_64
)
endif
()
if
(
osx_arch_ppc64
)
list
(
APPEND ARCH ppc64
)
endif
()
else
()
file
(
WRITE
"
${
CMAKE_BINARY_DIR
}
/arch.c"
"
${
archdetect_c_code
}
"
)
enable_language
(
C
)
# Detect the architecture in a rather creative way...
# This compiles a small C program which is a series of ifdefs that selects a
# particular #error preprocessor directive whose message string contains the
# target architecture. The program will always fail to compile (both because
# file is not a valid C program, and obviously because of the presence of the
# #error preprocessor directives... but by exploiting the preprocessor in this
# way, we can detect the correct target architecture even when cross-compiling,
# since the program itself never needs to be run (only the compiler/preprocessor)
try_run
(
run_result_unused
compile_result_unused
"
${
CMAKE_BINARY_DIR
}
"
"
${
CMAKE_BINARY_DIR
}
/arch.c"
COMPILE_OUTPUT_VARIABLE ARCH
CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=
${
CMAKE_OSX_ARCHITECTURES
}
)
# Parse the architecture name from the compiler output
string
(
REGEX MATCH
"cmake_ARCH ([a-zA-Z0-9_]+)"
ARCH
"
${
ARCH
}
"
)
# Get rid of the value marker leaving just the architecture name
string
(
REPLACE
"cmake_ARCH "
""
ARCH
"
${
ARCH
}
"
)
# If we are compiling with an unknown architecture this variable should
# already be set to "unknown" but in the case that it's empty (i.e. due
# to a typo in the code), then set it to unknown
if
(
NOT ARCH
)
set
(
ARCH unknown
)
endif
()
endif
()
set
(
${
output_var
}
"
${
ARCH
}
"
PARENT_SCOPE
)
endfunction
()
openmmapi/include/openmm/internal/vectorize.h
View file @
cdfc47e4
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014 Stanford University and the Authors.
*
* Portions copyright (c) 2014
-2018
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -32,14 +32,12 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#if defined(__A
NDROID
__)
#if defined(__A
RM
__)
#include "vectorize_neon.h"
#elif defined(__PPC__)
#include "vectorize_ppc.h"
#else
#if defined(__PNACL__)
#include "vectorize_pnacl.h"
#else
#include "vectorize_sse.h"
#endif
#include "vectorize_sse.h"
#endif
#endif
/*OPENMM_VECTORIZE_H_*/
openmmapi/include/openmm/internal/vectorize_ppc.h
0 → 100644
View file @
cdfc47e4
#ifndef OPENMM_VECTORIZE_PPC_H_
#define OPENMM_VECTORIZE_PPC_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman, Heng Ma *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <cmath>
#include <cstdlib>
#include <altivec.h>
// This file defines classes and functions to simplify vectorizing code with AltiVec on PPC.
/**
* Determine whether ivec4 and fvec4 are supported on this processor.
*/
static
bool
isVec4Supported
()
{
return
true
;
}
typedef
vector
float
__m128
;
typedef
vector
int
__m128i
;
class
ivec4
;
/**
* A four element vector of floats.
*/
class
fvec4
{
public:
__m128
val
;
fvec4
()
{}
fvec4
(
float
v
)
{
val
=
(
__m128
)
{
v
,
v
,
v
,
v
};
}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
{
val
=
(
__m128
)
{
v1
,
v2
,
v3
,
v4
};
}
fvec4
(
__m128
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
{
val
=
*
((
__m128
*
)
v
);
}
operator
__m128
()
const
{
return
val
;
}
float
operator
[](
int
i
)
const
{
return
val
[
i
];
}
void
store
(
float
*
v
)
const
{
*
((
__m128
*
)
v
)
=
val
;
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
return
vec_add
(
val
,
other
.
val
);
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
return
vec_sub
(
val
,
other
.
val
);
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
return
vec_mul
(
val
,
other
.
val
);
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
return
vec_div
(
val
,
other
.
val
);
}
void
operator
+=
(
const
fvec4
&
other
)
{
val
=
vec_add
(
val
,
other
.
val
);
}
void
operator
-=
(
const
fvec4
&
other
)
{
val
=
vec_sub
(
val
,
other
.
val
);
}
void
operator
*=
(
const
fvec4
&
other
)
{
val
=
vec_mul
(
val
,
other
.
val
);
}
void
operator
/=
(
const
fvec4
&
other
)
{
val
=
vec_div
(
val
,
other
.
val
);
}
fvec4
operator
-
()
const
{
return
-
val
;
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
return
vec_and
(
val
,
other
.
val
);
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
return
vec_or
(
val
,
other
.
val
);
}
ivec4
operator
==
(
const
fvec4
&
other
)
const
;
ivec4
operator
!=
(
const
fvec4
&
other
)
const
;
ivec4
operator
>
(
const
fvec4
&
other
)
const
;
ivec4
operator
<
(
const
fvec4
&
other
)
const
;
ivec4
operator
>=
(
const
fvec4
&
other
)
const
;
ivec4
operator
<=
(
const
fvec4
&
other
)
const
;
operator
ivec4
()
const
;
};
/**
* A four element vector of ints.
*/
class
ivec4
{
public:
__m128i
val
;
ivec4
()
{}
ivec4
(
int
v
)
{
val
=
(
__m128i
)
{
v
,
v
,
v
,
v
};
}
ivec4
(
int
v1
,
int
v2
,
int
v3
,
int
v4
)
{
val
=
(
__m128i
)
{
v1
,
v2
,
v3
,
v4
};
}
ivec4
(
__m128i
v
)
:
val
(
v
)
{}
ivec4
(
const
int
*
v
)
{
val
=
*
((
__m128i
*
)
v
);
}
operator
__m128i
()
const
{
return
val
;
}
int
operator
[](
int
i
)
const
{
return
val
[
i
];
}
void
store
(
int
*
v
)
const
{
*
((
__m128i
*
)
v
)
=
val
;
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
return
vec_add
(
val
,
other
.
val
);
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
return
vec_sub
(
val
,
other
.
val
);
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
return
val
*
other
.
val
;
//(__m128i) {val[0]*other[0], val[1]*other[1], val[2]*other[2], val[3]*other[3]};
}
void
operator
+=
(
const
ivec4
&
other
)
{
val
=
vec_add
(
val
,
other
.
val
);
}
void
operator
-=
(
const
ivec4
&
other
)
{
val
=
vec_sub
(
val
,
other
.
val
);
}
void
operator
*=
(
const
ivec4
&
other
)
{
val
=
val
*
other
.
val
;
}
ivec4
operator
-
()
const
{
return
-
val
;
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
return
val
&
other
.
val
;
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
return
val
|
other
.
val
;
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
return
(
val
==
other
.
val
);
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
return
(
val
!=
other
.
val
);
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
return
(
val
>
other
.
val
);
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
return
(
val
<
other
.
val
);
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
return
(
val
>=
other
.
val
);
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
return
(
val
<=
other
.
val
);
}
operator
fvec4
()
const
;
};
// Conversion operators.
inline
ivec4
fvec4
::
operator
==
(
const
fvec4
&
other
)
const
{
return
(
val
==
other
.
val
);
}
inline
ivec4
fvec4
::
operator
!=
(
const
fvec4
&
other
)
const
{
return
(
val
!=
other
.
val
);
}
inline
ivec4
fvec4
::
operator
>
(
const
fvec4
&
other
)
const
{
return
(
val
>
other
.
val
);
}
inline
ivec4
fvec4
::
operator
<
(
const
fvec4
&
other
)
const
{
return
(
val
<
other
.
val
);
}
inline
ivec4
fvec4
::
operator
>=
(
const
fvec4
&
other
)
const
{
return
(
val
>=
other
.
val
);
}
inline
ivec4
fvec4
::
operator
<=
(
const
fvec4
&
other
)
const
{
return
(
val
<=
other
.
val
);
}
inline
fvec4
::
operator
ivec4
()
const
{
return
(
__m128i
)
{(
int
)
val
[
0
],
(
int
)
val
[
1
],
(
int
)
val
[
2
],
(
int
)
val
[
3
]};
}
inline
ivec4
::
operator
fvec4
()
const
{
return
(
__m128
)
{(
float
)
val
[
0
],
(
float
)
val
[
1
],
(
float
)
val
[
2
],
(
float
)
val
[
3
]};
}
// Functions that operate on fvec4s.
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
return
fvec4
(
fabs
(
v
[
0
]),
fabs
(
v
[
1
]),
fabs
(
v
[
2
]),
fabs
(
v
[
3
]));
}
static
inline
fvec4
exp
(
const
fvec4
&
v
)
{
return
fvec4
(
expf
(
v
[
0
]),
expf
(
v
[
1
]),
expf
(
v
[
2
]),
expf
(
v
[
3
]));
}
static
inline
fvec4
log
(
const
fvec4
&
v
)
{
return
fvec4
(
logf
(
v
[
0
]),
logf
(
v
[
1
]),
logf
(
v
[
2
]),
logf
(
v
[
3
]));
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
fvec4
r
=
v1
*
v2
;
return
r
[
0
]
+
r
[
1
]
+
r
[
2
];
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
fvec4
r
=
v1
*
v2
;
fvec4
temp
=
__builtin_shuffle
(
r
.
val
,
r
.
val
,
(
__m128i
)
{
0
,
1
,
-
1
,
-
1
})
+
__builtin_shuffle
(
r
.
val
,
r
.
val
,
(
__m128i
)
{
2
,
3
,
-
1
,
-
1
});
return
temp
[
0
]
+
temp
[
1
];
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
__m128
temp
=
v2
.
val
*
__builtin_shuffle
(
v1
.
val
,
v1
.
val
,
(
__m128i
)
{
2
,
0
,
1
,
3
})
-
v1
.
val
*
__builtin_shuffle
(
v2
.
val
,
v2
.
val
,
(
__m128i
)
{
2
,
0
,
1
,
3
});
return
__builtin_shuffle
(
temp
,
temp
,
(
__m128i
)
{
2
,
0
,
1
,
3
});
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
__m128
a1
=
__builtin_shuffle
(
v1
.
val
,
v2
.
val
,
(
__m128i
)
{
0
,
4
,
2
,
6
});
__m128
a2
=
__builtin_shuffle
(
v1
.
val
,
v2
.
val
,
(
__m128i
)
{
1
,
5
,
3
,
7
});
__m128
a3
=
__builtin_shuffle
(
v3
.
val
,
v4
.
val
,
(
__m128i
)
{
0
,
4
,
2
,
6
});
__m128
a4
=
__builtin_shuffle
(
v3
.
val
,
v4
.
val
,
(
__m128i
)
{
1
,
5
,
3
,
7
});
v1
=
__builtin_shuffle
(
a1
,
a3
,
(
__m128i
)
{
0
,
1
,
4
,
5
});
v2
=
__builtin_shuffle
(
a2
,
a4
,
(
__m128i
)
{
0
,
1
,
4
,
5
});
v3
=
__builtin_shuffle
(
a1
,
a3
,
(
__m128i
)
{
2
,
3
,
6
,
7
});
v4
=
__builtin_shuffle
(
a2
,
a4
,
(
__m128i
)
{
2
,
3
,
6
,
7
});
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
vec_min
(
v1
.
val
,
v2
.
val
);
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
return
vec_max
(
v1
.
val
,
v2
.
val
);
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
return
ivec4
(
abs
(
v
[
0
]),
abs
(
v
[
1
]),
abs
(
v
[
2
]),
abs
(
v
[
3
]));
}
static
inline
bool
any
(
const
__m128i
&
v
)
{
ivec4
temp
=
__builtin_shuffle
(
v
,
v
,
(
__m128i
)
{
0
,
1
,
-
1
,
-
1
})
|
__builtin_shuffle
(
v
,
v
,
(
__m128i
)
{
2
,
3
,
-
1
,
-
1
});
return
(
temp
[
0
]
||
temp
[
1
]);
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
__m128i
&
mask
)
{
return
(
__m128
)
((
mask
&
(
__m128i
)
v2
.
val
)
+
((
ivec4
(
0xFFFFFFFF
)
-
ivec4
(
mask
))
&
(
__m128i
)
v1
.
val
).
val
);
}
// These are at the end since they involve other functions defined above.
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
vec_min
(
v1
.
val
,
v2
.
val
);
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
vec_max
(
v1
.
val
,
v2
.
val
);
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
return
vec_round
(
v
.
val
);
}
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
return
vec_floor
(
v
.
val
);
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
return
vec_ceil
(
v
.
val
);
}
static
inline
fvec4
rsqrt
(
const
fvec4
&
v
)
{
return
fvec4
(
1.0
/
sqrt
(
v
[
0
]),
1.0
/
sqrt
(
v
[
1
]),
1.0
/
sqrt
(
v
[
2
]),
1.0
/
sqrt
(
v
[
3
]));
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
vec_sqrt
(
v
.
val
);
}
#endif
/*OPENMM_VECTORIZE_PPC_H_*/
platforms/cpu/sharedTarget/CMakeLists.txt
View file @
cdfc47e4
FOREACH
(
file
${
SOURCE_FILES
}
)
IF
(
file MATCHES
".*Vec8.*"
)
IF
(
MSVC
)
IF
(
file MATCHES
".*Vec8.*"
)
IF
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX /D__AVX__"
)
ELSE
(
MSVC
)
IF
(
NOT ANDROID
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ENDIF
(
NOT ANDROID
)
ENDIF
(
MSVC
)
ELSE
(
file MATCHES
".*Vec8.*"
)
IF
(
NOT MSVC
)
IF
(
NOT ANDROID
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
(
NOT ANDROID
)
ENDIF
(
NOT MSVC
)
ENDIF
(
file MATCHES
".*Vec8.*"
)
ELSEIF
(
X86
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ELSE
()
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ENDIF
()
ELSE
()
IF
(
X86 AND NOT MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
()
ENDIF
()
ENDFOREACH
(
file
)
ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
...
...
platforms/cpu/staticTarget/CMakeLists.txt
View file @
cdfc47e4
FOREACH
(
file
${
SOURCE_FILES
}
)
IF
(
file MATCHES
".*Vec8.*"
)
IF
(
MSVC
)
IF
(
file MATCHES
".*Vec8.*"
)
IF
(
MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
/arch:AVX /D__AVX__"
)
ELSEIF
(
PNACL
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ELSE
(
MSVC
)
ELSEIF
(
X86
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1 -mavx"
)
ENDIF
(
MSVC
)
ELSE
(
file MATCHES
".*Vec8.*"
)
IF
(
NOT
(
MSVC OR ANDROID OR PNACL
))
ELSE
()
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ENDIF
()
ELSE
()
IF
(
X86 AND NOT MSVC
)
SET_SOURCE_FILES_PROPERTIES
(
${
file
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
(
NOT
(
MSVC OR ANDROID OR PNACL
)
)
ENDIF
(
file MATCHES
".*Vec8.*"
)
ENDIF
(
)
ENDIF
(
)
ENDFOREACH
(
file
)
ADD_LIBRARY
(
${
STATIC_TARGET
}
STATIC
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
...
...
tests/CMakeLists.txt
View file @
cdfc47e4
#
# Testing
#
ENABLE_TESTING
()
# Automatically create tests using files named "Test*.cpp"
FILE
(
GLOB TEST_PROGS
"*Test*.cpp"
)
FOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
GET_FILENAME_COMPONENT
(
TEST_ROOT
${
TEST_PROG
}
NAME_WE
)
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
IF
(
OPENMM_BUILD_SHARED_LIB
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_TARGET
}
)
ELSE
(
OPENMM_BUILD_SHARED_LIB
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
STATIC_TARGET
}
)
ENDIF
(
OPENMM_BUILD_SHARED_LIB
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
)
AND
NOT
(
MSVC OR ANDROID OR PNACL
))
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
((
${
TEST_ROOT
}
MATCHES TestVectorize
)
AND
NOT
(
MSVC OR ANDROID OR PNACL
))
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize8
)
AND
NOT
(
MSVC OR ANDROID OR PNACL
))
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mavx"
)
ENDIF
((
${
TEST_ROOT
}
MATCHES TestVectorize8
)
AND
NOT
(
MSVC OR ANDROID OR PNACL
))
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_TEST_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
#
# Testing
#
ENABLE_TESTING
()
# Automatically create tests using files named "Test*.cpp"
FILE
(
GLOB TEST_PROGS
"*Test*.cpp"
)
FOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
GET_FILENAME_COMPONENT
(
TEST_ROOT
${
TEST_PROG
}
NAME_WE
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize8
)
AND NOT X86
)
CONTINUE
()
ENDIF
()
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
IF
(
OPENMM_BUILD_SHARED_LIB
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_TARGET
}
)
ELSE
(
OPENMM_BUILD_SHARED_LIB
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
STATIC_TARGET
}
)
ENDIF
(
OPENMM_BUILD_SHARED_LIB
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize
)
AND X86 AND NOT MSVC
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-msse4.1"
)
ENDIF
()
IF
((
${
TEST_ROOT
}
MATCHES TestVectorize8
)
AND X86 AND NOT MSVC
)
SET
(
EXTRA_TEST_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-mavx"
)
ENDIF
()
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_TEST_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment