Commit cdfc47e4 authored by peastman's avatar peastman
Browse files

Initial support for PPC

parent 689e2bc9
......@@ -33,6 +33,21 @@ MARK_AS_ADVANCED(DART_ROOT)
# We have custom cmake modules for FindOpenMM and running python tests
SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")
# Determine what architecture we are compiling for.
INCLUDE(TargetArch)
target_architecture(TARGET_ARCH)
if ("${TARGET_ARCH}" MATCHES "x86_64|i386")
set(X86 ON)
endif()
if ("${TARGET_ARCH}" MATCHES "arm")
set(ARM ON)
add_compile_definitions(__ARM__=1)
endif()
if ("${TARGET_ARCH}" MATCHES "ppc")
set(PPC ON)
add_compile_definitions(__PPC__=1)
endif()
# Where to install
IF(WIN32)
IF(NOT OPENMM_INSTALL_PREFIX)
......@@ -69,7 +84,10 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT})
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 platforms/reference serialization libraries/irrxml libraries/vecmath)
SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 platforms/reference serialization libraries/irrxml)
IF(X86 OR ARM)
SET(OPENMM_SOURCE_SUBDIRS ${OPENMM_SOURCE_SUBDIRS} libraries/vecmath)
ENDIF()
IF(WIN32)
SET(OPENMM_SOURCE_SUBDIRS ${OPENMM_SOURCE_SUBDIRS} libraries/pthreads)
ELSE(WIN32)
......@@ -82,7 +100,7 @@ ENDIF(WIN32)
SET (CMAKE_CXX_STANDARD 11)
IF (APPLE AND (NOT PNACL))
IF(APPLE)
# Build 64 bit binaries compatible with OS X 10.7
IF (NOT CMAKE_OSX_DEPLOYMENT_TARGET)
SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.7" CACHE STRING "The minimum version of OS X to support" FORCE)
......@@ -102,19 +120,17 @@ IF (APPLE AND (NOT PNACL))
# Improve the linking behavior of Mac libraries
SET (CMAKE_INSTALL_NAME_DIR "@rpath")
SET(EXTRA_COMPILE_FLAGS "-msse2 -stdlib=libc++")
ELSE (APPLE AND (NOT PNACL))
IF (MSVC OR ANDROID OR PNACL)
SET(EXTRA_COMPILE_FLAGS)
IF (MSVC)
# Use warning level 2, not whatever warning level CMake picked.
STRING(REGEX REPLACE "/W[0-4]" "/W2" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
# Explicitly suppress warnings 4305 and 4244.
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4305 /wd4244")
ENDIF (MSVC)
ELSE (MSVC OR ANDROID OR PNACL)
SET(EXTRA_COMPILE_FLAGS "-msse2")
ENDIF (MSVC OR ANDROID OR PNACL)
ENDIF (APPLE AND (NOT PNACL))
ELSEIF(MSVC)
SET(EXTRA_COMPILE_FLAGS)
# Use warning level 2, not whatever warning level CMake picked.
STRING(REGEX REPLACE "/W[0-4]" "/W2" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
# Explicitly suppress warnings 4305 and 4244.
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4305 /wd4244")
ELSEIF(X86)
SET(EXTRA_COMPILE_FLAGS "-msse2")
ELSE()
SET(EXTRA_COMPILE_FLAGS)
ENDIF()
IF(UNIX AND NOT CMAKE_BUILD_TYPE)
SET(CMAKE_BUILD_TYPE Release CACHE STRING "Debug or Release build" FORCE)
......@@ -219,19 +235,19 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
## OpenMM was previously installed there.
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
IF (ANDROID OR PNACL)
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-UHAVE_SSE2")
ELSE (ANDROID OR PNACL)
IF(X86)
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-DHAVE_SSE2=1")
ENDIF(ANDROID OR PNACL)
IF (NOT (ANDROID OR PNACL OR (WIN32 AND OPENMM_BUILD_STATIC_LIB)))
ELSE()
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-UHAVE_SSE2")
ENDIF()
IF(X86 AND NOT (WIN32 AND OPENMM_BUILD_STATIC_LIB))
FILE(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/libraries/asmjit/*/*.cpp)
FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/libraries/asmjit/*.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${src_files})
SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
INCLUDE_DIRECTORIES(BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/libraries/asmjit")
SET(EXTRA_COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DLEPTON_USE_JIT")
ENDIF (NOT (ANDROID OR PNACL OR (WIN32 AND OPENMM_BUILD_STATIC_LIB)))
ENDIF()
# If API wrappers are being generated, and add them to the build.
SET(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS ON CACHE BOOL "Build wrappers for C and Fortran")
......
# This is from Solar CMake (https://github.com/axr/solar-cmake).
#
# Copyright (c) 2012 Petroules Corporation. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# Based on the Qt 5 processor detection code, so should be very accurate
# https://qt.gitorious.org/qt/qtbase/blobs/master/src/corelib/global/qprocessordetection.h
# Currently handles arm (v5, v6, v7), x86 (32/64), ia64, and ppc (32/64)
# Regarding POWER/PowerPC, just as is noted in the Qt source,
# "There are many more known variants/revisions that we do not handle/detect."
set(archdetect_c_code "
#if defined(__arm__) || defined(__TARGET_ARCH_ARM)
#if defined(__ARM_ARCH_7__) \\
|| defined(__ARM_ARCH_7A__) \\
|| defined(__ARM_ARCH_7R__) \\
|| defined(__ARM_ARCH_7M__) \\
|| (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 7)
#error cmake_ARCH armv7
#elif defined(__ARM_ARCH_6__) \\
|| defined(__ARM_ARCH_6J__) \\
|| defined(__ARM_ARCH_6T2__) \\
|| defined(__ARM_ARCH_6Z__) \\
|| defined(__ARM_ARCH_6K__) \\
|| defined(__ARM_ARCH_6ZK__) \\
|| defined(__ARM_ARCH_6M__) \\
|| (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 6)
#error cmake_ARCH armv6
#elif defined(__ARM_ARCH_5TEJ__) \\
|| (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 5)
#error cmake_ARCH armv5
#else
#error cmake_ARCH arm
#endif
#elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
#error cmake_ARCH i386
#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64)
#error cmake_ARCH x86_64
#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
#error cmake_ARCH ia64
#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \\
|| defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \\
|| defined(_M_MPPC) || defined(_M_PPC)
#if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
#error cmake_ARCH ppc64
#else
#error cmake_ARCH ppc
#endif
#endif
#error cmake_ARCH unknown
")
# Set ppc_support to TRUE before including this file or ppc and ppc64
# will be treated as invalid architectures since they are no longer supported by Apple
function(target_architecture output_var)
if(APPLE AND CMAKE_OSX_ARCHITECTURES)
# On OS X we use CMAKE_OSX_ARCHITECTURES *if* it was set
# First let's normalize the order of the values
# Note that it's not possible to compile PowerPC applications if you are using
# the OS X SDK version 10.6 or later - you'll need 10.4/10.5 for that, so we
# disable it by default
# See this page for more information:
# http://stackoverflow.com/questions/5333490/how-can-we-restore-ppc-ppc64-as-well-as-full-10-4-10-5-sdk-support-to-xcode-4
# Architecture defaults to i386 or ppc on OS X 10.5 and earlier, depending on the CPU type detected at runtime.
# On OS X 10.6+ the default is x86_64 if the CPU supports it, i386 otherwise.
foreach(osx_arch ${CMAKE_OSX_ARCHITECTURES})
if("${osx_arch}" STREQUAL "ppc" AND ppc_support)
set(osx_arch_ppc TRUE)
elseif("${osx_arch}" STREQUAL "i386")
set(osx_arch_i386 TRUE)
elseif("${osx_arch}" STREQUAL "x86_64")
set(osx_arch_x86_64 TRUE)
elseif("${osx_arch}" STREQUAL "ppc64" AND ppc_support)
set(osx_arch_ppc64 TRUE)
else()
message(FATAL_ERROR "Invalid OS X arch name: ${osx_arch}")
endif()
endforeach()
# Now add all the architectures in our normalized order
if(osx_arch_ppc)
list(APPEND ARCH ppc)
endif()
if(osx_arch_i386)
list(APPEND ARCH i386)
endif()
if(osx_arch_x86_64)
list(APPEND ARCH x86_64)
endif()
if(osx_arch_ppc64)
list(APPEND ARCH ppc64)
endif()
else()
file(WRITE "${CMAKE_BINARY_DIR}/arch.c" "${archdetect_c_code}")
enable_language(C)
# Detect the architecture in a rather creative way...
# This compiles a small C program which is a series of ifdefs that selects a
# particular #error preprocessor directive whose message string contains the
# target architecture. The program will always fail to compile (both because
# file is not a valid C program, and obviously because of the presence of the
# #error preprocessor directives... but by exploiting the preprocessor in this
# way, we can detect the correct target architecture even when cross-compiling,
# since the program itself never needs to be run (only the compiler/preprocessor)
try_run(
run_result_unused
compile_result_unused
"${CMAKE_BINARY_DIR}"
"${CMAKE_BINARY_DIR}/arch.c"
COMPILE_OUTPUT_VARIABLE ARCH
CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}
)
# Parse the architecture name from the compiler output
string(REGEX MATCH "cmake_ARCH ([a-zA-Z0-9_]+)" ARCH "${ARCH}")
# Get rid of the value marker leaving just the architecture name
string(REPLACE "cmake_ARCH " "" ARCH "${ARCH}")
# If we are compiling with an unknown architecture this variable should
# already be set to "unknown" but in the case that it's empty (i.e. due
# to a typo in the code), then set it to unknown
if (NOT ARCH)
set(ARCH unknown)
endif()
endif()
set(${output_var} "${ARCH}" PARENT_SCOPE)
endfunction()
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014 Stanford University and the Authors. *
* Portions copyright (c) 2014-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -32,14 +32,12 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#if defined(__ANDROID__)
#if defined(__ARM__)
#include "vectorize_neon.h"
#elif defined(__PPC__)
#include "vectorize_ppc.h"
#else
#if defined(__PNACL__)
#include "vectorize_pnacl.h"
#else
#include "vectorize_sse.h"
#endif
#include "vectorize_sse.h"
#endif
#endif /*OPENMM_VECTORIZE_H_*/
#ifndef OPENMM_VECTORIZE_PPC_H_
#define OPENMM_VECTORIZE_PPC_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman, Heng Ma *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <cmath>
#include <cstdlib>
#include <altivec.h>
// This file defines classes and functions to simplify vectorizing code with AltiVec on PPC.
/**
* Determine whether ivec4 and fvec4 are supported on this processor.
*/
static bool isVec4Supported() {
return true;
}
typedef vector float __m128;
typedef vector int __m128i;
class ivec4;
/**
* A four element vector of floats.
*/
class fvec4 {
public:
__m128 val;
fvec4() {}
fvec4(float v) {
val = (__m128) {v, v, v, v};
}
fvec4(float v1, float v2, float v3, float v4) {
val = (__m128) {v1, v2, v3, v4};
}
fvec4(__m128 v) : val(v) {}
fvec4(const float* v) {
val = *((__m128*) v);
}
operator __m128() const {
return val;
}
float operator[](int i) const {
return val[i];
}
void store(float* v) const {
*((__m128*) v) = val;
}
fvec4 operator+(const fvec4& other) const {
return vec_add(val, other.val);
}
fvec4 operator-(const fvec4& other) const {
return vec_sub(val, other.val);
}
fvec4 operator*(const fvec4& other) const {
return vec_mul(val, other.val);
}
fvec4 operator/(const fvec4& other) const {
return vec_div(val, other.val);
}
void operator+=(const fvec4& other) {
val = vec_add(val, other.val);
}
void operator-=(const fvec4& other) {
val = vec_sub(val, other.val);
}
void operator*=(const fvec4& other) {
val = vec_mul(val, other.val);
}
void operator/=(const fvec4& other) {
val = vec_div(val, other.val);
}
fvec4 operator-() const {
return -val;
}
fvec4 operator&(const fvec4& other) const {
return vec_and(val, other.val);
}
fvec4 operator|(const fvec4& other) const {
return vec_or(val, other.val);
}
ivec4 operator==(const fvec4& other) const;
ivec4 operator!=(const fvec4& other) const;
ivec4 operator>(const fvec4& other) const;
ivec4 operator<(const fvec4& other) const;
ivec4 operator>=(const fvec4& other) const;
ivec4 operator<=(const fvec4& other) const;
operator ivec4() const;
};
/**
* A four element vector of ints.
*/
class ivec4 {
public:
__m128i val;
ivec4() {}
ivec4(int v) {
val = (__m128i) {v, v, v, v};
}
ivec4(int v1, int v2, int v3, int v4) {
val = (__m128i) {v1, v2, v3, v4};
}
ivec4(__m128i v) : val(v) {}
ivec4(const int* v) {
val = *((__m128i*) v);
}
operator __m128i() const {
return val;
}
int operator[](int i) const {
return val[i];
}
void store(int* v) const {
*((__m128i*) v) = val;
}
ivec4 operator+(const ivec4& other) const {
return vec_add(val, other.val);
}
ivec4 operator-(const ivec4& other) const {
return vec_sub(val, other.val);
}
ivec4 operator*(const ivec4& other) const {
return val * other.val; //(__m128i) {val[0]*other[0], val[1]*other[1], val[2]*other[2], val[3]*other[3]};
}
void operator+=(const ivec4& other) {
val = vec_add(val, other.val);
}
void operator-=(const ivec4& other) {
val = vec_sub(val, other.val);
}
void operator*=(const ivec4& other) {
val = val*other.val;
}
ivec4 operator-() const {
return -val;
}
ivec4 operator&(const ivec4& other) const {
return val&other.val;
}
ivec4 operator|(const ivec4& other) const {
return val|other.val;
}
ivec4 operator==(const ivec4& other) const {
return (val==other.val);
}
ivec4 operator!=(const ivec4& other) const {
return (val!=other.val);
}
ivec4 operator>(const ivec4& other) const {
return (val>other.val);
}
ivec4 operator<(const ivec4& other) const {
return (val<other.val);
}
ivec4 operator>=(const ivec4& other) const {
return (val>=other.val);
}
ivec4 operator<=(const ivec4& other) const {
return (val<=other.val);
}
operator fvec4() const;
};
// Conversion operators.
inline ivec4 fvec4::operator==(const fvec4& other) const {
return (val==other.val);
}
inline ivec4 fvec4::operator!=(const fvec4& other) const {
return (val!=other.val);
}
inline ivec4 fvec4::operator>(const fvec4& other) const {
return (val>other.val);
}
inline ivec4 fvec4::operator<(const fvec4& other) const {
return (val<other.val);
}
inline ivec4 fvec4::operator>=(const fvec4& other) const {
return (val>=other.val);
}
inline ivec4 fvec4::operator<=(const fvec4& other) const {
return (val<=other.val);
}
inline fvec4::operator ivec4() const {
return (__m128i) {(int)val[0], (int)val[1], (int)val[2], (int)val[3]};
}
inline ivec4::operator fvec4() const {
return (__m128) {(float)val[0], (float)val[1], (float)val[2], (float)val[3]};
}
// Functions that operate on fvec4s.
static inline fvec4 abs(const fvec4& v) {
return fvec4(fabs(v[0]), fabs(v[1]), fabs(v[2]), fabs(v[3]));
}
static inline fvec4 exp(const fvec4& v) {
return fvec4(expf(v[0]), expf(v[1]), expf(v[2]), expf(v[3]));
}
static inline fvec4 log(const fvec4& v) {
return fvec4(logf(v[0]), logf(v[1]), logf(v[2]), logf(v[3]));
}
static inline float dot3(const fvec4& v1, const fvec4& v2) {
fvec4 r = v1*v2;
return r[0]+r[1]+r[2];
}
static inline float dot4(const fvec4& v1, const fvec4& v2) {
fvec4 r = v1*v2;
fvec4 temp = __builtin_shuffle(r.val, r.val, (__m128i) {0, 1, -1, -1})+__builtin_shuffle(r.val, r.val, (__m128i) {2, 3, -1, -1});
return temp[0]+temp[1];
}
static inline fvec4 cross(const fvec4& v1, const fvec4& v2) {
__m128 temp = v2.val*__builtin_shuffle(v1.val, v1.val, (__m128i) {2, 0, 1, 3}) -
v1.val*__builtin_shuffle(v2.val, v2.val, (__m128i) {2, 0, 1, 3});
return __builtin_shuffle(temp, temp, (__m128i) {2, 0, 1, 3});
}
static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
__m128 a1 = __builtin_shuffle(v1.val, v2.val, (__m128i) {0, 4, 2, 6});
__m128 a2 = __builtin_shuffle(v1.val, v2.val, (__m128i) {1, 5, 3, 7});
__m128 a3 = __builtin_shuffle(v3.val, v4.val, (__m128i) {0, 4, 2, 6});
__m128 a4 = __builtin_shuffle(v3.val, v4.val, (__m128i) {1, 5, 3, 7});
v1 = __builtin_shuffle(a1, a3, (__m128i) {0, 1, 4, 5});
v2 = __builtin_shuffle(a2, a4, (__m128i) {0, 1, 4, 5});
v3 = __builtin_shuffle(a1, a3, (__m128i) {2, 3, 6, 7});
v4 = __builtin_shuffle(a2, a4, (__m128i) {2, 3, 6, 7});
}
// Functions that operate on ivec4s.
static inline ivec4 min(const ivec4& v1, const ivec4& v2) {
return vec_min(v1.val, v2.val);
}
static inline ivec4 max(const ivec4& v1, const ivec4& v2) {
return vec_max(v1.val, v2.val);
}
static inline ivec4 abs(const ivec4& v) {
return ivec4(abs(v[0]), abs(v[1]), abs(v[2]), abs(v[3]));
}
static inline bool any(const __m128i& v) {
ivec4 temp = __builtin_shuffle(v, v, (__m128i) {0, 1, -1, -1}) | __builtin_shuffle(v, v, (__m128i) {2, 3, -1, -1});
return (temp[0] || temp[1]);
}
// Mathematical operators involving a scalar and a vector.
static inline fvec4 operator+(float v1, const fvec4& v2) {
return fvec4(v1)+v2;
}
static inline fvec4 operator-(float v1, const fvec4& v2) {
return fvec4(v1)-v2;
}
static inline fvec4 operator*(float v1, const fvec4& v2) {
return fvec4(v1)*v2;
}
static inline fvec4 operator/(float v1, const fvec4& v2) {
return fvec4(v1)/v2;
}
// Operations for blending fvec4s based on an ivec4.
static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const __m128i& mask) {
return (__m128) ((mask&(__m128i)v2.val) + ((ivec4(0xFFFFFFFF)-ivec4(mask))&(__m128i)v1.val).val);
}
// These are at the end since they involve other functions defined above.
static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
return vec_min(v1.val, v2.val);
}
static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
return vec_max(v1.val, v2.val);
}
static inline fvec4 round(const fvec4& v) {
return vec_round(v.val);
}
static inline fvec4 floor(const fvec4& v) {
return vec_floor(v.val);
}
static inline fvec4 ceil(const fvec4& v) {
return vec_ceil(v.val);
}
static inline fvec4 rsqrt(const fvec4& v) {
return fvec4(1.0/sqrt(v[0]), 1.0/sqrt(v[1]), 1.0/sqrt(v[2]), 1.0/sqrt(v[3]));
}
static inline fvec4 sqrt(const fvec4& v) {
return vec_sqrt(v.val);
}
#endif /*OPENMM_VECTORIZE_PPC_H_*/
FOREACH(file ${SOURCE_FILES})
IF (file MATCHES ".*Vec8.*")
IF (MSVC)
IF(file MATCHES ".*Vec8.*")
IF(MSVC)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__")
ELSE (MSVC)
IF (NOT ANDROID)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1 -mavx")
ENDIF (NOT ANDROID)
ENDIF (MSVC)
ELSE (file MATCHES ".*Vec8.*")
IF (NOT MSVC)
IF (NOT ANDROID)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
ENDIF (NOT ANDROID)
ENDIF (NOT MSVC)
ENDIF (file MATCHES ".*Vec8.*")
ELSEIF(X86)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1 -mavx")
ELSE()
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
ENDIF()
ELSE()
IF(X86 AND NOT MSVC)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
ENDIF()
ENDIF()
ENDFOREACH(file)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
......
FOREACH(file ${SOURCE_FILES})
IF (file MATCHES ".*Vec8.*")
IF (MSVC)
IF(file MATCHES ".*Vec8.*")
IF(MSVC)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__")
ELSEIF (PNACL)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
ELSE (MSVC)
ELSEIF(X86)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1 -mavx")
ENDIF (MSVC)
ELSE (file MATCHES ".*Vec8.*")
IF (NOT (MSVC OR ANDROID OR PNACL))
ELSE()
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
ENDIF()
ELSE()
IF(X86 AND NOT MSVC)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
ENDIF (NOT (MSVC OR ANDROID OR PNACL))
ENDIF (file MATCHES ".*Vec8.*")
ENDIF()
ENDIF()
ENDFOREACH(file)
ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
......
#
# Testing
#
ENABLE_TESTING()
# Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp")
FOREACH(TEST_PROG ${TEST_PROGS})
GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
IF (OPENMM_BUILD_SHARED_LIB)
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
ELSE (OPENMM_BUILD_SHARED_LIB)
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${STATIC_TARGET})
ENDIF (OPENMM_BUILD_SHARED_LIB)
SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS}")
IF ((${TEST_ROOT} MATCHES TestVectorize) AND NOT (MSVC OR ANDROID OR PNACL))
SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
ENDIF ((${TEST_ROOT} MATCHES TestVectorize) AND NOT (MSVC OR ANDROID OR PNACL))
IF ((${TEST_ROOT} MATCHES TestVectorize8) AND NOT (MSVC OR ANDROID OR PNACL))
SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx")
ENDIF ((${TEST_ROOT} MATCHES TestVectorize8) AND NOT (MSVC OR ANDROID OR PNACL))
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_TEST_FLAGS}")
ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT})
ENDFOREACH(TEST_PROG ${TEST_PROGS})
#
# Testing
#
ENABLE_TESTING()
# Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp")
FOREACH(TEST_PROG ${TEST_PROGS})
GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
IF ((${TEST_ROOT} MATCHES TestVectorize8) AND NOT X86)
CONTINUE()
ENDIF()
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
IF (OPENMM_BUILD_SHARED_LIB)
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
ELSE (OPENMM_BUILD_SHARED_LIB)
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${STATIC_TARGET})
ENDIF (OPENMM_BUILD_SHARED_LIB)
SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS}")
IF((${TEST_ROOT} MATCHES TestVectorize) AND X86 AND NOT MSVC)
SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
ENDIF()
IF((${TEST_ROOT} MATCHES TestVectorize8) AND X86 AND NOT MSVC)
SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx")
ENDIF()
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_TEST_FLAGS}")
ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT})
ENDFOREACH(TEST_PROG ${TEST_PROGS})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment