Initial support for PPC

cdfc47e4 · peastman · 689e2bc9 · cdfc47e4 · cdfc47e4 · cdfc47e4
Commit cdfc47e4 authored Nov 07, 2018 by peastman
7 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,6 +33,21 @@ MARK_AS_ADVANCED(DART_ROOT)
 # We have custom cmake modules for FindOpenMM and running python tests
 SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")

+# Determine what architecture we are compiling for.
+INCLUDE(TargetArch)
+target_architecture(TARGET_ARCH)
+if ("${TARGET_ARCH}" MATCHES "x86_64|i386")
+    set(X86 ON)
+endif()
+if ("${TARGET_ARCH}" MATCHES "arm")
+    set(ARM ON)
+    add_compile_definitions(__ARM__=1)
+endif()
+if ("${TARGET_ARCH}" MATCHES "ppc")
+    set(PPC ON)
+    add_compile_definitions(__PPC__=1)
+endif()
+
 # Where to install
 IF(WIN32)
    IF(NOT OPENMM_INSTALL_PREFIX)
@@ -69,7 +84,10 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT})

 # The source is organized into subdirectories, but we handle them all from
 # this CMakeLists file rather than letting CMake visit them as SUBDIRS.
-SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 platforms/reference serialization libraries/irrxml libraries/vecmath)
+SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 platforms/reference serialization libraries/irrxml)
+IF(X86 OR ARM)
+    SET(OPENMM_SOURCE_SUBDIRS ${OPENMM_SOURCE_SUBDIRS} libraries/vecmath)
+ENDIF()
 IF(WIN32)
    SET(OPENMM_SOURCE_SUBDIRS ${OPENMM_SOURCE_SUBDIRS} libraries/pthreads)
 ELSE(WIN32)
@@ -82,7 +100,7 @@ ENDIF(WIN32)

 SET (CMAKE_CXX_STANDARD 11)

-IF (APPLE AND (NOT PNACL))
+IF(APPLE)
    # Build 64 bit binaries compatible with OS X 10.7
    IF (NOT CMAKE_OSX_DEPLOYMENT_TARGET)
        SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.7" CACHE STRING "The minimum version of OS X to support" FORCE)
@@ -102,19 +120,17 @@ IF (APPLE AND (NOT PNACL))
    # Improve the linking behavior of Mac libraries
    SET (CMAKE_INSTALL_NAME_DIR "@rpath")
    SET(EXTRA_COMPILE_FLAGS "-msse2 -stdlib=libc++")
-ELSE (APPLE AND (NOT PNACL))
-    IF (MSVC OR ANDROID OR PNACL)
-        SET(EXTRA_COMPILE_FLAGS)
-        IF (MSVC)
-            # Use warning level 2, not whatever warning level CMake picked.
-            STRING(REGEX REPLACE "/W[0-4]" "/W2" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-            # Explicitly suppress warnings 4305 and 4244.
-            SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4305 /wd4244")
-        ENDIF (MSVC)
-    ELSE (MSVC OR ANDROID OR PNACL)
-        SET(EXTRA_COMPILE_FLAGS "-msse2")
-    ENDIF (MSVC OR ANDROID OR PNACL)
-ENDIF (APPLE AND (NOT PNACL))
+ELSEIF(MSVC)
+    SET(EXTRA_COMPILE_FLAGS)
+    # Use warning level 2, not whatever warning level CMake picked.
+    STRING(REGEX REPLACE "/W[0-4]" "/W2" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    # Explicitly suppress warnings 4305 and 4244.
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4305 /wd4244")
+ELSEIF(X86)
+    SET(EXTRA_COMPILE_FLAGS "-msse2")
+ELSE()
+    SET(EXTRA_COMPILE_FLAGS)
+ENDIF()

 IF(UNIX AND NOT CMAKE_BUILD_TYPE)
    SET(CMAKE_BUILD_TYPE Release CACHE STRING "Debug or Release build" FORCE)
@@ -219,19 +235,19 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
    ## OpenMM was previously installed there.
    INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
 ENDFOREACH(subdir)
-IF (ANDROID OR PNACL)
-    SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-UHAVE_SSE2")
-ELSE (ANDROID OR PNACL)
+IF(X86)
    SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-DHAVE_SSE2=1")
-ENDIF(ANDROID OR PNACL)
-IF (NOT (ANDROID OR PNACL OR (WIN32 AND OPENMM_BUILD_STATIC_LIB)))
+ELSE()
+    SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-UHAVE_SSE2")
+ENDIF()
+IF(X86 AND NOT (WIN32 AND OPENMM_BUILD_STATIC_LIB))
    FILE(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/libraries/asmjit/*/*.cpp)
    FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/libraries/asmjit/*.h)
    SET(SOURCE_FILES ${SOURCE_FILES} ${src_files})
    SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
    INCLUDE_DIRECTORIES(BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/libraries/asmjit")
    SET(EXTRA_COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DLEPTON_USE_JIT")
-ENDIF (NOT (ANDROID OR PNACL OR (WIN32 AND OPENMM_BUILD_STATIC_LIB)))
+ENDIF()

 # If API wrappers are being generated, and add them to the build.
 SET(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS ON CACHE BOOL "Build wrappers for C and Fortran")

--- a/cmake_modules/TargetArch.cmake
+++ b/cmake_modules/TargetArch.cmake
+# This is from Solar CMake (https://github.com/axr/solar-cmake).
+#
+# Copyright (c) 2012 Petroules Corporation. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#    Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+#    Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+# OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Based on the Qt 5 processor detection code, so should be very accurate
+# https://qt.gitorious.org/qt/qtbase/blobs/master/src/corelib/global/qprocessordetection.h
+# Currently handles arm (v5, v6, v7), x86 (32/64), ia64, and ppc (32/64)
+
+# Regarding POWER/PowerPC, just as is noted in the Qt source,
+# "There are many more known variants/revisions that we do not handle/detect."
+
+set(archdetect_c_code "
+#if defined(__arm__) || defined(__TARGET_ARCH_ARM)
+    #if defined(__ARM_ARCH_7__) \\
+        || defined(__ARM_ARCH_7A__) \\
+        || defined(__ARM_ARCH_7R__) \\
+        || defined(__ARM_ARCH_7M__) \\
+        || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 7)
+        #error cmake_ARCH armv7
+    #elif defined(__ARM_ARCH_6__) \\
+        || defined(__ARM_ARCH_6J__) \\
+        || defined(__ARM_ARCH_6T2__) \\
+        || defined(__ARM_ARCH_6Z__) \\
+        || defined(__ARM_ARCH_6K__) \\
+        || defined(__ARM_ARCH_6ZK__) \\
+        || defined(__ARM_ARCH_6M__) \\
+        || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 6)
+        #error cmake_ARCH armv6
+    #elif defined(__ARM_ARCH_5TEJ__) \\
+        || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 5)
+        #error cmake_ARCH armv5
+    #else
+        #error cmake_ARCH arm
+    #endif
+#elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
+    #error cmake_ARCH i386
+#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64)
+    #error cmake_ARCH x86_64
+#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
+    #error cmake_ARCH ia64
+#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \\
+      || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC)  \\
+      || defined(_M_MPPC) || defined(_M_PPC)
+    #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
+        #error cmake_ARCH ppc64
+    #else
+        #error cmake_ARCH ppc
+    #endif
+#endif
+
+#error cmake_ARCH unknown
+")
+
+# Set ppc_support to TRUE before including this file or ppc and ppc64
+# will be treated as invalid architectures since they are no longer supported by Apple
+
+function(target_architecture output_var)
+    if(APPLE AND CMAKE_OSX_ARCHITECTURES)
+        # On OS X we use CMAKE_OSX_ARCHITECTURES *if* it was set
+        # First let's normalize the order of the values
+
+        # Note that it's not possible to compile PowerPC applications if you are using
+        # the OS X SDK version 10.6 or later - you'll need 10.4/10.5 for that, so we
+        # disable it by default
+        # See this page for more information:
+        # http://stackoverflow.com/questions/5333490/how-can-we-restore-ppc-ppc64-as-well-as-full-10-4-10-5-sdk-support-to-xcode-4
+
+        # Architecture defaults to i386 or ppc on OS X 10.5 and earlier, depending on the CPU type detected at runtime.
+        # On OS X 10.6+ the default is x86_64 if the CPU supports it, i386 otherwise.
+
+        foreach(osx_arch ${CMAKE_OSX_ARCHITECTURES})
+            if("${osx_arch}" STREQUAL "ppc" AND ppc_support)
+                set(osx_arch_ppc TRUE)
+            elseif("${osx_arch}" STREQUAL "i386")
+                set(osx_arch_i386 TRUE)
+            elseif("${osx_arch}" STREQUAL "x86_64")
+                set(osx_arch_x86_64 TRUE)
+            elseif("${osx_arch}" STREQUAL "ppc64" AND ppc_support)
+                set(osx_arch_ppc64 TRUE)
+            else()
+                message(FATAL_ERROR "Invalid OS X arch name: ${osx_arch}")
+            endif()
+        endforeach()
+
+        # Now add all the architectures in our normalized order
+        if(osx_arch_ppc)
+            list(APPEND ARCH ppc)
+        endif()
+
+        if(osx_arch_i386)
+            list(APPEND ARCH i386)
+        endif()
+
+        if(osx_arch_x86_64)
+            list(APPEND ARCH x86_64)
+        endif()
+
+        if(osx_arch_ppc64)
+            list(APPEND ARCH ppc64)
+        endif()
+    else()
+        file(WRITE "${CMAKE_BINARY_DIR}/arch.c" "${archdetect_c_code}")
+
+        enable_language(C)
+
+        # Detect the architecture in a rather creative way...
+        # This compiles a small C program which is a series of ifdefs that selects a
+        # particular #error preprocessor directive whose message string contains the
+        # target architecture. The program will always fail to compile (both because
+        # file is not a valid C program, and obviously because of the presence of the
+        # #error preprocessor directives... but by exploiting the preprocessor in this
+        # way, we can detect the correct target architecture even when cross-compiling,
+        # since the program itself never needs to be run (only the compiler/preprocessor)
+        try_run(
+            run_result_unused
+            compile_result_unused
+            "${CMAKE_BINARY_DIR}"
+            "${CMAKE_BINARY_DIR}/arch.c"
+            COMPILE_OUTPUT_VARIABLE ARCH
+            CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}
+        )
+
+        # Parse the architecture name from the compiler output
+        string(REGEX MATCH "cmake_ARCH ([a-zA-Z0-9_]+)" ARCH "${ARCH}")
+
+        # Get rid of the value marker leaving just the architecture name
+        string(REPLACE "cmake_ARCH " "" ARCH "${ARCH}")
+
+        # If we are compiling with an unknown architecture this variable should
+        # already be set to "unknown" but in the case that it's empty (i.e. due
+        # to a typo in the code), then set it to unknown
+        if (NOT ARCH)
+            set(ARCH unknown)
+        endif()
+    endif()
+
+    set(${output_var} "${ARCH}" PARENT_SCOPE)
+endfunction()
--- a/openmmapi/include/openmm/internal/vectorize.h
+++ b/openmmapi/include/openmm/internal/vectorize.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2014 Stanford University and the Authors.           *
+ * Portions copyright (c) 2014-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -32,14 +32,12 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */
        
-#if defined(__ANDROID__)
+#if defined(__ARM__)
    #include "vectorize_neon.h"
+#elif defined(__PPC__)
+    #include "vectorize_ppc.h"
 #else
-    #if defined(__PNACL__)
-        #include "vectorize_pnacl.h"
-    #else
-        #include "vectorize_sse.h"
-    #endif
+    #include "vectorize_sse.h"
 #endif

 #endif /*OPENMM_VECTORIZE_H_*/
--- a/openmmapi/include/openmm/internal/vectorize_ppc.h
+++ b/openmmapi/include/openmm/internal/vectorize_ppc.h
+#ifndef OPENMM_VECTORIZE_PPC_H_
+#define OPENMM_VECTORIZE_PPC_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
+ * Authors: Peter Eastman, Heng Ma                                            *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include <cmath>
+#include <cstdlib>
+#include <altivec.h>
+
+// This file defines classes and functions to simplify vectorizing code with AltiVec on PPC.
+
+/**
+ * Determine whether ivec4 and fvec4 are supported on this processor.
+ */
+static bool isVec4Supported() {
+    return true;
+}
+
+typedef vector float __m128;
+typedef vector int __m128i;
+
+class ivec4;
+
+/**
+ * A four element vector of floats.
+ */
+class fvec4 {
+public:
+    __m128 val;
+    
+    fvec4() {}
+    fvec4(float v) {
+        val = (__m128) {v, v, v, v};
+    }
+    fvec4(float v1, float v2, float v3, float v4) {
+        val = (__m128) {v1, v2, v3, v4};
+    }
+    fvec4(__m128 v) : val(v) {}
+    fvec4(const float* v) {
+        val = *((__m128*) v);
+    }
+    operator __m128() const {
+        return val;
+    }
+    float operator[](int i) const {
+        return val[i];
+    }
+    void store(float* v) const {
+        *((__m128*) v) = val;
+    }
+    fvec4 operator+(const fvec4& other) const {
+        return vec_add(val, other.val);
+    }
+    fvec4 operator-(const fvec4& other) const {
+        return vec_sub(val, other.val);
+    }
+    fvec4 operator*(const fvec4& other) const {
+        return vec_mul(val, other.val);
+    }
+    fvec4 operator/(const fvec4& other) const {
+        return vec_div(val, other.val);
+    }
+    void operator+=(const fvec4& other) {
+        val = vec_add(val, other.val); 
+    }
+    void operator-=(const fvec4& other) {
+        val = vec_sub(val, other.val);
+    }
+    void operator*=(const fvec4& other) {
+        val = vec_mul(val, other.val); 
+    }
+    void operator/=(const fvec4& other) {
+        val = vec_div(val, other.val); 
+    }
+    fvec4 operator-() const {
+        return -val;
+    }
+    fvec4 operator&(const fvec4& other) const {
+        return vec_and(val, other.val);
+    }
+    fvec4 operator|(const fvec4& other) const {
+        return vec_or(val, other.val); 
+    }
+    ivec4 operator==(const fvec4& other) const;
+    ivec4 operator!=(const fvec4& other) const;
+    ivec4 operator>(const fvec4& other) const;
+    ivec4 operator<(const fvec4& other) const;
+    ivec4 operator>=(const fvec4& other) const;
+    ivec4 operator<=(const fvec4& other) const;
+    operator ivec4() const;
+};
+
+/**
+ * A four element vector of ints.
+ */
+class ivec4 {
+public:
+    __m128i val;
+    
+    ivec4() {}
+    ivec4(int v) {
+        val = (__m128i) {v, v, v, v};
+    }
+    ivec4(int v1, int v2, int v3, int v4) {
+        val = (__m128i) {v1, v2, v3, v4};
+    }
+    ivec4(__m128i v) : val(v) {}
+    ivec4(const int* v) {
+        val = *((__m128i*) v);
+    }
+    operator __m128i() const {
+        return val;
+    }
+    int operator[](int i) const {
+        return val[i];
+    }
+    void store(int* v) const {
+        *((__m128i*) v) = val;
+    }
+    ivec4 operator+(const ivec4& other) const {
+        return vec_add(val, other.val);
+    }
+    ivec4 operator-(const ivec4& other) const {
+        return vec_sub(val, other.val);
+    }
+    ivec4 operator*(const ivec4& other) const {
+        return val * other.val; //(__m128i) {val[0]*other[0], val[1]*other[1], val[2]*other[2], val[3]*other[3]}; 
+    }
+    void operator+=(const ivec4& other) {
+        val = vec_add(val, other.val);
+    }
+    void operator-=(const ivec4& other) {
+        val = vec_sub(val, other.val);
+    }
+    void operator*=(const ivec4& other) {
+        val = val*other.val;
+    }
+    ivec4 operator-() const {
+        return -val;
+    }
+    ivec4 operator&(const ivec4& other) const {
+        return val&other.val;
+    }
+    ivec4 operator|(const ivec4& other) const {
+        return val|other.val;
+    }
+    ivec4 operator==(const ivec4& other) const {
+        return (val==other.val);
+    }
+    ivec4 operator!=(const ivec4& other) const {
+        return (val!=other.val);
+    }
+    ivec4 operator>(const ivec4& other) const {
+        return (val>other.val);
+    }
+    ivec4 operator<(const ivec4& other) const {
+        return (val<other.val);
+    }
+    ivec4 operator>=(const ivec4& other) const {
+        return (val>=other.val);
+    }
+    ivec4 operator<=(const ivec4& other) const {
+        return (val<=other.val);
+    }
+    operator fvec4() const;
+};
+
+// Conversion operators.
+
+inline ivec4 fvec4::operator==(const fvec4& other) const {
+    return  (val==other.val);
+}
+
+inline ivec4 fvec4::operator!=(const fvec4& other) const {
+    return  (val!=other.val);
+}
+
+inline ivec4 fvec4::operator>(const fvec4& other) const {
+    return  (val>other.val);
+}
+
+inline ivec4 fvec4::operator<(const fvec4& other) const {
+    return  (val<other.val);
+}
+
+inline ivec4 fvec4::operator>=(const fvec4& other) const {
+    return  (val>=other.val);
+}
+
+inline ivec4 fvec4::operator<=(const fvec4& other) const {
+    return  (val<=other.val);
+}
+
+inline fvec4::operator ivec4() const {
+    return (__m128i) {(int)val[0], (int)val[1], (int)val[2], (int)val[3]};
+}
+
+inline ivec4::operator fvec4() const {
+    return (__m128) {(float)val[0], (float)val[1], (float)val[2], (float)val[3]};
+}
+
+// Functions that operate on fvec4s.
+
+static inline fvec4 abs(const fvec4& v) {
+    return fvec4(fabs(v[0]), fabs(v[1]), fabs(v[2]), fabs(v[3]));
+}
+
+static inline fvec4 exp(const fvec4& v) {
+    return fvec4(expf(v[0]), expf(v[1]), expf(v[2]), expf(v[3]));
+}
+
+static inline fvec4 log(const fvec4& v) {
+    return fvec4(logf(v[0]), logf(v[1]), logf(v[2]), logf(v[3]));
+}
+
+static inline float dot3(const fvec4& v1, const fvec4& v2) {
+    fvec4 r = v1*v2;
+    return r[0]+r[1]+r[2];
+}
+
+static inline float dot4(const fvec4& v1, const fvec4& v2) {
+    fvec4 r = v1*v2;
+    fvec4 temp = __builtin_shuffle(r.val, r.val, (__m128i) {0, 1, -1, -1})+__builtin_shuffle(r.val, r.val, (__m128i) {2, 3, -1, -1});
+    return temp[0]+temp[1];
+}
+
+static inline fvec4 cross(const fvec4& v1, const fvec4& v2) {
+    __m128 temp = v2.val*__builtin_shuffle(v1.val, v1.val, (__m128i) {2, 0, 1, 3}) -
+                  v1.val*__builtin_shuffle(v2.val, v2.val, (__m128i) {2, 0, 1, 3});
+    return __builtin_shuffle(temp, temp, (__m128i) {2, 0, 1, 3});
+}
+
+static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
+    __m128 a1 = __builtin_shuffle(v1.val, v2.val, (__m128i) {0, 4, 2, 6});
+    __m128 a2 = __builtin_shuffle(v1.val, v2.val, (__m128i) {1, 5, 3, 7});
+    __m128 a3 = __builtin_shuffle(v3.val, v4.val, (__m128i) {0, 4, 2, 6});
+    __m128 a4 = __builtin_shuffle(v3.val, v4.val, (__m128i) {1, 5, 3, 7});
+    v1 = __builtin_shuffle(a1, a3, (__m128i) {0, 1, 4, 5});
+    v2 = __builtin_shuffle(a2, a4, (__m128i) {0, 1, 4, 5});
+    v3 = __builtin_shuffle(a1, a3, (__m128i) {2, 3, 6, 7});
+    v4 = __builtin_shuffle(a2, a4, (__m128i) {2, 3, 6, 7});
+}
+
+// Functions that operate on ivec4s.
+
+static inline ivec4 min(const ivec4& v1, const ivec4& v2) {
+    return vec_min(v1.val, v2.val);
+}
+
+static inline ivec4 max(const ivec4& v1, const ivec4& v2) {
+    return vec_max(v1.val, v2.val);
+}
+
+static inline ivec4 abs(const ivec4& v) {
+    return ivec4(abs(v[0]), abs(v[1]), abs(v[2]), abs(v[3]));
+}
+
+static inline bool any(const __m128i& v) {
+    ivec4 temp = __builtin_shuffle(v, v, (__m128i) {0, 1, -1, -1}) | __builtin_shuffle(v, v, (__m128i) {2, 3, -1, -1});
+    return (temp[0] || temp[1]);
+}
+
+// Mathematical operators involving a scalar and a vector.
+
+static inline fvec4 operator+(float v1, const fvec4& v2) {
+    return fvec4(v1)+v2;
+}
+
+static inline fvec4 operator-(float v1, const fvec4& v2) {
+    return fvec4(v1)-v2;
+}
+
+static inline fvec4 operator*(float v1, const fvec4& v2) {
+    return fvec4(v1)*v2;
+}
+
+static inline fvec4 operator/(float v1, const fvec4& v2) {
+    return fvec4(v1)/v2;
+}
+
+// Operations for blending fvec4s based on an ivec4.
+
+static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const __m128i& mask) {
+    return (__m128) ((mask&(__m128i)v2.val) + ((ivec4(0xFFFFFFFF)-ivec4(mask))&(__m128i)v1.val).val);
+}
+
+// These are at the end since they involve other functions defined above.
+
+static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
+    return vec_min(v1.val, v2.val);
+}
+
+static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
+    return vec_max(v1.val, v2.val);
+}
+
+static inline fvec4 round(const fvec4& v) {
+    return vec_round(v.val);
+}
+
+static inline fvec4 floor(const fvec4& v) {
+    return vec_floor(v.val);
+}
+
+static inline fvec4 ceil(const fvec4& v) {
+    return vec_ceil(v.val);
+}
+
+static inline fvec4 rsqrt(const fvec4& v) {
+    return fvec4(1.0/sqrt(v[0]), 1.0/sqrt(v[1]), 1.0/sqrt(v[2]), 1.0/sqrt(v[3]));
+}
+
+static inline fvec4 sqrt(const fvec4& v) {
+    return vec_sqrt(v.val);
+}
+
+#endif /*OPENMM_VECTORIZE_PPC_H_*/
+
--- a/platforms/cpu/sharedTarget/CMakeLists.txt
+++ b/platforms/cpu/sharedTarget/CMakeLists.txt
 FOREACH(file ${SOURCE_FILES})
-    IF (file MATCHES ".*Vec8.*")
-        IF (MSVC)
+    IF(file MATCHES ".*Vec8.*")
+        IF(MSVC)
            SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__")
-        ELSE (MSVC)
-            IF (NOT ANDROID)
-                SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1 -mavx")
-            ENDIF (NOT ANDROID)
-        ENDIF (MSVC)
-    ELSE (file MATCHES ".*Vec8.*")
-        IF (NOT MSVC)
-            IF (NOT ANDROID)
-                SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
-            ENDIF (NOT ANDROID)
-        ENDIF (NOT MSVC)
-    ENDIF (file MATCHES ".*Vec8.*")
+        ELSEIF(X86)
+            SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1 -mavx")
+        ELSE()
+            SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
+        ENDIF()
+    ELSE()
+        IF(X86 AND NOT MSVC)
+            SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
+        ENDIF()
+    ENDIF()
 ENDFOREACH(file)
 ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})


--- a/platforms/cpu/staticTarget/CMakeLists.txt
+++ b/platforms/cpu/staticTarget/CMakeLists.txt
 FOREACH(file ${SOURCE_FILES})
-    IF (file MATCHES ".*Vec8.*")
-		IF (MSVC)
+    IF(file MATCHES ".*Vec8.*")
+        IF(MSVC)
            SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__")
-        ELSEIF (PNACL)
-            SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
-		ELSE (MSVC)
+        ELSEIF(X86)
            SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1 -mavx")
-		ENDIF (MSVC)
-    ELSE (file MATCHES ".*Vec8.*")
-		IF (NOT (MSVC OR ANDROID OR PNACL))
+        ELSE()
+            SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
+        ENDIF()
+    ELSE()
+        IF(X86 AND NOT MSVC)
            SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
-		ENDIF (NOT (MSVC OR ANDROID OR PNACL))
-    ENDIF (file MATCHES ".*Vec8.*")
+        ENDIF()
+    ENDIF()
 ENDFOREACH(file)
 ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})


--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
-#
-# Testing
-#
-
-ENABLE_TESTING()
-
-# Automatically create tests using files named "Test*.cpp"
-FILE(GLOB TEST_PROGS "*Test*.cpp")
-FOREACH(TEST_PROG ${TEST_PROGS})
-    GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
-    ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
-    IF (OPENMM_BUILD_SHARED_LIB)
-        TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
-    ELSE (OPENMM_BUILD_SHARED_LIB)
-        TARGET_LINK_LIBRARIES(${TEST_ROOT} ${STATIC_TARGET})
-    ENDIF (OPENMM_BUILD_SHARED_LIB)
-    SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS}")
-    IF ((${TEST_ROOT} MATCHES TestVectorize) AND NOT (MSVC OR ANDROID OR PNACL))
-        SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
-    ENDIF ((${TEST_ROOT} MATCHES TestVectorize) AND NOT (MSVC OR ANDROID OR PNACL))
-    IF ((${TEST_ROOT} MATCHES TestVectorize8) AND NOT (MSVC OR ANDROID OR PNACL))
-        SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx")
-    ENDIF ((${TEST_ROOT} MATCHES TestVectorize8) AND NOT (MSVC OR ANDROID OR PNACL))
-    SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_TEST_FLAGS}")
-    ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT})
-ENDFOREACH(TEST_PROG ${TEST_PROGS})
-
+#
+# Testing
+#
+
+ENABLE_TESTING()
+
+# Automatically create tests using files named "Test*.cpp"
+FILE(GLOB TEST_PROGS "*Test*.cpp")
+FOREACH(TEST_PROG ${TEST_PROGS})
+    GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
+    IF ((${TEST_ROOT} MATCHES TestVectorize8) AND NOT X86)
+        CONTINUE()
+    ENDIF()
+    ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
+    IF (OPENMM_BUILD_SHARED_LIB)
+        TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
+    ELSE (OPENMM_BUILD_SHARED_LIB)
+        TARGET_LINK_LIBRARIES(${TEST_ROOT} ${STATIC_TARGET})
+    ENDIF (OPENMM_BUILD_SHARED_LIB)
+    SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS}")
+    IF((${TEST_ROOT} MATCHES TestVectorize) AND X86 AND NOT MSVC)
+        SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
+    ENDIF()
+    IF((${TEST_ROOT} MATCHES TestVectorize8) AND X86 AND NOT MSVC)
+        SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx")
+    ENDIF()
+    SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_TEST_FLAGS}")
+    ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT})
+ENDFOREACH(TEST_PROG ${TEST_PROGS})
+