Commit 19d2885a authored by Lee-Ping's avatar Lee-Ping
Browse files

Merge github.com:SimTk/openmm

parents 99ef4344 57a6768e
#ifndef OPENMM_THREAD_POOL_H_
#define OPENMM_THREAD_POOL_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#define NOMINMAX
#include "windowsExport.h"
#include <pthread.h>
#include <vector>
namespace OpenMM {
/**
* A ThreadPool creates a set of worker threads that can be used to execute tasks in parallel.
* After creating a ThreadPool, call execute() to start a task running then waitForThreads()
* to block until all threads have finished. You also can synchronize the threads in the middle
* of the task by having them call syncThreads(). In this case, the parent thread should call
* waitForThreads() an additional time; each call waits until all worker threads have reached the
* next syncThreads(), and the final call waits until they exit from the Task's execute() method.
* After calling waitForThreads() to block at a synchronization point, the parent thread should
* call resumeThreads() to instruct the worker threads to resume.
*/
class OPENMM_EXPORT ThreadPool {
public:
class Task;
class ThreadData;
ThreadPool();
~ThreadPool();
/**
* Get the number of worker threads in the pool.
*/
int getNumThreads() const;
/**
* Execute a Task in parallel on the worker threads.
*/
void execute(Task& task);
/**
* This is called by the worker threads to block until all threads have reached the same point
* and the master thread instructs them to continue by calling resumeThreads().
*/
void syncThreads();
/**
* This is called by the master thread to wait until all threads have completed the Task. Alternatively,
* if the threads call syncThreads(), this blocks until all threads have reached the synchronization point.
*/
void waitForThreads();
/**
* Instruct the threads to resume running after blocking at a synchronization point.
*/
void resumeThreads();
private:
bool isDeleted;
int numThreads, waitCount;
std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData;
pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock;
};
/**
* This defines a task that can be executed in parallel by the worker threads.
*/
class OPENMM_EXPORT ThreadPool::Task {
public:
/**
* Execute the task on each thread.
*
* @param pool the ThreadPool being used to execute the task
* @param threadIndex the index of the thread invoking this method
*/
virtual void execute(ThreadPool& pool, int threadIndex) = 0;
};
} // namespace OpenMM
#endif // OPENMM_THREAD_POOL_H_
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include <dlfcn.h> #include <dlfcn.h>
#else #else
#ifdef WIN32 #ifdef WIN32
#define NOMINMAX
#include <windows.h> #include <windows.h>
#else #else
#include <dlfcn.h> #include <dlfcn.h>
......
...@@ -55,58 +55,62 @@ public: ...@@ -55,58 +55,62 @@ public:
return val; return val;
} }
float operator[](int i) const { float operator[](int i) const {
int resultBits = _mm_extract_ps(val, i); float result[4];
return *((float*) &resultBits); store(result);
return result[i];
} }
void store(float* v) const { void store(float* v) const {
_mm_storeu_ps(v, val); _mm_storeu_ps(v, val);
} }
fvec4 operator+(fvec4 other) const { fvec4 operator+(const fvec4& other) const {
return _mm_add_ps(val, other); return _mm_add_ps(val, other);
} }
fvec4 operator-(fvec4 other) const { fvec4 operator-(const fvec4& other) const {
return _mm_sub_ps(val, other); return _mm_sub_ps(val, other);
} }
fvec4 operator*(fvec4 other) const { fvec4 operator*(const fvec4& other) const {
return _mm_mul_ps(val, other); return _mm_mul_ps(val, other);
} }
fvec4 operator/(fvec4 other) const { fvec4 operator/(const fvec4& other) const {
return _mm_div_ps(val, other); return _mm_div_ps(val, other);
} }
void operator+=(fvec4 other) { void operator+=(const fvec4& other) {
val = _mm_add_ps(val, other); val = _mm_add_ps(val, other);
} }
void operator-=(fvec4 other) { void operator-=(const fvec4& other) {
val = _mm_sub_ps(val, other); val = _mm_sub_ps(val, other);
} }
void operator*=(fvec4 other) { void operator*=(const fvec4& other) {
val = _mm_mul_ps(val, other); val = _mm_mul_ps(val, other);
} }
void operator/=(fvec4 other) { void operator/=(const fvec4& other) {
val = _mm_div_ps(val, other); val = _mm_div_ps(val, other);
} }
fvec4 operator-() const { fvec4 operator-() const {
return _mm_sub_ps(_mm_set1_ps(0.0f), val); return _mm_sub_ps(_mm_set1_ps(0.0f), val);
} }
fvec4 operator&(fvec4 other) const { fvec4 operator&(const fvec4& other) const {
return _mm_and_ps(val, other); return _mm_and_ps(val, other);
} }
fvec4 operator==(fvec4 other) const { fvec4 operator|(const fvec4& other) const {
return _mm_or_ps(val, other);
}
fvec4 operator==(const fvec4& other) const {
return _mm_cmpeq_ps(val, other); return _mm_cmpeq_ps(val, other);
} }
fvec4 operator!=(fvec4 other) const { fvec4 operator!=(const fvec4& other) const {
return _mm_cmpneq_ps(val, other); return _mm_cmpneq_ps(val, other);
} }
fvec4 operator>(fvec4 other) const { fvec4 operator>(const fvec4& other) const {
return _mm_cmpgt_ps(val, other); return _mm_cmpgt_ps(val, other);
} }
fvec4 operator<(fvec4 other) const { fvec4 operator<(const fvec4& other) const {
return _mm_cmplt_ps(val, other); return _mm_cmplt_ps(val, other);
} }
fvec4 operator>=(fvec4 other) const { fvec4 operator>=(const fvec4& other) const {
return _mm_cmpge_ps(val, other); return _mm_cmpge_ps(val, other);
} }
fvec4 operator<=(fvec4 other) const { fvec4 operator<=(const fvec4& other) const {
return _mm_cmple_ps(val, other); return _mm_cmple_ps(val, other);
} }
operator ivec4() const; operator ivec4() const;
...@@ -128,38 +132,58 @@ public: ...@@ -128,38 +132,58 @@ public:
return val; return val;
} }
int operator[](int i) const { int operator[](int i) const {
return _mm_extract_epi32(val, i); int result[4];
store(result);
return result[i];
} }
void store(int* v) const { void store(int* v) const {
_mm_storeu_si128((__m128i*) v, val); _mm_storeu_si128((__m128i*) v, val);
} }
ivec4 operator+(ivec4 other) const { ivec4 operator+(const ivec4& other) const {
return _mm_add_epi32(val, other); return _mm_add_epi32(val, other);
} }
ivec4 operator-(ivec4 other) const { ivec4 operator-(const ivec4& other) const {
return _mm_sub_epi32(val, other); return _mm_sub_epi32(val, other);
} }
ivec4 operator*(ivec4 other) const { ivec4 operator*(const ivec4& other) const {
return _mm_mul_epi32(val, other); return _mm_mul_epi32(val, other);
} }
void operator+=(ivec4 other) { void operator+=(const ivec4& other) {
val = _mm_add_epi32(val, other); val = _mm_add_epi32(val, other);
} }
void operator-=(ivec4 other) { void operator-=(const ivec4& other) {
val = _mm_sub_epi32(val, other); val = _mm_sub_epi32(val, other);
} }
void operator*=(ivec4 other) { void operator*=(const ivec4& other) {
val = _mm_mul_epi32(val, other); val = _mm_mul_epi32(val, other);
} }
ivec4 operator-() const { ivec4 operator-() const {
return _mm_sub_epi32(_mm_set1_epi32(0), val); return _mm_sub_epi32(_mm_set1_epi32(0), val);
} }
ivec4 operator&(ivec4 other) const { ivec4 operator&(const ivec4& other) const {
return _mm_and_si128(val, other); return _mm_and_si128(val, other);
} }
ivec4 operator==(ivec4 other) const { ivec4 operator|(const ivec4& other) const {
return _mm_or_si128(val, other);
}
ivec4 operator==(const ivec4& other) const {
return _mm_cmpeq_epi32(val, other); return _mm_cmpeq_epi32(val, other);
} }
ivec4 operator!=(const ivec4& other) const {
return _mm_xor_si128(*this==other, _mm_set1_epi32(0xFFFFFFFF));
}
ivec4 operator>(const ivec4& other) const {
return _mm_cmpgt_epi32(val, other);
}
ivec4 operator<(const ivec4& other) const {
return _mm_cmplt_epi32(val, other);
}
ivec4 operator>=(const ivec4& other) const {
return _mm_xor_si128(_mm_cmplt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
}
ivec4 operator<=(const ivec4& other) const {
return _mm_xor_si128(_mm_cmpgt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
}
operator fvec4() const; operator fvec4() const;
}; };
...@@ -175,74 +199,88 @@ inline ivec4::operator fvec4() const { ...@@ -175,74 +199,88 @@ inline ivec4::operator fvec4() const {
// Functions that operate on fvec4s. // Functions that operate on fvec4s.
static inline fvec4 floor(fvec4 v) { static inline fvec4 floor(const fvec4& v) {
return fvec4(_mm_floor_ps(v.val)); return fvec4(_mm_floor_ps(v.val));
} }
static inline fvec4 ceil(fvec4 v) { static inline fvec4 ceil(const fvec4& v) {
return fvec4(_mm_ceil_ps(v.val)); return fvec4(_mm_ceil_ps(v.val));
} }
static inline fvec4 round(fvec4 v) { static inline fvec4 round(const fvec4& v) {
return fvec4(_mm_round_ps(v.val, _MM_FROUND_TO_NEAREST_INT)); return fvec4(_mm_round_ps(v.val, _MM_FROUND_TO_NEAREST_INT));
} }
static inline fvec4 min(fvec4 v1, fvec4 v2) { static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
return fvec4(_mm_min_ps(v1.val, v2.val)); return fvec4(_mm_min_ps(v1.val, v2.val));
} }
static inline fvec4 max(fvec4 v1, fvec4 v2) { static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
return fvec4(_mm_max_ps(v1.val, v2.val)); return fvec4(_mm_max_ps(v1.val, v2.val));
} }
static inline fvec4 abs(fvec4 v) { static inline fvec4 abs(const fvec4& v) {
static const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)); static const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
return fvec4(_mm_and_ps(v.val, mask)); return fvec4(_mm_and_ps(v.val, mask));
} }
static inline fvec4 sqrt(fvec4 v) { static inline fvec4 sqrt(const fvec4& v) {
return fvec4(_mm_sqrt_ps(v.val)); return fvec4(_mm_sqrt_ps(v.val));
} }
static inline float dot3(fvec4 v1, fvec4 v2) { static inline float dot3(const fvec4& v1, const fvec4& v2) {
return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71)); return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71));
} }
static inline float dot4(fvec4 v1, fvec4 v2) { static inline float dot4(const fvec4& v1, const fvec4& v2) {
return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1)); return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1));
} }
static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
_MM_TRANSPOSE4_PS(v1, v2, v3, v4);
}
// Functions that operate on ivec4s. // Functions that operate on ivec4s.
static inline ivec4 min(ivec4 v1, ivec4 v2) { static inline ivec4 min(const ivec4& v1, const ivec4& v2) {
return ivec4(_mm_min_epi32(v1.val, v2.val)); return ivec4(_mm_min_epi32(v1.val, v2.val));
} }
static inline ivec4 max(ivec4 v1, ivec4 v2) { static inline ivec4 max(const ivec4& v1, const ivec4& v2) {
return ivec4(_mm_max_epi32(v1.val, v2.val)); return ivec4(_mm_max_epi32(v1.val, v2.val));
} }
static inline ivec4 abs(ivec4 v) { static inline ivec4 abs(const ivec4& v) {
return ivec4(_mm_abs_epi32(v.val)); return ivec4(_mm_abs_epi32(v.val));
} }
static inline bool any(const ivec4& v) {
return !_mm_test_all_zeros(v, _mm_set1_epi32(0xFFFFFFFF));
}
// Mathematical operators involving a scalar and a vector. // Mathematical operators involving a scalar and a vector.
static inline fvec4 operator+(float v1, fvec4 v2) { static inline fvec4 operator+(float v1, const fvec4& v2) {
return fvec4(v1)+v2; return fvec4(v1)+v2;
} }
static inline fvec4 operator-(float v1, fvec4 v2) { static inline fvec4 operator-(float v1, const fvec4& v2) {
return fvec4(v1)-v2; return fvec4(v1)-v2;
} }
static inline fvec4 operator*(float v1, fvec4 v2) { static inline fvec4 operator*(float v1, const fvec4& v2) {
return fvec4(v1)*v2; return fvec4(v1)*v2;
} }
static inline fvec4 operator/(float v1, fvec4 v2) { static inline fvec4 operator/(float v1, const fvec4& v2) {
return fvec4(v1)/v2; return fvec4(v1)/v2;
} }
// Operations for blending fvec4s based on an ivec4.
static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const ivec4& mask) {
return fvec4(_mm_blendv_ps(v1.val, v2.val, _mm_castsi128_ps(mask.val)));
}
#endif /*OPENMM_VECTORIZE_H_*/ #endif /*OPENMM_VECTORIZE_H_*/
#ifndef OPENMM_VECTORIZE8_H_
#define OPENMM_VECTORIZE8_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2014 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "vectorize.h"
#include <immintrin.h>
// This file defines classes and functions to simplify vectorizing code with AVX.
class ivec8;
/**
* An eight element vector of floats.
*/
class fvec8 {
public:
__m256 val;
fvec8() {}
fvec8(float v) : val(_mm256_set1_ps(v)) {}
fvec8(float v1, float v2, float v3, float v4, float v5, float v6, float v7, float v8) : val(_mm256_set_ps(v8, v7, v6, v5, v4, v3, v2, v1)) {}
fvec8(__m256 v) : val(v) {}
fvec8(const float* v) : val(_mm256_loadu_ps(v)) {}
operator __m256() const {
return val;
}
fvec4 lowerVec() const {
return _mm256_castps256_ps128(val);
}
fvec4 upperVec() const {
return _mm256_extractf128_ps(val, 1);
}
void store(float* v) const {
_mm256_storeu_ps(v, val);
}
fvec8 operator+(const fvec8& other) const {
return _mm256_add_ps(val, other);
}
fvec8 operator-(const fvec8& other) const {
return _mm256_sub_ps(val, other);
}
fvec8 operator*(const fvec8& other) const {
return _mm256_mul_ps(val, other);
}
fvec8 operator/(const fvec8& other) const {
return _mm256_div_ps(val, other);
}
void operator+=(const fvec8& other) {
val = _mm256_add_ps(val, other);
}
void operator-=(const fvec8& other) {
val = _mm256_sub_ps(val, other);
}
void operator*=(const fvec8& other) {
val = _mm256_mul_ps(val, other);
}
void operator/=(const fvec8& other) {
val = _mm256_div_ps(val, other);
}
fvec8 operator-() const {
return _mm256_sub_ps(_mm256_set1_ps(0.0f), val);
}
fvec8 operator&(const fvec8& other) const {
return _mm256_and_ps(val, other);
}
fvec8 operator|(const fvec8& other) const {
return _mm256_or_ps(val, other);
}
fvec8 operator==(const fvec8& other) const {
return _mm256_cmp_ps(val, other, _CMP_EQ_OQ);
}
fvec8 operator!=(const fvec8& other) const {
return _mm256_cmp_ps(val, other, _CMP_NEQ_OQ);
}
fvec8 operator>(const fvec8& other) const {
return _mm256_cmp_ps(val, other, _CMP_GT_OQ);
}
fvec8 operator<(const fvec8& other) const {
return _mm256_cmp_ps(val, other, _CMP_LT_OQ);
}
fvec8 operator>=(const fvec8& other) const {
return _mm256_cmp_ps(val, other, _CMP_GE_OQ);
}
fvec8 operator<=(const fvec8& other) const {
return _mm256_cmp_ps(val, other, _CMP_LE_OQ);
}
operator ivec8() const;
};
/**
* An eight element vector of ints.
*/
class ivec8 {
public:
__m256i val;
ivec8() {}
ivec8(int v) : val(_mm256_set1_epi32(v)) {}
ivec8(int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8) : val(_mm256_set_epi32(v8, v7, v6, v5, v4, v3, v2, v1)) {}
ivec8(__m256i v) : val(v) {}
ivec8(const int* v) : val(_mm256_loadu_si256((const __m256i*) v)) {}
operator __m256i() const {
return val;
}
ivec4 lowerVec() const {
return _mm256_castsi256_si128(val);
}
ivec4 upperVec() const {
return _mm256_extractf128_si256(val, 1);
}
void store(int* v) const {
_mm256_storeu_si256((__m256i*) v, val);
}
ivec8 operator&(const ivec8& other) const {
return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(val), _mm256_castsi256_ps(other.val)));
}
ivec8 operator|(const ivec8& other) const {
return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(val), _mm256_castsi256_ps(other.val)));
}
operator fvec8() const;
};
// Conversion operators.
inline fvec8::operator ivec8() const {
return _mm256_cvttps_epi32(val);
}
inline ivec8::operator fvec8() const {
return _mm256_cvtepi32_ps(val);
}
// Functions that operate on fvec8s.
static inline fvec8 floor(const fvec8& v) {
return fvec8(_mm256_round_ps(v.val, 0x09));
}
static inline fvec8 ceil(const fvec8& v) {
return fvec8(_mm256_round_ps(v.val, 0x0A));
}
static inline fvec8 round(const fvec8& v) {
return fvec8(_mm256_round_ps(v.val, _MM_FROUND_TO_NEAREST_INT));
}
static inline fvec8 min(const fvec8& v1, const fvec8& v2) {
return fvec8(_mm256_min_ps(v1.val, v2.val));
}
static inline fvec8 max(const fvec8& v1, const fvec8& v2) {
return fvec8(_mm256_max_ps(v1.val, v2.val));
}
static inline fvec8 abs(const fvec8& v) {
static const __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF));
return fvec8(_mm256_and_ps(v.val, mask));
}
static inline fvec8 sqrt(const fvec8& v) {
return fvec8(_mm256_sqrt_ps(v.val));
}
static inline float dot8(const fvec8& v1, const fvec8& v2) {
fvec8 result = _mm256_dp_ps(v1, v2, 0xF1);
return _mm_cvtss_f32(result.lowerVec())+_mm_cvtss_f32(result.upperVec());
}
static inline void transpose(const fvec4& in1, const fvec4& in2, const fvec4& in3, const fvec4& in4, const fvec4& in5, const fvec4& in6, const fvec4& in7, const fvec4& in8, fvec8& out1, fvec8& out2, fvec8& out3, fvec8& out4) {
fvec4 i1 = in1, i2 = in2, i3 = in3, i4 = in4;
fvec4 i5 = in5, i6 = in6, i7 = in7, i8 = in8;
_MM_TRANSPOSE4_PS(i1, i2, i3, i4);
_MM_TRANSPOSE4_PS(i5, i6, i7, i8);
#ifdef _MSC_VER
// Visual Studio has a bug in _mm256_castps128_ps256, so we have to use the more expensive _mm256_insertf128_ps.
out1 = _mm256_insertf128_ps(out1, i1, 0);
out2 = _mm256_insertf128_ps(out2, i2, 0);
out3 = _mm256_insertf128_ps(out3, i3, 0);
out4 = _mm256_insertf128_ps(out4, i4, 0);
#else
out1 = _mm256_castps128_ps256(i1);
out2 = _mm256_castps128_ps256(i2);
out3 = _mm256_castps128_ps256(i3);
out4 = _mm256_castps128_ps256(i4);
#endif
out1 = _mm256_insertf128_ps(out1, i5, 1);
out2 = _mm256_insertf128_ps(out2, i6, 1);
out3 = _mm256_insertf128_ps(out3, i7, 1);
out4 = _mm256_insertf128_ps(out4, i8, 1);
}
static inline void transpose(const fvec8& in1, const fvec8& in2, const fvec8& in3, const fvec8& in4, fvec4& out1, fvec4& out2, fvec4& out3, fvec4& out4, fvec4& out5, fvec4& out6, fvec4& out7, fvec4& out8) {
out1 = in1.lowerVec();
out2 = in2.lowerVec();
out3 = in3.lowerVec();
out4 = in4.lowerVec();
_MM_TRANSPOSE4_PS(out1, out2, out3, out4);
out5 = in1.upperVec();
out6 = in2.upperVec();
out7 = in3.upperVec();
out8 = in4.upperVec();
_MM_TRANSPOSE4_PS(out5, out6, out7, out8);
}
// Functions that operate on ivec8s.
static inline bool any(const ivec8& v) {
return !_mm256_testz_si256(v, _mm256_set1_epi32(0xFFFFFFFF));
}
// Mathematical operators involving a scalar and a vector.
static inline fvec8 operator+(float v1, const fvec8& v2) {
return fvec8(v1)+v2;
}
static inline fvec8 operator-(float v1, const fvec8& v2) {
return fvec8(v1)-v2;
}
static inline fvec8 operator*(float v1, const fvec8& v2) {
return fvec8(v1)*v2;
}
static inline fvec8 operator/(float v1, const fvec8& v2) {
return fvec8(v1)/v2;
}
// Operations for blending fvec8s based on an ivec8.
static inline fvec8 blend(const fvec8& v1, const fvec8& v2, const ivec8& mask) {
return fvec8(_mm256_blendv_ps(v1.val, v2.val, _mm256_castsi256_ps(mask.val)));
}
#endif /*OPENMM_VECTORIZE8_H_*/
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "openmm/State.h" #include "openmm/State.h"
#include "openmm/VirtualSite.h" #include "openmm/VirtualSite.h"
#include "openmm/Context.h" #include "openmm/Context.h"
#include <algorithm>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <utility> #include <utility>
...@@ -75,11 +76,27 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ ...@@ -75,11 +76,27 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ
throw OpenMMException("A constraint cannot involve a massless particle"); throw OpenMMException("A constraint cannot involve a massless particle");
} }
// Validate the list of properties.
const vector<string>& platformProperties = platform->getPropertyNames();
for (map<string, string>::const_iterator iter = properties.begin(); iter != properties.end(); ++iter) {
bool valid = false;
for (int i = 0; i < (int) platformProperties.size(); i++)
if (platformProperties[i] == iter->first) {
valid = true;
break;
}
if (!valid)
throw OpenMMException("Illegal property name: "+iter->first);
}
// Find the list of kernels required. // Find the list of kernels required.
vector<string> kernelNames; vector<string> kernelNames;
kernelNames.push_back(CalcForcesAndEnergyKernel::Name()); kernelNames.push_back(CalcForcesAndEnergyKernel::Name());
kernelNames.push_back(UpdateStateDataKernel::Name()); kernelNames.push_back(UpdateStateDataKernel::Name());
kernelNames.push_back(ApplyConstraintsKernel::Name());
kernelNames.push_back(VirtualSitesKernel::Name());
for (int i = 0; i < system.getNumForces(); ++i) { for (int i = 0; i < system.getNumForces(); ++i) {
forceImpls.push_back(system.getForce(i).createImpl()); forceImpls.push_back(system.getForce(i).createImpl());
map<string, double> forceParameters = forceImpls[forceImpls.size()-1]->getDefaultParameters(); map<string, double> forceParameters = forceImpls[forceImpls.size()-1]->getDefaultParameters();
...@@ -90,14 +107,40 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ ...@@ -90,14 +107,40 @@ ContextImpl::ContextImpl(Context& owner, const System& system, Integrator& integ
hasInitializedForces = true; hasInitializedForces = true;
vector<string> integratorKernels = integrator.getKernelNames(); vector<string> integratorKernels = integrator.getKernelNames();
kernelNames.insert(kernelNames.begin(), integratorKernels.begin(), integratorKernels.end()); kernelNames.insert(kernelNames.begin(), integratorKernels.begin(), integratorKernels.end());
if (platform == 0)
this->platform = platform = &Platform::findPlatform(kernelNames); // Select a platform to use.
else if (!platform->supportsKernels(kernelNames))
vector<pair<double, Platform*> > candidatePlatforms;
if (platform == NULL) {
for (int i = 0; i < Platform::getNumPlatforms(); i++) {
Platform& p = Platform::getPlatform(i);
if (p.supportsKernels(kernelNames))
candidatePlatforms.push_back(make_pair(p.getSpeed(), &p));
}
if (candidatePlatforms.size() == 0)
throw OpenMMException("No Platform supports all the requested kernels");
sort(candidatePlatforms.begin(), candidatePlatforms.end());
}
else {
if (!platform->supportsKernels(kernelNames))
throw OpenMMException("Specified a Platform for a Context which does not support all required kernels"); throw OpenMMException("Specified a Platform for a Context which does not support all required kernels");
candidatePlatforms.push_back(make_pair(platform->getSpeed(), platform));
}
for (int i = candidatePlatforms.size()-1; i >= 0; i--) {
try {
this->platform = platform = candidatePlatforms[i].second;
platform->contextCreated(*this, properties);
break;
}
catch (...) {
if (i > 0)
continue;
throw;
}
}
// Create and initialize kernels and other objects. // Create and initialize kernels and other objects.
platform->contextCreated(*this, properties);
initializeForcesKernel = platform->createKernel(CalcForcesAndEnergyKernel::Name(), *this); initializeForcesKernel = platform->createKernel(CalcForcesAndEnergyKernel::Name(), *this);
initializeForcesKernel.getAs<CalcForcesAndEnergyKernel>().initialize(system); initializeForcesKernel.getAs<CalcForcesAndEnergyKernel>().initialize(system);
updateStateDataKernel = platform->createKernel(UpdateStateDataKernel::Name(), *this); updateStateDataKernel = platform->createKernel(UpdateStateDataKernel::Name(), *this);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/hardware.h"
using namespace std;
namespace OpenMM {
class ThreadPool::ThreadData {
public:
ThreadData(ThreadPool& owner, int index) : owner(owner), index(index), isDeleted(false) {
}
ThreadPool& owner;
int index;
bool isDeleted;
Task* currentTask;
};
static void* threadBody(void* args) {
ThreadPool::ThreadData& data = *reinterpret_cast<ThreadPool::ThreadData*>(args);
while (true) {
// Wait for the signal to start running.
data.owner.syncThreads();
if (data.isDeleted)
break;
data.currentTask->execute(data.owner, data.index);
}
delete &data;
return 0;
}
ThreadPool::ThreadPool() {
numThreads = getNumProcessors();
pthread_cond_init(&startCondition, NULL);
pthread_cond_init(&endCondition, NULL);
pthread_mutex_init(&lock, NULL);
thread.resize(numThreads);
pthread_mutex_lock(&lock);
waitCount = 0;
for (int i = 0; i < numThreads; i++) {
ThreadData* data = new ThreadData(*this, i);
data->isDeleted = false;
threadData.push_back(data);
pthread_create(&thread[i], NULL, threadBody, data);
}
while (waitCount < numThreads)
pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock);
}
ThreadPool::~ThreadPool() {
for (int i = 0; i < (int) threadData.size(); i++)
threadData[i]->isDeleted = true;
pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition);
pthread_mutex_unlock(&lock);
for (int i = 0; i < (int) thread.size(); i++)
pthread_join(thread[i], NULL);
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition);
}
int ThreadPool::getNumThreads() const {
return numThreads;
}
void ThreadPool::execute(Task& task) {
for (int i = 0; i < (int) threadData.size(); i++)
threadData[i]->currentTask = &task;
resumeThreads();
}
void ThreadPool::syncThreads() {
pthread_mutex_lock(&lock);
waitCount++;
pthread_cond_signal(&endCondition);
pthread_cond_wait(&startCondition, &lock);
pthread_mutex_unlock(&lock);
}
void ThreadPool::waitForThreads() {
pthread_mutex_lock(&lock);
while (waitCount < numThreads)
pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock);
}
void ThreadPool::resumeThreads() {
pthread_mutex_lock(&lock);
waitCount = 0;
pthread_cond_broadcast(&startCondition);
pthread_mutex_unlock(&lock);
}
} // namespace OpenMM
...@@ -14,10 +14,6 @@ ...@@ -14,10 +14,6 @@
# libOpenMMCPU_static[_d].a # libOpenMMCPU_static[_d].a
#---------------------------------------------------- #----------------------------------------------------
IF (APPLE)
SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.6")
ENDIF (APPLE)
SUBDIRS (tests) SUBDIRS (tests)
# The source is organized into subdirectories, but we handle them all from # The source is organized into subdirectories, but we handle them all from
...@@ -36,9 +32,9 @@ SET(STATIC_TARGET ${OPENMMCPU_LIBRARY_NAME}_static) ...@@ -36,9 +32,9 @@ SET(STATIC_TARGET ${OPENMMCPU_LIBRARY_NAME}_static)
# Ensure that debug libraries have "_d" appended to their names. # Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition. # CMake gets this right on Windows automatically with this definition.
IF (${CMAKE_GENERATOR} MATCHES "Visual Studio") IF (MSVC)
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE) SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio") ENDIF (MSVC)
# But on Unix or Cygwin we have to add the suffix manually # But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
......
#ifndef OPENMM_ALIGNEDARRAY_H_
#define OPENMM_ALIGNEDARRAY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
namespace OpenMM {
/**
* This class represents an array in memory whose starting point is guaranteed to
* be aligned with a 16 byte boundary. This can improve the performance of vectorized
* code, since loads and stores are more efficient.
*/
template <class T>
class AlignedArray {
public:
/**
* Default constructor, to allow AlignedArrays to be used inside collections.
*/
AlignedArray() : dataSize(0), baseData(0), data(0) {
}
/**
* Create an Aligned array that contains a specified number of elements.
*/
AlignedArray(int size) {
allocate(size);
}
~AlignedArray() {
if (baseData != 0)
delete[] baseData;
}
/**
* Get the number of elements in the array.
*/
int size() const {
return dataSize;
}
/**
* Change the size of the array. This may cause all contents to be lost.
*/
void resize(int size) {
if (dataSize == size)
return;
if (baseData != 0)
delete[] baseData;
allocate(size);
}
/**
* Get a reference to an element of the array.
*/
T& operator[](int i) {
return data[i];
}
/**
* Get a const reference to an element of the array.
*/
const T& operator[](int i) const {
return data[i];
}
private:
void allocate(int size) {
dataSize = size;
baseData = new char[size*sizeof(T)+16];
char* offsetData = baseData+15;
offsetData -= (long long)offsetData&0xF;
data = (T*) offsetData;
}
int dataSize;
char* baseData;
T* data;
};
} // namespace OpenMM
#endif /*OPENMM_ALIGNEDARRAY_H_*/
#ifndef OPENMM_CPUBONDFORCE_H_
#define OPENMM_CPUBONDFORCE_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "ReferenceBondIxn.h"
#include "windowsExportCpu.h"
#include "openmm/internal/ThreadPool.h"
#include <list>
#include <set>
#include <vector>
namespace OpenMM {
/**
* This class parallelizes the calculation of bonded forces.
*/
class OPENMM_EXPORT_CPU CpuBondForce {
public:
class ComputeForceTask;
CpuBondForce();
/**
* Analyze the set of bonds and decide which to compute with each thread.
*/
void initialize(int numAtoms, int numBonds, int numAtomsPerBond, int** bondAtoms, ThreadPool& threads);
/**
* Compute the forces from all bonds.
*/
void calculateForce(std::vector<OpenMM::RealVec>& atomCoordinates, RealOpenMM** parameters, std::vector<OpenMM::RealVec>& forces,
RealOpenMM* totalEnergy, ReferenceBondIxn& referenceBondIxn);
/**
* This routine contains the code executed by each thread.
*/
void threadComputeForce(ThreadPool& threads, int threadIndex, std::vector<OpenMM::RealVec>& atomCoordinates, RealOpenMM** parameters,
std::vector<OpenMM::RealVec>& forces, RealOpenMM* totalEnergy, ReferenceBondIxn& referenceBondIxn);
private:
bool canAssignBond(int bond, int thread, std::vector<int>& atomThread);
void assignBond(int bond, int thread, std::vector<int>& atomThread, std::vector<int>& bondThread, std::vector<std::set<int> >& atomBonds, std::list<int>& candidateBonds);
int numBonds, numAtomsPerBond;
int** bondAtoms;
ThreadPool* threads;
std::vector<std::vector<int> > threadBonds;
std::vector<int> extraBonds;
};
} // namespace OpenMM
#endif /*OPENMM_CPUBONDFORCE_H_*/
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_GBSAOBC_FORCE_H__
#define OPENMM_CPU_GBSAOBC_FORCE_H__
#include "AlignedArray.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h"
#include <set>
#include <utility>
#include <vector>
namespace OpenMM {
class CpuGBSAOBCForce {
public:
class ComputeTask;
CpuGBSAOBCForce();
/**
* Set the force to use a cutoff.
*
* @param distance the cutoff distance
*/
void setUseCutoff(float distance);
/**
*
* Set the force to use periodic boundary conditions. This requires that a cutoff has
* already been set, and the smallest side of the periodic box is at least twice the cutoff
* distance.
*
* @param boxSize the X, Y, and Z widths of the periodic box
*/
void setPeriodic(float* periodicBoxSize);
/**
* Set the solute dielectric constant.
*/
void setSoluteDielectric(float dielectric);
/**
* Set the solvent dielectric constant.
*/
void setSolventDielectric(float dielectric);
/**
* Get the per-particle parameters (offset radius, scaled radius).
*/
const std::vector<std::pair<float, float> >& getParticleParameters() const;
/**
* Set the per-particle parameters (offset radius, scaled radius).
*/
void setParticleParameters(const std::vector<std::pair<float, float> >& params);
/**
*
* Calculate LJ Coulomb pair ixn
*
* @param posq atom coordinates and charges
* @param forces force array (forces added)
* @param totalEnergy total energy
* @param threads the thread pool to use
*/
void computeForce(const AlignedArray<float>& posq, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/**
* This routine contains the code executed by each thread.
*/
void threadComputeForce(ThreadPool& threads, int threadIndex);
private:
bool cutoff;
bool periodic;
float periodicBoxSize[3];
float cutoffDistance, soluteDielectric, solventDielectric;
std::vector<std::pair<float, float> > particleParams;
std::vector<float> bornRadii;
std::vector<std::vector<float> > threadBornForces;
std::vector<float> obcChain;
std::vector<double> threadEnergy;
std::vector<float> logTable;
float logDX, logDXInv;
// The following variables are used to make information accessible to the individual threads.
float const* posq;
std::vector<AlignedArray<float> >* threadForce;
bool includeEnergy;
void* atomicCounter;
static const int NUM_TABLE_POINTS;
static const float TABLE_MIN;
static const float TABLE_MAX;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Evaluate log(x) using a lookup table for speed.
*/
fvec4 fastLog(const fvec4& x);
};
} // namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_GBSAOBC_FORCE_H__
...@@ -32,22 +32,147 @@ ...@@ -32,22 +32,147 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. * * USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "CpuPlatform.h" #include "CpuBondForce.h"
#include "CpuGBSAOBCForce.h"
#include "CpuLangevinDynamics.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "CpuNonbondedForce.h" #include "CpuNonbondedForce.h"
#include "CpuPlatform.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/System.h" #include "openmm/System.h"
namespace OpenMM { namespace OpenMM {
/**
* This kernel is invoked at the beginning and end of force and energy computations. It gives the
* Platform a chance to clear buffers and do other initialization at the beginning, and to do any
* necessary work at the end to determine the final results.
*/
class CpuCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel {
public:
class InitForceTask;
class SumForceTask;
CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context);
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* This is called at the beginning of each force/energy computation, before calcForcesAndEnergy() has been called on
* any ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
*/
void beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
/**
* This is called at the end of each force/energy computation, after calcForcesAndEnergy() has been called on
* every ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/
double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
private:
CpuPlatform::PlatformData& data;
Kernel referenceKernel;
};
/**
* This kernel is invoked by PeriodicTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class CpuCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
public:
CpuCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcPeriodicTorsionForceKernel(name, platform), data(data), torsionIndexArray(NULL), torsionParamArray(NULL) {
}
~CpuCalcPeriodicTorsionForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the PeriodicTorsionForce this kernel will be used for
*/
void initialize(const System& system, const PeriodicTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the PeriodicTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
private:
CpuPlatform::PlatformData& data;
int numTorsions;
int **torsionIndexArray;
RealOpenMM **torsionParamArray;
CpuBondForce bondForce;
};
/**
* This kernel is invoked by RBTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class CpuCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
public:
CpuCalcRBTorsionForceKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcRBTorsionForceKernel(name, platform), data(data), torsionIndexArray(NULL), torsionParamArray(NULL) {
}
~CpuCalcRBTorsionForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the RBTorsionForce this kernel will be used for
*/
void initialize(const System& system, const RBTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the RBTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
private:
CpuPlatform::PlatformData& data;
int numTorsions;
int **torsionIndexArray;
RealOpenMM **torsionParamArray;
CpuBondForce bondForce;
};
/** /**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system. * This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
*/ */
class CpuCalcNonbondedForceKernel : public CalcNonbondedForceKernel { class CpuCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
CpuCalcNonbondedForceKernel(std::string name, const Platform& platform) : CalcNonbondedForceKernel(name, platform), CpuCalcNonbondedForceKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data);
bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false) {
}
~CpuCalcNonbondedForceKernel(); ~CpuCalcNonbondedForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -76,6 +201,7 @@ public: ...@@ -76,6 +201,7 @@ public:
void copyParametersToContext(ContextImpl& context, const NonbondedForce& force); void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
private: private:
class PmeIO; class PmeIO;
CpuPlatform::PlatformData& data;
int numParticles, num14; int numParticles, num14;
int **bonded14IndexArray; int **bonded14IndexArray;
double **bonded14ParamArray; double **bonded14ParamArray;
...@@ -84,15 +210,88 @@ private: ...@@ -84,15 +210,88 @@ private:
bool useSwitchingFunction, useOptimizedPme, hasInitializedPme; bool useSwitchingFunction, useOptimizedPme, hasInitializedPme;
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
std::vector<std::pair<float, float> > particleParams; std::vector<std::pair<float, float> > particleParams;
std::vector<float> posq;
std::vector<float> forces;
std::vector<RealVec> lastPositions; std::vector<RealVec> lastPositions;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
CpuNeighborList neighborList; CpuNeighborList* neighborList;
CpuNonbondedForce nonbonded; CpuNonbondedForce* nonbonded;
Kernel optimizedPme; Kernel optimizedPme;
}; };
/**
* This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
*/
class CpuCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
public:
CpuCalcGBSAOBCForceKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcGBSAOBCForceKernel(name, platform),
data(data) {
}
~CpuCalcGBSAOBCForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GBSAOBCForce this kernel will be used for
*/
void initialize(const System& system, const GBSAOBCForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the GBSAOBCForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private:
CpuPlatform::PlatformData& data;
std::vector<std::pair<float, float> > particleParams;
CpuGBSAOBCForce obc;
};
/**
* This kernel is invoked by LangevinIntegrator to take one time step.
*/
class CpuIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
public:
CpuIntegrateLangevinStepKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data) : IntegrateLangevinStepKernel(name, platform),
data(data), dynamics(NULL) {
}
~CpuIntegrateLangevinStepKernel();
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param integrator the LangevinIntegrator this kernel will be used for
*/
void initialize(const System& system, const LangevinIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the LangevinIntegrator this kernel is being used for
*/
void execute(ContextImpl& context, const LangevinIntegrator& integrator);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the LangevinIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const LangevinIntegrator& integrator);
private:
CpuPlatform::PlatformData& data;
CpuLangevinDynamics* dynamics;
std::vector<RealOpenMM> masses;
double prevTemp, prevFriction, prevStepSize;
};
} // namespace OpenMM } // namespace OpenMM
#endif /*OPENMM_CPUKERNELS_H_*/ #endif /*OPENMM_CPUKERNELS_H_*/
......
/* Portions copyright (c) 2013 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __CPU_LANGEVIN_DYNAMICS_H__
#define __CPU_LANGEVIN_DYNAMICS_H__
#include "ReferenceStochasticDynamics.h"
#include "CpuRandom.h"
#include "openmm/internal/ThreadPool.h"
#include "sfmt/SFMT.h"
// ---------------------------------------------------------------------------------------
class CpuLangevinDynamics : public ReferenceStochasticDynamics {
public:
class Update1Task;
class Update2Task;
/**
* Constructor.
*
* @param numberOfAtoms number of atoms
* @param deltaT delta t for dynamics
* @param tau viscosity
* @param temperature temperature
* @param threads thread pool for parallelizing computation
* @param random random number generator
*/
CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM tau, RealOpenMM temperature, OpenMM::ThreadPool& threads, OpenMM::CpuRandom& random);
/**
* Destructor.
*/
~CpuLangevinDynamics();
/**
* First update step.
*
* @param numberOfAtoms number of atoms
* @param atomCoordinates atom coordinates
* @param velocities velocities
* @param forces forces
* @param inverseMasses inverse atom masses
* @param xPrime xPrime
*/
void updatePart1(int numberOfAtoms, std::vector<OpenMM::RealVec>& atomCoordinates, std::vector<OpenMM::RealVec>& velocities,
std::vector<OpenMM::RealVec>& forces, std::vector<RealOpenMM>& inverseMasses, std::vector<OpenMM::RealVec>& xPrime);
/**
* Second update step.
*
* @param numberOfAtoms number of atoms
* @param atomCoordinates atom coordinates
* @param velocities velocities
* @param forces forces
* @param inverseMasses inverse atom masses
* @param xPrime xPrime
*/
void updatePart2(int numberOfAtoms, std::vector<OpenMM::RealVec>& atomCoordinates, std::vector<OpenMM::RealVec>& velocities,
std::vector<OpenMM::RealVec>& forces, std::vector<RealOpenMM>& inverseMasses, std::vector<OpenMM::RealVec>& xPrime);
private:
void threadUpdate1(int threadIndex);
void threadUpdate2(int threadIndex);
OpenMM::ThreadPool& threads;
OpenMM::CpuRandom& random;
std::vector<OpenMM_SFMT::SFMT> threadRandom;
// The following variables are used to make information accessible to the individual threads.
int numberOfAtoms;
OpenMM::RealVec* atomCoordinates;
OpenMM::RealVec* velocities;
OpenMM::RealVec* forces;
RealOpenMM* inverseMasses;
OpenMM::RealVec* xPrime;
};
// ---------------------------------------------------------------------------------------
#endif // __CPU_LANGEVIN_DYNAMICS_H__
#ifndef OPENMM_CPU_NEIGHBORLIST_H_ #ifndef OPENMM_CPU_NEIGHBORLIST_H_
#define OPENMM_CPU_NEIGHBORLIST_H_ #define OPENMM_CPU_NEIGHBORLIST_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "AlignedArray.h"
#include "windowsExportCpu.h" #include "windowsExportCpu.h"
#include <pthread.h> #include "openmm/internal/ThreadPool.h"
#include <set> #include <set>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -11,13 +43,11 @@ namespace OpenMM { ...@@ -11,13 +43,11 @@ namespace OpenMM {
class OPENMM_EXPORT_CPU CpuNeighborList { class OPENMM_EXPORT_CPU CpuNeighborList {
public: public:
class ThreadData; class ThreadTask;
class Voxels; class Voxels;
static const int BlockSize; CpuNeighborList(int blockSize);
CpuNeighborList(); void computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const std::vector<std::set<int> >& exclusions,
~CpuNeighborList(); const float* periodicBoxSize, bool usePeriodic, float maxDistance, ThreadPool& threads);
void computeNeighborList(int numAtoms, const std::vector<float>& atomLocations, const std::vector<std::set<int> >& exclusions,
const float* periodicBoxSize, bool usePeriodic, float maxDistance);
int getNumBlocks() const; int getNumBlocks() const;
const std::vector<int>& getSortedAtoms() const; const std::vector<int>& getSortedAtoms() const;
const std::vector<int>& getBlockNeighbors(int blockIndex) const; const std::vector<int>& getBlockNeighbors(int blockIndex) const;
...@@ -25,25 +55,13 @@ public: ...@@ -25,25 +55,13 @@ public:
/** /**
* This routine contains the code executed by each thread. * This routine contains the code executed by each thread.
*/ */
void threadComputeNeighborList(ThreadPool& threads, int threadIndex);
void runThread(int index); void runThread(int index);
private: private:
/** int blockSize;
* This is called by the worker threads to wait until the master thread instructs them to advance.
*/
void threadWait();
/**
* This is called by the master thread to instruct all the worker threads to advance.
*/
void advanceThreads();
bool isDeleted;
int numThreads, waitCount;
std::vector<int> sortedAtoms; std::vector<int> sortedAtoms;
std::vector<std::vector<int> > blockNeighbors; std::vector<std::vector<int> > blockNeighbors;
std::vector<std::vector<char> > blockExclusions; std::vector<std::vector<char> > blockExclusions;
std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData;
pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
float minx, maxx, miny, maxy, minz, maxz; float minx, maxx, miny, maxy, minz, maxz;
std::vector<std::pair<int, int> > atomBins; std::vector<std::pair<int, int> > atomBins;
...@@ -58,4 +76,4 @@ private: ...@@ -58,4 +76,4 @@ private:
} // namespace OpenMM } // namespace OpenMM
#endif // OPENMM_REFERENCE_NEIGHBORLIST_H_ #endif // OPENMM_CPU_NEIGHBORLIST_H_
...@@ -25,10 +25,11 @@ ...@@ -25,10 +25,11 @@
#ifndef OPENMM_CPU_NONBONDED_FORCE_H__ #ifndef OPENMM_CPU_NONBONDED_FORCE_H__
#define OPENMM_CPU_NONBONDED_FORCE_H__ #define OPENMM_CPU_NONBONDED_FORCE_H__
#include "AlignedArray.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "ReferencePairIxn.h" #include "ReferencePairIxn.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include <pthread.h>
#include <set> #include <set>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -38,7 +39,7 @@ namespace OpenMM { ...@@ -38,7 +39,7 @@ namespace OpenMM {
class CpuNonbondedForce { class CpuNonbondedForce {
public: public:
class ThreadData; class ComputeDirectTask;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -48,13 +49,11 @@ class CpuNonbondedForce { ...@@ -48,13 +49,11 @@ class CpuNonbondedForce {
CpuNonbondedForce(); CpuNonbondedForce();
/**--------------------------------------------------------------------------------------- /**
* Virtual destructor.
Destructor */
--------------------------------------------------------------------------------------- */
~CpuNonbondedForce(); virtual ~CpuNonbondedForce();
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -130,9 +129,9 @@ class CpuNonbondedForce { ...@@ -130,9 +129,9 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateReciprocalIxn(int numberOfAtoms, float* posq, std::vector<RealVec>& atomCoordinates, void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions, const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
std::vector<RealVec>& forces, float* totalEnergy) const; std::vector<RealVec>& forces, double* totalEnergy) const;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -140,28 +139,31 @@ class CpuNonbondedForce { ...@@ -140,28 +139,31 @@ class CpuNonbondedForce {
@param numberOfAtoms number of atoms @param numberOfAtoms number of atoms
@param posq atom coordinates and charges @param posq atom coordinates and charges
@param atomCoordinates atom coordinates (periodic boundary conditions not applied)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon)) @param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param exclusions atom exclusion indices @param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added) @param forces force array (forces added)
@param totalEnergy total energy @param totalEnergy total energy
@param threads the thread pool to use
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<std::pair<float, float> >& atomParameters, void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, float* forces, float* totalEnergy); const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/** /**
* This routine contains the code executed by each thread. * This routine contains the code executed by each thread.
*/ */
void runThread(int index, std::vector<float>& threadForce, double& threadEnergy); void threadComputeDirect(ThreadPool& threads, int threadIndex);
private: protected:
bool cutoff; bool cutoff;
bool useSwitch; bool useSwitch;
bool periodic; bool periodic;
bool ewald; bool ewald;
bool pme; bool pme;
bool tableIsValid;
const CpuNeighborList* neighborList; const CpuNeighborList* neighborList;
float periodicBoxSize[3]; float periodicBoxSize[3];
float cutoffDistance, switchingDistance; float cutoffDistance, switchingDistance;
...@@ -171,18 +173,16 @@ private: ...@@ -171,18 +173,16 @@ private:
int meshDim[3]; int meshDim[3];
std::vector<float> ewaldScaleTable; std::vector<float> ewaldScaleTable;
float ewaldDX, ewaldDXInv; float ewaldDX, ewaldDXInv;
bool isDeleted; std::vector<double> threadEnergy;
int numThreads, waitCount;
std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData;
pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
int numberOfAtoms; int numberOfAtoms;
float* posq; float* posq;
RealVec const* atomCoordinates;
std::pair<float, float> const* atomParameters; std::pair<float, float> const* atomParameters;
std::set<int> const* exclusions; std::set<int> const* exclusions;
std::vector<AlignedArray<float> >* threadForce;
bool includeEnergy; bool includeEnergy;
void* atomicCounter;
static const float TWO_OVER_SQRT_PI; static const float TWO_OVER_SQRT_PI;
static const int NUM_TABLE_POINTS; static const int NUM_TABLE_POINTS;
...@@ -210,7 +210,7 @@ private: ...@@ -210,7 +210,7 @@ private:
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize); virtual void calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) = 0;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -222,7 +222,7 @@ private: ...@@ -222,7 +222,7 @@ private:
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize); virtual void calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) = 0;
/** /**
* Compute the displacement and squared distance between two points, optionally using * Compute the displacement and squared distance between two points, optionally using
...@@ -230,20 +230,15 @@ private: ...@@ -230,20 +230,15 @@ private:
*/ */
void getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const; void getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Compute a fast approximation to erfc(x).
*/
static fvec4 erfcApprox(fvec4 x);
/** /**
* Create a lookup table for the scale factor used with Ewald and PME. * Create a lookup table for the scale factor used with Ewald and PME.
*/ */
void tabulateEwaldScaleFactor(); void tabulateEwaldScaleFactor();
/** /**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI) * Compute a fast approximation to erfc(x).
*/ */
fvec4 ewaldScaleFunction(fvec4 x); static float erfcApprox(float x);
}; };
} // namespace OpenMM } // namespace OpenMM
......
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#define OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
#include "CpuNonbondedForce.h"
// ---------------------------------------------------------------------------------------
namespace OpenMM {
class CpuNonbondedForceVec4 : public CpuNonbondedForce {
public:
/**---------------------------------------------------------------------------------------
Constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec4();
protected:
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void getDeltaR(const float* posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Compute a fast approximation to erfc(x).
*/
static fvec4 erfcApprox(const fvec4& x);
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec4 ewaldScaleFunction(const fvec4& x);
};
} // namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_VEC4_H__
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
SET_SOURCE_FILES_PROPERTIES(${SOURCE_FILES} PROPERTIES COMPILE_FLAGS "-msse4.1") FOREACH(file ${SOURCE_FILES})
IF (file MATCHES ".*Vec8.*")
IF (MSVC)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__")
ELSE (MSVC)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1 -mavx")
ENDIF (MSVC)
ELSE (file MATCHES ".*Vec8.*")
IF (NOT MSVC)
SET_SOURCE_FILES_PROPERTIES(${file} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
ENDIF (NOT MSVC)
ENDIF (file MATCHES ".*Vec8.*")
ENDFOREACH(file)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES}) ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
...@@ -7,6 +19,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ...@@ -7,6 +19,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}) SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${PTHREADS_LIB}) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${PTHREADS_LIB})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_CPU_BUILDING_SHARED_LIBRARY") SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET}) INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment