Commit 4b061bed authored by Peter Eastman's avatar Peter Eastman
Browse files

Created LocalEnergyMinimizer

parent 1b56ffd8
...@@ -75,7 +75,7 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT}) ...@@ -75,7 +75,7 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT})
# The source is organized into subdirectories, but we handle them all from # The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS. # this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt platforms/reference libraries/validate) SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs platforms/reference libraries/validate)
# The build system will set ARCH64 for 64 bit builds, which require # The build system will set ARCH64 for 64 bit builds, which require
# use of the lib64/ library directories rather than lib/. # use of the lib64/ library directories rather than lib/.
......
This diff is collapsed.
/*
* ANSI C implementation of vector operations.
*
* Copyright (c) 2007-2010 Naoaki Okazaki
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/* $Id: arithmetic_ansi.h 65 2010-01-29 12:19:16Z naoaki $ */
#include <stdlib.h>
#include <memory.h>
#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
#else
#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
#endif/*LBFGS_IEEE_FLOAT*/
inline static void* vecalloc(size_t size)
{
void *memblock = malloc(size);
if (memblock) {
memset(memblock, 0, size);
}
return memblock;
}
inline static void vecfree(void *memblock)
{
free(memblock);
}
inline static void vecset(lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
{
int i;
for (i = 0;i < n;++i) {
x[i] = c;
}
}
inline static void veccpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
{
int i;
for (i = 0;i < n;++i) {
y[i] = x[i];
}
}
inline static void vecncpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
{
int i;
for (i = 0;i < n;++i) {
y[i] = -x[i];
}
}
inline static void vecadd(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
{
int i;
for (i = 0;i < n;++i) {
y[i] += c * x[i];
}
}
inline static void vecdiff(lbfgsfloatval_t *z, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
{
int i;
for (i = 0;i < n;++i) {
z[i] = x[i] - y[i];
}
}
inline static void vecscale(lbfgsfloatval_t *y, const lbfgsfloatval_t c, const int n)
{
int i;
for (i = 0;i < n;++i) {
y[i] *= c;
}
}
inline static void vecmul(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
{
int i;
for (i = 0;i < n;++i) {
y[i] *= x[i];
}
}
inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
{
int i;
*s = 0.;
for (i = 0;i < n;++i) {
*s += x[i] * y[i];
}
}
inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
{
vecdot(s, x, x, n);
*s = (lbfgsfloatval_t)sqrt(*s);
}
inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
{
vec2norm(s, x, n);
*s = (lbfgsfloatval_t)(1.0 / *s);
}
/*
* SSE2 implementation of vector oprations (64bit double).
*
* Copyright (c) 2007-2010 Naoaki Okazaki
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/* $Id: arithmetic_sse_double.h 65 2010-01-29 12:19:16Z naoaki $ */
#include <stdlib.h>
#include <malloc.h>
#include <memory.h>
#if 1400 <= _MSC_VER
#include <intrin.h>
#endif/*1400 <= _MSC_VER*/
#if HAVE_EMMINTRIN_H
#include <emmintrin.h>
#endif/*HAVE_EMMINTRIN_H*/
inline static void* vecalloc(size_t size)
{
#ifdef _MSC_VER
void *memblock = _aligned_malloc(size, 16);
#else
void *memblock = memalign(16, size);
#endif
if (memblock != NULL) {
memset(memblock, 0, size);
}
return memblock;
}
inline static void vecfree(void *memblock)
{
#ifdef _MSC_VER
_aligned_free(memblock);
#else
free(memblock);
#endif
}
#define fsigndiff(x, y) \
((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002)
#define vecset(x, c, n) \
{ \
int i; \
__m128d XMM0 = _mm_set1_pd(c); \
for (i = 0;i < (n);i += 8) { \
_mm_store_pd((x)+i , XMM0); \
_mm_store_pd((x)+i+2, XMM0); \
_mm_store_pd((x)+i+4, XMM0); \
_mm_store_pd((x)+i+6, XMM0); \
} \
}
#define veccpy(y, x, n) \
{ \
int i; \
for (i = 0;i < (n);i += 8) { \
__m128d XMM0 = _mm_load_pd((x)+i ); \
__m128d XMM1 = _mm_load_pd((x)+i+2); \
__m128d XMM2 = _mm_load_pd((x)+i+4); \
__m128d XMM3 = _mm_load_pd((x)+i+6); \
_mm_store_pd((y)+i , XMM0); \
_mm_store_pd((y)+i+2, XMM1); \
_mm_store_pd((y)+i+4, XMM2); \
_mm_store_pd((y)+i+6, XMM3); \
} \
}
#define vecncpy(y, x, n) \
{ \
int i; \
for (i = 0;i < (n);i += 8) { \
__m128d XMM0 = _mm_setzero_pd(); \
__m128d XMM1 = _mm_setzero_pd(); \
__m128d XMM2 = _mm_setzero_pd(); \
__m128d XMM3 = _mm_setzero_pd(); \
__m128d XMM4 = _mm_load_pd((x)+i ); \
__m128d XMM5 = _mm_load_pd((x)+i+2); \
__m128d XMM6 = _mm_load_pd((x)+i+4); \
__m128d XMM7 = _mm_load_pd((x)+i+6); \
XMM0 = _mm_sub_pd(XMM0, XMM4); \
XMM1 = _mm_sub_pd(XMM1, XMM5); \
XMM2 = _mm_sub_pd(XMM2, XMM6); \
XMM3 = _mm_sub_pd(XMM3, XMM7); \
_mm_store_pd((y)+i , XMM0); \
_mm_store_pd((y)+i+2, XMM1); \
_mm_store_pd((y)+i+4, XMM2); \
_mm_store_pd((y)+i+6, XMM3); \
} \
}
#define vecadd(y, x, c, n) \
{ \
int i; \
__m128d XMM7 = _mm_set1_pd(c); \
for (i = 0;i < (n);i += 4) { \
__m128d XMM0 = _mm_load_pd((x)+i ); \
__m128d XMM1 = _mm_load_pd((x)+i+2); \
__m128d XMM2 = _mm_load_pd((y)+i ); \
__m128d XMM3 = _mm_load_pd((y)+i+2); \
XMM0 = _mm_mul_pd(XMM0, XMM7); \
XMM1 = _mm_mul_pd(XMM1, XMM7); \
XMM2 = _mm_add_pd(XMM2, XMM0); \
XMM3 = _mm_add_pd(XMM3, XMM1); \
_mm_store_pd((y)+i , XMM2); \
_mm_store_pd((y)+i+2, XMM3); \
} \
}
#define vecdiff(z, x, y, n) \
{ \
int i; \
for (i = 0;i < (n);i += 8) { \
__m128d XMM0 = _mm_load_pd((x)+i ); \
__m128d XMM1 = _mm_load_pd((x)+i+2); \
__m128d XMM2 = _mm_load_pd((x)+i+4); \
__m128d XMM3 = _mm_load_pd((x)+i+6); \
__m128d XMM4 = _mm_load_pd((y)+i ); \
__m128d XMM5 = _mm_load_pd((y)+i+2); \
__m128d XMM6 = _mm_load_pd((y)+i+4); \
__m128d XMM7 = _mm_load_pd((y)+i+6); \
XMM0 = _mm_sub_pd(XMM0, XMM4); \
XMM1 = _mm_sub_pd(XMM1, XMM5); \
XMM2 = _mm_sub_pd(XMM2, XMM6); \
XMM3 = _mm_sub_pd(XMM3, XMM7); \
_mm_store_pd((z)+i , XMM0); \
_mm_store_pd((z)+i+2, XMM1); \
_mm_store_pd((z)+i+4, XMM2); \
_mm_store_pd((z)+i+6, XMM3); \
} \
}
#define vecscale(y, c, n) \
{ \
int i; \
__m128d XMM7 = _mm_set1_pd(c); \
for (i = 0;i < (n);i += 4) { \
__m128d XMM0 = _mm_load_pd((y)+i ); \
__m128d XMM1 = _mm_load_pd((y)+i+2); \
XMM0 = _mm_mul_pd(XMM0, XMM7); \
XMM1 = _mm_mul_pd(XMM1, XMM7); \
_mm_store_pd((y)+i , XMM0); \
_mm_store_pd((y)+i+2, XMM1); \
} \
}
#define vecmul(y, x, n) \
{ \
int i; \
for (i = 0;i < (n);i += 8) { \
__m128d XMM0 = _mm_load_pd((x)+i ); \
__m128d XMM1 = _mm_load_pd((x)+i+2); \
__m128d XMM2 = _mm_load_pd((x)+i+4); \
__m128d XMM3 = _mm_load_pd((x)+i+6); \
__m128d XMM4 = _mm_load_pd((y)+i ); \
__m128d XMM5 = _mm_load_pd((y)+i+2); \
__m128d XMM6 = _mm_load_pd((y)+i+4); \
__m128d XMM7 = _mm_load_pd((y)+i+6); \
XMM4 = _mm_mul_pd(XMM4, XMM0); \
XMM5 = _mm_mul_pd(XMM5, XMM1); \
XMM6 = _mm_mul_pd(XMM6, XMM2); \
XMM7 = _mm_mul_pd(XMM7, XMM3); \
_mm_store_pd((y)+i , XMM4); \
_mm_store_pd((y)+i+2, XMM5); \
_mm_store_pd((y)+i+4, XMM6); \
_mm_store_pd((y)+i+6, XMM7); \
} \
}
#if 3 <= __SSE__
/*
Horizontal add with haddps SSE3 instruction. The work register (rw)
is unused.
*/
#define __horizontal_sum(r, rw) \
r = _mm_hadd_ps(r, r); \
r = _mm_hadd_ps(r, r);
#else
/*
Horizontal add with SSE instruction. The work register (rw) is used.
*/
#define __horizontal_sum(r, rw) \
rw = r; \
r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
r = _mm_add_ps(r, rw); \
rw = r; \
r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
r = _mm_add_ps(r, rw);
#endif
#define vecdot(s, x, y, n) \
{ \
int i; \
__m128d XMM0 = _mm_setzero_pd(); \
__m128d XMM1 = _mm_setzero_pd(); \
__m128d XMM2, XMM3, XMM4, XMM5; \
for (i = 0;i < (n);i += 4) { \
XMM2 = _mm_load_pd((x)+i ); \
XMM3 = _mm_load_pd((x)+i+2); \
XMM4 = _mm_load_pd((y)+i ); \
XMM5 = _mm_load_pd((y)+i+2); \
XMM2 = _mm_mul_pd(XMM2, XMM4); \
XMM3 = _mm_mul_pd(XMM3, XMM5); \
XMM0 = _mm_add_pd(XMM0, XMM2); \
XMM1 = _mm_add_pd(XMM1, XMM3); \
} \
XMM0 = _mm_add_pd(XMM0, XMM1); \
XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
XMM0 = _mm_add_pd(XMM0, XMM1); \
_mm_store_sd((s), XMM0); \
}
#define vec2norm(s, x, n) \
{ \
int i; \
__m128d XMM0 = _mm_setzero_pd(); \
__m128d XMM1 = _mm_setzero_pd(); \
__m128d XMM2, XMM3, XMM4, XMM5; \
for (i = 0;i < (n);i += 4) { \
XMM2 = _mm_load_pd((x)+i ); \
XMM3 = _mm_load_pd((x)+i+2); \
XMM4 = XMM2; \
XMM5 = XMM3; \
XMM2 = _mm_mul_pd(XMM2, XMM4); \
XMM3 = _mm_mul_pd(XMM3, XMM5); \
XMM0 = _mm_add_pd(XMM0, XMM2); \
XMM1 = _mm_add_pd(XMM1, XMM3); \
} \
XMM0 = _mm_add_pd(XMM0, XMM1); \
XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
XMM0 = _mm_add_pd(XMM0, XMM1); \
XMM0 = _mm_sqrt_pd(XMM0); \
_mm_store_sd((s), XMM0); \
}
#define vec2norminv(s, x, n) \
{ \
int i; \
__m128d XMM0 = _mm_setzero_pd(); \
__m128d XMM1 = _mm_setzero_pd(); \
__m128d XMM2, XMM3, XMM4, XMM5; \
for (i = 0;i < (n);i += 4) { \
XMM2 = _mm_load_pd((x)+i ); \
XMM3 = _mm_load_pd((x)+i+2); \
XMM4 = XMM2; \
XMM5 = XMM3; \
XMM2 = _mm_mul_pd(XMM2, XMM4); \
XMM3 = _mm_mul_pd(XMM3, XMM5); \
XMM0 = _mm_add_pd(XMM0, XMM2); \
XMM1 = _mm_add_pd(XMM1, XMM3); \
} \
XMM2 = _mm_set1_pd(1.0); \
XMM0 = _mm_add_pd(XMM0, XMM1); \
XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
XMM0 = _mm_add_pd(XMM0, XMM1); \
XMM0 = _mm_sqrt_pd(XMM0); \
XMM2 = _mm_div_pd(XMM2, XMM0); \
_mm_store_sd((s), XMM2); \
}
/*
* SSE/SSE3 implementation of vector oprations (32bit float).
*
* Copyright (c) 2007-2010 Naoaki Okazaki
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/* $Id: arithmetic_sse_float.h 65 2010-01-29 12:19:16Z naoaki $ */
#include <stdlib.h>
#include <malloc.h>
#include <memory.h>
#if 1400 <= _MSC_VER
#include <intrin.h>
#endif/*_MSC_VER*/
#if HAVE_XMMINTRIN_H
#include <xmmintrin.h>
#endif/*HAVE_XMMINTRIN_H*/
#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
#else
#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
#endif/*LBFGS_IEEE_FLOAT*/
inline static void* vecalloc(size_t size)
{
void *memblock = _aligned_malloc(size, 16);
if (memblock != NULL) {
memset(memblock, 0, size);
}
return memblock;
}
inline static void vecfree(void *memblock)
{
_aligned_free(memblock);
}
#define vecset(x, c, n) \
{ \
int i; \
__m128 XMM0 = _mm_set_ps1(c); \
for (i = 0;i < (n);i += 16) { \
_mm_store_ps((x)+i , XMM0); \
_mm_store_ps((x)+i+ 4, XMM0); \
_mm_store_ps((x)+i+ 8, XMM0); \
_mm_store_ps((x)+i+12, XMM0); \
} \
}
#define veccpy(y, x, n) \
{ \
int i; \
for (i = 0;i < (n);i += 16) { \
__m128 XMM0 = _mm_load_ps((x)+i ); \
__m128 XMM1 = _mm_load_ps((x)+i+ 4); \
__m128 XMM2 = _mm_load_ps((x)+i+ 8); \
__m128 XMM3 = _mm_load_ps((x)+i+12); \
_mm_store_ps((y)+i , XMM0); \
_mm_store_ps((y)+i+ 4, XMM1); \
_mm_store_ps((y)+i+ 8, XMM2); \
_mm_store_ps((y)+i+12, XMM3); \
} \
}
#define vecncpy(y, x, n) \
{ \
int i; \
const uint32_t mask = 0x80000000; \
__m128 XMM4 = _mm_load_ps1((float*)&mask); \
for (i = 0;i < (n);i += 16) { \
__m128 XMM0 = _mm_load_ps((x)+i ); \
__m128 XMM1 = _mm_load_ps((x)+i+ 4); \
__m128 XMM2 = _mm_load_ps((x)+i+ 8); \
__m128 XMM3 = _mm_load_ps((x)+i+12); \
XMM0 = _mm_xor_ps(XMM0, XMM4); \
XMM1 = _mm_xor_ps(XMM1, XMM4); \
XMM2 = _mm_xor_ps(XMM2, XMM4); \
XMM3 = _mm_xor_ps(XMM3, XMM4); \
_mm_store_ps((y)+i , XMM0); \
_mm_store_ps((y)+i+ 4, XMM1); \
_mm_store_ps((y)+i+ 8, XMM2); \
_mm_store_ps((y)+i+12, XMM3); \
} \
}
#define vecadd(y, x, c, n) \
{ \
int i; \
__m128 XMM7 = _mm_set_ps1(c); \
for (i = 0;i < (n);i += 8) { \
__m128 XMM0 = _mm_load_ps((x)+i ); \
__m128 XMM1 = _mm_load_ps((x)+i+4); \
__m128 XMM2 = _mm_load_ps((y)+i ); \
__m128 XMM3 = _mm_load_ps((y)+i+4); \
XMM0 = _mm_mul_ps(XMM0, XMM7); \
XMM1 = _mm_mul_ps(XMM1, XMM7); \
XMM2 = _mm_add_ps(XMM2, XMM0); \
XMM3 = _mm_add_ps(XMM3, XMM1); \
_mm_store_ps((y)+i , XMM2); \
_mm_store_ps((y)+i+4, XMM3); \
} \
}
#define vecdiff(z, x, y, n) \
{ \
int i; \
for (i = 0;i < (n);i += 16) { \
__m128 XMM0 = _mm_load_ps((x)+i ); \
__m128 XMM1 = _mm_load_ps((x)+i+ 4); \
__m128 XMM2 = _mm_load_ps((x)+i+ 8); \
__m128 XMM3 = _mm_load_ps((x)+i+12); \
__m128 XMM4 = _mm_load_ps((y)+i ); \
__m128 XMM5 = _mm_load_ps((y)+i+ 4); \
__m128 XMM6 = _mm_load_ps((y)+i+ 8); \
__m128 XMM7 = _mm_load_ps((y)+i+12); \
XMM0 = _mm_sub_ps(XMM0, XMM4); \
XMM1 = _mm_sub_ps(XMM1, XMM5); \
XMM2 = _mm_sub_ps(XMM2, XMM6); \
XMM3 = _mm_sub_ps(XMM3, XMM7); \
_mm_store_ps((z)+i , XMM0); \
_mm_store_ps((z)+i+ 4, XMM1); \
_mm_store_ps((z)+i+ 8, XMM2); \
_mm_store_ps((z)+i+12, XMM3); \
} \
}
#define vecscale(y, c, n) \
{ \
int i; \
__m128 XMM7 = _mm_set_ps1(c); \
for (i = 0;i < (n);i += 8) { \
__m128 XMM0 = _mm_load_ps((y)+i ); \
__m128 XMM1 = _mm_load_ps((y)+i+4); \
XMM0 = _mm_mul_ps(XMM0, XMM7); \
XMM1 = _mm_mul_ps(XMM1, XMM7); \
_mm_store_ps((y)+i , XMM0); \
_mm_store_ps((y)+i+4, XMM1); \
} \
}
#define vecmul(y, x, n) \
{ \
int i; \
for (i = 0;i < (n);i += 16) { \
__m128 XMM0 = _mm_load_ps((x)+i ); \
__m128 XMM1 = _mm_load_ps((x)+i+ 4); \
__m128 XMM2 = _mm_load_ps((x)+i+ 8); \
__m128 XMM3 = _mm_load_ps((x)+i+12); \
__m128 XMM4 = _mm_load_ps((y)+i ); \
__m128 XMM5 = _mm_load_ps((y)+i+ 4); \
__m128 XMM6 = _mm_load_ps((y)+i+ 8); \
__m128 XMM7 = _mm_load_ps((y)+i+12); \
XMM4 = _mm_mul_ps(XMM4, XMM0); \
XMM5 = _mm_mul_ps(XMM5, XMM1); \
XMM6 = _mm_mul_ps(XMM6, XMM2); \
XMM7 = _mm_mul_ps(XMM7, XMM3); \
_mm_store_ps((y)+i , XMM4); \
_mm_store_ps((y)+i+ 4, XMM5); \
_mm_store_ps((y)+i+ 8, XMM6); \
_mm_store_ps((y)+i+12, XMM7); \
} \
}
#if 3 <= __SSE__
/*
Horizontal add with haddps SSE3 instruction. The work register (rw)
is unused.
*/
#define __horizontal_sum(r, rw) \
r = _mm_hadd_ps(r, r); \
r = _mm_hadd_ps(r, r);
#else
/*
Horizontal add with SSE instruction. The work register (rw) is used.
*/
#define __horizontal_sum(r, rw) \
rw = r; \
r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
r = _mm_add_ps(r, rw); \
rw = r; \
r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
r = _mm_add_ps(r, rw);
#endif
#define vecdot(s, x, y, n) \
{ \
int i; \
__m128 XMM0 = _mm_setzero_ps(); \
__m128 XMM1 = _mm_setzero_ps(); \
__m128 XMM2, XMM3, XMM4, XMM5; \
for (i = 0;i < (n);i += 8) { \
XMM2 = _mm_load_ps((x)+i ); \
XMM3 = _mm_load_ps((x)+i+4); \
XMM4 = _mm_load_ps((y)+i ); \
XMM5 = _mm_load_ps((y)+i+4); \
XMM2 = _mm_mul_ps(XMM2, XMM4); \
XMM3 = _mm_mul_ps(XMM3, XMM5); \
XMM0 = _mm_add_ps(XMM0, XMM2); \
XMM1 = _mm_add_ps(XMM1, XMM3); \
} \
XMM0 = _mm_add_ps(XMM0, XMM1); \
__horizontal_sum(XMM0, XMM1); \
_mm_store_ss((s), XMM0); \
}
#define vec2norm(s, x, n) \
{ \
int i; \
__m128 XMM0 = _mm_setzero_ps(); \
__m128 XMM1 = _mm_setzero_ps(); \
__m128 XMM2, XMM3; \
for (i = 0;i < (n);i += 8) { \
XMM2 = _mm_load_ps((x)+i ); \
XMM3 = _mm_load_ps((x)+i+4); \
XMM2 = _mm_mul_ps(XMM2, XMM2); \
XMM3 = _mm_mul_ps(XMM3, XMM3); \
XMM0 = _mm_add_ps(XMM0, XMM2); \
XMM1 = _mm_add_ps(XMM1, XMM3); \
} \
XMM0 = _mm_add_ps(XMM0, XMM1); \
__horizontal_sum(XMM0, XMM1); \
XMM2 = XMM0; \
XMM1 = _mm_rsqrt_ss(XMM0); \
XMM3 = XMM1; \
XMM1 = _mm_mul_ss(XMM1, XMM1); \
XMM1 = _mm_mul_ss(XMM1, XMM3); \
XMM1 = _mm_mul_ss(XMM1, XMM0); \
XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
XMM3 = _mm_add_ss(XMM3, XMM1); \
XMM3 = _mm_mul_ss(XMM3, XMM2); \
_mm_store_ss((s), XMM3); \
}
#define vec2norminv(s, x, n) \
{ \
int i; \
__m128 XMM0 = _mm_setzero_ps(); \
__m128 XMM1 = _mm_setzero_ps(); \
__m128 XMM2, XMM3; \
for (i = 0;i < (n);i += 16) { \
XMM2 = _mm_load_ps((x)+i ); \
XMM3 = _mm_load_ps((x)+i+4); \
XMM2 = _mm_mul_ps(XMM2, XMM2); \
XMM3 = _mm_mul_ps(XMM3, XMM3); \
XMM0 = _mm_add_ps(XMM0, XMM2); \
XMM1 = _mm_add_ps(XMM1, XMM3); \
} \
XMM0 = _mm_add_ps(XMM0, XMM1); \
__horizontal_sum(XMM0, XMM1); \
XMM2 = XMM0; \
XMM1 = _mm_rsqrt_ss(XMM0); \
XMM3 = XMM1; \
XMM1 = _mm_mul_ss(XMM1, XMM1); \
XMM1 = _mm_mul_ss(XMM1, XMM3); \
XMM1 = _mm_mul_ss(XMM1, XMM0); \
XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
XMM3 = _mm_add_ss(XMM3, XMM1); \
_mm_store_ss((s), XMM3); \
}
This diff is collapsed.
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include "openmm/HarmonicBondForce.h" #include "openmm/HarmonicBondForce.h"
#include "openmm/Integrator.h" #include "openmm/Integrator.h"
#include "openmm/LangevinIntegrator.h" #include "openmm/LangevinIntegrator.h"
#include "openmm/LocalEnergyMinimizer.h"
#include "openmm/MonteCarloBarostat.h" #include "openmm/MonteCarloBarostat.h"
#include "openmm/NonbondedForce.h" #include "openmm/NonbondedForce.h"
#include "openmm/Context.h" #include "openmm/Context.h"
......
#ifndef OPENMM_LOCALENERGYMINIMIZER_H_
#define OPENMM_LOCALENERGYMINIMIZER_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "Context.h"
namespace OpenMM {
/**
* Given a Context, this class searches for a new set of particle positions that represent
* a local minimum of the potential energy. The search is performed with the L-BFGS algorithm.
*/
class OPENMM_EXPORT LocalEnergyMinimizer {
public:
/**
* Search for a new set of particle positions that represent a local potential energy minimum.
* On exit, the Context will have been updated with the new positions.
*
* @param context a Context specifying the System to minimize and the initial particle positions
* @param tolerance this specifies how precisely the energy minimum must be located. Minimization
* will be halted once the root-mean-square value of all force components reaches
* this tolerance. The default value is 1.
* @param maxIterations the maximum number of iterations to perform. If this is 0, minimation is continued
* until the results converge without regard to how many iterations it takes. The
* default value is 0.
*/
static void minimize(Context& context, double tolerance = 1, int maxIterations = 0);
};
} // namespace OpenMM
#endif /*OPENMM_LOCALENERGYMINIMIZER_H_*/
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/LocalEnergyMinimizer.h"
#include "openmm/OpenMMException.h"
#include "lbfgs.h"
#include "openmm/Platform.h"
#include <cmath>
#include <sstream>
#include <vector>
using namespace OpenMM;
using namespace std;
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
Context* context = reinterpret_cast<Context*>(instance);
int numParticles = context->getSystem().getNumParticles();
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(x[3*i], x[3*i+1], x[3*i+2]);
context->setPositions(positions);
State state = context->getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
for (int i = 0; i < numParticles; i++) {
g[3*i] = -forces[i][0];
g[3*i+1] = -forces[i][1];
g[3*i+2] = -forces[i][2];
}
return state.getPotentialEnergy();
}
void LocalEnergyMinimizer::minimize(Context& context, double tolerance, int maxIterations) {
int numParticles = context.getSystem().getNumParticles();
lbfgsfloatval_t *x = lbfgs_malloc(numParticles*3);
if (x == NULL)
throw OpenMMException("LocalEnergyMinimizer: Failed to allocate memory");
// Record the initial positions and determine a normalization constant for scaling the tolerance.
const vector<Vec3>& positions = context.getState(State::Positions).getPositions();
double norm = 0.0;
for (int i = 0; i < numParticles; i++) {
x[3*i] = positions[i][0];
x[3*i+1] = positions[i][1];
x[3*i+2] = positions[i][2];
norm += positions[i].dot(positions[i]);
}
norm /= numParticles;
norm = (norm < 1 ? 1 : sqrt(norm));
// Perform the minimization.
lbfgs_parameter_t param;
lbfgs_parameter_init(&param);
if (!context.getPlatform().supportsDoublePrecision())
param.xtol = 1e-7;
param.epsilon = tolerance/norm;
param.max_iterations = maxIterations;
param.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
lbfgsfloatval_t fx;
lbfgs(numParticles*3, x, &fx, evaluate, NULL, &context, &param);
lbfgs_free(x);
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "AssertionUtilities.h"
#include "openmm/Context.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/LocalEnergyMinimizer.h"
#include "openmm/NonbondedForce.h"
#include "openmm/VerletIntegrator.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
void testHarmonicBonds() {
const int numParticles = 10;
System system;
HarmonicBondForce* bonds = new HarmonicBondForce();
system.addForce(bonds);
// Create a chain of particles connected by harmonic bonds.
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
positions[i] = Vec3(i, 0, 0);
if (i > 0)
bonds->addBond(i-1, i, 1+0.1*i, 1);
}
// Minimize it and check that all bonds are at their equilibrium distances.
VerletIntegrator integrator(0.01);
Context context(system, integrator);
context.setPositions(positions);
LocalEnergyMinimizer::minimize(context, 1e-5);
State state = context.getState(State::Positions);
for (int i = 1; i < numParticles; i++) {
Vec3 delta = state.getPositions()[i]-state.getPositions()[i-1];
ASSERT_EQUAL_TOL(1+0.1*i, sqrt(delta.dot(delta)), 1e-4);
}
}
void testLargeSystem() {
const int numParticles = 100;
const double cutoff = 2.0;
const double boxSize = 5.0;
const double tolerance = 5;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
NonbondedForce* nonbonded = new NonbondedForce();
nonbonded->setCutoffDistance(cutoff);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
system.addForce(nonbonded);
// Create a cloud of particles.
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
nonbonded->addParticle(i%2 == 0 ? 1 : -1, 0.2, 0.2);
positions[i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
}
// Minimize it, verify that the energy has decreased, and check that the force magnitude satisfies the requested tolerance.
VerletIntegrator integrator(0.01);
Context context(system, integrator);
context.setPositions(positions);
State initialState = context.getState(State::Forces | State::Energy);
LocalEnergyMinimizer::minimize(context, tolerance);
State finalState = context.getState(State::Forces | State::Energy);
ASSERT(finalState.getPotentialEnergy() < initialState.getPotentialEnergy());
double initialNorm = 0.0;
double finalNorm = 0.0;
for (int i = 0; i < numParticles; i++) {
initialNorm += initialState.getForces()[i].dot(initialState.getForces()[i]);
finalNorm += finalState.getForces()[i].dot(finalState.getForces()[i]);
}
initialNorm = sqrt(initialNorm/numParticles);
finalNorm = sqrt(finalNorm/numParticles);
ASSERT(finalNorm < initialNorm);
ASSERT(finalNorm < 2*tolerance);
}
int main() {
try {
testHarmonicBonds();
testLargeSystem();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment