Unverified Commit ff3432fa authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Enable SSE code for LBFGS (#4327)

* Updated to new version of libLBFGS

* Enable SSE code for LBFGS
parent f6e6b6ed
......@@ -244,6 +244,7 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
ENDFOREACH(subdir)
IF(X86)
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-DHAVE_SSE2=1")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/lbfgs/src/lbfgs.cpp PROPERTIES COMPILE_FLAGS "-DUSE_SSE=1 -DHAVE_EMMINTRIN_H=1")
ELSE()
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-UHAVE_SSE2")
ENDIF()
......
......@@ -24,7 +24,7 @@
* THE SOFTWARE.
*/
/* $Id: lbfgs.h 65 2010-01-29 12:19:16Z naoaki $ */
/* $Id$ */
#ifndef __LBFGS_H__
#define __LBFGS_H__
......@@ -243,7 +243,7 @@ typedef struct {
* (f' - f) / f < \ref delta,
* where f' is the objective value of \ref past iterations ago, and f is
* the objective value of the current iteration.
* The default value is \c 0.
* The default value is \c 1e-5.
*/
lbfgsfloatval_t delta;
......@@ -267,7 +267,7 @@ typedef struct {
/**
* The maximum number of trials for the line search.
* This parameter controls the number of function and gradients evaluations
* per iteration for the line search routine. The default value is \c 20.
* per iteration for the line search routine. The default value is \c 40.
*/
int max_linesearch;
......@@ -313,7 +313,7 @@ typedef struct {
* evaluations are inexpensive with respect to the cost of the
* iteration (which is sometimes the case when solving very large
* problems) it may be advantageous to set this parameter to a small
* value. A typical small value is \c 0.1. This parameter shuold be
* value. A typical small value is \c 0.1. This parameter should be
* greater than the \ref ftol parameter (\c 1e-4) and smaller than
* \c 1.0.
*/
......@@ -525,6 +525,13 @@ lbfgsfloatval_t WINDOWS_EXPORT * lbfgs_malloc(int n);
*/
void WINDOWS_EXPORT lbfgs_free(lbfgsfloatval_t *x);
/**
* Get string description of an lbfgs() return code.
*
* @param err A value returned by lbfgs().
*/
const char* lbfgs_strerror(int err);
/** @} */
#ifdef __cplusplus
......@@ -583,7 +590,7 @@ Among the various ports of L-BFGS, this library provides several features:
The library is thread-safe, which is the secondary gain from the callback
interface.
- <b>Cross platform.</b> The source code can be compiled on Microsoft Visual
Studio 2005, GNU C Compiler (gcc), etc.
Studio 2010, GNU C Compiler (gcc), etc.
- <b>Configurable precision</b>: A user can choose single-precision (float)
or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro.
- <b>SSE/SSE2 optimization</b>:
......@@ -597,17 +604,28 @@ This library is used by:
- <a href="http://www.chokkan.org/software/classias/">Classias: A collection of machine-learning algorithms for classification</a>
- <a href="http://www.public.iastate.edu/~gdancik/mlegp/">mlegp: an R package for maximum likelihood estimates for Gaussian processes</a>
- <a href="http://infmath.uibk.ac.at/~matthiasf/imaging2/">imaging2: the imaging2 class library</a>
- <a href="http://search.cpan.org/~laye/Algorithm-LBFGS-0.16/">Algorithm::LBFGS - Perl extension for L-BFGS</a>
- <a href="http://www.cs.kuleuven.be/~bernd/yap-lbfgs/">YAP-LBFGS (an interface to call libLBFGS from YAP Prolog)</a>
@section download Download
- <a href="http://www.chokkan.org/software/dist/liblbfgs-1.9.tar.gz">Source code</a>
- <a href="https://github.com/downloads/chokkan/liblbfgs/liblbfgs-1.10.tar.gz">Source code</a>
- <a href="https://github.com/chokkan/liblbfgs">GitHub repository</a>
libLBFGS is distributed under the term of the
<a href="http://opensource.org/licenses/mit-license.php">MIT license</a>.
@section modules Third-party modules
- <a href="http://cran.r-project.org/web/packages/lbfgs/index.html">lbfgs: Limited-memory BFGS Optimization (a wrapper for R)</a> maintained by Antonio Coppola.
- <a href="http://search.cpan.org/~laye/Algorithm-LBFGS-0.16/">Algorithm::LBFGS - Perl extension for L-BFGS</a> maintained by Lei Sun.
- <a href="http://www.cs.kuleuven.be/~bernd/yap-lbfgs/">YAP-LBFGS (an interface to call libLBFGS from YAP Prolog)</a> maintained by Bernd Gutmann.
@section changelog History
- Version 1.10 (2010-12-22):
- Fixed compiling errors on Mac OS X; this patch was kindly submitted by
Nic Schraudolph.
- Reduced compiling warnings on Mac OS X; this patch was kindly submitted
by Tamas Nepusz.
- Replaced memalign() with posix_memalign().
- Updated solution and project files for Microsoft Visual Studio 2010.
- Version 1.9 (2010-01-29):
- Fixed a mistake in checking the validity of the parameters "ftol" and
"wolfe"; this was discovered by Kevin S. Van Horn.
......@@ -728,6 +746,7 @@ Special thanks go to:
- Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about
OWL-QN
- Takashi Imamichi for the useful enhancements of the backtracking method
- Kevin S. Van Horn, Nic Schraudolph, and Tamas Nepusz for bug fixes
Finally I would like to thank the original author, Jorge Nocedal, who has been
distributing the effieicnt and explanatory implementation in an open source
......
......@@ -23,10 +23,10 @@
* THE SOFTWARE.
*/
/* $Id: arithmetic_ansi.h 65 2010-01-29 12:19:16Z naoaki $ */
/* $Id$ */
#include <stdlib.h>
#include <string.h>
#include <memory.h>
#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
......
......@@ -23,10 +23,12 @@
* THE SOFTWARE.
*/
/* $Id: arithmetic_sse_double.h 65 2010-01-29 12:19:16Z naoaki $ */
/* $Id$ */
#include <stdlib.h>
#ifndef __APPLE__
#include <malloc.h>
#endif
#include <memory.h>
#if 1400 <= _MSC_VER
......@@ -39,10 +41,15 @@
inline static void* vecalloc(size_t size)
{
#ifdef _MSC_VER
#if defined(_WIN32)
void *memblock = _aligned_malloc(size, 16);
#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
void *memblock = malloc(size);
#else
void *memblock = memalign(16, size);
void *memblock = NULL, *p = NULL;
if (posix_memalign(&p, 16, size) == 0) {
memblock = p;
}
#endif
if (memblock != NULL) {
memset(memblock, 0, size);
......@@ -192,7 +199,7 @@ inline static void vecfree(void *memblock)
#if 3 <= __SSE__
#if 3 <= __SSE__ || defined(__SSE3__)
/*
Horizontal add with haddps SSE3 instruction. The work register (rw)
is unused.
......
......@@ -23,10 +23,12 @@
* THE SOFTWARE.
*/
/* $Id: arithmetic_sse_float.h 65 2010-01-29 12:19:16Z naoaki $ */
/* $Id$ */
#include <stdlib.h>
#ifndef __APPLE__
#include <malloc.h>
#endif
#include <memory.h>
#if 1400 <= _MSC_VER
......@@ -45,7 +47,16 @@
inline static void* vecalloc(size_t size)
{
#if defined(_MSC_VER)
void *memblock = _aligned_malloc(size, 16);
#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
void *memblock = malloc(size);
#else
void *memblock = NULL, *p = NULL;
if (posix_memalign(&p, 16, size) == 0) {
memblock = p;
}
#endif
if (memblock != NULL) {
memset(memblock, 0, size);
}
......@@ -54,7 +65,11 @@ inline static void* vecalloc(size_t size)
inline static void vecfree(void *memblock)
{
#ifdef _MSC_VER
_aligned_free(memblock);
#else
free(memblock);
#endif
}
#define vecset(x, c, n) \
......@@ -185,7 +200,7 @@ inline static void vecfree(void *memblock)
#if 3 <= __SSE__
#if 3 <= __SSE__ || defined(__SSE3__)
/*
Horizontal add with haddps SSE3 instruction. The work register (rw)
is unused.
......
......@@ -24,7 +24,7 @@
* THE SOFTWARE.
*/
/* $Id: lbfgs.c 65 2010-01-29 12:19:16Z naoaki $ */
/* $Id$ */
/*
This library is a C port of the FORTRAN implementation of Limited-memory
......@@ -65,6 +65,7 @@ licence.
#include <config.h>
#endif/*HAVE_CONFIG_H*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
......@@ -73,7 +74,6 @@ licence.
#ifdef _MSC_VER
#define inline __inline
typedef unsigned int uint32_t;
#endif/*_MSC_VER*/
#if defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64
......@@ -226,10 +226,14 @@ static int round_out_variables(int n)
lbfgsfloatval_t* lbfgs_malloc(int n)
{
if (n < 0) {
return NULL;
}
#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
n = round_out_variables(n);
#endif/*defined(USE_SSE)*/
return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * n);
return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * (size_t)n);
}
void lbfgs_free(lbfgsfloatval_t *x)
......@@ -290,7 +294,7 @@ int lbfgs(
if (n % 8 != 0) {
return LBFGSERR_INVALID_N_SSE;
}
if (((unsigned short)x & 0x000F) != 0) {
if ((uintptr_t)(const void*)x % 16 != 0) {
return LBFGSERR_INVALID_X_SSE;
}
#endif/*defined(USE_SSE)*/
......@@ -364,11 +368,11 @@ int lbfgs(
}
/* Allocate working space. */
xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
xp = (lbfgsfloatval_t*)vecalloc((size_t)n * sizeof(lbfgsfloatval_t));
g = (lbfgsfloatval_t*)vecalloc((size_t)n * sizeof(lbfgsfloatval_t));
gp = (lbfgsfloatval_t*)vecalloc((size_t)n * sizeof(lbfgsfloatval_t));
d = (lbfgsfloatval_t*)vecalloc((size_t)n * sizeof(lbfgsfloatval_t));
w = (lbfgsfloatval_t*)vecalloc((size_t)n * sizeof(lbfgsfloatval_t));
if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) {
ret = LBFGSERR_OUTOFMEMORY;
goto lbfgs_exit;
......@@ -376,7 +380,7 @@ int lbfgs(
if (param.orthantwise_c != 0.) {
/* Allocate working space for OW-LQN. */
pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
pg = (lbfgsfloatval_t*)vecalloc((size_t)n * sizeof(lbfgsfloatval_t));
if (pg == NULL) {
ret = LBFGSERR_OUTOFMEMORY;
goto lbfgs_exit;
......@@ -384,7 +388,7 @@ int lbfgs(
}
/* Allocate limited memory storage. */
lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t));
lm = (iteration_data_t*)vecalloc((size_t)m * sizeof(iteration_data_t));
if (lm == NULL) {
ret = LBFGSERR_OUTOFMEMORY;
goto lbfgs_exit;
......@@ -395,8 +399,8 @@ int lbfgs(
it = &lm[i];
it->alpha = 0;
it->ys = 0;
it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
it->s = (lbfgsfloatval_t*)vecalloc((size_t)n * sizeof(lbfgsfloatval_t));
it->y = (lbfgsfloatval_t*)vecalloc((size_t)n * sizeof(lbfgsfloatval_t));
if (it->s == NULL || it->y == NULL) {
ret = LBFGSERR_OUTOFMEMORY;
goto lbfgs_exit;
......@@ -405,7 +409,7 @@ int lbfgs(
/* Allocate an array for storing previous values of the objective function. */
if (0 < param.past) {
pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t));
pf = (lbfgsfloatval_t*)vecalloc((size_t)param.past * sizeof(lbfgsfloatval_t));
}
try {
......@@ -491,7 +495,7 @@ int lbfgs(
/* Report the progress. */
if (cd.proc_progress) {
if (ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls)) {
if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) {
goto lbfgs_exit;
}
}
......@@ -511,7 +515,7 @@ int lbfgs(
/*
Test for stopping criterion.
The criterion is given by the following formula:
(f(past_x) - f(x)) / f(x) < \delta
|(f(past_x) - f(x))| / f(x) < \delta
*/
if (pf != NULL) {
/* We don't test the stopping criterion while k < past. */
......@@ -520,7 +524,7 @@ int lbfgs(
rate = (pf[k % param.past] - fx) / fx;
/* The stopping criterion. */
if (rate < param.delta) {
if (fabs(rate) < param.delta) {
ret = LBFGS_STOP;
break;
}
......@@ -661,6 +665,119 @@ lbfgs_exit:
return ret;
}
const char* lbfgs_strerror(int err)
{
switch(err) {
case LBFGS_SUCCESS:
/* Also handles LBFGS_CONVERGENCE. */
return "Success: reached convergence (gtol).";
case LBFGS_STOP:
return "Success: met stopping criteria (ftol).";
case LBFGS_ALREADY_MINIMIZED:
return "The initial variables already minimize the objective function.";
case LBFGSERR_UNKNOWNERROR:
return "Unknown error.";
case LBFGSERR_LOGICERROR:
return "Logic error.";
case LBFGSERR_OUTOFMEMORY:
return "Insufficient memory.";
case LBFGSERR_CANCELED:
return "The minimization process has been canceled.";
case LBFGSERR_INVALID_N:
return "Invalid number of variables specified.";
case LBFGSERR_INVALID_N_SSE:
return "Invalid number of variables (for SSE) specified.";
case LBFGSERR_INVALID_X_SSE:
return "The array x must be aligned to 16 (for SSE).";
case LBFGSERR_INVALID_EPSILON:
return "Invalid parameter lbfgs_parameter_t::epsilon specified.";
case LBFGSERR_INVALID_TESTPERIOD:
return "Invalid parameter lbfgs_parameter_t::past specified.";
case LBFGSERR_INVALID_DELTA:
return "Invalid parameter lbfgs_parameter_t::delta specified.";
case LBFGSERR_INVALID_LINESEARCH:
return "Invalid parameter lbfgs_parameter_t::linesearch specified.";
case LBFGSERR_INVALID_MINSTEP:
return "Invalid parameter lbfgs_parameter_t::max_step specified.";
case LBFGSERR_INVALID_MAXSTEP:
return "Invalid parameter lbfgs_parameter_t::max_step specified.";
case LBFGSERR_INVALID_FTOL:
return "Invalid parameter lbfgs_parameter_t::ftol specified.";
case LBFGSERR_INVALID_WOLFE:
return "Invalid parameter lbfgs_parameter_t::wolfe specified.";
case LBFGSERR_INVALID_GTOL:
return "Invalid parameter lbfgs_parameter_t::gtol specified.";
case LBFGSERR_INVALID_XTOL:
return "Invalid parameter lbfgs_parameter_t::xtol specified.";
case LBFGSERR_INVALID_MAXLINESEARCH:
return "Invalid parameter lbfgs_parameter_t::max_linesearch specified.";
case LBFGSERR_INVALID_ORTHANTWISE:
return "Invalid parameter lbfgs_parameter_t::orthantwise_c specified.";
case LBFGSERR_INVALID_ORTHANTWISE_START:
return "Invalid parameter lbfgs_parameter_t::orthantwise_start specified.";
case LBFGSERR_INVALID_ORTHANTWISE_END:
return "Invalid parameter lbfgs_parameter_t::orthantwise_end specified.";
case LBFGSERR_OUTOFINTERVAL:
return "The line-search step went out of the interval of uncertainty.";
case LBFGSERR_INCORRECT_TMINMAX:
return "A logic error occurred; alternatively, the interval of uncertainty"
" became too small.";
case LBFGSERR_ROUNDING_ERROR:
return "A rounding error occurred; alternatively, no line-search step"
" satisfies the sufficient decrease and curvature conditions.";
case LBFGSERR_MINIMUMSTEP:
return "The line-search step became smaller than lbfgs_parameter_t::min_step.";
case LBFGSERR_MAXIMUMSTEP:
return "The line-search step became larger than lbfgs_parameter_t::max_step.";
case LBFGSERR_MAXIMUMLINESEARCH:
return "The line-search routine reaches the maximum number of evaluations.";
case LBFGSERR_MAXIMUMITERATION:
return "The algorithm routine reaches the maximum number of iterations.";
case LBFGSERR_WIDTHTOOSMALL:
return "Relative width of the interval of uncertainty is at most"
" lbfgs_parameter_t::xtol.";
case LBFGSERR_INVALIDPARAMETERS:
return "A logic error (negative line-search step) occurred.";
case LBFGSERR_INCREASEGRADIENT:
return "The current search direction increases the objective function value.";
default:
return "(unknown)";
}
}
static int line_search_backtracking(
......@@ -677,8 +794,11 @@ static int line_search_backtracking(
const lbfgs_parameter_t *param
)
{
int ret = 0, count = 0;
lbfgsfloatval_t width, dg, norm = 0.;
(void)gp;
(void)wp;
int count = 0;
lbfgsfloatval_t width, dg;
lbfgsfloatval_t finit, dginit = 0., dgtest;
const lbfgsfloatval_t dec = 0.5, inc = 2.1;
......@@ -770,7 +890,7 @@ static int line_search_backtracking_owlqn(
const lbfgs_parameter_t *param
)
{
int i, ret = 0, count = 0;
int i, count = 0;
lbfgsfloatval_t width = 0.5, norm = 0.;
lbfgsfloatval_t finit = *f, dgtest;
......@@ -844,6 +964,9 @@ static int line_search_morethuente(
const lbfgs_parameter_t *param
)
{
(void)gp;
(void)wa;
int count = 0;
int brackt, stage1, uinfo = 0;
lbfgsfloatval_t dg;
......@@ -1017,8 +1140,6 @@ static int line_search_morethuente(
width = fabs(sty - stx);
}
}
return LBFGSERR_LOGICERROR;
}
......@@ -1063,9 +1184,9 @@ static int line_search_morethuente(
* @param du The value of f'(u).
* @param v The value of another point, v.
* @param fv The value of f(v).
* @param du The value of f'(v).
* @param xmin The maximum value.
* @param dv The value of f'(v).
* @param xmin The minimum value.
* @param xmax The maximum value.
*/
#define CUBIC_MINIMIZER2(cm, u, fu, du, v, fv, dv, xmin, xmax) \
d = (v) - (u); \
......@@ -1083,7 +1204,7 @@ static int line_search_morethuente(
r = p / q; \
if (r < 0. && gamma != 0.) { \
(cm) = (v) - r * d; \
} else if (a < 0) { \
} else if (d > 0) { \
(cm) = (xmax); \
} else { \
(cm) = (xmin); \
......@@ -1098,7 +1219,7 @@ static int line_search_morethuente(
* @param v The value of another point, v.
* @param fv The value of f(v).
*/
#define QUARD_MINIMIZER(qm, u, fu, du, v, fv) \
#define QUAD_MINIMIZER(qm, u, fu, du, v, fv) \
a = (v) - (u); \
(qm) = (u) + (du) / (((fu) - (fv)) / a + (du)) / 2 * a;
......@@ -1110,7 +1231,7 @@ static int line_search_morethuente(
* @param v The value of another point, v.
* @param dv The value of f'(v).
*/
#define QUARD_MINIMIZER2(qm, u, du, v, dv) \
#define QUAD_MINIMIZER2(qm, u, du, v, dv) \
a = (u) - (v); \
(qm) = (v) + (dv) / ((dv) - (du)) * a;
......@@ -1163,7 +1284,7 @@ static int update_trial_interval(
lbfgsfloatval_t mc; /* minimizer of an interpolated cubic. */
lbfgsfloatval_t mq; /* minimizer of an interpolated quadratic. */
lbfgsfloatval_t newt; /* new trial value. */
USES_MINIMIZER; /* for CUBIC_MINIMIZER and QUARD_MINIMIZER. */
USES_MINIMIZER; /* for CUBIC_MINIMIZER and QUAD_MINIMIZER. */
/* Check the input parameters for errors. */
if (*brackt) {
......@@ -1194,7 +1315,7 @@ static int update_trial_interval(
*brackt = 1;
bound = 1;
CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
QUARD_MINIMIZER(mq, *x, *fx, *dx, *t, *ft);
QUAD_MINIMIZER(mq, *x, *fx, *dx, *t, *ft);
if (fabs(mc - *x) < fabs(mq - *x)) {
newt = mc;
} else {
......@@ -1210,7 +1331,7 @@ static int update_trial_interval(
*brackt = 1;
bound = 0;
CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
QUAD_MINIMIZER2(mq, *x, *dx, *t, *dt);
if (fabs(mc - *t) > fabs(mq - *t)) {
newt = mc;
} else {
......@@ -1230,7 +1351,7 @@ static int update_trial_interval(
*/
bound = 1;
CUBIC_MINIMIZER2(mc, *x, *fx, *dx, *t, *ft, *dt, tmin, tmax);
QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
QUAD_MINIMIZER2(mq, *x, *dx, *t, *dt);
if (*brackt) {
if (fabs(*t - mc) < fabs(*t - mq)) {
newt = mc;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment