Commit e38c6907 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1582 from peastman/diisopt

Optimizations to DIIS
parents d027c825 319018ed
...@@ -59,7 +59,7 @@ public: ...@@ -59,7 +59,7 @@ public:
Sparse_Matrix_CompRow(const Sparse_Matrix_CompRow &S); Sparse_Matrix_CompRow(const Sparse_Matrix_CompRow &S);
Sparse_Matrix_CompRow(int M, int N, int nz, const T *val, Sparse_Matrix_CompRow(int M, int N, int nz, const T *val,
const int *r, const int *c); int *r, int *c);
...@@ -93,7 +93,7 @@ public: ...@@ -93,7 +93,7 @@ public:
*/ */
template <class T> template <class T>
Sparse_Matrix_CompRow<T>::Sparse_Matrix_CompRow(int M, int N, int nz, Sparse_Matrix_CompRow<T>::Sparse_Matrix_CompRow(int M, int N, int nz,
const T *val, const int *r, const int *c) : val_(nz,val), const T *val, int *r, int *c) : val_(nz,val),
rowptr_(M, r), colind_(nz, c), dim1_(M), dim2_(N) {} rowptr_(M, r), colind_(nz, c), dim1_(M), dim2_(N) {}
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
#include "CudaForceInfo.h" #include "CudaForceInfo.h"
#include "CudaKernelSources.h" #include "CudaKernelSources.h"
#include "CudaNonbondedUtilities.h" #include "CudaNonbondedUtilities.h"
#include "jama_svd.h" #include "jama_lu.h"
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
...@@ -1837,7 +1837,8 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) { ...@@ -1837,7 +1837,8 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
int numPrev = (iteration+1 < MaxPrevDIISDipoles ? iteration+1 : MaxPrevDIISDipoles); int numPrev = (iteration+1 < MaxPrevDIISDipoles ? iteration+1 : MaxPrevDIISDipoles);
void* buildMatrixArgs[] = {&prevErrors->getDevicePointer(), &iteration, &diisMatrix->getDevicePointer()}; void* buildMatrixArgs[] = {&prevErrors->getDevicePointer(), &iteration, &diisMatrix->getDevicePointer()};
int threadBlocks = min(numPrev, cu.getNumThreadBlocks()); int threadBlocks = min(numPrev, cu.getNumThreadBlocks());
cu.executeKernel(buildMatrixKernel, buildMatrixArgs, threadBlocks*128, 128, 128*elementSize); int blockSize = 512;
cu.executeKernel(buildMatrixKernel, buildMatrixArgs, threadBlocks*blockSize, blockSize, blockSize*elementSize);
vector<float> matrixf; vector<float> matrixf;
vector<double> matrix; vector<double> matrix;
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision())
...@@ -1877,20 +1878,16 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) { ...@@ -1877,20 +1878,16 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
b[i+1][j+1] = matrixf[i*MaxPrevDIISDipoles+j]; b[i+1][j+1] = matrixf[i*MaxPrevDIISDipoles+j];
} }
// Solve using SVD. Since the right hand side is (-1, 0, 0, 0, ...), this is simpler than the general case. // Solve using LU.
JAMA::SVD<double> svd(b); JAMA::LU<double> lu(b);
Array2D<double> u, v; Array1D<double> x(rank, 0.0);
svd.getU(u); x[0] = -1.0;
svd.getV(v); Array1D<double> coeff = lu.solve(x);
Array1D<double> s; coefficients[rank-1] = 1.0;
svd.getSingularValues(s); for (int i = 0; i < rank-1; i++) {
int effectiveRank = svd.rank(); coefficients[i] = coeff[i+1];
for (int i = 1; i < rank; i++) { coefficients[rank-1] -= coefficients[i];
double d = 0;
for (int j = 0; j < effectiveRank; j++)
d -= u[0][j]*v[i][j]/s[j];
coefficients[i-1] = d;
} }
} }
diisCoefficients->upload(coefficients, false); diisCoefficients->upload(coefficients, false);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment