Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bbbc3160
"wrappers/python/src/vscode:/vscode.git/clone" did not exist on "743c4c876dd63727e23d9758a16712ea22f588d2"
Commit
bbbc3160
authored
Jul 21, 2014
by
peastman
Browse files
Optimization to building DIIS matrix
parent
a346a6db
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
37 additions
and
38 deletions
+37
-38
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+13
-9
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
...moeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+24
-29
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
bbbc3160
...
...
@@ -978,7 +978,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
prevDipoles
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipoles"
);
prevDipolesPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesPolar"
);
prevErrors
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevErrors"
);
diisMatrix
=
new
CudaArray
(
cu
,
(
MaxPrevDIISDipoles
+
1
)
*
(
MaxPrevDIISDipoles
+
1
)
,
elementSize
,
"diisMatrix"
);
diisMatrix
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"diisMatrix"
);
diisCoefficients
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
+
1
,
sizeof
(
float
),
"diisMatrix"
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
...
...
@@ -1625,33 +1625,37 @@ double CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
void
*
recordDIISDipolesGkArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
gkKernel
->
getField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedFieldPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
(),
&
prevDipolesGk
->
getDevicePointer
(),
&
prevDipolesGkPolar
->
getDevicePointer
(),
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
falseValue
};
&
prevDipolesGkPolar
->
getDevicePointer
(),
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
falseValue
,
&
diisMatrix
->
getDevicePointer
()
};
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesGkArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
}
void
*
recordDIISDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
npt
,
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
(),
&
prevDipoles
->
getDevicePointer
(),
&
prevDipolesPolar
->
getDevicePointer
(),
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
trueValue
};
&
prevDipolesPolar
->
getDevicePointer
(),
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
trueValue
,
&
diisMatrix
->
getDevicePointer
()
};
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
float2
*
errors
=
(
float2
*
)
cu
.
getPinnedBuffer
();
inducedDipoleErrors
->
download
(
errors
,
false
);
// Determine the coefficients for selecting the new dipoles.
vector
<
float
>
coefficients
(
MaxPrevDIISDipoles
);
int
numPrev
=
(
iteration
+
1
<
MaxPrevDIISDipoles
?
iteration
+
1
:
MaxPrevDIISDipoles
);
void
*
buildMatrixArgs
[]
=
{
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
diisMatrix
->
getDevicePointer
()};
int
threadBlocks
=
min
(
numPrev
,
cu
.
getNumThreadBlocks
());
cu
.
executeKernel
(
buildMatrixKernel
,
buildMatrixArgs
,
threadBlocks
*
128
,
128
,
128
*
elementSize
);
vector
<
float
>
coefficients
(
MaxPrevDIISDipoles
);
if
(
iteration
==
0
)
coefficients
[
0
]
=
1
;
else
{
void
*
buildMatrixArgs
[]
=
{
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
diisMatrix
->
getDevicePointer
()};
cu
.
executeKernel
(
buildMatrixKernel
,
buildMatrixArgs
,
cu
.
getNumThreadBlocks
()
*
128
,
128
,
128
*
elementSize
);
vector
<
float
>
matrix
;
diisMatrix
->
download
(
matrix
);
int
rank
=
numPrev
+
1
;
Array2D
<
double
>
b
(
rank
,
rank
);
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
for
(
int
j
=
0
;
j
<
rank
;
j
++
)
b
[
i
][
j
]
=
matrix
[
i
*
rank
+
j
];
b
[
0
][
0
]
=
0
;
for
(
int
i
=
1
;
i
<
rank
;
i
++
)
b
[
i
][
0
]
=
b
[
0
][
i
]
=
-
1
;
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
for
(
int
j
=
0
;
j
<
numPrev
;
j
++
)
b
[
i
+
1
][
j
+
1
]
=
matrix
[
i
*
MaxPrevDIISDipoles
+
j
];
// Solve using SVD. Since the right hand side is (-1, 0, 0, 0, ...), this is simpler than the general case.
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
View file @
bbbc3160
...
...
@@ -499,7 +499,7 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
extern
"C"
__global__
void
recordInducedDipolesForDIIS
(
const
long
long
*
__restrict__
fixedField
,
const
long
long
*
__restrict__
fixedFieldPolar
,
const
long
long
*
__restrict__
fixedFieldS
,
const
long
long
*
__restrict__
inducedField
,
const
long
long
*
__restrict__
inducedFieldPolar
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float
*
__restrict__
polarizability
,
float2
*
__restrict__
errors
,
real
*
__restrict__
prevDipoles
,
real
*
__restrict__
prevDipolesPolar
,
real
*
__restrict__
prevErrors
,
int
iteration
,
bool
recordPrevErrors
)
{
real
*
__restrict__
prevDipoles
,
real
*
__restrict__
prevDipolesPolar
,
real
*
__restrict__
prevErrors
,
int
iteration
,
bool
recordPrevErrors
,
real
*
__restrict__
matrix
)
{
extern
__shared__
real2
buffer
[];
#ifdef USE_EWALD
const
real
ewaldScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
...
...
@@ -557,29 +557,27 @@ extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restri
}
if
(
threadIdx
.
x
==
0
)
errors
[
blockIdx
.
x
]
=
make_float2
((
float
)
buffer
[
0
].
x
,
(
float
)
buffer
[
0
].
y
);
if
(
iteration
>=
MAX_PREV_DIIS_DIPOLES
&&
recordPrevErrors
&&
blockIdx
.
x
==
0
)
{
// Shift over the existing matrix elements.
for
(
int
i
=
0
;
i
<
MAX_PREV_DIIS_DIPOLES
-
1
;
i
++
)
{
if
(
threadIdx
.
x
<
MAX_PREV_DIIS_DIPOLES
-
1
)
matrix
[
threadIdx
.
x
+
i
*
MAX_PREV_DIIS_DIPOLES
]
=
matrix
[(
threadIdx
.
x
+
1
)
+
(
i
+
1
)
*
MAX_PREV_DIIS_DIPOLES
];
__syncthreads
();
}
}
}
extern
"C"
__global__
void
computeDIISMatrix
(
real
*
__restrict__
prevErrors
,
int
iteration
,
real
*
__restrict__
matrix
)
{
extern
__shared__
real
sumBuffer
[];
int
rank
=
min
(
iteration
+
1
,
MAX_PREV_DIIS_DIPOLES
)
+
1
;
for
(
int
element
=
blockIdx
.
x
;
element
<
rank
*
rank
;
element
+=
gridDim
.
x
)
{
int
j
=
min
(
iteration
,
MAX_PREV_DIIS_DIPOLES
-
1
)
;
for
(
int
i
=
blockIdx
.
x
;
i
<=
j
;
i
+=
gridDim
.
x
)
{
// All the threads in this thread block work together to compute a single matrix element.
int
i
=
element
/
rank
;
int
j
=
element
-
i
*
rank
;
if
(
i
>
j
)
continue
;
real
value
;
if
(
i
==
0
&&
j
==
0
)
value
=
0
;
else
if
(
i
==
0
||
j
==
0
)
value
=
-
1
;
else
{
// Compute the inner product of the two error vectors.
real
sum
=
0
;
for
(
int
index
=
threadIdx
.
x
;
index
<
NUM_ATOMS
*
3
;
index
+=
blockDim
.
x
)
sum
+=
prevErrors
[
index
+
(
i
-
1
)
*
NUM_ATOMS
*
3
]
*
prevErrors
[
index
+
(
j
-
1
)
*
NUM_ATOMS
*
3
];
sum
+=
prevErrors
[
index
+
i
*
NUM_ATOMS
*
3
]
*
prevErrors
[
index
+
j
*
NUM_ATOMS
*
3
];
sumBuffer
[
threadIdx
.
x
]
=
sum
;
__syncthreads
();
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
...
...
@@ -587,13 +585,10 @@ extern "C" __global__ void computeDIISMatrix(real* __restrict__ prevErrors, int
sumBuffer
[
threadIdx
.
x
]
+=
sumBuffer
[
threadIdx
.
x
+
offset
];
__syncthreads
();
}
value
=
sumBuffer
[
0
];
}
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
{
matrix
[
element
]
=
value
;
matrix
[
i
+
MAX_PREV_DIIS_DIPOLES
*
j
]
=
sumBuffer
[
0
]
;
if
(
i
!=
j
)
matrix
[
j
*
rank
+
i
]
=
value
;
matrix
[
j
+
MAX_PREV_DIIS_DIPOLES
*
i
]
=
sumBuffer
[
0
]
;
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment