Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
699fce6e
Commit
699fce6e
authored
Aug 06, 2012
by
Peter Eastman
Browse files
Continuing to convert AmoebaMultipoleForce: mutual induced dipoles now work
parent
29654b80
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
338 additions
and
11 deletions
+338
-11
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
+37
-7
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
+4
-2
plugins/amoeba/platforms/cuda2/src/kernels/multipoleInducedField.cu
...oeba/platforms/cuda2/src/kernels/multipoleInducedField.cu
+295
-0
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
...ba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
+2
-2
No files found.
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
View file @
699fce6e
...
@@ -865,9 +865,9 @@ private:
...
@@ -865,9 +865,9 @@ private:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
f
ield
(
NULL
),
f
ieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedF
ield
(
NULL
),
inducedF
ieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
inducedDipolePolar
(
NULL
),
currentEpsilon
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
inducedDipole
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
)
{
pmeGrid
(
NULL
)
{
}
}
...
@@ -887,6 +887,10 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -887,6 +887,10 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
field
;
delete
field
;
if
(
fieldPolar
!=
NULL
)
if
(
fieldPolar
!=
NULL
)
delete
fieldPolar
;
delete
fieldPolar
;
if
(
inducedField
!=
NULL
)
delete
inducedField
;
if
(
inducedFieldPolar
!=
NULL
)
delete
inducedFieldPolar
;
if
(
torque
!=
NULL
)
if
(
torque
!=
NULL
)
delete
torque
;
delete
torque
;
if
(
dampingAndThole
!=
NULL
)
if
(
dampingAndThole
!=
NULL
)
...
@@ -895,8 +899,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -895,8 +899,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
inducedDipole
;
delete
inducedDipole
;
if
(
inducedDipolePolar
!=
NULL
)
if
(
inducedDipolePolar
!=
NULL
)
delete
inducedDipolePolar
;
delete
inducedDipolePolar
;
if
(
currentEpsilon
!=
NULL
)
if
(
inducedDipoleErrors
!=
NULL
)
delete
currentEpsilon
;
delete
inducedDipoleErrors
;
if
(
polarizability
!=
NULL
)
if
(
polarizability
!=
NULL
)
delete
polarizability
;
delete
polarizability
;
if
(
covalentFlags
!=
NULL
)
if
(
covalentFlags
!=
NULL
)
...
@@ -971,6 +975,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -971,6 +975,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
torque
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"torque"
);
torque
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"torque"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
inducedDipoleErrors
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
(),
sizeof
(
float2
),
"inducedDipoleErrors"
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
torque
);
cu
.
addAutoclearBuffer
(
*
torque
);
...
@@ -1009,9 +1014,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1009,9 +1014,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
if
(
force
.
getPolarizationType
()
==
AmoebaMultipoleForce
::
Mutual
)
{
if
(
force
.
getPolarizationType
()
==
AmoebaMultipoleForce
::
Mutual
)
{
maxInducedIterations
=
force
.
getMutualInducedMaxIterations
();
maxInducedIterations
=
force
.
getMutualInducedMaxIterations
();
inducedEpsilon
=
force
.
getMutualInducedTargetEpsilon
();
inducedEpsilon
=
force
.
getMutualInducedTargetEpsilon
();
inducedField
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"inducedField"
);
inducedFieldPolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"inducedFieldPolar"
);
}
}
else
else
maxInducedIterations
=
=
0
;
maxInducedIterations
=
0
;
// Create the kernels.
// Create the kernels.
...
@@ -1032,6 +1039,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1032,6 +1039,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
mapTorqueKernel
=
cu
.
getKernel
(
module
,
"mapTorqueToForce"
);
mapTorqueKernel
=
cu
.
getKernel
(
module
,
"mapTorqueToForce"
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleFixedField
,
defines
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleFixedField
,
defines
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
if
(
maxInducedIterations
>
0
)
{
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleInducedField
,
defines
);
computeInducedFieldKernel
=
cu
.
getKernel
(
module
,
"computeInducedField"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldBySOR"
);
}
stringstream
electrostaticsSource
;
stringstream
electrostaticsSource
;
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
multipoleElectrostatics
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
multipoleElectrostatics
;
...
@@ -1436,8 +1448,26 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1436,8 +1448,26 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
recordInducedDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
void
*
recordInducedDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
()};
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
()};
cu
.
executeKernel
(
recordInducedDipolesKernel
,
recordInducedDipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
recordInducedDipolesKernel
,
recordInducedDipolesArgs
,
cu
.
getNumAtoms
());
vector
<
float2
>
errors
;
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
void
*
updateInducedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
()};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
);
inducedDipoleErrors
->
download
(
errors
);
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
(
int
)
errors
.
size
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total2
+=
errors
[
j
].
y
;
}
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
break
;
}
}
// Compute electrostatic force.
// Compute electrostatic force.
...
...
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
View file @
699fce6e
...
@@ -388,11 +388,13 @@ private:
...
@@ -388,11 +388,13 @@ private:
CudaArray
*
labFrameQuadrupoles
;
CudaArray
*
labFrameQuadrupoles
;
CudaArray
*
field
;
CudaArray
*
field
;
CudaArray
*
fieldPolar
;
CudaArray
*
fieldPolar
;
CudaArray
*
inducedField
;
CudaArray
*
inducedFieldPolar
;
CudaArray
*
torque
;
CudaArray
*
torque
;
CudaArray
*
dampingAndThole
;
CudaArray
*
dampingAndThole
;
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipolePolar
;
CudaArray
*
inducedDipolePolar
;
CudaArray
*
currentEpsilon
;
CudaArray
*
inducedDipoleErrors
;
CudaArray
*
polarizability
;
CudaArray
*
polarizability
;
CudaArray
*
covalentFlags
;
CudaArray
*
covalentFlags
;
CudaArray
*
polarizationGroupFlags
;
CudaArray
*
polarizationGroupFlags
;
...
@@ -413,7 +415,7 @@ private:
...
@@ -413,7 +415,7 @@ private:
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomGridIndex
;
CudaSort
*
sort
;
CudaSort
*
sort
;
cufftHandle
fft
;
cufftHandle
fft
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
};
};
/**
/**
...
...
plugins/amoeba/platforms/cuda2/src/kernels/multipoleInducedField.cu
0 → 100644
View file @
699fce6e
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef
struct
{
real3
pos
;
real3
field
,
fieldPolar
,
inducedDipole
,
inducedDipolePolar
;
float
thole
,
damp
;
}
AtomData
;
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
real4
atomPosq
=
posq
[
atom
];
data
.
pos
=
make_real3
(
atomPosq
.
x
,
atomPosq
.
y
,
atomPosq
.
z
);
data
.
inducedDipole
.
x
=
inducedDipole
[
atom
*
3
];
data
.
inducedDipole
.
y
=
inducedDipole
[
atom
*
3
+
1
];
data
.
inducedDipole
.
z
=
inducedDipole
[
atom
*
3
+
2
];
data
.
inducedDipolePolar
.
x
=
inducedDipolePolar
[
atom
*
3
];
data
.
inducedDipolePolar
.
y
=
inducedDipolePolar
[
atom
*
3
+
1
];
data
.
inducedDipolePolar
.
z
=
inducedDipolePolar
[
atom
*
3
+
2
];
float2
temp
=
dampingAndThole
[
atom
];
data
.
damp
=
temp
.
x
;
data
.
thole
=
temp
.
y
;
}
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
real3
deltaR
,
real3
*
fields
)
{
real
rI
=
RSQRT
(
dot
(
deltaR
,
deltaR
));
real
r
=
RECIP
(
rI
);
real
r2I
=
rI
*
rI
;
real
rr3
=
-
rI
*
r2I
;
real
rr5
=
-
3
*
rr3
*
r2I
;
real
dampProd
=
atom1
.
damp
*
atom2
.
damp
;
real
ratio
=
(
dampProd
!=
0
?
r
/
dampProd
:
1
);
float
pGamma
=
(
atom2
.
thole
>
atom1
.
thole
?
atom1
.
thole
:
atom2
.
thole
);
real
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
real
dampExp
=
(
dampProd
!=
0
?
EXP
(
-
damp
)
:
0
);
rr3
*=
1
-
dampExp
;
rr5
*=
1
-
(
1
+
damp
)
*
dampExp
;
real
dDotDelta
=
rr5
*
dot
(
deltaR
,
atom2
.
inducedDipole
);
fields
[
0
]
=
rr3
*
atom2
.
inducedDipole
+
dDotDelta
*
deltaR
;
dDotDelta
=
rr5
*
dot
(
deltaR
,
atom2
.
inducedDipolePolar
);
fields
[
1
]
=
rr3
*
atom2
.
inducedDipolePolar
+
dDotDelta
*
deltaR
;
dDotDelta
=
rr5
*
dot
(
deltaR
,
atom1
.
inducedDipole
);
fields
[
2
]
=
rr3
*
atom1
.
inducedDipole
+
dDotDelta
*
deltaR
;
dDotDelta
=
rr5
*
dot
(
deltaR
,
atom1
.
inducedDipolePolar
);
fields
[
3
]
=
rr3
*
atom1
.
inducedDipolePolar
+
dDotDelta
*
deltaR
;
}
/**
* Compute the mutual induced field.
*/
extern
"C"
__global__
void
computeInducedField
(
unsigned
long
long
*
__restrict__
fieldBuffers
,
unsigned
long
long
*
__restrict__
fieldPolarBuffers
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
const
ushort2
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
unsigned
int
*
__restrict__
interactionFlags
,
#endif
const
float2
*
__restrict__
dampingAndThole
)
{
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
numTileIndices
/
totalWarps
:
warp
*
numTiles
/
totalWarps
);
unsigned
int
end
=
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
numTiles
/
totalWarps
);
#else
const
unsigned
int
numTiles
=
numTileIndices
;
unsigned
int
pos
=
startTileIndex
+
warp
*
numTiles
/
totalWarps
;
unsigned
int
end
=
startTileIndex
+
(
warp
+
1
)
*
numTiles
/
totalWarps
;
#endif
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
#ifndef ENABLE_SHUFFLE
__shared__
real
tempBuffer
[
3
*
THREAD_BLOCK_SIZE
];
#endif
do
{
// Extract the coordinates of this tile
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
x
,
y
;
AtomData
data
;
data
.
field
=
make_real3
(
0
);
data
.
fieldPolar
=
make_real3
(
0
);
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles
[
pos
];
x
=
tileIndices
.
x
;
y
=
tileIndices
.
y
;
}
else
#endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
}
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
loadAtomData
(
data
,
atom1
,
posq
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
if
(
pos
>=
end
)
;
// This warp is done.
else
if
(
x
==
y
)
{
// This tile is on the diagonal.
localData
[
threadIdx
.
x
].
pos
=
data
.
pos
;
localData
[
threadIdx
.
x
].
inducedDipole
=
data
.
inducedDipole
;
localData
[
threadIdx
.
x
].
inducedDipolePolar
=
data
.
inducedDipolePolar
;
localData
[
threadIdx
.
x
].
thole
=
data
.
thole
;
localData
[
threadIdx
.
x
].
damp
=
data
.
damp
;
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
j
;
real3
delta
=
localData
[
atom2
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real3
fields
[
4
];
computeOneInteraction
(
data
,
localData
[
atom2
],
delta
,
fields
);
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
data
.
field
+=
fields
[
0
];
data
.
fieldPolar
+=
fields
[
1
];
}
}
}
else
{
// This is an off-diagonal tile.
loadAtomData
(
localData
[
threadIdx
.
x
],
y
*
TILE_SIZE
+
tgx
,
posq
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
localData
[
threadIdx
.
x
].
field
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
fieldPolar
=
make_real3
(
0
);
#ifdef USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags
[
pos
]
:
0xFFFFFFFF
);
if
(
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
// No interactions in this tile.
}
else
{
// Compute only a subset of the interactions in this tile.
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
int
atom2
=
tbx
+
j
;
int
bufferIndex
=
3
*
threadIdx
.
x
;
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
real3
delta
=
localData
[
atom2
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
);
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
j
;
COMPUTE_INTERACTION
#ifdef USE_CUTOFF
}
#endif
#ifdef ENABLE_SHUFFLE
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
for
(
int
i
=
16
;
i
>=
1
;
i
/=
2
)
{
dEdR2
.
x
+=
__shfl_xor
(
dEdR2
.
x
,
i
,
32
);
dEdR2
.
y
+=
__shfl_xor
(
dEdR2
.
y
,
i
,
32
);
dEdR2
.
z
+=
__shfl_xor
(
dEdR2
.
z
,
i
,
32
);
}
if
(
tgx
==
0
)
{
localData
[
tbx
+
j
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
j
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
j
].
fz
+=
dEdR2
.
z
;
}
#else
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
tempBuffer
[
bufferIndex
]
=
dEdR2
.
x
;
tempBuffer
[
bufferIndex
+
1
]
=
dEdR2
.
y
;
tempBuffer
[
bufferIndex
+
2
]
=
dEdR2
.
z
;
// Sum the forces on atom2.
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
bufferIndex
]
+=
tempBuffer
[
bufferIndex
+
3
]
+
tempBuffer
[
bufferIndex
+
6
]
+
tempBuffer
[
bufferIndex
+
9
];
tempBuffer
[
bufferIndex
+
1
]
+=
tempBuffer
[
bufferIndex
+
4
]
+
tempBuffer
[
bufferIndex
+
7
]
+
tempBuffer
[
bufferIndex
+
10
];
tempBuffer
[
bufferIndex
+
2
]
+=
tempBuffer
[
bufferIndex
+
5
]
+
tempBuffer
[
bufferIndex
+
8
]
+
tempBuffer
[
bufferIndex
+
11
];
}
if
(
tgx
==
0
)
{
localData
[
tbx
+
j
].
fx
+=
tempBuffer
[
bufferIndex
]
+
tempBuffer
[
bufferIndex
+
12
]
+
tempBuffer
[
bufferIndex
+
24
]
+
tempBuffer
[
bufferIndex
+
36
]
+
tempBuffer
[
bufferIndex
+
48
]
+
tempBuffer
[
bufferIndex
+
60
]
+
tempBuffer
[
bufferIndex
+
72
]
+
tempBuffer
[
bufferIndex
+
84
];
localData
[
tbx
+
j
].
fy
+=
tempBuffer
[
bufferIndex
+
1
]
+
tempBuffer
[
bufferIndex
+
13
]
+
tempBuffer
[
bufferIndex
+
25
]
+
tempBuffer
[
bufferIndex
+
37
]
+
tempBuffer
[
bufferIndex
+
49
]
+
tempBuffer
[
bufferIndex
+
61
]
+
tempBuffer
[
bufferIndex
+
73
]
+
tempBuffer
[
bufferIndex
+
85
];
localData
[
tbx
+
j
].
fz
+=
tempBuffer
[
bufferIndex
+
2
]
+
tempBuffer
[
bufferIndex
+
14
]
+
tempBuffer
[
bufferIndex
+
26
]
+
tempBuffer
[
bufferIndex
+
38
]
+
tempBuffer
[
bufferIndex
+
50
]
+
tempBuffer
[
bufferIndex
+
62
]
+
tempBuffer
[
bufferIndex
+
74
]
+
tempBuffer
[
bufferIndex
+
86
];
}
#endif
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
real3
delta
=
localData
[
atom2
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real3
fields
[
4
];
computeOneInteraction
(
data
,
localData
[
atom2
],
delta
,
fields
);
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
data
.
field
+=
fields
[
0
];
data
.
fieldPolar
+=
fields
[
1
];
localData
[
atom2
].
field
+=
fields
[
2
];
localData
[
atom2
].
fieldPolar
+=
fields
[
3
];
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
}
}
}
// Write results.
if
(
pos
<
end
)
{
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
fieldBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
field
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
field
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
field
.
z
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
fieldPolar
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
fieldPolar
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
fieldPolar
.
z
*
0xFFFFFFFF
)));
}
if
(
pos
<
end
&&
x
!=
y
)
{
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
fieldBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
field
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
field
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
field
.
z
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fieldPolar
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fieldPolar
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fieldPolar
.
z
*
0xFFFFFFFF
)));
}
pos
++
;
}
while
(
pos
<
end
);
}
extern
"C"
__global__
void
updateInducedFieldBySOR
(
const
long
long
*
__restrict__
fixedField
,
const
long
long
*
__restrict__
fixedFieldPolar
,
const
long
long
*
__restrict__
inducedField
,
const
long
long
*
__restrict__
inducedFieldPolar
,
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
const
float
*
__restrict__
polarizability
,
float2
*
__restrict__
errors
)
{
extern
__shared__
real2
buffer
[];
float
polarSOR
=
0.55
f
;
real
sumErrors
=
0
;
real
sumPolarErrors
=
0
;
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
scale
=
polarizability
[
atom
]
/
(
real
)
0xFFFFFFFF
;
for
(
int
component
=
0
;
component
<
3
;
component
++
)
{
int
dipoleIndex
=
3
*
atom
+
component
;
int
fieldIndex
=
atom
+
component
*
PADDED_NUM_ATOMS
;
real
previousDipole
=
inducedDipole
[
dipoleIndex
];
real
previousDipolePolar
=
inducedDipolePolar
[
dipoleIndex
];
real
newDipole
=
scale
*
(
fixedField
[
fieldIndex
]
+
inducedField
[
fieldIndex
]);
real
newDipolePolar
=
scale
*
(
fixedFieldPolar
[
fieldIndex
]
+
inducedFieldPolar
[
fieldIndex
]);
newDipole
=
previousDipole
+
polarSOR
*
(
newDipole
-
previousDipole
);
newDipolePolar
=
previousDipolePolar
+
polarSOR
*
(
newDipolePolar
-
previousDipolePolar
);
inducedDipole
[
dipoleIndex
]
=
newDipole
;
inducedDipolePolar
[
dipoleIndex
]
=
newDipolePolar
;
sumErrors
+=
(
newDipole
-
previousDipole
)
*
(
newDipole
-
previousDipole
);
sumPolarErrors
+=
(
newDipolePolar
-
previousDipolePolar
)
*
(
newDipolePolar
-
previousDipolePolar
);
}
}
// Sum the errors over threads and store the total for this block.
buffer
[
threadIdx
.
x
]
=
make_real2
(
sumErrors
,
sumPolarErrors
);
__syncthreads
();
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
{
buffer
[
threadIdx
.
x
].
x
+=
buffer
[
threadIdx
.
x
+
offset
].
x
;
buffer
[
threadIdx
.
x
].
y
+=
buffer
[
threadIdx
.
x
+
offset
].
y
;
}
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
errors
[
blockIdx
.
x
]
=
make_float2
((
float
)
buffer
[
0
].
x
,
(
float
)
buffer
[
0
].
y
);
}
\ No newline at end of file
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
View file @
699fce6e
...
@@ -1384,8 +1384,8 @@ int main( int numberOfArguments, char* argv[] ) {
...
@@ -1384,8 +1384,8 @@ int main( int numberOfArguments, char* argv[] ) {
// test mutual polarization, no cutoff
// test mutual polarization, no cutoff
//
testMultipoleAmmoniaMutualPolarization( log );
testMultipoleAmmoniaMutualPolarization
(
log
);
//
// // test multipole direct & mutual polarization using PME
// // test multipole direct & mutual polarization using PME
//
//
// testMultipoleWaterPMEDirectPolarization( log );
// testMultipoleWaterPMEDirectPolarization( log );
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment