Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
699fce6e
"platforms/vscode:/vscode.git/clone" did not exist on "6c6bc628bf95bb9ec90344400a671c55a4a6d5b1"
Commit
699fce6e
authored
Aug 06, 2012
by
Peter Eastman
Browse files
Continuing to convert AmoebaMultipoleForce: mutual induced dipoles now work
parent
29654b80
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
338 additions
and
11 deletions
+338
-11
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
+37
-7
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
+4
-2
plugins/amoeba/platforms/cuda2/src/kernels/multipoleInducedField.cu
...oeba/platforms/cuda2/src/kernels/multipoleInducedField.cu
+295
-0
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
...ba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
+2
-2
No files found.
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
View file @
699fce6e
...
@@ -865,9 +865,9 @@ private:
...
@@ -865,9 +865,9 @@ private:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
f
ield
(
NULL
),
f
ieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedF
ield
(
NULL
),
inducedF
ieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
inducedDipolePolar
(
NULL
),
currentEpsilon
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
inducedDipole
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
)
{
pmeGrid
(
NULL
)
{
}
}
...
@@ -887,6 +887,10 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -887,6 +887,10 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
field
;
delete
field
;
if
(
fieldPolar
!=
NULL
)
if
(
fieldPolar
!=
NULL
)
delete
fieldPolar
;
delete
fieldPolar
;
if
(
inducedField
!=
NULL
)
delete
inducedField
;
if
(
inducedFieldPolar
!=
NULL
)
delete
inducedFieldPolar
;
if
(
torque
!=
NULL
)
if
(
torque
!=
NULL
)
delete
torque
;
delete
torque
;
if
(
dampingAndThole
!=
NULL
)
if
(
dampingAndThole
!=
NULL
)
...
@@ -895,8 +899,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -895,8 +899,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
inducedDipole
;
delete
inducedDipole
;
if
(
inducedDipolePolar
!=
NULL
)
if
(
inducedDipolePolar
!=
NULL
)
delete
inducedDipolePolar
;
delete
inducedDipolePolar
;
if
(
currentEpsilon
!=
NULL
)
if
(
inducedDipoleErrors
!=
NULL
)
delete
currentEpsilon
;
delete
inducedDipoleErrors
;
if
(
polarizability
!=
NULL
)
if
(
polarizability
!=
NULL
)
delete
polarizability
;
delete
polarizability
;
if
(
covalentFlags
!=
NULL
)
if
(
covalentFlags
!=
NULL
)
...
@@ -971,6 +975,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -971,6 +975,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
torque
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"torque"
);
torque
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"torque"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
inducedDipoleErrors
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
(),
sizeof
(
float2
),
"inducedDipoleErrors"
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
torque
);
cu
.
addAutoclearBuffer
(
*
torque
);
...
@@ -1009,9 +1014,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1009,9 +1014,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
if
(
force
.
getPolarizationType
()
==
AmoebaMultipoleForce
::
Mutual
)
{
if
(
force
.
getPolarizationType
()
==
AmoebaMultipoleForce
::
Mutual
)
{
maxInducedIterations
=
force
.
getMutualInducedMaxIterations
();
maxInducedIterations
=
force
.
getMutualInducedMaxIterations
();
inducedEpsilon
=
force
.
getMutualInducedTargetEpsilon
();
inducedEpsilon
=
force
.
getMutualInducedTargetEpsilon
();
inducedField
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"inducedField"
);
inducedFieldPolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"inducedFieldPolar"
);
}
}
else
else
maxInducedIterations
=
=
0
;
maxInducedIterations
=
0
;
// Create the kernels.
// Create the kernels.
...
@@ -1032,6 +1039,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1032,6 +1039,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
mapTorqueKernel
=
cu
.
getKernel
(
module
,
"mapTorqueToForce"
);
mapTorqueKernel
=
cu
.
getKernel
(
module
,
"mapTorqueToForce"
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleFixedField
,
defines
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleFixedField
,
defines
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
if
(
maxInducedIterations
>
0
)
{
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleInducedField
,
defines
);
computeInducedFieldKernel
=
cu
.
getKernel
(
module
,
"computeInducedField"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldBySOR"
);
}
stringstream
electrostaticsSource
;
stringstream
electrostaticsSource
;
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
multipoleElectrostatics
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
multipoleElectrostatics
;
...
@@ -1436,8 +1448,26 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1436,8 +1448,26 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
recordInducedDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
void
*
recordInducedDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
()};
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
()};
cu
.
executeKernel
(
recordInducedDipolesKernel
,
recordInducedDipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
recordInducedDipolesKernel
,
recordInducedDipolesArgs
,
cu
.
getNumAtoms
());
vector
<
float2
>
errors
;
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
void
*
updateInducedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
()};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
);
inducedDipoleErrors
->
download
(
errors
);
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
(
int
)
errors
.
size
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total2
+=
errors
[
j
].
y
;
}
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
break
;
}
}
// Compute electrostatic force.
// Compute electrostatic force.
...
...
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
View file @
699fce6e
...
@@ -388,11 +388,13 @@ private:
...
@@ -388,11 +388,13 @@ private:
CudaArray
*
labFrameQuadrupoles
;
CudaArray
*
labFrameQuadrupoles
;
CudaArray
*
field
;
CudaArray
*
field
;
CudaArray
*
fieldPolar
;
CudaArray
*
fieldPolar
;
CudaArray
*
inducedField
;
CudaArray
*
inducedFieldPolar
;
CudaArray
*
torque
;
CudaArray
*
torque
;
CudaArray
*
dampingAndThole
;
CudaArray
*
dampingAndThole
;
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipolePolar
;
CudaArray
*
inducedDipolePolar
;
CudaArray
*
currentEpsilon
;
CudaArray
*
inducedDipoleErrors
;
CudaArray
*
polarizability
;
CudaArray
*
polarizability
;
CudaArray
*
covalentFlags
;
CudaArray
*
covalentFlags
;
CudaArray
*
polarizationGroupFlags
;
CudaArray
*
polarizationGroupFlags
;
...
@@ -413,7 +415,7 @@ private:
...
@@ -413,7 +415,7 @@ private:
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomGridIndex
;
CudaSort
*
sort
;
CudaSort
*
sort
;
cufftHandle
fft
;
cufftHandle
fft
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
};
};
/**
/**
...
...
plugins/amoeba/platforms/cuda2/src/kernels/multipoleInducedField.cu
0 → 100644
View file @
699fce6e
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef
struct
{
real3
pos
;
real3
field
,
fieldPolar
,
inducedDipole
,
inducedDipolePolar
;
float
thole
,
damp
;
}
AtomData
;
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
real4
atomPosq
=
posq
[
atom
];
data
.
pos
=
make_real3
(
atomPosq
.
x
,
atomPosq
.
y
,
atomPosq
.
z
);
data
.
inducedDipole
.
x
=
inducedDipole
[
atom
*
3
];
data
.
inducedDipole
.
y
=
inducedDipole
[
atom
*
3
+
1
];
data
.
inducedDipole
.
z
=
inducedDipole
[
atom
*
3
+
2
];
data
.
inducedDipolePolar
.
x
=
inducedDipolePolar
[
atom
*
3
];
data
.
inducedDipolePolar
.
y
=
inducedDipolePolar
[
atom
*
3
+
1
];
data
.
inducedDipolePolar
.
z
=
inducedDipolePolar
[
atom
*
3
+
2
];
float2
temp
=
dampingAndThole
[
atom
];
data
.
damp
=
temp
.
x
;
data
.
thole
=
temp
.
y
;
}
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
real3
deltaR
,
real3
*
fields
)
{
real
rI
=
RSQRT
(
dot
(
deltaR
,
deltaR
));
real
r
=
RECIP
(
rI
);
real
r2I
=
rI
*
rI
;
real
rr3
=
-
rI
*
r2I
;
real
rr5
=
-
3
*
rr3
*
r2I
;
real
dampProd
=
atom1
.
damp
*
atom2
.
damp
;
real
ratio
=
(
dampProd
!=
0
?
r
/
dampProd
:
1
);
float
pGamma
=
(
atom2
.
thole
>
atom1
.
thole
?
atom1
.
thole
:
atom2
.
thole
);
real
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
real
dampExp
=
(
dampProd
!=
0
?
EXP
(
-
damp
)
:
0
);
rr3
*=
1
-
dampExp
;
rr5
*=
1
-
(
1
+
damp
)
*
dampExp
;
real
dDotDelta
=
rr5
*
dot
(
deltaR
,
atom2
.
inducedDipole
);
fields
[
0
]
=
rr3
*
atom2
.
inducedDipole
+
dDotDelta
*
deltaR
;
dDotDelta
=
rr5
*
dot
(
deltaR
,
atom2
.
inducedDipolePolar
);
fields
[
1
]
=
rr3
*
atom2
.
inducedDipolePolar
+
dDotDelta
*
deltaR
;
dDotDelta
=
rr5
*
dot
(
deltaR
,
atom1
.
inducedDipole
);
fields
[
2
]
=
rr3
*
atom1
.
inducedDipole
+
dDotDelta
*
deltaR
;
dDotDelta
=
rr5
*
dot
(
deltaR
,
atom1
.
inducedDipolePolar
);
fields
[
3
]
=
rr3
*
atom1
.
inducedDipolePolar
+
dDotDelta
*
deltaR
;
}
/**
* Compute the mutual induced field.
*/
extern
"C"
__global__
void
computeInducedField
(
unsigned
long
long
*
__restrict__
fieldBuffers
,
unsigned
long
long
*
__restrict__
fieldPolarBuffers
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
const
ushort2
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
unsigned
int
*
__restrict__
interactionFlags
,
#endif
const
float2
*
__restrict__
dampingAndThole
)
{
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
numTileIndices
/
totalWarps
:
warp
*
numTiles
/
totalWarps
);
unsigned
int
end
=
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
numTiles
/
totalWarps
);
#else
const
unsigned
int
numTiles
=
numTileIndices
;
unsigned
int
pos
=
startTileIndex
+
warp
*
numTiles
/
totalWarps
;
unsigned
int
end
=
startTileIndex
+
(
warp
+
1
)
*
numTiles
/
totalWarps
;
#endif
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
#ifndef ENABLE_SHUFFLE
__shared__
real
tempBuffer
[
3
*
THREAD_BLOCK_SIZE
];
#endif
do
{
// Extract the coordinates of this tile
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
x
,
y
;
AtomData
data
;
data
.
field
=
make_real3
(
0
);
data
.
fieldPolar
=
make_real3
(
0
);
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles
[
pos
];
x
=
tileIndices
.
x
;
y
=
tileIndices
.
y
;
}
else
#endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
}
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
loadAtomData
(
data
,
atom1
,
posq
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
if
(
pos
>=
end
)
;
// This warp is done.
else
if
(
x
==
y
)
{
// This tile is on the diagonal.
localData
[
threadIdx
.
x
].
pos
=
data
.
pos
;
localData
[
threadIdx
.
x
].
inducedDipole
=
data
.
inducedDipole
;
localData
[
threadIdx
.
x
].
inducedDipolePolar
=
data
.
inducedDipolePolar
;
localData
[
threadIdx
.
x
].
thole
=
data
.
thole
;
localData
[
threadIdx
.
x
].
damp
=
data
.
damp
;
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
j
;
real3
delta
=
localData
[
atom2
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real3
fields
[
4
];
computeOneInteraction
(
data
,
localData
[
atom2
],
delta
,
fields
);
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
data
.
field
+=
fields
[
0
];
data
.
fieldPolar
+=
fields
[
1
];
}
}
}
else
{
// This is an off-diagonal tile.
loadAtomData
(
localData
[
threadIdx
.
x
],
y
*
TILE_SIZE
+
tgx
,
posq
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
localData
[
threadIdx
.
x
].
field
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
fieldPolar
=
make_real3
(
0
);
#ifdef USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags
[
pos
]
:
0xFFFFFFFF
);
if
(
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
// No interactions in this tile.
}
else
{
// Compute only a subset of the interactions in this tile.
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
int
atom2
=
tbx
+
j
;
int
bufferIndex
=
3
*
threadIdx
.
x
;
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
real3
delta
=
localData
[
atom2
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
);
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
j
;
COMPUTE_INTERACTION
#ifdef USE_CUTOFF
}
#endif
#ifdef ENABLE_SHUFFLE
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
for
(
int
i
=
16
;
i
>=
1
;
i
/=
2
)
{
dEdR2
.
x
+=
__shfl_xor
(
dEdR2
.
x
,
i
,
32
);
dEdR2
.
y
+=
__shfl_xor
(
dEdR2
.
y
,
i
,
32
);
dEdR2
.
z
+=
__shfl_xor
(
dEdR2
.
z
,
i
,
32
);
}
if
(
tgx
==
0
)
{
localData
[
tbx
+
j
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
j
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
j
].
fz
+=
dEdR2
.
z
;
}
#else
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
tempBuffer
[
bufferIndex
]
=
dEdR2
.
x
;
tempBuffer
[
bufferIndex
+
1
]
=
dEdR2
.
y
;
tempBuffer
[
bufferIndex
+
2
]
=
dEdR2
.
z
;
// Sum the forces on atom2.
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
bufferIndex
]
+=
tempBuffer
[
bufferIndex
+
3
]
+
tempBuffer
[
bufferIndex
+
6
]
+
tempBuffer
[
bufferIndex
+
9
];
tempBuffer
[
bufferIndex
+
1
]
+=
tempBuffer
[
bufferIndex
+
4
]
+
tempBuffer
[
bufferIndex
+
7
]
+
tempBuffer
[
bufferIndex
+
10
];
tempBuffer
[
bufferIndex
+
2
]
+=
tempBuffer
[
bufferIndex
+
5
]
+
tempBuffer
[
bufferIndex
+
8
]
+
tempBuffer
[
bufferIndex
+
11
];
}
if
(
tgx
==
0
)
{
localData
[
tbx
+
j
].
fx
+=
tempBuffer
[
bufferIndex
]
+
tempBuffer
[
bufferIndex
+
12
]
+
tempBuffer
[
bufferIndex
+
24
]
+
tempBuffer
[
bufferIndex
+
36
]
+
tempBuffer
[
bufferIndex
+
48
]
+
tempBuffer
[
bufferIndex
+
60
]
+
tempBuffer
[
bufferIndex
+
72
]
+
tempBuffer
[
bufferIndex
+
84
];
localData
[
tbx
+
j
].
fy
+=
tempBuffer
[
bufferIndex
+
1
]
+
tempBuffer
[
bufferIndex
+
13
]
+
tempBuffer
[
bufferIndex
+
25
]
+
tempBuffer
[
bufferIndex
+
37
]
+
tempBuffer
[
bufferIndex
+
49
]
+
tempBuffer
[
bufferIndex
+
61
]
+
tempBuffer
[
bufferIndex
+
73
]
+
tempBuffer
[
bufferIndex
+
85
];
localData
[
tbx
+
j
].
fz
+=
tempBuffer
[
bufferIndex
+
2
]
+
tempBuffer
[
bufferIndex
+
14
]
+
tempBuffer
[
bufferIndex
+
26
]
+
tempBuffer
[
bufferIndex
+
38
]
+
tempBuffer
[
bufferIndex
+
50
]
+
tempBuffer
[
bufferIndex
+
62
]
+
tempBuffer
[
bufferIndex
+
74
]
+
tempBuffer
[
bufferIndex
+
86
];
}
#endif
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
real3
delta
=
localData
[
atom2
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real3
fields
[
4
];
computeOneInteraction
(
data
,
localData
[
atom2
],
delta
,
fields
);
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
data
.
field
+=
fields
[
0
];
data
.
fieldPolar
+=
fields
[
1
];
localData
[
atom2
].
field
+=
fields
[
2
];
localData
[
atom2
].
fieldPolar
+=
fields
[
3
];
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
}
}
}
// Write results.
if
(
pos
<
end
)
{
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
fieldBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
field
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
field
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
field
.
z
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
fieldPolar
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
fieldPolar
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
fieldPolar
.
z
*
0xFFFFFFFF
)));
}
if
(
pos
<
end
&&
x
!=
y
)
{
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
fieldBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
field
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
field
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
field
.
z
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fieldPolar
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fieldPolar
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fieldPolar
.
z
*
0xFFFFFFFF
)));
}
pos
++
;
}
while
(
pos
<
end
);
}
extern
"C"
__global__
void
updateInducedFieldBySOR
(
const
long
long
*
__restrict__
fixedField
,
const
long
long
*
__restrict__
fixedFieldPolar
,
const
long
long
*
__restrict__
inducedField
,
const
long
long
*
__restrict__
inducedFieldPolar
,
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
const
float
*
__restrict__
polarizability
,
float2
*
__restrict__
errors
)
{
extern
__shared__
real2
buffer
[];
float
polarSOR
=
0.55
f
;
real
sumErrors
=
0
;
real
sumPolarErrors
=
0
;
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
scale
=
polarizability
[
atom
]
/
(
real
)
0xFFFFFFFF
;
for
(
int
component
=
0
;
component
<
3
;
component
++
)
{
int
dipoleIndex
=
3
*
atom
+
component
;
int
fieldIndex
=
atom
+
component
*
PADDED_NUM_ATOMS
;
real
previousDipole
=
inducedDipole
[
dipoleIndex
];
real
previousDipolePolar
=
inducedDipolePolar
[
dipoleIndex
];
real
newDipole
=
scale
*
(
fixedField
[
fieldIndex
]
+
inducedField
[
fieldIndex
]);
real
newDipolePolar
=
scale
*
(
fixedFieldPolar
[
fieldIndex
]
+
inducedFieldPolar
[
fieldIndex
]);
newDipole
=
previousDipole
+
polarSOR
*
(
newDipole
-
previousDipole
);
newDipolePolar
=
previousDipolePolar
+
polarSOR
*
(
newDipolePolar
-
previousDipolePolar
);
inducedDipole
[
dipoleIndex
]
=
newDipole
;
inducedDipolePolar
[
dipoleIndex
]
=
newDipolePolar
;
sumErrors
+=
(
newDipole
-
previousDipole
)
*
(
newDipole
-
previousDipole
);
sumPolarErrors
+=
(
newDipolePolar
-
previousDipolePolar
)
*
(
newDipolePolar
-
previousDipolePolar
);
}
}
// Sum the errors over threads and store the total for this block.
buffer
[
threadIdx
.
x
]
=
make_real2
(
sumErrors
,
sumPolarErrors
);
__syncthreads
();
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
{
buffer
[
threadIdx
.
x
].
x
+=
buffer
[
threadIdx
.
x
+
offset
].
x
;
buffer
[
threadIdx
.
x
].
y
+=
buffer
[
threadIdx
.
x
+
offset
].
y
;
}
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
errors
[
blockIdx
.
x
]
=
make_float2
((
float
)
buffer
[
0
].
x
,
(
float
)
buffer
[
0
].
y
);
}
\ No newline at end of file
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
View file @
699fce6e
...
@@ -1384,8 +1384,8 @@ int main( int numberOfArguments, char* argv[] ) {
...
@@ -1384,8 +1384,8 @@ int main( int numberOfArguments, char* argv[] ) {
// test mutual polarization, no cutoff
// test mutual polarization, no cutoff
//
testMultipoleAmmoniaMutualPolarization( log );
testMultipoleAmmoniaMutualPolarization
(
log
);
//
// // test multipole direct & mutual polarization using PME
// // test multipole direct & mutual polarization using PME
//
//
// testMultipoleWaterPMEDirectPolarization( log );
// testMultipoleWaterPMEDirectPolarization( log );
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment