Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c22f00cf
Commit
c22f00cf
authored
Sep 13, 2010
by
Mark Friedrichs
Browse files
AmoebaCudaPmeMutualInducedField
parent
3763b76b
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1254 additions
and
526 deletions
+1254
-526
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+1
-0
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+4
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+67
-138
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.h
...rc/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.h
+8
-48
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+29
-238
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
...cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
+7
-32
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
...a/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
+21
-70
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+745
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+371
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
...platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+1
-0
No files found.
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
View file @
c22f00cf
...
...
@@ -2763,6 +2763,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
SetCalculateAmoebaCudaVdw14_7Sim
(
amoebaGpu
);
SetCalculateAmoebaCudaWcaDispersionSim
(
amoebaGpu
);
SetCalculateAmoebaCudaMutualInducedFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpu
);
SetCalculateAmoebaElectrostaticSim
(
amoebaGpu
);
SetCalculateAmoebaRealSpaceEwaldSim
(
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
c22f00cf
...
...
@@ -88,6 +88,10 @@ extern void SetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext gpu);
extern
void
GetCalculateAmoebaCudaMutualInducedFieldSim
(
amoebaGpuContext
gpu
);
extern
void
cudaComputeAmoebaMutualInducedField
(
amoebaGpuContext
gpu
);
extern
void
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
gpu
);
extern
void
GetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
gpu
);
extern
void
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpuContext
gpu
);
// mutual induced and Gk
extern
void
SetCalculateAmoebaCudaMutualInducedAndGkFieldsSim
(
amoebaGpuContext
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
c22f00cf
...
...
@@ -36,14 +36,11 @@ void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGp
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
float4
atomCoordinatesI
,
float4
atomCoordinatesJ
,
float
dampingFactorI
,
float
dampingFactorJ
,
float
tholeI
,
float
tholeJ
,
float
*
inducedDipoleI
,
float
*
inducedDipolePolarI
,
float
*
inducedDipoleJ
,
float
*
inducedDipolePolarJ
,
float
*
inducedDipoleSI
,
float
*
inducedDipolePolarSI
,
float
*
inducedDipoleSJ
,
float
*
inducedDipolePolarSJ
,
float
scalingDistanceCutoff
,
#define GK
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#undef GK
__device__
void
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
fields
[
8
][
3
]
#ifdef AMOEBA_DEBUG
...
...
@@ -59,9 +56,9 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor
// get deltaR, and r between 2 atoms
deltaR
[
0
]
=
atom
CoordinatesJ
.
x
-
atomCoordinates
I
.
x
;
deltaR
[
1
]
=
atom
CoordinatesJ
.
y
-
atomCoordinates
I
.
y
;
deltaR
[
2
]
=
atom
CoordinatesJ
.
z
-
atomCoordinates
I
.
z
;
deltaR
[
0
]
=
atom
J
.
x
-
atom
I
.
x
;
deltaR
[
1
]
=
atom
J
.
y
-
atom
I
.
y
;
deltaR
[
2
]
=
atom
J
.
z
-
atom
I
.
z
;
float
r
=
sqrtf
(
deltaR
[
0
]
*
deltaR
[
0
]
+
deltaR
[
1
]
*
deltaR
[
1
]
+
deltaR
[
2
]
*
deltaR
[
2
]
);
float
rI
=
1.0
f
/
r
;
...
...
@@ -69,112 +66,59 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor
float
rr3
=
-
rI
*
r2I
;
float
rr5
=
-
3.0
f
*
rr3
*
r2I
;
float
dampProd
=
dampingFactorI
*
dampingFactorJ
;
float
dampProd
=
atomI
.
damp
*
atomJ
.
damp
;
float
ratio
=
(
dampProd
!=
0.0
f
)
?
(
r
/
dampProd
)
:
1.0
f
;
float
pGamma
=
thole
J
>
thole
I
?
tholeI
:
thole
J
;
float
pGamma
=
atomI
.
thole
>
atomJ
.
thole
?
atomJ
.
thole
:
atomI
.
thole
;
float
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
float
dampExp
=
(
(
dampProd
!=
0.0
f
)
&&
(
r
<
scalingDistanceCutoff
)
)
?
expf
(
-
damp
)
:
0.0
f
;
float
dampExp
=
(
(
dampProd
!=
0.0
f
)
&&
(
r
<
cAmoebaSim
.
scalingDistanceCutoff
)
)
?
expf
(
-
damp
)
:
0.0
f
;
rr3
*=
(
1.0
f
-
dampExp
);
rr5
*=
(
1.0
f
-
(
1.0
f
+
damp
)
*
dampExp
);
float
*
dipole
=
inducedDipoleJ
;
float
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
0
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
0
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
0
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarJ
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
1
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
1
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
1
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipoleI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
2
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
2
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
2
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
3
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
3
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
3
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipoleSJ
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
4
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
4
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
4
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarSJ
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
5
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
5
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
5
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipoleSI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
6
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
6
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
6
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarSI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
7
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
7
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
7
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
#ifdef AMOEBA_DEBUGX
//int n2 = numberOfAtoms*numberOfAtoms;
//int debugIndex = atomI*numberOfAtoms + atomJ;
if
(
atomI
==
0
){
int
debugIndex
=
atomJ
;
debugArray
[
debugIndex
].
x
=
rr3
;
debugArray
[
debugIndex
].
y
=
rr5
;
dipole
=
&
inducedDipole
[
atomJ
*
3
];
debugArray
[
debugIndex
].
z
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
dipole
=
&
inducedDipole
[
atomI
*
3
];
debugArray
[
debugIndex
].
w
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
debugIndex
+=
numberOfAtoms
;
dipole
=
&
inducedDipolePolar
[
atomJ
*
3
];
debugArray
[
debugIndex
].
x
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
dipole
=
&
inducedDipolePolar
[
atomI
*
3
];
debugArray
[
debugIndex
].
y
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
2
];
float
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipole
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipole
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipole
[
2
]
);
fields
[
0
][
0
]
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
0
][
1
]
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
0
][
2
]
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipolePolar
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipolePolar
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipolePolar
[
2
]
);
fields
[
1
][
0
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
1
][
1
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
1
][
2
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipole
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipole
[
2
]
);
fields
[
2
][
0
]
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
2
][
1
]
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
2
][
2
]
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipolePolar
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipolePolar
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipolePolar
[
2
]
);
fields
[
3
][
0
]
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
3
][
1
]
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
3
][
2
]
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipoleS
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipoleS
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipoleS
[
2
]
);
fields
[
4
][
0
]
=
rr3
*
atomJ
.
inducedDipoleS
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
4
][
1
]
=
rr3
*
atomJ
.
inducedDipoleS
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
4
][
2
]
=
rr3
*
atomJ
.
inducedDipoleS
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipolePolarS
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipolePolarS
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipolePolarS
[
2
]
);
fields
[
5
][
0
]
=
rr3
*
atomJ
.
inducedDipolePolarS
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
5
][
1
]
=
rr3
*
atomJ
.
inducedDipolePolarS
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
5
][
2
]
=
rr3
*
atomJ
.
inducedDipolePolarS
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipoleS
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipoleS
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipoleS
[
2
]
);
fields
[
6
][
0
]
=
rr3
*
atomI
.
inducedDipoleS
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
6
][
1
]
=
rr3
*
atomI
.
inducedDipoleS
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
6
][
2
]
=
rr3
*
atomI
.
inducedDipoleS
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipolePolarS
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipolePolarS
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipolePolarS
[
2
]
);
fields
[
7
][
0
]
=
rr3
*
atomI
.
inducedDipolePolarS
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
7
][
1
]
=
rr3
*
atomI
.
inducedDipolePolarS
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
7
][
2
]
=
rr3
*
atomI
.
inducedDipolePolarS
[
2
]
+
dDotDelta
*
deltaR
[
2
];
}
#endif
}
__device__
void
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
float4
atomCoordinatesI
,
float4
atomCoordinatesJ
,
float
rb2
,
float
*
inducedDipoleSI
,
float
*
inducedDipolePolarSI
,
float
*
inducedDipoleSJ
,
float
*
inducedDipolePolarSJ
,
__device__
void
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
gkField
[
8
][
3
]
#ifdef AMOEBA_DEBUG
...
...
@@ -191,14 +135,16 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo
// ---------------------------------------------------------------------------------------
float
xr
=
atom
CoordinatesJ
.
x
-
atomCoordinates
I
.
x
;
float
yr
=
atom
CoordinatesJ
.
y
-
atomCoordinates
I
.
y
;
float
zr
=
atom
CoordinatesJ
.
z
-
atomCoordinates
I
.
z
;
float
xr
=
atom
J
.
x
-
atom
I
.
x
;
float
yr
=
atom
J
.
y
-
atom
I
.
y
;
float
zr
=
atom
J
.
z
-
atom
I
.
z
;
float
xr2
=
xr
*
xr
;
float
yr2
=
yr
*
yr
;
float
zr2
=
zr
*
zr
;
float
rb2
=
atomI
.
bornRadius
*
atomJ
.
bornRadius
;
float
r2
=
xr2
+
yr2
+
zr2
;
float
expterm
=
expf
(
-
r2
/
(
cAmoebaSim
.
gkc
*
rb2
));
float
expc
=
expterm
/
cAmoebaSim
.
gkc
;
...
...
@@ -209,21 +155,21 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo
float
gf3
=
gf2
*
gf
;
float
gf5
=
gf3
*
gf2
;
float
duixs
=
inducedDipoleS
I
[
0
];
float
duiys
=
inducedDipoleS
I
[
1
];
float
duizs
=
inducedDipoleS
I
[
2
];
float
duixs
=
atomI
.
inducedDipoleS
[
0
];
float
duiys
=
atomI
.
inducedDipoleS
[
1
];
float
duizs
=
atomI
.
inducedDipoleS
[
2
];
float
puixs
=
inducedDipolePolarS
I
[
0
];
float
puiys
=
inducedDipolePolarS
I
[
1
];
float
puizs
=
inducedDipolePolarS
I
[
2
];
float
puixs
=
atomI
.
inducedDipolePolarS
[
0
];
float
puiys
=
atomI
.
inducedDipolePolarS
[
1
];
float
puizs
=
atomI
.
inducedDipolePolarS
[
2
];
float
dukxs
=
inducedDipoleS
J
[
0
];
float
dukys
=
inducedDipoleS
J
[
1
];
float
dukzs
=
inducedDipoleS
J
[
2
];
float
dukxs
=
atomJ
.
inducedDipoleS
[
0
];
float
dukys
=
atomJ
.
inducedDipoleS
[
1
];
float
dukzs
=
atomJ
.
inducedDipoleS
[
2
];
float
pukxs
=
inducedDipolePolarS
J
[
0
];
float
pukys
=
inducedDipolePolarS
J
[
1
];
float
pukzs
=
inducedDipolePolarS
J
[
2
];
float
pukxs
=
atomJ
.
inducedDipolePolarS
[
0
];
float
pukys
=
atomJ
.
inducedDipolePolarS
[
1
];
float
pukzs
=
atomJ
.
inducedDipolePolarS
[
2
];
// reaction potential auxiliary terms
...
...
@@ -280,9 +226,6 @@ __device__ static int debugAccumulate( int index, float4* debugArray, float* fie
}
#endif
#define GK
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#undef GK
// Include versions of the kernels for N^2 calculations.
...
...
@@ -587,24 +530,16 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
}
if
(
gpu
->
bOutputBufferPerWarp
){
#if 0
(void) fprintf( amoebaGpu->log, "N2 warp\n" ); (void) fflush( amoebaGpu->log );
kCalculateAmoebaMutualInducedAndGkFieldsN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks,
amoebaGpu->nonbondThreadsPerBlock,
sizeof(MutualInducedParticle)*amoebaGpu->nonbondThreadsPerBlock>>>(
kCalculateAmoebaMutualInducedAndGkFieldsN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu->psPosq4->_pDevStream[0],
amoebaGpu->psInducedDipole->_pDevStream[0],
amoebaGpu->psInducedDipolePolar->_pDevStream[0],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_3
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_4
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu->psWorkArray_3_2->_pDevStream[0] );
#endif
amoebaGpu
->
psWorkArray_3_4
->
_pDevStream
[
0
]
);
#endif
}
else
{
...
...
@@ -619,12 +554,6 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
kCalculateAmoebaMutualInducedAndGkFieldsN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
gpu
->
psBornRadii
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipoleS
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolarS
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_3
->
_pDevStream
[
0
],
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.h
View file @
c22f00cf
...
...
@@ -36,12 +36,6 @@ __launch_bounds__(64, 1)
#endif
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedAndGkFields
,
_kernel
)(
unsigned
int
*
workUnit
,
float4
*
atomCoord
,
float
*
bornRadii
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float
*
inducedDipoleS
,
float
*
inducedDipolePolarS
,
float
*
outputField
,
float
*
outputFieldPolar
,
float
*
outputFieldS
,
...
...
@@ -60,13 +54,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float4
jCoord
;
float
jBornRadius
;
float
jDipole
[
3
];
float
jDipolePolar
[
3
];
float
jDipoleS
[
3
];
float
jDipolePolarS
[
3
];
while
(
pos
<
end
)
{
...
...
@@ -84,7 +71,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
MutualInducedParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
atomCoord
[
atomI
];
MutualInducedParticle
localParticle
;
loadMutualInducedShared
(
&
localParticle
,
atomI
);
float
fieldSum
[
3
];
float
fieldPolarSum
[
3
];
...
...
@@ -115,9 +103,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
// load shared data
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
,
atomCoord
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
,
bornRadii
,
inducedDipoleS
,
inducedDipolePolarS
);
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
...
...
@@ -126,16 +112,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
// load coords, charge, ...
loadMutualInducedData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jDipolePolar
,
&
jBornRadius
,
jDipoleS
,
jDipolePolarS
);
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
inducedDipole
[
atomI
*
3
]),
&
(
inducedDipolePolar
[
atomI
*
3
]),
jDipole
,
jDipolePolar
,
&
(
inducedDipoleS
[
atomI
*
3
]),
&
(
inducedDipolePolarS
[
atomI
*
3
]),
jDipoleS
,
jDipolePolarS
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
...
...
@@ -177,10 +154,7 @@ if( atomI == targetAtom ){
index = debugAccumulate( index, debugArray, ijField[5], mask, 4.0f );
}
#endif
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
iCoord
,
jCoord
,
bornRadii
[
atomI
]
*
jBornRadius
,
&
(
inducedDipoleS
[
atomI
*
3
]),
&
(
inducedDipolePolarS
[
atomI
*
3
]),
jDipoleS
,
jDipolePolarS
,
ijField
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
...
...
@@ -247,9 +221,7 @@ if( atomI == targetAtom ){
{
// load coordinates, charge, ...
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
),
atomCoord
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
,
bornRadii
,
inducedDipoleS
,
inducedDipolePolarS
);
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
}
// zero shared fields
...
...
@@ -263,16 +235,7 @@ if( atomI == targetAtom ){
// load coords, charge, ...
loadMutualInducedData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jDipolePolar
,
&
jBornRadius
,
jDipoleS
,
jDipolePolarS
);
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
inducedDipole
[
atomI
*
3
]),
&
(
inducedDipolePolar
[
atomI
*
3
]),
jDipole
,
jDipolePolar
,
&
(
inducedDipoleS
[
atomI
*
3
]),
&
(
inducedDipolePolarS
[
atomI
*
3
]),
jDipoleS
,
jDipolePolarS
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
...
...
@@ -344,10 +307,7 @@ if( atomI == targetAtom || (y + tj) == targetAtom ){
index = debugAccumulate( index, debugArray, ijField[indexI+5], maskD, -4.0f );
}
#endif
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
iCoord
,
jCoord
,
bornRadii
[
atomI
]
*
jBornRadius
,
&
(
inducedDipoleS
[
atomI
*
3
]),
&
(
inducedDipolePolarS
[
atomI
*
3
]),
jDipoleS
,
jDipolePolarS
,
ijField
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
c22f00cf
...
...
@@ -36,12 +36,9 @@ void GetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
calculateMutualInducedFieldPairIxn_kernel
(
float4
atomCoordinatesI
,
float4
atomCoordinatesJ
,
float
dampingFactorI
,
float
dampingFactorJ
,
float
tholeI
,
float
tholeJ
,
float
*
inducedDipoleI
,
float
*
inducedDipolePolarI
,
float
*
inducedDipoleJ
,
float
*
inducedDipolePolarJ
,
float
scalingDistanceCutoff
,
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
__device__
void
calculateMutualInducedFieldPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
fields
[
4
][
3
]
#ifdef AMOEBA_DEBUG
...
...
@@ -57,9 +54,9 @@ __device__ void calculateMutualInducedFieldPairIxn_kernel( float4 atomCoordinate
// get deltaR, and r between 2 atoms
deltaR
[
0
]
=
atom
CoordinatesJ
.
x
-
atomCoordinates
I
.
x
;
deltaR
[
1
]
=
atom
CoordinatesJ
.
y
-
atomCoordinates
I
.
y
;
deltaR
[
2
]
=
atom
CoordinatesJ
.
z
-
atomCoordinates
I
.
z
;
deltaR
[
0
]
=
atom
J
.
x
-
atom
I
.
x
;
deltaR
[
1
]
=
atom
J
.
y
-
atom
I
.
y
;
deltaR
[
2
]
=
atom
J
.
z
-
atom
I
.
z
;
float
r
=
sqrtf
(
deltaR
[
0
]
*
deltaR
[
0
]
+
deltaR
[
1
]
*
deltaR
[
1
]
+
deltaR
[
2
]
*
deltaR
[
2
]
);
float
rI
=
1.0
f
/
r
;
...
...
@@ -67,87 +64,36 @@ __device__ void calculateMutualInducedFieldPairIxn_kernel( float4 atomCoordinate
float
rr3
=
-
rI
*
r2I
;
float
rr5
=
-
3.0
f
*
rr3
*
r2I
;
float
dampProd
=
dampingFactorI
*
dampingFactorJ
;
float
dampProd
=
atomI
.
damp
*
atomJ
.
damp
;
float
ratio
=
(
dampProd
!=
0.0
f
)
?
(
r
/
dampProd
)
:
1.0
f
;
float
pGamma
=
thole
J
>
thole
I
?
tholeI
:
thole
J
;
float
pGamma
=
atomJ
.
thole
>
atomI
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
float
dampExp
=
(
(
dampProd
!=
0.0
f
)
&&
(
r
<
scalingDistanceCutoff
)
)
?
expf
(
-
damp
)
:
0.0
f
;
float
dampExp
=
(
(
dampProd
!=
0.0
f
)
&&
(
r
<
cAmoebaSim
.
scalingDistanceCutoff
)
)
?
expf
(
-
damp
)
:
0.0
f
;
rr3
*=
(
1.0
f
-
dampExp
);
rr5
*=
(
1.0
f
-
(
1.0
f
+
damp
)
*
dampExp
);
float
*
dipole
=
inducedDipoleJ
;
float
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
0
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
0
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
0
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarJ
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
1
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
1
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
1
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipoleI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
2
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
2
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
2
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
3
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
3
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
3
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
#ifdef AMOEBA_DEBUGX
//int n2 = numberOfAtoms*numberOfAtoms;
//int debugIndex = atomI*numberOfAtoms + atomJ;
if
(
atomI
==
0
){
int
debugIndex
=
atomJ
;
debugArray
[
debugIndex
].
x
=
rr3
;
debugArray
[
debugIndex
].
y
=
rr5
;
dipole
=
&
inducedDipole
[
atomJ
*
3
];
debugArray
[
debugIndex
].
z
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
dipole
=
&
inducedDipole
[
atomI
*
3
];
debugArray
[
debugIndex
].
w
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
debugIndex
+=
numberOfAtoms
;
dipole
=
&
inducedDipolePolar
[
atomJ
*
3
];
debugArray
[
debugIndex
].
x
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
dipole
=
&
inducedDipolePolar
[
atomI
*
3
];
debugArray
[
debugIndex
].
y
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
2
];
}
#endif
float
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipole
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipole
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipole
[
2
]
);
fields
[
0
][
0
]
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
0
][
1
]
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
0
][
2
]
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipolePolar
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipolePolar
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipolePolar
[
2
]
);
fields
[
1
][
0
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
1
][
1
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
1
][
2
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipole
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipole
[
2
]
);
fields
[
2
][
0
]
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
2
][
1
]
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
2
][
2
]
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipolePolar
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipolePolar
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipolePolar
[
2
]
);
fields
[
3
][
0
]
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
3
][
1
]
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
3
][
2
]
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
dDotDelta
*
deltaR
[
2
];
}
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
...
...
@@ -230,18 +176,9 @@ void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* arrayOfDe
{
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
4.8033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#ifdef AMOEBA_DEBUG
epsilon
[
1
]
=
4.8033324
f
*
sqrtf
(
delta
[
0
].
x
/
(
(
float
)
(
numberOfEntries
/
3
))
);
epsilon
[
2
]
=
4.8033324
f
*
sqrtf
(
delta
[
0
].
y
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#endif
}
}
/**
matrixProduct/matrixProductP contains epsilon**2 on output
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
...
...
@@ -291,46 +228,6 @@ static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<fl
LAUNCHERROR
(
"kReduceMI_Fields2"
);
}
#ifdef AMOEBA_DEBUG
#if 0
static void printMiFieldBuffer( amoebaGpuContext amoebaGpu, unsigned int bufferIndex )
{
(void) fprintf( amoebaGpu->log, "MI Field Buffer %u\n", bufferIndex );
unsigned int start = bufferIndex*3*amoebaGpu->paddedNumberOfAtoms;
unsigned int stop = (bufferIndex+1)*3*amoebaGpu->paddedNumberOfAtoms;
for( unsigned int ii = start; ii < stop; ii += 3 ){
unsigned int ii3Index = ii/3;
unsigned int bufferIndex = ii3Index/(amoebaGpu->paddedNumberOfAtoms);
unsigned int particleIndex = ii3Index - bufferIndex*(amoebaGpu->paddedNumberOfAtoms);
(void) fprintf( amoebaGpu->log, " %6u %3u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii/3, bufferIndex, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii+2] );
}
}
static void printMiFieldAtomBuffers( amoebaGpuContext amoebaGpu, unsigned int targetAtom )
{
(void) fprintf( amoebaGpu->log, "MI Field atom %u\n", targetAtom );
for( unsigned int ii = 0; ii < amoebaGpu->outputBuffers; ii++ ){
unsigned int particleIndex = 3*(targetAtom + ii*amoebaGpu->paddedNumberOfAtoms);
(void) fprintf( amoebaGpu->log, " %2u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex+2] );
}
}
#endif
#endif
/**---------------------------------------------------------------------------------------
Compute mutual induce field
...
...
@@ -367,9 +264,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -392,9 +286,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -413,15 +304,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
amoebaGpu
->
psWorkArray_3_1
->
Download
();
amoebaGpu
->
psWorkArray_3_2
->
Download
();
//printMiFieldAtomBuffers( amoebaGpu, (targetAtom + 0) );
//printMiFieldAtomBuffers( amoebaGpu, (targetAtom + 1) );
//printMiFieldAtomBuffers( amoebaGpu, 100 );
//printMiFieldBuffer( amoebaGpu, 0 );
//printMiFieldBuffer( amoebaGpu, 1 );
//printMiFieldBuffer( amoebaGpu, 37 );
//printMiFieldBuffer( amoebaGpu, 38 );
if
(
amoebaGpu
->
log
&&
iteration
==
-
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished MI kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
...
...
@@ -523,50 +405,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
count
++
;
#if 0
//debugIndex += gpu->natoms;
debugIndex += paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z );
int index = 0;
sums[index][0] += debugArray->_pSysStream[0][debugIndex].x;
sums[index][1] += debugArray->_pSysStream[0][debugIndex].y;
sums[index][2] += debugArray->_pSysStream[0][debugIndex].z;
debugIndex += gpu->natoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z );
index++;
sums[index][0] += debugArray->_pSysStream[0][debugIndex].x;
sums[index][1] += debugArray->_pSysStream[0][debugIndex].y;
sums[index][2] += debugArray->_pSysStream[0][debugIndex].z;
debugIndex += gpu->natoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z );
index++;
sums[index][0] += debugArray->_pSysStream[0][debugIndex].x;
sums[index][1] += debugArray->_pSysStream[0][debugIndex].y;
sums[index][2] += debugArray->_pSysStream[0][debugIndex].z;
debugIndex += gpu->natoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
index++;
sums[index][0] += debugArray->_pSysStream[0][debugIndex].x;
sums[index][1] += debugArray->_pSysStream[0][debugIndex].y;
sums[index][2] += debugArray->_pSysStream[0][debugIndex].z;
#endif
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
...
...
@@ -693,43 +531,6 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
cudaComputeAmoebaMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
LAUNCHERROR
(
"cudaComputeAmoebaMutualInducedFieldMatrixMultiply Loop
\n
"
);
//#ifdef AMOEBA_DEBUG
#if 0
if( 0 && amoebaGpu->log ){
amoebaGpu->psInducedDipole->Download();
amoebaGpu->psInducedDipolePolar->Download();
(void) fprintf( amoebaGpu->log, "%s Post cudaComputeAmoebaMutualInducedFieldMatrixMultiply\n", methodName );
int offset = 0;
int maxPrint = 20;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%4d ", ii );
(void) fprintf( amoebaGpu->log," MMi[%14.6e %14.6e %14.6e] ",
amoebaGpu->psInducedDipole->_pSysStream[0][offset], amoebaGpu->psInducedDipole->_pSysStream[0][offset+1], amoebaGpu->psInducedDipole->_pSysStream[0][offset+2] );
(void) fprintf( amoebaGpu->log,"MMip[%14.6e %14.6e %14.6e]\n",
amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset], amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset+1], amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset+2] );
if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) ){
ii = (gpu->natoms - maxPrint);
offset = ii*3;
} else {
offset += 3;
}
}
(void) fflush( amoebaGpu->log );
// write results to file
if( iteration == -1 ){
fileId[1] = iteration;
cudaWriteFloat1AndFloat1ArraysToFile( gpu->natoms, "CudaMI_MatrixProduct1", fileId, 3, amoebaGpu->psWorkVector[0],
3, amoebaGpu->psWorkVector[1] );
}
}
#endif
// ---------------------------------------------------------------------------------------
// post matrix multiply
kSorUpdateMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
...
...
@@ -763,18 +564,8 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
#if 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
methodName
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
#else
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e %14.6e crrntEps=%14.6e %14.6e %14.6e %14.6e done=%d
\n
"
,
methodName
,
iteration
,
sum1
,
sum2
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
0
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
#endif
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e done=%d
\n
"
,
methodName
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
done
);
(
void
)
fflush
(
amoebaGpu
->
log
);
int
offset
=
0
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
View file @
c22f00cf
...
...
@@ -36,11 +36,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
float4
*
atomCoord
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float
*
outputField
,
float
*
outputFieldPolar
float
*
outputField
,
float
*
outputFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
...
...
@@ -55,10 +51,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float4
jCoord
;
float
jDipole
[
3
];
float
jDipolePolar
[
3
];
while
(
pos
<
end
)
{
...
...
@@ -76,7 +68,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
MutualInducedParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
atomCoord
[
atomI
];
MutualInducedParticle
localParticle
;
loadMutualInducedShared
(
&
localParticle
,
atomI
);
float
fieldSum
[
3
];
float
fieldPolarSum
[
3
];
...
...
@@ -97,9 +90,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
// load shared data
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
,
atomCoord
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
...
...
@@ -108,14 +99,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
// load coords, charge, ...
loadMutualInducedData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jDipolePolar
);
calculateMutualInducedFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
inducedDipole
[
atomI
*
3
]),
&
(
inducedDipolePolar
[
atomI
*
3
]),
jDipole
,
jDipolePolar
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
...
...
@@ -187,9 +171,7 @@ if( atomI == targetAtom ){
// load coordinates, charge, ...
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
,
atomCoord
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
);
}
// zero shared fields
...
...
@@ -203,14 +185,7 @@ if( atomI == targetAtom ){
// load coords, charge, ...
loadMutualInducedData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jDipolePolar
);
calculateMutualInducedFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
inducedDipole
[
atomI
*
3
]),
&
(
inducedDipolePolar
[
atomI
*
3
]),
jDipole
,
jDipolePolar
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
View file @
c22f00cf
...
...
@@ -26,58 +26,50 @@ struct MutualInducedParticle {
#endif
};
__device__
void
loadMutualInducedShared
(
struct
MutualInducedParticle
*
sA
,
unsigned
int
atomI
,
float4
*
atomCoord
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float2
*
dampingFactorAndThole
#ifdef GK
,
float
*
bornRadii
,
float
*
inducedDipoleS
,
float
*
inducedDipolePolarS
#endif
)
__device__
static
void
loadMutualInducedShared
(
MutualInducedParticle
*
sA
,
unsigned
int
atomI
)
{
// coordinates & charge
sA
->
x
=
atomCoord
[
atomI
].
x
;
sA
->
y
=
atomCoord
[
atomI
].
y
;
sA
->
z
=
atomCoord
[
atomI
].
z
;
sA
->
q
=
atomCoord
[
atomI
].
w
;
sA
->
x
=
cSim
.
pPosq
[
atomI
].
x
;
sA
->
y
=
cSim
.
pPosq
[
atomI
].
y
;
sA
->
z
=
cSim
.
pPosq
[
atomI
].
z
;
sA
->
q
=
cSim
.
pPosq
[
atomI
].
w
;
// dipole
sA
->
inducedDipole
[
0
]
=
i
nducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
[
1
]
=
i
nducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
[
2
]
=
i
nducedDipole
[
atomI
*
3
+
2
];
sA
->
inducedDipole
[
0
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
[
1
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
[
2
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
+
2
];
// dipole polar
sA
->
inducedDipolePolar
[
0
]
=
i
nducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipolePolar
[
1
]
=
i
nducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolar
[
2
]
=
i
nducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
inducedDipolePolar
[
0
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipolePolar
[
1
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolar
[
2
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
damp
=
d
ampingFactorAndThole
[
atomI
].
x
;
sA
->
thole
=
d
ampingFactorAndThole
[
atomI
].
y
;
sA
->
damp
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
x
;
sA
->
thole
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
y
;
#ifdef GK
sA
->
bornRadius
=
b
ornRadii
[
atomI
];
sA
->
bornRadius
=
cSim
.
pB
ornRadii
[
atomI
];
// dipoleS
sA
->
inducedDipoleS
[
0
]
=
i
nducedDipoleS
[
atomI
*
3
];
sA
->
inducedDipoleS
[
1
]
=
i
nducedDipoleS
[
atomI
*
3
+
1
];
sA
->
inducedDipoleS
[
2
]
=
i
nducedDipoleS
[
atomI
*
3
+
2
];
sA
->
inducedDipoleS
[
0
]
=
cAmoebaSim
.
pI
nducedDipoleS
[
atomI
*
3
];
sA
->
inducedDipoleS
[
1
]
=
cAmoebaSim
.
pI
nducedDipoleS
[
atomI
*
3
+
1
];
sA
->
inducedDipoleS
[
2
]
=
cAmoebaSim
.
pI
nducedDipoleS
[
atomI
*
3
+
2
];
// dipole polar S
sA
->
inducedDipolePolarS
[
0
]
=
i
nducedDipolePolarS
[
atomI
*
3
];
sA
->
inducedDipolePolarS
[
1
]
=
i
nducedDipolePolarS
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolarS
[
2
]
=
i
nducedDipolePolarS
[
atomI
*
3
+
2
];
sA
->
inducedDipolePolarS
[
0
]
=
cAmoebaSim
.
pI
nducedDipolePolarS
[
atomI
*
3
];
sA
->
inducedDipolePolarS
[
1
]
=
cAmoebaSim
.
pI
nducedDipolePolarS
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolarS
[
2
]
=
cAmoebaSim
.
pI
nducedDipolePolarS
[
atomI
*
3
+
2
];
#endif
}
__device__
static
void
zeroMutualInducedParticleSharedField
(
struct
MutualInducedParticle
*
sA
)
__device__
static
void
zeroMutualInducedParticleSharedField
(
MutualInducedParticle
*
sA
)
{
// zero shared fields
...
...
@@ -101,44 +93,3 @@ __device__ static void zeroMutualInducedParticleSharedField( struct MutualInduce
#endif
}
// load struct and arrays w/ shared data in sA
__device__
void
loadMutualInducedData
(
struct
MutualInducedParticle
*
sA
,
float4
*
jCoord
,
float
*
jInducedDipole
,
float
*
jInducedDipolePolar
#ifdef GK
,
float
*
jBornRadius
,
float
*
jInducedDipoleS
,
float
*
jInducedDipolePolarS
#endif
)
{
// load coords, charge, ...
jCoord
->
x
=
sA
->
x
;
jCoord
->
y
=
sA
->
y
;
jCoord
->
z
=
sA
->
z
;
jCoord
->
w
=
sA
->
q
;
jInducedDipole
[
0
]
=
sA
->
inducedDipole
[
0
];
jInducedDipole
[
1
]
=
sA
->
inducedDipole
[
1
];
jInducedDipole
[
2
]
=
sA
->
inducedDipole
[
2
];
jInducedDipolePolar
[
0
]
=
sA
->
inducedDipolePolar
[
0
];
jInducedDipolePolar
[
1
]
=
sA
->
inducedDipolePolar
[
1
];
jInducedDipolePolar
[
2
]
=
sA
->
inducedDipolePolar
[
2
];
#ifdef GK
*
jBornRadius
=
sA
->
bornRadius
;
jInducedDipoleS
[
0
]
=
sA
->
inducedDipoleS
[
0
];
jInducedDipoleS
[
1
]
=
sA
->
inducedDipoleS
[
1
];
jInducedDipoleS
[
2
]
=
sA
->
inducedDipoleS
[
2
];
jInducedDipolePolarS
[
0
]
=
sA
->
inducedDipolePolarS
[
0
];
jInducedDipolePolarS
[
1
]
=
sA
->
inducedDipolePolarS
[
1
];
jInducedDipolePolarS
[
2
]
=
sA
->
inducedDipolePolarS
[
2
];
#endif
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
0 → 100644
View file @
c22f00cf
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#include <stdio.h>
using
namespace
std
;
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
void
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
}
void
GetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
__device__
void
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
uscale
,
float
fields
[
4
][
3
]
#ifdef AMOEBA_DEBUG
,
float4
*
pullBack
#endif
){
// compute the real space portion of the Ewald summation
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
zr
=
atomJ
.
z
-
atomI
.
z
;
// periodic boundary conditions
xr
-=
floor
(
xr
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
yr
-=
floor
(
yr
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn
[
3
];
bn
[
0
]
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
bn
[
1
]
=
(
bn
[
0
]
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
bn
[
2
]
=
(
3.0
f
*
bn
[
1
]
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
}
}
float
dsc3
=
uscale
*
scale3
;
float
dsc5
=
uscale
*
scale5
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
rr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
duir
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
float
dukr
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
puir
=
atomI
.
inducedDipolePolar
[
0
]
*
xr
+
atomI
.
inducedDipolePolar
[
1
]
*
yr
+
atomI
.
inducedDipolePolar
[
2
]
*
zr
;
float
pukr
=
atomJ
.
inducedDipolePolar
[
0
]
*
xr
+
atomJ
.
inducedDipolePolar
[
1
]
*
yr
+
atomJ
.
inducedDipolePolar
[
2
]
*
zr
;
float
fimd
[
3
],
fkmd
[
3
];
float
fimp
[
3
],
fkmp
[
3
];
float
fid
[
3
],
fkd
[
3
];
float
fip
[
3
],
fkp
[
3
];
bn
[
1
]
*=
-
1.0
f
;
fimd
[
0
]
=
bn
[
1
]
*
atomJ
.
inducedDipole
[
0
]
+
bn
[
2
]
*
dukr
*
xr
;
fimd
[
1
]
=
bn
[
1
]
*
atomJ
.
inducedDipole
[
1
]
+
bn
[
2
]
*
dukr
*
yr
;
fimd
[
2
]
=
bn
[
1
]
*
atomJ
.
inducedDipole
[
2
]
+
bn
[
2
]
*
dukr
*
zr
;
fkmd
[
0
]
=
bn
[
1
]
*
atomI
.
inducedDipole
[
0
]
+
bn
[
2
]
*
duir
*
xr
;
fkmd
[
1
]
=
bn
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
bn
[
2
]
*
duir
*
yr
;
fkmd
[
2
]
=
bn
[
1
]
*
atomI
.
inducedDipole
[
2
]
+
bn
[
2
]
*
duir
*
zr
;
fimp
[
0
]
=
bn
[
1
]
*
atomJ
.
inducedDipolePolar
[
0
]
+
bn
[
2
]
*
pukr
*
xr
;
fimp
[
1
]
=
bn
[
1
]
*
atomJ
.
inducedDipolePolar
[
1
]
+
bn
[
2
]
*
pukr
*
yr
;
fimp
[
2
]
=
bn
[
1
]
*
atomJ
.
inducedDipolePolar
[
2
]
+
bn
[
2
]
*
pukr
*
zr
;
fkmp
[
0
]
=
bn
[
1
]
*
atomI
.
inducedDipolePolar
[
0
]
+
bn
[
2
]
*
puir
*
xr
;
fkmp
[
1
]
=
bn
[
1
]
*
atomI
.
inducedDipolePolar
[
1
]
+
bn
[
2
]
*
puir
*
yr
;
fkmp
[
2
]
=
bn
[
1
]
*
atomI
.
inducedDipolePolar
[
2
]
+
bn
[
2
]
*
puir
*
zr
;
rr3
*=
-
1.0
f
;;
fid
[
0
]
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
rr5
*
dukr
*
xr
;
fid
[
1
]
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
rr5
*
dukr
*
yr
;
fid
[
2
]
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
rr5
*
dukr
*
zr
;
fkd
[
0
]
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
rr5
*
duir
*
xr
;
fkd
[
1
]
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
rr5
*
duir
*
yr
;
fkd
[
2
]
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
rr5
*
duir
*
zr
;
fip
[
0
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
rr5
*
pukr
*
xr
;
fip
[
1
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
rr5
*
pukr
*
yr
;
fip
[
2
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
rr5
*
pukr
*
zr
;
fkp
[
0
]
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
rr5
*
puir
*
xr
;
fkp
[
1
]
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
rr5
*
puir
*
yr
;
fkp
[
2
]
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
rr5
*
puir
*
zr
;
// increment the field at each site due to this interaction
if
(
r2
<=
cAmoebaSim
.
cutoffDistance2
){
fields
[
0
][
0
]
=
fimd
[
0
]
-
fid
[
0
];
fields
[
1
][
0
]
=
fkmd
[
0
]
-
fkd
[
0
];
fields
[
2
][
0
]
=
fimp
[
0
]
-
fip
[
0
];
fields
[
3
][
0
]
=
fkmp
[
0
]
-
fkp
[
0
];
fields
[
0
][
1
]
=
fimd
[
1
]
-
fid
[
1
];
fields
[
1
][
1
]
=
fkmd
[
1
]
-
fkd
[
1
];
fields
[
2
][
1
]
=
fimp
[
1
]
-
fip
[
1
];
fields
[
3
][
1
]
=
fkmp
[
1
]
-
fkp
[
1
];
fields
[
0
][
2
]
=
fimd
[
2
]
-
fid
[
2
];
fields
[
1
][
2
]
=
fkmd
[
2
]
-
fkd
[
2
];
fields
[
2
][
2
]
=
fimp
[
2
]
-
fip
[
2
];
fields
[
3
][
2
]
=
fkmp
[
2
]
-
fkp
[
2
];
}
else
{
fields
[
0
][
0
]
=
0.0
f
;
fields
[
1
][
0
]
=
0.0
f
;
fields
[
2
][
0
]
=
0.0
f
;
fields
[
3
][
0
]
=
0.0
f
;
fields
[
0
][
1
]
=
0.0
f
;
fields
[
1
][
1
]
=
0.0
f
;
fields
[
2
][
1
]
=
0.0
f
;
fields
[
3
][
1
]
=
0.0
f
;
fields
[
0
][
2
]
=
0.0
f
;
fields
[
1
][
2
]
=
0.0
f
;
fields
[
2
][
2
]
=
0.0
f
;
fields
[
3
][
2
]
=
0.0
f
;
}
#ifdef AMOEBA_DEBUG
pullBack
[
0
].
x
=
xr
;
pullBack
[
0
].
y
=
yr
;
pullBack
[
0
].
z
=
zr
;
pullBack
[
0
].
w
=
r2
;
pullBack
[
1
].
x
=
alsq2
;
pullBack
[
1
].
y
=
bn
[
0
];
pullBack
[
1
].
z
=
bn
[
2
];
pullBack
[
1
].
w
=
exp2a
;
/*
pullBack[1].x = atomJ.x - atomI.x;
pullBack[1].y = atomJ.y - atomI.y;
pullBack[1].z = atomJ.z - atomI.z;
pullBack[1].w = (atomJ.x - atomI.x)*(atomJ.x - atomI.x) + (atomJ.y - atomI.y)*(atomJ.y - atomI.y)+ (atomJ.z - atomI.z)*(atomJ.z - atomI.z);
pullBack[1].x = scale3;
pullBack[1].y = scale5;
pullBack[1].z = scale7;
*/
#endif
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kInitializeMutualInducedField_kernel
(
int
numberOfAtoms
,
float
*
fixedEField
,
float
*
fixedEFieldPolar
,
float
*
polarizability
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
)
{
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
numberOfAtoms
)
return
;
fixedEField
[
threadId
]
*=
polarizability
[
threadId
];
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
];
fixedEFieldPolar
[
threadId
]
*=
polarizability
[
threadId
];
inducedDipolePolar
[
threadId
]
=
fixedEFieldPolar
[
threadId
];
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kReduceMutualInducedFieldDelta_kernel
(
int
numberOfEntries
,
float
*
arrayOfDeltas1
,
float
*
arrayOfDeltas2
,
float
*
epsilon
)
{
extern
__shared__
float2
delta
[];
delta
[
threadIdx
.
x
].
x
=
0.0
f
;
delta
[
threadIdx
.
x
].
y
=
0.0
f
;
unsigned
int
pos
=
threadIdx
.
x
;
// load deltas
while
(
pos
<
numberOfEntries
)
{
delta
[
threadIdx
.
x
].
x
+=
arrayOfDeltas1
[
pos
];
delta
[
threadIdx
.
x
].
y
+=
arrayOfDeltas2
[
pos
];
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
__syncthreads
();
// sum the deltas
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
{
delta
[
threadIdx
.
x
].
x
+=
delta
[
threadIdx
.
x
+
offset
].
x
;
delta
[
threadIdx
.
x
].
y
+=
delta
[
threadIdx
.
x
+
offset
].
y
;
}
__syncthreads
();
}
// set epsilons
if
(
threadIdx
.
x
==
0
)
{
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
4.8033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#ifdef AMOEBA_DEBUG
epsilon
[
1
]
=
4.8033324
f
*
sqrtf
(
delta
[
0
].
x
/
(
(
float
)
(
numberOfEntries
/
3
))
);
epsilon
[
2
]
=
4.8033324
f
*
sqrtf
(
delta
[
0
].
y
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#endif
}
}
/**
matrixProduct/matrixProductP contains epsilon**2 on output
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kSorUpdateMutualInducedField_kernel
(
int
numberOfEntries
,
float
*
polarizability
,
float
*
inducedDipole
,
float
*
inducedDipoleP
,
float
*
fixedEField
,
float
*
fixedEFieldP
,
float
*
matrixProduct
,
float
*
matrixProductP
)
{
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
numberOfEntries
)
return
;
float
previousDipole
=
inducedDipole
[
threadId
];
float
previousDipoleP
=
inducedDipoleP
[
threadId
];
// add self terms to fields
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
matrixProduct
[
threadId
]
+=
term
*
previousDipole
;
matrixProductP
[
threadId
]
+=
term
*
previousDipoleP
;
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProduct
[
threadId
];
inducedDipoleP
[
threadId
]
=
fixedEFieldP
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProductP
[
threadId
];
const
float
polarSOR
=
0.70
f
;
inducedDipole
[
threadId
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
inducedDipoleP
[
threadId
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
matrixProduct
[
threadId
]
=
(
inducedDipole
[
threadId
]
-
previousDipole
)
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
matrixProductP
[
threadId
]
=
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
}
// reduce psWorkArray_3_1
// reduce psWorkArray_3_2
static
void
kReduceMutualInducedFields
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
outputArray
,
CUDAStream
<
float
>*
outputPolarArray
)
{
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
outputArray
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeMI_Fields1"
);
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
outputPolarArray
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeMI_Fields2"
);
}
/**---------------------------------------------------------------------------------------
Compute mutual induce field
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
static
void
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
outputArray
,
CUDAStream
<
float
>*
outputPolarArray
)
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
int
targetAtom
=
0
;
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: scalingDistanceCutoff=%.5f
\n
"
,
methodName
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
#endif
kClearFields_3
(
amoebaGpu
,
2
);
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeMutualInducedFieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
else
{
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
MutualInducedParticle
),
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaPmeMutualInducedFieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeMutualInducedField"
);
kReduceMutualInducedFields
(
amoebaGpu
,
outputArray
,
outputPolarArray
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished maxtrixMultiply kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
outputPolarArray
->
Download
();
debugArray
->
Download
();
int
maxPrint
=
5
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// MI
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Mult[%16.9e %16.9e %16.9e] "
,
outputArray
->
_pSysStream
[
0
][
indexOffset
],
outputArray
->
_pSysStream
[
0
][
indexOffset
+
1
],
outputArray
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
// MI polar
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MultP[%16.9e %16.9e %16.9e]
\n
"
,
outputPolarArray
->
_pSysStream
[
0
][
indexOffset
],
outputPolarArray
->
_pSysStream
[
0
][
indexOffset
+
1
],
outputPolarArray
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
/*
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
(void) fprintf( amoebaGpu->log,"%5d PmeMIMult\n", jj );
for( int kk = 0; kk < 7; kk++ ){
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
debugIndex += paddedNumberOfAtoms;
}
(void) fprintf( amoebaGpu->log,"\n" );
}
*/
(
void
)
fflush
(
amoebaGpu
->
log
);
iteration
++
;
}
delete
debugArray
;
#endif
}
/**---------------------------------------------------------------------------------------
Compute mutual induce field
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
//#define GET_EFIELD_FROM_FILE
#ifdef GET_EFIELD_FROM_FILE
#include <stdlib.h>
#endif
static
void
cudaComputeAmoebaPmeMutualInducedFieldBySOR
(
amoebaGpuContext
amoebaGpu
)
{
// ---------------------------------------------------------------------------------------
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldBySOR"
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
fileId
.
resize
(
2
);
fileId
[
0
]
=
timestep
;
fileId
[
1
]
=
1
;
#endif
// ---------------------------------------------------------------------------------------
int
done
;
int
iteration
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
int
numOfElems
=
gpu
->
natoms
*
3
;
int
numThreads
=
min
(
THREADS_PER_BLOCK
,
numOfElems
);
int
numBlocks
=
numOfElems
/
numThreads
;
if
(
(
numOfElems
%
numThreads
)
!=
0
)
numBlocks
++
;
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d numOfElems=%d numThreads=%d numBlocks=%d "
"maxIterations=%d targetEpsilon=%.3e
\n
"
,
methodName
,
gpu
->
natoms
,
numOfElems
,
numThreads
,
numBlocks
,
amoebaGpu
->
mutualInducedMaxIterations
,
amoebaGpu
->
mutualInducedTargetEpsilon
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
#ifdef GET_EFIELD_FROM_FILE
std
::
string
fileName
=
"waterEField.txt"
;
StringVectorVector
fileContents
;
readFile
(
fileName
,
fileContents
);
unsigned
int
offset
=
0
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Read file: %s %u
\n
"
,
fileName
.
c_str
(),
fileContents
.
size
()
);
fflush
(
amoebaGpu
->
log
);
for
(
unsigned
int
ii
=
1
;
ii
<
fileContents
.
size
()
-
1
;
ii
++
){
StringVector
lineTokens
=
fileContents
[
ii
];
unsigned
int
lineTokenIndex
=
1
;
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
offset
-=
3
;
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
}
float
conversion
=
100.0
f
;
for
(
int
ii
=
0
;
ii
<
3
*
gpu
->
natoms
;
ii
++
){
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
}
amoebaGpu
->
psE_Field
->
Upload
();
amoebaGpu
->
psE_FieldPolar
->
Upload
();
#endif
// ---------------------------------------------------------------------------------------
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
kInitializeMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
gpu
->
natoms
,
amoebaGpu
->
psE_Field
->
_pDevStream
[
0
],
amoebaGpu
->
psE_FieldPolar
->
_pDevStream
[
0
],
amoebaGpu
->
psPolarizability
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"AmoebaPmeMutualInducedFieldSetup"
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
amoebaGpu
->
psInducedDipole
->
Download
(),
amoebaGpu
->
psInducedDipolePolar
->
Download
();
amoebaGpu
->
psPolarizability
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Initial setup for matrix multiply
\n
"
,
methodName
);
int
offset
=
0
;
int
maxPrint
=
20000
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%4d pol=%12.4e "
,
ii
,
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
]
);
if
(
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
]
!=
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
+
1
]
||
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
]
!=
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
+
2
]
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"PolX!!! %12.4e %12.4e "
,
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
+
2
]
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
" E[%14.6e %14.6e %14.6e] Mi[%14.6e %14.6e %14.6e] "
,
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
+
2
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ep[%14.6e %14.6e %14.6e] Mip[%14.6e %14.6e %14.6e]
\n
"
,
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
+
2
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
+
2
]
);
offset
+=
3
;
if
(
ii
==
maxPrint
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
)
)
)
ii
=
(
gpu
->
natoms
-
maxPrint
);
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
// ---------------------------------------------------------------------------------------
done
=
0
;
iteration
=
1
;
while
(
!
done
){
// matrix multiply
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
#ifdef GET_EFIELD_FROM_FILE
{
std
::
string
fileName
=
"waterInduceRecip.txt"
;
StringVectorVector
fileContents
;
readFile
(
fileName
,
fileContents
);
unsigned
int
offset
=
0
;
amoebaGpu
->
psWorkVector
[
0
]
->
Download
();
amoebaGpu
->
psWorkVector
[
1
]
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Read file: %s %u
\n
"
,
fileName
.
c_str
(),
fileContents
.
size
()
);
fflush
(
amoebaGpu
->
log
);
float
conversion
=
100.0
f
;
for
(
unsigned
int
ii
=
1
;
ii
<
fileContents
.
size
()
-
1
;
ii
++
){
StringVector
lineTokens
=
fileContents
[
ii
];
unsigned
int
lineTokenIndex
=
1
;
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu
->
psWorkVector
[
0
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
0
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
0
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
offset
-=
3
;
amoebaGpu
->
psWorkVector
[
1
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
1
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
1
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
}
amoebaGpu
->
psWorkVector
[
0
]
->
Upload
();
amoebaGpu
->
psWorkVector
[
1
]
->
Upload
();
}
#endif
// post matrix multiply
kSorUpdateMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psE_Field
->
_pDevStream
[
0
],
amoebaGpu
->
psE_FieldPolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
// get total epsilon -- performing sums on gpu
kReduceMutualInducedFieldDelta_kernel
<<<
1
,
amoebaGpu
->
epsilonThreadsPerBlock
,
2
*
sizeof
(
float
)
*
amoebaGpu
->
epsilonThreadsPerBlock
>>>
(
3
*
gpu
->
natoms
,
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevStream
[
0
],
amoebaGpu
->
psCurrentEpsilon
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeMutualInducedFieldDelta"
);
if
(
amoebaGpu
->
log
){
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
}
// Debye=4.8033324f
amoebaGpu
->
psCurrentEpsilon
->
Download
();
float
currentEpsilon
=
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
0
];
amoebaGpu
->
mutualInducedCurrentEpsilon
=
currentEpsilon
;
if
(
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
||
amoebaGpu
->
mutualInducedCurrentEpsilon
<
amoebaGpu
->
mutualInducedTargetEpsilon
){
done
=
1
;
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
#if 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
methodName
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
#else
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e %14.6e crrntEps=%14.6e %14.6e %14.6e %14.6e done=%d
\n
"
,
methodName
,
iteration
,
sum1
,
sum2
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
0
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
#endif
(
void
)
fflush
(
amoebaGpu
->
log
);
int
offset
=
0
;
int
maxPrint
=
20
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%4d "
,
ii
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
" Mi[%14.6e %14.6e %14.6e] "
,
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Mip[%14.6e %14.6e %14.6e]
\n
"
,
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
+
2
]
);
if
(
ii
==
maxPrint
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
)
)
){
ii
=
(
gpu
->
natoms
-
maxPrint
);
offset
=
3
*
(
ii
+
1
);
}
else
{
offset
+=
3
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
iteration
++
;
}
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
#ifdef AMOEBA_DEBUG
/*
if( 0 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
}
*/
#endif
// ---------------------------------------------------------------------------------------
}
void
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpuContext
amoebaGpu
)
{
if
(
amoebaGpu
->
mutualInducedIterativeMethod
==
0
){
cudaComputeAmoebaPmeMutualInducedFieldBySOR
(
amoebaGpu
);
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
0 → 100644
View file @
c22f00cf
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateAmoebaPmeMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputField
,
float
*
outputFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
extern
__shared__
MutualInducedParticle
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
const
float
uscale
=
1
.
0
f
;
#ifdef AMOEBA_DEBUG
float4
pullBack
[
4
];
#endif
while
(
pos
<
end
)
{
unsigned
int
x
;
unsigned
int
y
;
bool
bExclusionFlag
;
// Extract cell coordinates
decodeCell
(
workUnit
[
pos
],
&
x
,
&
y
,
&
bExclusionFlag
);
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
MutualInducedParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
MutualInducedParticle
localParticle
;
loadMutualInducedShared
(
&
localParticle
,
atomI
);
float
fieldSum
[
3
];
float
fieldPolarSum
[
3
];
// 0: field at i due to j
// 1: field at i due to j polar
fieldSum
[
0
]
=
0
.
0
f
;
fieldSum
[
1
]
=
0
.
0
f
;
fieldSum
[
2
]
=
0
.
0
f
;
fieldPolarSum
[
0
]
=
0
.
0
f
;
fieldPolarSum
[
1
]
=
0
.
0
f
;
fieldPolarSum
[
2
]
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// load shared data
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
ijField
[
4
][
3
];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
uscale
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
mask
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
j
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
][
0
]
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
0
][
1
]
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
0
][
2
]
:
0
.
0
f
;
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
2
][
0
]
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
2
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
pullBackIndex
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
cAmoebaSim
.
cutoffDistance2
;
debugArray
[
index
].
w
=
6
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
pullBackIndex
++
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
float
flag
=
6
.
0
f
;
debugArray
[
index
].
x
=
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[indexI][0];
debugArray[index].y = match ? 0.0f : ijField[indexI][1];
debugArray[index].z = match ? 0.0f : ijField[indexI][2];
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
fieldSum
,
outputField
);
load3dArrayBufferPerWarp
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
fieldSum
,
outputField
);
load3dArray
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
unsigned
int
atomJ
=
y
+
tgx
;
// load coordinates, charge, ...
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
);
}
// zero shared fields
zeroMutualInducedParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
ijField
[
4
][
3
];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
uscale
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
tj
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
][
0
]
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
0
][
1
]
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
0
][
2
]
:
0
.
0
f
;
// add to polar field at atomI the field due atomJ's dipole
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
2
][
0
]
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
2
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
][
2
]
:
0
.
0
f
;
// add to field at atomJ the field due atomI's dipole
psA
[
tj
].
field
[
0
]
+=
mask
?
ijField
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
field
[
1
]
+=
mask
?
ijField
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
field
[
2
]
+=
mask
?
ijField
[
1
][
2
]
:
0
.
0
f
;
// add to polar field at atomJ the field due atomI's dipole
psA
[
tj
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
3
][
0
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
3
][
1
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
3
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
tj
)
==
targetAtom
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
tj
)
:
atomI
;
unsigned
int
pullBackIndex
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
debugArray
[
index
].
z
=
cAmoebaSim
.
cutoffDistance2
;
debugArray
[
index
].
w
=
7
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
pullBackIndex
++
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[indexI][0];
debugArray[index].y = match ? 0.0f : ijField[indexI][1];
debugArray[index].z = match ? 0.0f : ijField[indexI][2];
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
fieldSum
,
outputField
);
load3dArrayBufferPerWarp
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
offset
=
3
*
(
y
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
field
,
outputField
);
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
fieldPolar
,
outputFieldPolar
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
fieldSum
,
outputField
);
load3dArray
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
offset
=
3
*
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
sA
[
threadIdx
.
x
].
field
,
outputField
);
load3dArray
(
offset
,
sA
[
threadIdx
.
x
].
fieldPolar
,
outputFieldPolar
);
#endif
lasty
=
y
;
}
pos
++
;
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
View file @
c22f00cf
...
...
@@ -373,6 +373,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
cudaComputeAmoebaMutualInducedField
(
amoebaGpu
);
}
else
{
cudaComputeAmoebaPmeFixedEField
(
amoebaGpu
);
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpu
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment