Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c22f00cf
"devtools/vscode:/vscode.git/clone" did not exist on "3be0bf11d90561aa6f64f1c97772e07516f7c51a"
Commit
c22f00cf
authored
Sep 13, 2010
by
Mark Friedrichs
Browse files
AmoebaCudaPmeMutualInducedField
parent
3763b76b
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1254 additions
and
526 deletions
+1254
-526
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+1
-0
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+4
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+67
-138
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.h
...rc/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.h
+8
-48
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+29
-238
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
...cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
+7
-32
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
...a/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
+21
-70
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+745
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+371
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
...platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+1
-0
No files found.
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
View file @
c22f00cf
...
@@ -2763,6 +2763,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
...
@@ -2763,6 +2763,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
SetCalculateAmoebaCudaVdw14_7Sim
(
amoebaGpu
);
SetCalculateAmoebaCudaVdw14_7Sim
(
amoebaGpu
);
SetCalculateAmoebaCudaWcaDispersionSim
(
amoebaGpu
);
SetCalculateAmoebaCudaWcaDispersionSim
(
amoebaGpu
);
SetCalculateAmoebaCudaMutualInducedFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaMutualInducedFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpu
);
SetCalculateAmoebaElectrostaticSim
(
amoebaGpu
);
SetCalculateAmoebaElectrostaticSim
(
amoebaGpu
);
SetCalculateAmoebaRealSpaceEwaldSim
(
amoebaGpu
);
SetCalculateAmoebaRealSpaceEwaldSim
(
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
c22f00cf
...
@@ -88,6 +88,10 @@ extern void SetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext gpu);
...
@@ -88,6 +88,10 @@ extern void SetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext gpu);
extern
void
GetCalculateAmoebaCudaMutualInducedFieldSim
(
amoebaGpuContext
gpu
);
extern
void
GetCalculateAmoebaCudaMutualInducedFieldSim
(
amoebaGpuContext
gpu
);
extern
void
cudaComputeAmoebaMutualInducedField
(
amoebaGpuContext
gpu
);
extern
void
cudaComputeAmoebaMutualInducedField
(
amoebaGpuContext
gpu
);
extern
void
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
gpu
);
extern
void
GetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
gpu
);
extern
void
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpuContext
gpu
);
// mutual induced and Gk
// mutual induced and Gk
extern
void
SetCalculateAmoebaCudaMutualInducedAndGkFieldsSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
SetCalculateAmoebaCudaMutualInducedAndGkFieldsSim
(
amoebaGpuContext
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
c22f00cf
...
@@ -36,14 +36,11 @@ void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGp
...
@@ -36,14 +36,11 @@ void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGp
//#define AMOEBA_DEBUG
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
float4
atomCoordinatesI
,
float4
atomCoordinatesJ
,
#define GK
float
dampingFactorI
,
float
dampingFactorJ
,
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
float
tholeI
,
float
tholeJ
,
#undef GK
float
*
inducedDipoleI
,
float
*
inducedDipolePolarI
,
float
*
inducedDipoleJ
,
float
*
inducedDipolePolarJ
,
__device__
void
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
*
inducedDipoleSI
,
float
*
inducedDipolePolarSI
,
float
*
inducedDipoleSJ
,
float
*
inducedDipolePolarSJ
,
float
scalingDistanceCutoff
,
float
fields
[
8
][
3
]
float
fields
[
8
][
3
]
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
@@ -59,9 +56,9 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor
...
@@ -59,9 +56,9 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor
// get deltaR, and r between 2 atoms
// get deltaR, and r between 2 atoms
deltaR
[
0
]
=
atom
CoordinatesJ
.
x
-
atomCoordinates
I
.
x
;
deltaR
[
0
]
=
atom
J
.
x
-
atom
I
.
x
;
deltaR
[
1
]
=
atom
CoordinatesJ
.
y
-
atomCoordinates
I
.
y
;
deltaR
[
1
]
=
atom
J
.
y
-
atom
I
.
y
;
deltaR
[
2
]
=
atom
CoordinatesJ
.
z
-
atomCoordinates
I
.
z
;
deltaR
[
2
]
=
atom
J
.
z
-
atom
I
.
z
;
float
r
=
sqrtf
(
deltaR
[
0
]
*
deltaR
[
0
]
+
deltaR
[
1
]
*
deltaR
[
1
]
+
deltaR
[
2
]
*
deltaR
[
2
]
);
float
r
=
sqrtf
(
deltaR
[
0
]
*
deltaR
[
0
]
+
deltaR
[
1
]
*
deltaR
[
1
]
+
deltaR
[
2
]
*
deltaR
[
2
]
);
float
rI
=
1.0
f
/
r
;
float
rI
=
1.0
f
/
r
;
...
@@ -69,112 +66,59 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor
...
@@ -69,112 +66,59 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor
float
rr3
=
-
rI
*
r2I
;
float
rr3
=
-
rI
*
r2I
;
float
rr5
=
-
3.0
f
*
rr3
*
r2I
;
float
rr5
=
-
3.0
f
*
rr3
*
r2I
;
float
dampProd
=
dampingFactorI
*
dampingFactorJ
;
float
dampProd
=
atomI
.
damp
*
atomJ
.
damp
;
float
ratio
=
(
dampProd
!=
0.0
f
)
?
(
r
/
dampProd
)
:
1.0
f
;
float
ratio
=
(
dampProd
!=
0.0
f
)
?
(
r
/
dampProd
)
:
1.0
f
;
float
pGamma
=
thole
J
>
thole
I
?
tholeI
:
thole
J
;
float
pGamma
=
atomI
.
thole
>
atomJ
.
thole
?
atomJ
.
thole
:
atomI
.
thole
;
float
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
float
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
float
dampExp
=
(
(
dampProd
!=
0.0
f
)
&&
(
r
<
scalingDistanceCutoff
)
)
?
expf
(
-
damp
)
:
0.0
f
;
float
dampExp
=
(
(
dampProd
!=
0.0
f
)
&&
(
r
<
cAmoebaSim
.
scalingDistanceCutoff
)
)
?
expf
(
-
damp
)
:
0.0
f
;
rr3
*=
(
1.0
f
-
dampExp
);
rr3
*=
(
1.0
f
-
dampExp
);
rr5
*=
(
1.0
f
-
(
1.0
f
+
damp
)
*
dampExp
);
rr5
*=
(
1.0
f
-
(
1.0
f
+
damp
)
*
dampExp
);
float
*
dipole
=
inducedDipoleJ
;
float
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipole
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipole
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipole
[
2
]
);
float
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
0
][
0
]
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
0
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
0
][
1
]
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
0
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
0
][
2
]
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
0
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarJ
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipolePolar
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipolePolar
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipolePolar
[
2
]
);
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
1
][
0
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
1
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
1
][
1
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
1
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
1
][
2
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
1
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipoleI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipole
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipole
[
2
]
);
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
2
][
0
]
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
2
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
2
][
1
]
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
2
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
2
][
2
]
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
2
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipolePolar
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipolePolar
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipolePolar
[
2
]
);
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
3
][
0
]
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
3
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
3
][
1
]
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
3
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
3
][
2
]
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
3
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipoleS
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipoleS
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipoleS
[
2
]
);
dipole
=
inducedDipoleSJ
;
fields
[
4
][
0
]
=
rr3
*
atomJ
.
inducedDipoleS
[
0
]
+
dDotDelta
*
deltaR
[
0
];
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
4
][
1
]
=
rr3
*
atomJ
.
inducedDipoleS
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
4
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
4
][
2
]
=
rr3
*
atomJ
.
inducedDipoleS
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
4
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
4
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarSJ
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipolePolarS
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipolePolarS
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipolePolarS
[
2
]
);
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
5
][
0
]
=
rr3
*
atomJ
.
inducedDipolePolarS
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
5
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
5
][
1
]
=
rr3
*
atomJ
.
inducedDipolePolarS
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
5
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
5
][
2
]
=
rr3
*
atomJ
.
inducedDipolePolarS
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
5
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipoleSI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipoleS
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipoleS
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipoleS
[
2
]
);
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
6
][
0
]
=
rr3
*
atomI
.
inducedDipoleS
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
6
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
6
][
1
]
=
rr3
*
atomI
.
inducedDipoleS
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
6
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
6
][
2
]
=
rr3
*
atomI
.
inducedDipoleS
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
6
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarSI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipolePolarS
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipolePolarS
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipolePolarS
[
2
]
);
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
7
][
0
]
=
rr3
*
atomI
.
inducedDipolePolarS
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
7
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
7
][
1
]
=
rr3
*
atomI
.
inducedDipolePolarS
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
7
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
7
][
2
]
=
rr3
*
atomI
.
inducedDipolePolarS
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
7
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
#ifdef AMOEBA_DEBUGX
//int n2 = numberOfAtoms*numberOfAtoms;
//int debugIndex = atomI*numberOfAtoms + atomJ;
if
(
atomI
==
0
){
int
debugIndex
=
atomJ
;
debugArray
[
debugIndex
].
x
=
rr3
;
debugArray
[
debugIndex
].
y
=
rr5
;
dipole
=
&
inducedDipole
[
atomJ
*
3
];
debugArray
[
debugIndex
].
z
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
dipole
=
&
inducedDipole
[
atomI
*
3
];
debugArray
[
debugIndex
].
w
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
debugIndex
+=
numberOfAtoms
;
dipole
=
&
inducedDipolePolar
[
atomJ
*
3
];
debugArray
[
debugIndex
].
x
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
dipole
=
&
inducedDipolePolar
[
atomI
*
3
];
debugArray
[
debugIndex
].
y
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
2
];
}
#endif
}
}
__device__
void
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
float4
atomCoordinatesI
,
float4
atomCoordinatesJ
,
float
rb2
,
__device__
void
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
*
inducedDipoleSI
,
float
*
inducedDipolePolarSI
,
float
*
inducedDipoleSJ
,
float
*
inducedDipolePolarSJ
,
float
gkField
[
8
][
3
]
float
gkField
[
8
][
3
]
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
@@ -191,14 +135,16 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo
...
@@ -191,14 +135,16 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
float
xr
=
atom
CoordinatesJ
.
x
-
atomCoordinates
I
.
x
;
float
xr
=
atom
J
.
x
-
atom
I
.
x
;
float
yr
=
atom
CoordinatesJ
.
y
-
atomCoordinates
I
.
y
;
float
yr
=
atom
J
.
y
-
atom
I
.
y
;
float
zr
=
atom
CoordinatesJ
.
z
-
atomCoordinates
I
.
z
;
float
zr
=
atom
J
.
z
-
atom
I
.
z
;
float
xr2
=
xr
*
xr
;
float
xr2
=
xr
*
xr
;
float
yr2
=
yr
*
yr
;
float
yr2
=
yr
*
yr
;
float
zr2
=
zr
*
zr
;
float
zr2
=
zr
*
zr
;
float
rb2
=
atomI
.
bornRadius
*
atomJ
.
bornRadius
;
float
r2
=
xr2
+
yr2
+
zr2
;
float
r2
=
xr2
+
yr2
+
zr2
;
float
expterm
=
expf
(
-
r2
/
(
cAmoebaSim
.
gkc
*
rb2
));
float
expterm
=
expf
(
-
r2
/
(
cAmoebaSim
.
gkc
*
rb2
));
float
expc
=
expterm
/
cAmoebaSim
.
gkc
;
float
expc
=
expterm
/
cAmoebaSim
.
gkc
;
...
@@ -209,21 +155,21 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo
...
@@ -209,21 +155,21 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo
float
gf3
=
gf2
*
gf
;
float
gf3
=
gf2
*
gf
;
float
gf5
=
gf3
*
gf2
;
float
gf5
=
gf3
*
gf2
;
float
duixs
=
inducedDipoleS
I
[
0
];
float
duixs
=
atomI
.
inducedDipoleS
[
0
];
float
duiys
=
inducedDipoleS
I
[
1
];
float
duiys
=
atomI
.
inducedDipoleS
[
1
];
float
duizs
=
inducedDipoleS
I
[
2
];
float
duizs
=
atomI
.
inducedDipoleS
[
2
];
float
puixs
=
inducedDipolePolarS
I
[
0
];
float
puixs
=
atomI
.
inducedDipolePolarS
[
0
];
float
puiys
=
inducedDipolePolarS
I
[
1
];
float
puiys
=
atomI
.
inducedDipolePolarS
[
1
];
float
puizs
=
inducedDipolePolarS
I
[
2
];
float
puizs
=
atomI
.
inducedDipolePolarS
[
2
];
float
dukxs
=
inducedDipoleS
J
[
0
];
float
dukxs
=
atomJ
.
inducedDipoleS
[
0
];
float
dukys
=
inducedDipoleS
J
[
1
];
float
dukys
=
atomJ
.
inducedDipoleS
[
1
];
float
dukzs
=
inducedDipoleS
J
[
2
];
float
dukzs
=
atomJ
.
inducedDipoleS
[
2
];
float
pukxs
=
inducedDipolePolarS
J
[
0
];
float
pukxs
=
atomJ
.
inducedDipolePolarS
[
0
];
float
pukys
=
inducedDipolePolarS
J
[
1
];
float
pukys
=
atomJ
.
inducedDipolePolarS
[
1
];
float
pukzs
=
inducedDipolePolarS
J
[
2
];
float
pukzs
=
atomJ
.
inducedDipolePolarS
[
2
];
// reaction potential auxiliary terms
// reaction potential auxiliary terms
...
@@ -280,9 +226,6 @@ __device__ static int debugAccumulate( int index, float4* debugArray, float* fie
...
@@ -280,9 +226,6 @@ __device__ static int debugAccumulate( int index, float4* debugArray, float* fie
}
}
#endif
#endif
#define GK
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#undef GK
// Include versions of the kernels for N^2 calculations.
// Include versions of the kernels for N^2 calculations.
...
@@ -587,24 +530,16 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
...
@@ -587,24 +530,16 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
}
}
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
#if 0
kCalculateAmoebaMutualInducedAndGkFieldsN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
(void) fprintf( amoebaGpu->log, "N2 warp\n" ); (void) fflush( amoebaGpu->log );
kCalculateAmoebaMutualInducedAndGkFieldsN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks,
amoebaGpu->nonbondThreadsPerBlock,
sizeof(MutualInducedParticle)*amoebaGpu->nonbondThreadsPerBlock>>>(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu->psPosq4->_pDevStream[0],
amoebaGpu->psInducedDipole->_pDevStream[0],
amoebaGpu->psInducedDipolePolar->_pDevStream[0],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_3
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_4
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
#else
amoebaGpu->psWorkArray_3_2->_pDevStream[0] );
amoebaGpu
->
psWorkArray_3_4
->
_pDevStream
[
0
]
);
#endif
#endif
#endif
}
else
{
}
else
{
...
@@ -619,12 +554,6 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
...
@@ -619,12 +554,6 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
kCalculateAmoebaMutualInducedAndGkFieldsN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaMutualInducedAndGkFieldsN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
gpu
->
psBornRadii
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipoleS
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolarS
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_3
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_3
->
_pDevStream
[
0
],
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.h
View file @
c22f00cf
...
@@ -36,12 +36,6 @@ __launch_bounds__(64, 1)
...
@@ -36,12 +36,6 @@ __launch_bounds__(64, 1)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedAndGkFields
,
_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedAndGkFields
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
float4
*
atomCoord
,
float
*
bornRadii
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float
*
inducedDipoleS
,
float
*
inducedDipolePolarS
,
float
*
outputField
,
float
*
outputField
,
float
*
outputFieldPolar
,
float
*
outputFieldPolar
,
float
*
outputFieldS
,
float
*
outputFieldS
,
...
@@ -60,13 +54,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
...
@@ -60,13 +54,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float4
jCoord
;
float
jBornRadius
;
float
jDipole
[
3
];
float
jDipolePolar
[
3
];
float
jDipoleS
[
3
];
float
jDipolePolarS
[
3
];
while
(
pos
<
end
)
while
(
pos
<
end
)
{
{
...
@@ -84,7 +71,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
...
@@ -84,7 +71,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
MutualInducedParticle
*
psA
=
&
sA
[
tbx
];
MutualInducedParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
atomCoord
[
atomI
];
MutualInducedParticle
localParticle
;
loadMutualInducedShared
(
&
localParticle
,
atomI
);
float
fieldSum
[
3
];
float
fieldSum
[
3
];
float
fieldPolarSum
[
3
];
float
fieldPolarSum
[
3
];
...
@@ -115,9 +103,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
...
@@ -115,9 +103,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
// load shared data
// load shared data
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
,
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
atomCoord
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
,
bornRadii
,
inducedDipoleS
,
inducedDipolePolarS
);
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
{
...
@@ -126,16 +112,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
...
@@ -126,16 +112,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
// load coords, charge, ...
// load coords, charge, ...
loadMutualInducedData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jDipolePolar
,
&
jBornRadius
,
jDipoleS
,
jDipolePolarS
);
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
inducedDipole
[
atomI
*
3
]),
&
(
inducedDipolePolar
[
atomI
*
3
]),
jDipole
,
jDipolePolar
,
&
(
inducedDipoleS
[
atomI
*
3
]),
&
(
inducedDipolePolarS
[
atomI
*
3
]),
jDipoleS
,
jDipolePolarS
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
debugArray
,
debugArray
#endif
#endif
...
@@ -177,10 +154,7 @@ if( atomI == targetAtom ){
...
@@ -177,10 +154,7 @@ if( atomI == targetAtom ){
index = debugAccumulate( index, debugArray, ijField[5], mask, 4.0f );
index = debugAccumulate( index, debugArray, ijField[5], mask, 4.0f );
}
}
#endif
#endif
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
iCoord
,
jCoord
,
bornRadii
[
atomI
]
*
jBornRadius
,
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
&
(
inducedDipoleS
[
atomI
*
3
]),
&
(
inducedDipolePolarS
[
atomI
*
3
]),
jDipoleS
,
jDipolePolarS
,
ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
debugArray
,
debugArray
#endif
#endif
...
@@ -247,9 +221,7 @@ if( atomI == targetAtom ){
...
@@ -247,9 +221,7 @@ if( atomI == targetAtom ){
{
{
// load coordinates, charge, ...
// load coordinates, charge, ...
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
),
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
atomCoord
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
,
bornRadii
,
inducedDipoleS
,
inducedDipolePolarS
);
}
}
// zero shared fields
// zero shared fields
...
@@ -263,16 +235,7 @@ if( atomI == targetAtom ){
...
@@ -263,16 +235,7 @@ if( atomI == targetAtom ){
// load coords, charge, ...
// load coords, charge, ...
loadMutualInducedData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jDipolePolar
,
&
jBornRadius
,
jDipoleS
,
jDipolePolarS
);
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
calculateMutualInducedAndGkFieldsPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
inducedDipole
[
atomI
*
3
]),
&
(
inducedDipolePolar
[
atomI
*
3
]),
jDipole
,
jDipolePolar
,
&
(
inducedDipoleS
[
atomI
*
3
]),
&
(
inducedDipolePolarS
[
atomI
*
3
]),
jDipoleS
,
jDipolePolarS
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
debugArray
,
debugArray
#endif
#endif
...
@@ -344,10 +307,7 @@ if( atomI == targetAtom || (y + tj) == targetAtom ){
...
@@ -344,10 +307,7 @@ if( atomI == targetAtom || (y + tj) == targetAtom ){
index = debugAccumulate( index, debugArray, ijField[indexI+5], maskD, -4.0f );
index = debugAccumulate( index, debugArray, ijField[indexI+5], maskD, -4.0f );
}
}
#endif
#endif
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
iCoord
,
jCoord
,
bornRadii
[
atomI
]
*
jBornRadius
,
calculateMutualInducedAndGkFieldsGkPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
&
(
inducedDipoleS
[
atomI
*
3
]),
&
(
inducedDipolePolarS
[
atomI
*
3
]),
jDipoleS
,
jDipolePolarS
,
ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
debugArray
,
debugArray
#endif
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
c22f00cf
...
@@ -36,12 +36,9 @@ void GetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
...
@@ -36,12 +36,9 @@ void GetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
//#define AMOEBA_DEBUG
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
calculateMutualInducedFieldPairIxn_kernel
(
float4
atomCoordinatesI
,
float4
atomCoordinatesJ
,
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
float
dampingFactorI
,
float
dampingFactorJ
,
float
tholeI
,
float
tholeJ
,
__device__
void
calculateMutualInducedFieldPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
*
inducedDipoleI
,
float
*
inducedDipolePolarI
,
float
*
inducedDipoleJ
,
float
*
inducedDipolePolarJ
,
float
scalingDistanceCutoff
,
float
fields
[
4
][
3
]
float
fields
[
4
][
3
]
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
@@ -57,9 +54,9 @@ __device__ void calculateMutualInducedFieldPairIxn_kernel( float4 atomCoordinate
...
@@ -57,9 +54,9 @@ __device__ void calculateMutualInducedFieldPairIxn_kernel( float4 atomCoordinate
// get deltaR, and r between 2 atoms
// get deltaR, and r between 2 atoms
deltaR
[
0
]
=
atom
CoordinatesJ
.
x
-
atomCoordinates
I
.
x
;
deltaR
[
0
]
=
atom
J
.
x
-
atom
I
.
x
;
deltaR
[
1
]
=
atom
CoordinatesJ
.
y
-
atomCoordinates
I
.
y
;
deltaR
[
1
]
=
atom
J
.
y
-
atom
I
.
y
;
deltaR
[
2
]
=
atom
CoordinatesJ
.
z
-
atomCoordinates
I
.
z
;
deltaR
[
2
]
=
atom
J
.
z
-
atom
I
.
z
;
float
r
=
sqrtf
(
deltaR
[
0
]
*
deltaR
[
0
]
+
deltaR
[
1
]
*
deltaR
[
1
]
+
deltaR
[
2
]
*
deltaR
[
2
]
);
float
r
=
sqrtf
(
deltaR
[
0
]
*
deltaR
[
0
]
+
deltaR
[
1
]
*
deltaR
[
1
]
+
deltaR
[
2
]
*
deltaR
[
2
]
);
float
rI
=
1.0
f
/
r
;
float
rI
=
1.0
f
/
r
;
...
@@ -67,87 +64,36 @@ __device__ void calculateMutualInducedFieldPairIxn_kernel( float4 atomCoordinate
...
@@ -67,87 +64,36 @@ __device__ void calculateMutualInducedFieldPairIxn_kernel( float4 atomCoordinate
float
rr3
=
-
rI
*
r2I
;
float
rr3
=
-
rI
*
r2I
;
float
rr5
=
-
3.0
f
*
rr3
*
r2I
;
float
rr5
=
-
3.0
f
*
rr3
*
r2I
;
float
dampProd
=
dampingFactorI
*
dampingFactorJ
;
float
dampProd
=
atomI
.
damp
*
atomJ
.
damp
;
float
ratio
=
(
dampProd
!=
0.0
f
)
?
(
r
/
dampProd
)
:
1.0
f
;
float
ratio
=
(
dampProd
!=
0.0
f
)
?
(
r
/
dampProd
)
:
1.0
f
;
float
pGamma
=
thole
J
>
thole
I
?
tholeI
:
thole
J
;
float
pGamma
=
atomJ
.
thole
>
atomI
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
float
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
float
dampExp
=
(
(
dampProd
!=
0.0
f
)
&&
(
r
<
scalingDistanceCutoff
)
)
?
expf
(
-
damp
)
:
0.0
f
;
float
dampExp
=
(
(
dampProd
!=
0.0
f
)
&&
(
r
<
cAmoebaSim
.
scalingDistanceCutoff
)
)
?
expf
(
-
damp
)
:
0.0
f
;
rr3
*=
(
1.0
f
-
dampExp
);
rr3
*=
(
1.0
f
-
dampExp
);
rr5
*=
(
1.0
f
-
(
1.0
f
+
damp
)
*
dampExp
);
rr5
*=
(
1.0
f
-
(
1.0
f
+
damp
)
*
dampExp
);
float
*
dipole
=
inducedDipoleJ
;
float
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipole
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipole
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipole
[
2
]
);
float
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
0
][
0
]
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
0
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
0
][
1
]
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
0
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
0
][
2
]
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
0
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarJ
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomJ
.
inducedDipolePolar
[
0
]
+
deltaR
[
1
]
*
atomJ
.
inducedDipolePolar
[
1
]
+
deltaR
[
2
]
*
atomJ
.
inducedDipolePolar
[
2
]
);
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
1
][
0
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
1
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
1
][
1
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
1
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
1
][
2
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
1
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipoleI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipole
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipole
[
2
]
);
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
2
][
0
]
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
2
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
2
][
1
]
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
2
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
2
][
2
]
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
2
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
dipole
=
inducedDipolePolarI
;
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
atomI
.
inducedDipolePolar
[
0
]
+
deltaR
[
1
]
*
atomI
.
inducedDipolePolar
[
1
]
+
deltaR
[
2
]
*
atomI
.
inducedDipolePolar
[
2
]
);
dDotDelta
=
rr5
*
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
fields
[
3
][
0
]
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
3
][
0
]
=
rr3
*
dipole
[
0
]
+
dDotDelta
*
deltaR
[
0
];
fields
[
3
][
1
]
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
3
][
1
]
=
rr3
*
dipole
[
1
]
+
dDotDelta
*
deltaR
[
1
];
fields
[
3
][
2
]
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
dDotDelta
*
deltaR
[
2
];
fields
[
3
][
2
]
=
rr3
*
dipole
[
2
]
+
dDotDelta
*
deltaR
[
2
];
#ifdef AMOEBA_DEBUGX
//int n2 = numberOfAtoms*numberOfAtoms;
//int debugIndex = atomI*numberOfAtoms + atomJ;
if
(
atomI
==
0
){
int
debugIndex
=
atomJ
;
debugArray
[
debugIndex
].
x
=
rr3
;
debugArray
[
debugIndex
].
y
=
rr5
;
dipole
=
&
inducedDipole
[
atomJ
*
3
];
debugArray
[
debugIndex
].
z
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
dipole
=
&
inducedDipole
[
atomI
*
3
];
debugArray
[
debugIndex
].
w
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
debugIndex
+=
numberOfAtoms
;
dipole
=
&
inducedDipolePolar
[
atomJ
*
3
];
debugArray
[
debugIndex
].
x
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
dipole
=
&
inducedDipolePolar
[
atomI
*
3
];
debugArray
[
debugIndex
].
y
=
(
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
]
);
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields1
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields1
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields2
[
atomI
*
numberOfAtoms
*
3
+
atomJ
*
3
+
2
];
debugIndex
+=
numberOfAtoms
;
debugArray
[
debugIndex
].
x
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
0
];
debugArray
[
debugIndex
].
y
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
1
];
debugArray
[
debugIndex
].
z
=
fields2
[
atomJ
*
numberOfAtoms
*
3
+
atomI
*
3
+
2
];
}
#endif
}
}
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
// Include versions of the kernels for N^2 calculations.
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#define METHOD_NAME(a, b) a##N2##b
...
@@ -230,18 +176,9 @@ void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* arrayOfDe
...
@@ -230,18 +176,9 @@ void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* arrayOfDe
{
{
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
4.8033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
epsilon
[
0
]
=
4.8033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#ifdef AMOEBA_DEBUG
epsilon
[
1
]
=
4.8033324
f
*
sqrtf
(
delta
[
0
].
x
/
(
(
float
)
(
numberOfEntries
/
3
))
);
epsilon
[
2
]
=
4.8033324
f
*
sqrtf
(
delta
[
0
].
y
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#endif
}
}
}
}
/**
matrixProduct/matrixProductP contains epsilon**2 on output
*/
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
...
@@ -291,46 +228,6 @@ static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<fl
...
@@ -291,46 +228,6 @@ static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<fl
LAUNCHERROR
(
"kReduceMI_Fields2"
);
LAUNCHERROR
(
"kReduceMI_Fields2"
);
}
}
#ifdef AMOEBA_DEBUG
#if 0
static void printMiFieldBuffer( amoebaGpuContext amoebaGpu, unsigned int bufferIndex )
{
(void) fprintf( amoebaGpu->log, "MI Field Buffer %u\n", bufferIndex );
unsigned int start = bufferIndex*3*amoebaGpu->paddedNumberOfAtoms;
unsigned int stop = (bufferIndex+1)*3*amoebaGpu->paddedNumberOfAtoms;
for( unsigned int ii = start; ii < stop; ii += 3 ){
unsigned int ii3Index = ii/3;
unsigned int bufferIndex = ii3Index/(amoebaGpu->paddedNumberOfAtoms);
unsigned int particleIndex = ii3Index - bufferIndex*(amoebaGpu->paddedNumberOfAtoms);
(void) fprintf( amoebaGpu->log, " %6u %3u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii/3, bufferIndex, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii+2] );
}
}
static void printMiFieldAtomBuffers( amoebaGpuContext amoebaGpu, unsigned int targetAtom )
{
(void) fprintf( amoebaGpu->log, "MI Field atom %u\n", targetAtom );
for( unsigned int ii = 0; ii < amoebaGpu->outputBuffers; ii++ ){
unsigned int particleIndex = 3*(targetAtom + ii*amoebaGpu->paddedNumberOfAtoms);
(void) fprintf( amoebaGpu->log, " %2u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex+2] );
}
}
#endif
#endif
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Compute mutual induce field
Compute mutual induce field
...
@@ -367,9 +264,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
...
@@ -367,9 +264,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
@@ -392,9 +286,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
...
@@ -392,9 +286,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
@@ -410,17 +301,8 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
...
@@ -410,17 +301,8 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
kReduceMutualInducedFields
(
amoebaGpu
,
outputArray
,
outputPolarArray
);
kReduceMutualInducedFields
(
amoebaGpu
,
outputArray
,
outputPolarArray
);
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
Download
();
amoebaGpu
->
psWorkArray_3_1
->
Download
();
amoebaGpu
->
psWorkArray_3_2
->
Download
();
amoebaGpu
->
psWorkArray_3_2
->
Download
();
//printMiFieldAtomBuffers( amoebaGpu, (targetAtom + 0) );
//printMiFieldAtomBuffers( amoebaGpu, (targetAtom + 1) );
//printMiFieldAtomBuffers( amoebaGpu, 100 );
//printMiFieldBuffer( amoebaGpu, 0 );
//printMiFieldBuffer( amoebaGpu, 1 );
//printMiFieldBuffer( amoebaGpu, 37 );
//printMiFieldBuffer( amoebaGpu, 38 );
if
(
amoebaGpu
->
log
&&
iteration
==
-
1
){
if
(
amoebaGpu
->
log
&&
iteration
==
-
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished MI kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished MI kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
...
@@ -523,50 +405,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
...
@@ -523,50 +405,6 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
count
++
;
count
++
;
#if 0
//debugIndex += gpu->natoms;
debugIndex += paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z );
int index = 0;
sums[index][0] += debugArray->_pSysStream[0][debugIndex].x;
sums[index][1] += debugArray->_pSysStream[0][debugIndex].y;
sums[index][2] += debugArray->_pSysStream[0][debugIndex].z;
debugIndex += gpu->natoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z );
index++;
sums[index][0] += debugArray->_pSysStream[0][debugIndex].x;
sums[index][1] += debugArray->_pSysStream[0][debugIndex].y;
sums[index][2] += debugArray->_pSysStream[0][debugIndex].z;
debugIndex += gpu->natoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z );
index++;
sums[index][0] += debugArray->_pSysStream[0][debugIndex].x;
sums[index][1] += debugArray->_pSysStream[0][debugIndex].y;
sums[index][2] += debugArray->_pSysStream[0][debugIndex].z;
debugIndex += gpu->natoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
index++;
sums[index][0] += debugArray->_pSysStream[0][debugIndex].x;
sums[index][1] += debugArray->_pSysStream[0][debugIndex].y;
sums[index][2] += debugArray->_pSysStream[0][debugIndex].z;
#endif
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
...
@@ -693,43 +531,6 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
...
@@ -693,43 +531,6 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
cudaComputeAmoebaMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
cudaComputeAmoebaMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
LAUNCHERROR
(
"cudaComputeAmoebaMutualInducedFieldMatrixMultiply Loop
\n
"
);
LAUNCHERROR
(
"cudaComputeAmoebaMutualInducedFieldMatrixMultiply Loop
\n
"
);
//#ifdef AMOEBA_DEBUG
#if 0
if( 0 && amoebaGpu->log ){
amoebaGpu->psInducedDipole->Download();
amoebaGpu->psInducedDipolePolar->Download();
(void) fprintf( amoebaGpu->log, "%s Post cudaComputeAmoebaMutualInducedFieldMatrixMultiply\n", methodName );
int offset = 0;
int maxPrint = 20;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%4d ", ii );
(void) fprintf( amoebaGpu->log," MMi[%14.6e %14.6e %14.6e] ",
amoebaGpu->psInducedDipole->_pSysStream[0][offset], amoebaGpu->psInducedDipole->_pSysStream[0][offset+1], amoebaGpu->psInducedDipole->_pSysStream[0][offset+2] );
(void) fprintf( amoebaGpu->log,"MMip[%14.6e %14.6e %14.6e]\n",
amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset], amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset+1], amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset+2] );
if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) ){
ii = (gpu->natoms - maxPrint);
offset = ii*3;
} else {
offset += 3;
}
}
(void) fflush( amoebaGpu->log );
// write results to file
if( iteration == -1 ){
fileId[1] = iteration;
cudaWriteFloat1AndFloat1ArraysToFile( gpu->natoms, "CudaMI_MatrixProduct1", fileId, 3, amoebaGpu->psWorkVector[0],
3, amoebaGpu->psWorkVector[1] );
}
}
#endif
// ---------------------------------------------------------------------------------------
// post matrix multiply
// post matrix multiply
kSorUpdateMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
kSorUpdateMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
...
@@ -763,18 +564,8 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
...
@@ -763,18 +564,8 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
#if 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e done=%d
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
methodName
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
done
);
methodName
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
#else
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e %14.6e crrntEps=%14.6e %14.6e %14.6e %14.6e done=%d
\n
"
,
methodName
,
iteration
,
sum1
,
sum2
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
0
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
#endif
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
int
offset
=
0
;
int
offset
=
0
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
View file @
c22f00cf
...
@@ -36,11 +36,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
...
@@ -36,11 +36,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedField
,
_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
float4
*
atomCoord
,
float
*
outputField
,
float
*
outputFieldPolar
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float
*
outputField
,
float
*
outputFieldPolar
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
#endif
...
@@ -55,10 +51,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
...
@@ -55,10 +51,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float4
jCoord
;
float
jDipole
[
3
];
float
jDipolePolar
[
3
];
while
(
pos
<
end
)
while
(
pos
<
end
)
{
{
...
@@ -76,7 +68,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
...
@@ -76,7 +68,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
MutualInducedParticle
*
psA
=
&
sA
[
tbx
];
MutualInducedParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
atomCoord
[
atomI
];
MutualInducedParticle
localParticle
;
loadMutualInducedShared
(
&
localParticle
,
atomI
);
float
fieldSum
[
3
];
float
fieldSum
[
3
];
float
fieldPolarSum
[
3
];
float
fieldPolarSum
[
3
];
...
@@ -97,9 +90,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
...
@@ -97,9 +90,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
// load shared data
// load shared data
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
,
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
atomCoord
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
{
...
@@ -108,14 +99,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
...
@@ -108,14 +99,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
// load coords, charge, ...
// load coords, charge, ...
loadMutualInducedData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jDipolePolar
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
calculateMutualInducedFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
inducedDipole
[
atomI
*
3
]),
&
(
inducedDipolePolar
[
atomI
*
3
]),
jDipole
,
jDipolePolar
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
debugArray
,
debugArray
#endif
#endif
...
@@ -187,9 +171,7 @@ if( atomI == targetAtom ){
...
@@ -187,9 +171,7 @@ if( atomI == targetAtom ){
// load coordinates, charge, ...
// load coordinates, charge, ...
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
,
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
);
atomCoord
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
}
}
// zero shared fields
// zero shared fields
...
@@ -203,14 +185,7 @@ if( atomI == targetAtom ){
...
@@ -203,14 +185,7 @@ if( atomI == targetAtom ){
// load coords, charge, ...
// load coords, charge, ...
loadMutualInducedData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jDipolePolar
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
calculateMutualInducedFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
inducedDipole
[
atomI
*
3
]),
&
(
inducedDipolePolar
[
atomI
*
3
]),
jDipole
,
jDipolePolar
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
debugArray
,
debugArray
#endif
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
View file @
c22f00cf
...
@@ -26,58 +26,50 @@ struct MutualInducedParticle {
...
@@ -26,58 +26,50 @@ struct MutualInducedParticle {
#endif
#endif
};
};
__device__
void
loadMutualInducedShared
(
struct
MutualInducedParticle
*
sA
,
unsigned
int
atomI
,
__device__
static
void
loadMutualInducedShared
(
MutualInducedParticle
*
sA
,
unsigned
int
atomI
)
float4
*
atomCoord
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float2
*
dampingFactorAndThole
#ifdef GK
,
float
*
bornRadii
,
float
*
inducedDipoleS
,
float
*
inducedDipolePolarS
#endif
)
{
{
// coordinates & charge
// coordinates & charge
sA
->
x
=
atomCoord
[
atomI
].
x
;
sA
->
x
=
cSim
.
pPosq
[
atomI
].
x
;
sA
->
y
=
atomCoord
[
atomI
].
y
;
sA
->
y
=
cSim
.
pPosq
[
atomI
].
y
;
sA
->
z
=
atomCoord
[
atomI
].
z
;
sA
->
z
=
cSim
.
pPosq
[
atomI
].
z
;
sA
->
q
=
atomCoord
[
atomI
].
w
;
sA
->
q
=
cSim
.
pPosq
[
atomI
].
w
;
// dipole
// dipole
sA
->
inducedDipole
[
0
]
=
i
nducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
[
0
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
[
1
]
=
i
nducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
[
1
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
[
2
]
=
i
nducedDipole
[
atomI
*
3
+
2
];
sA
->
inducedDipole
[
2
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
+
2
];
// dipole polar
// dipole polar
sA
->
inducedDipolePolar
[
0
]
=
i
nducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipolePolar
[
0
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipolePolar
[
1
]
=
i
nducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolar
[
1
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolar
[
2
]
=
i
nducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
inducedDipolePolar
[
2
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
damp
=
d
ampingFactorAndThole
[
atomI
].
x
;
sA
->
damp
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
x
;
sA
->
thole
=
d
ampingFactorAndThole
[
atomI
].
y
;
sA
->
thole
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
y
;
#ifdef GK
#ifdef GK
sA
->
bornRadius
=
b
ornRadii
[
atomI
];
sA
->
bornRadius
=
cSim
.
pB
ornRadii
[
atomI
];
// dipoleS
// dipoleS
sA
->
inducedDipoleS
[
0
]
=
i
nducedDipoleS
[
atomI
*
3
];
sA
->
inducedDipoleS
[
0
]
=
cAmoebaSim
.
pI
nducedDipoleS
[
atomI
*
3
];
sA
->
inducedDipoleS
[
1
]
=
i
nducedDipoleS
[
atomI
*
3
+
1
];
sA
->
inducedDipoleS
[
1
]
=
cAmoebaSim
.
pI
nducedDipoleS
[
atomI
*
3
+
1
];
sA
->
inducedDipoleS
[
2
]
=
i
nducedDipoleS
[
atomI
*
3
+
2
];
sA
->
inducedDipoleS
[
2
]
=
cAmoebaSim
.
pI
nducedDipoleS
[
atomI
*
3
+
2
];
// dipole polar S
// dipole polar S
sA
->
inducedDipolePolarS
[
0
]
=
i
nducedDipolePolarS
[
atomI
*
3
];
sA
->
inducedDipolePolarS
[
0
]
=
cAmoebaSim
.
pI
nducedDipolePolarS
[
atomI
*
3
];
sA
->
inducedDipolePolarS
[
1
]
=
i
nducedDipolePolarS
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolarS
[
1
]
=
cAmoebaSim
.
pI
nducedDipolePolarS
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolarS
[
2
]
=
i
nducedDipolePolarS
[
atomI
*
3
+
2
];
sA
->
inducedDipolePolarS
[
2
]
=
cAmoebaSim
.
pI
nducedDipolePolarS
[
atomI
*
3
+
2
];
#endif
#endif
}
}
__device__
static
void
zeroMutualInducedParticleSharedField
(
struct
MutualInducedParticle
*
sA
)
__device__
static
void
zeroMutualInducedParticleSharedField
(
MutualInducedParticle
*
sA
)
{
{
// zero shared fields
// zero shared fields
...
@@ -101,44 +93,3 @@ __device__ static void zeroMutualInducedParticleSharedField( struct MutualInduce
...
@@ -101,44 +93,3 @@ __device__ static void zeroMutualInducedParticleSharedField( struct MutualInduce
#endif
#endif
}
}
// load struct and arrays w/ shared data in sA
__device__
void
loadMutualInducedData
(
struct
MutualInducedParticle
*
sA
,
float4
*
jCoord
,
float
*
jInducedDipole
,
float
*
jInducedDipolePolar
#ifdef GK
,
float
*
jBornRadius
,
float
*
jInducedDipoleS
,
float
*
jInducedDipolePolarS
#endif
)
{
// load coords, charge, ...
jCoord
->
x
=
sA
->
x
;
jCoord
->
y
=
sA
->
y
;
jCoord
->
z
=
sA
->
z
;
jCoord
->
w
=
sA
->
q
;
jInducedDipole
[
0
]
=
sA
->
inducedDipole
[
0
];
jInducedDipole
[
1
]
=
sA
->
inducedDipole
[
1
];
jInducedDipole
[
2
]
=
sA
->
inducedDipole
[
2
];
jInducedDipolePolar
[
0
]
=
sA
->
inducedDipolePolar
[
0
];
jInducedDipolePolar
[
1
]
=
sA
->
inducedDipolePolar
[
1
];
jInducedDipolePolar
[
2
]
=
sA
->
inducedDipolePolar
[
2
];
#ifdef GK
*
jBornRadius
=
sA
->
bornRadius
;
jInducedDipoleS
[
0
]
=
sA
->
inducedDipoleS
[
0
];
jInducedDipoleS
[
1
]
=
sA
->
inducedDipoleS
[
1
];
jInducedDipoleS
[
2
]
=
sA
->
inducedDipoleS
[
2
];
jInducedDipolePolarS
[
0
]
=
sA
->
inducedDipolePolarS
[
0
];
jInducedDipolePolarS
[
1
]
=
sA
->
inducedDipolePolarS
[
1
];
jInducedDipolePolarS
[
2
]
=
sA
->
inducedDipolePolarS
[
2
];
#endif
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
0 → 100644
View file @
c22f00cf
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#include <stdio.h>
using
namespace
std
;
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
void
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
}
void
GetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
__device__
void
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
uscale
,
float
fields
[
4
][
3
]
#ifdef AMOEBA_DEBUG
,
float4
*
pullBack
#endif
){
// compute the real space portion of the Ewald summation
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
zr
=
atomJ
.
z
-
atomI
.
z
;
// periodic boundary conditions
xr
-=
floor
(
xr
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
yr
-=
floor
(
yr
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn
[
3
];
bn
[
0
]
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
bn
[
1
]
=
(
bn
[
0
]
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
bn
[
2
]
=
(
3.0
f
*
bn
[
1
]
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
}
}
float
dsc3
=
uscale
*
scale3
;
float
dsc5
=
uscale
*
scale5
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
rr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
duir
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
float
dukr
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
puir
=
atomI
.
inducedDipolePolar
[
0
]
*
xr
+
atomI
.
inducedDipolePolar
[
1
]
*
yr
+
atomI
.
inducedDipolePolar
[
2
]
*
zr
;
float
pukr
=
atomJ
.
inducedDipolePolar
[
0
]
*
xr
+
atomJ
.
inducedDipolePolar
[
1
]
*
yr
+
atomJ
.
inducedDipolePolar
[
2
]
*
zr
;
float
fimd
[
3
],
fkmd
[
3
];
float
fimp
[
3
],
fkmp
[
3
];
float
fid
[
3
],
fkd
[
3
];
float
fip
[
3
],
fkp
[
3
];
bn
[
1
]
*=
-
1.0
f
;
fimd
[
0
]
=
bn
[
1
]
*
atomJ
.
inducedDipole
[
0
]
+
bn
[
2
]
*
dukr
*
xr
;
fimd
[
1
]
=
bn
[
1
]
*
atomJ
.
inducedDipole
[
1
]
+
bn
[
2
]
*
dukr
*
yr
;
fimd
[
2
]
=
bn
[
1
]
*
atomJ
.
inducedDipole
[
2
]
+
bn
[
2
]
*
dukr
*
zr
;
fkmd
[
0
]
=
bn
[
1
]
*
atomI
.
inducedDipole
[
0
]
+
bn
[
2
]
*
duir
*
xr
;
fkmd
[
1
]
=
bn
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
bn
[
2
]
*
duir
*
yr
;
fkmd
[
2
]
=
bn
[
1
]
*
atomI
.
inducedDipole
[
2
]
+
bn
[
2
]
*
duir
*
zr
;
fimp
[
0
]
=
bn
[
1
]
*
atomJ
.
inducedDipolePolar
[
0
]
+
bn
[
2
]
*
pukr
*
xr
;
fimp
[
1
]
=
bn
[
1
]
*
atomJ
.
inducedDipolePolar
[
1
]
+
bn
[
2
]
*
pukr
*
yr
;
fimp
[
2
]
=
bn
[
1
]
*
atomJ
.
inducedDipolePolar
[
2
]
+
bn
[
2
]
*
pukr
*
zr
;
fkmp
[
0
]
=
bn
[
1
]
*
atomI
.
inducedDipolePolar
[
0
]
+
bn
[
2
]
*
puir
*
xr
;
fkmp
[
1
]
=
bn
[
1
]
*
atomI
.
inducedDipolePolar
[
1
]
+
bn
[
2
]
*
puir
*
yr
;
fkmp
[
2
]
=
bn
[
1
]
*
atomI
.
inducedDipolePolar
[
2
]
+
bn
[
2
]
*
puir
*
zr
;
rr3
*=
-
1.0
f
;;
fid
[
0
]
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
rr5
*
dukr
*
xr
;
fid
[
1
]
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
rr5
*
dukr
*
yr
;
fid
[
2
]
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
rr5
*
dukr
*
zr
;
fkd
[
0
]
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
rr5
*
duir
*
xr
;
fkd
[
1
]
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
rr5
*
duir
*
yr
;
fkd
[
2
]
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
rr5
*
duir
*
zr
;
fip
[
0
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
rr5
*
pukr
*
xr
;
fip
[
1
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
rr5
*
pukr
*
yr
;
fip
[
2
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
rr5
*
pukr
*
zr
;
fkp
[
0
]
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
rr5
*
puir
*
xr
;
fkp
[
1
]
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
rr5
*
puir
*
yr
;
fkp
[
2
]
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
rr5
*
puir
*
zr
;
// increment the field at each site due to this interaction
if
(
r2
<=
cAmoebaSim
.
cutoffDistance2
){
fields
[
0
][
0
]
=
fimd
[
0
]
-
fid
[
0
];
fields
[
1
][
0
]
=
fkmd
[
0
]
-
fkd
[
0
];
fields
[
2
][
0
]
=
fimp
[
0
]
-
fip
[
0
];
fields
[
3
][
0
]
=
fkmp
[
0
]
-
fkp
[
0
];
fields
[
0
][
1
]
=
fimd
[
1
]
-
fid
[
1
];
fields
[
1
][
1
]
=
fkmd
[
1
]
-
fkd
[
1
];
fields
[
2
][
1
]
=
fimp
[
1
]
-
fip
[
1
];
fields
[
3
][
1
]
=
fkmp
[
1
]
-
fkp
[
1
];
fields
[
0
][
2
]
=
fimd
[
2
]
-
fid
[
2
];
fields
[
1
][
2
]
=
fkmd
[
2
]
-
fkd
[
2
];
fields
[
2
][
2
]
=
fimp
[
2
]
-
fip
[
2
];
fields
[
3
][
2
]
=
fkmp
[
2
]
-
fkp
[
2
];
}
else
{
fields
[
0
][
0
]
=
0.0
f
;
fields
[
1
][
0
]
=
0.0
f
;
fields
[
2
][
0
]
=
0.0
f
;
fields
[
3
][
0
]
=
0.0
f
;
fields
[
0
][
1
]
=
0.0
f
;
fields
[
1
][
1
]
=
0.0
f
;
fields
[
2
][
1
]
=
0.0
f
;
fields
[
3
][
1
]
=
0.0
f
;
fields
[
0
][
2
]
=
0.0
f
;
fields
[
1
][
2
]
=
0.0
f
;
fields
[
2
][
2
]
=
0.0
f
;
fields
[
3
][
2
]
=
0.0
f
;
}
#ifdef AMOEBA_DEBUG
pullBack
[
0
].
x
=
xr
;
pullBack
[
0
].
y
=
yr
;
pullBack
[
0
].
z
=
zr
;
pullBack
[
0
].
w
=
r2
;
pullBack
[
1
].
x
=
alsq2
;
pullBack
[
1
].
y
=
bn
[
0
];
pullBack
[
1
].
z
=
bn
[
2
];
pullBack
[
1
].
w
=
exp2a
;
/*
pullBack[1].x = atomJ.x - atomI.x;
pullBack[1].y = atomJ.y - atomI.y;
pullBack[1].z = atomJ.z - atomI.z;
pullBack[1].w = (atomJ.x - atomI.x)*(atomJ.x - atomI.x) + (atomJ.y - atomI.y)*(atomJ.y - atomI.y)+ (atomJ.z - atomI.z)*(atomJ.z - atomI.z);
pullBack[1].x = scale3;
pullBack[1].y = scale5;
pullBack[1].z = scale7;
*/
#endif
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kInitializeMutualInducedField_kernel
(
int
numberOfAtoms
,
float
*
fixedEField
,
float
*
fixedEFieldPolar
,
float
*
polarizability
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
)
{
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
numberOfAtoms
)
return
;
fixedEField
[
threadId
]
*=
polarizability
[
threadId
];
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
];
fixedEFieldPolar
[
threadId
]
*=
polarizability
[
threadId
];
inducedDipolePolar
[
threadId
]
=
fixedEFieldPolar
[
threadId
];
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kReduceMutualInducedFieldDelta_kernel
(
int
numberOfEntries
,
float
*
arrayOfDeltas1
,
float
*
arrayOfDeltas2
,
float
*
epsilon
)
{
extern
__shared__
float2
delta
[];
delta
[
threadIdx
.
x
].
x
=
0.0
f
;
delta
[
threadIdx
.
x
].
y
=
0.0
f
;
unsigned
int
pos
=
threadIdx
.
x
;
// load deltas
while
(
pos
<
numberOfEntries
)
{
delta
[
threadIdx
.
x
].
x
+=
arrayOfDeltas1
[
pos
];
delta
[
threadIdx
.
x
].
y
+=
arrayOfDeltas2
[
pos
];
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
__syncthreads
();
// sum the deltas
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
{
delta
[
threadIdx
.
x
].
x
+=
delta
[
threadIdx
.
x
+
offset
].
x
;
delta
[
threadIdx
.
x
].
y
+=
delta
[
threadIdx
.
x
+
offset
].
y
;
}
__syncthreads
();
}
// set epsilons
if
(
threadIdx
.
x
==
0
)
{
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
4.8033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#ifdef AMOEBA_DEBUG
epsilon
[
1
]
=
4.8033324
f
*
sqrtf
(
delta
[
0
].
x
/
(
(
float
)
(
numberOfEntries
/
3
))
);
epsilon
[
2
]
=
4.8033324
f
*
sqrtf
(
delta
[
0
].
y
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#endif
}
}
/**
matrixProduct/matrixProductP contains epsilon**2 on output
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kSorUpdateMutualInducedField_kernel
(
int
numberOfEntries
,
float
*
polarizability
,
float
*
inducedDipole
,
float
*
inducedDipoleP
,
float
*
fixedEField
,
float
*
fixedEFieldP
,
float
*
matrixProduct
,
float
*
matrixProductP
)
{
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
numberOfEntries
)
return
;
float
previousDipole
=
inducedDipole
[
threadId
];
float
previousDipoleP
=
inducedDipoleP
[
threadId
];
// add self terms to fields
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
matrixProduct
[
threadId
]
+=
term
*
previousDipole
;
matrixProductP
[
threadId
]
+=
term
*
previousDipoleP
;
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProduct
[
threadId
];
inducedDipoleP
[
threadId
]
=
fixedEFieldP
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProductP
[
threadId
];
const
float
polarSOR
=
0.70
f
;
inducedDipole
[
threadId
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
inducedDipoleP
[
threadId
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
matrixProduct
[
threadId
]
=
(
inducedDipole
[
threadId
]
-
previousDipole
)
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
matrixProductP
[
threadId
]
=
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
}
// reduce psWorkArray_3_1
// reduce psWorkArray_3_2
static
void
kReduceMutualInducedFields
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
outputArray
,
CUDAStream
<
float
>*
outputPolarArray
)
{
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
outputArray
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeMI_Fields1"
);
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
outputPolarArray
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeMI_Fields2"
);
}
/**---------------------------------------------------------------------------------------
Compute mutual induce field
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
static
void
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
outputArray
,
CUDAStream
<
float
>*
outputPolarArray
)
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
int
targetAtom
=
0
;
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: scalingDistanceCutoff=%.5f
\n
"
,
methodName
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
#endif
kClearFields_3
(
amoebaGpu
,
2
);
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeMutualInducedFieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
else
{
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
MutualInducedParticle
),
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaPmeMutualInducedFieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeMutualInducedField"
);
kReduceMutualInducedFields
(
amoebaGpu
,
outputArray
,
outputPolarArray
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished maxtrixMultiply kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
outputPolarArray
->
Download
();
debugArray
->
Download
();
int
maxPrint
=
5
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// MI
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Mult[%16.9e %16.9e %16.9e] "
,
outputArray
->
_pSysStream
[
0
][
indexOffset
],
outputArray
->
_pSysStream
[
0
][
indexOffset
+
1
],
outputArray
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
// MI polar
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MultP[%16.9e %16.9e %16.9e]
\n
"
,
outputPolarArray
->
_pSysStream
[
0
][
indexOffset
],
outputPolarArray
->
_pSysStream
[
0
][
indexOffset
+
1
],
outputPolarArray
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
/*
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
(void) fprintf( amoebaGpu->log,"%5d PmeMIMult\n", jj );
for( int kk = 0; kk < 7; kk++ ){
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
debugIndex += paddedNumberOfAtoms;
}
(void) fprintf( amoebaGpu->log,"\n" );
}
*/
(
void
)
fflush
(
amoebaGpu
->
log
);
iteration
++
;
}
delete
debugArray
;
#endif
}
/**---------------------------------------------------------------------------------------
Compute mutual induce field
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
//#define GET_EFIELD_FROM_FILE
#ifdef GET_EFIELD_FROM_FILE
#include <stdlib.h>
#endif
static
void
cudaComputeAmoebaPmeMutualInducedFieldBySOR
(
amoebaGpuContext
amoebaGpu
)
{
// ---------------------------------------------------------------------------------------
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldBySOR"
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
fileId
.
resize
(
2
);
fileId
[
0
]
=
timestep
;
fileId
[
1
]
=
1
;
#endif
// ---------------------------------------------------------------------------------------
int
done
;
int
iteration
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
int
numOfElems
=
gpu
->
natoms
*
3
;
int
numThreads
=
min
(
THREADS_PER_BLOCK
,
numOfElems
);
int
numBlocks
=
numOfElems
/
numThreads
;
if
(
(
numOfElems
%
numThreads
)
!=
0
)
numBlocks
++
;
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d numOfElems=%d numThreads=%d numBlocks=%d "
"maxIterations=%d targetEpsilon=%.3e
\n
"
,
methodName
,
gpu
->
natoms
,
numOfElems
,
numThreads
,
numBlocks
,
amoebaGpu
->
mutualInducedMaxIterations
,
amoebaGpu
->
mutualInducedTargetEpsilon
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
#ifdef GET_EFIELD_FROM_FILE
std
::
string
fileName
=
"waterEField.txt"
;
StringVectorVector
fileContents
;
readFile
(
fileName
,
fileContents
);
unsigned
int
offset
=
0
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Read file: %s %u
\n
"
,
fileName
.
c_str
(),
fileContents
.
size
()
);
fflush
(
amoebaGpu
->
log
);
for
(
unsigned
int
ii
=
1
;
ii
<
fileContents
.
size
()
-
1
;
ii
++
){
StringVector
lineTokens
=
fileContents
[
ii
];
unsigned
int
lineTokenIndex
=
1
;
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
offset
-=
3
;
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
}
float
conversion
=
100.0
f
;
for
(
int
ii
=
0
;
ii
<
3
*
gpu
->
natoms
;
ii
++
){
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
}
amoebaGpu
->
psE_Field
->
Upload
();
amoebaGpu
->
psE_FieldPolar
->
Upload
();
#endif
// ---------------------------------------------------------------------------------------
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
kInitializeMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
gpu
->
natoms
,
amoebaGpu
->
psE_Field
->
_pDevStream
[
0
],
amoebaGpu
->
psE_FieldPolar
->
_pDevStream
[
0
],
amoebaGpu
->
psPolarizability
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"AmoebaPmeMutualInducedFieldSetup"
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
amoebaGpu
->
psInducedDipole
->
Download
(),
amoebaGpu
->
psInducedDipolePolar
->
Download
();
amoebaGpu
->
psPolarizability
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Initial setup for matrix multiply
\n
"
,
methodName
);
int
offset
=
0
;
int
maxPrint
=
20000
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%4d pol=%12.4e "
,
ii
,
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
]
);
if
(
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
]
!=
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
+
1
]
||
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
]
!=
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
+
2
]
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"PolX!!! %12.4e %12.4e "
,
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
+
2
]
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
" E[%14.6e %14.6e %14.6e] Mi[%14.6e %14.6e %14.6e] "
,
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
offset
+
2
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ep[%14.6e %14.6e %14.6e] Mip[%14.6e %14.6e %14.6e]
\n
"
,
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
offset
+
2
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
+
2
]
);
offset
+=
3
;
if
(
ii
==
maxPrint
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
)
)
)
ii
=
(
gpu
->
natoms
-
maxPrint
);
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
// ---------------------------------------------------------------------------------------
done
=
0
;
iteration
=
1
;
while
(
!
done
){
// matrix multiply
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
#ifdef GET_EFIELD_FROM_FILE
{
std
::
string
fileName
=
"waterInduceRecip.txt"
;
StringVectorVector
fileContents
;
readFile
(
fileName
,
fileContents
);
unsigned
int
offset
=
0
;
amoebaGpu
->
psWorkVector
[
0
]
->
Download
();
amoebaGpu
->
psWorkVector
[
1
]
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Read file: %s %u
\n
"
,
fileName
.
c_str
(),
fileContents
.
size
()
);
fflush
(
amoebaGpu
->
log
);
float
conversion
=
100.0
f
;
for
(
unsigned
int
ii
=
1
;
ii
<
fileContents
.
size
()
-
1
;
ii
++
){
StringVector
lineTokens
=
fileContents
[
ii
];
unsigned
int
lineTokenIndex
=
1
;
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu
->
psWorkVector
[
0
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
0
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
0
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
offset
-=
3
;
amoebaGpu
->
psWorkVector
[
1
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
1
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
1
]
->
_pSysStream
[
0
][
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
}
amoebaGpu
->
psWorkVector
[
0
]
->
Upload
();
amoebaGpu
->
psWorkVector
[
1
]
->
Upload
();
}
#endif
// post matrix multiply
kSorUpdateMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psE_Field
->
_pDevStream
[
0
],
amoebaGpu
->
psE_FieldPolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
// get total epsilon -- performing sums on gpu
kReduceMutualInducedFieldDelta_kernel
<<<
1
,
amoebaGpu
->
epsilonThreadsPerBlock
,
2
*
sizeof
(
float
)
*
amoebaGpu
->
epsilonThreadsPerBlock
>>>
(
3
*
gpu
->
natoms
,
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevStream
[
0
],
amoebaGpu
->
psCurrentEpsilon
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeMutualInducedFieldDelta"
);
if
(
amoebaGpu
->
log
){
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
}
// Debye=4.8033324f
amoebaGpu
->
psCurrentEpsilon
->
Download
();
float
currentEpsilon
=
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
0
];
amoebaGpu
->
mutualInducedCurrentEpsilon
=
currentEpsilon
;
if
(
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
||
amoebaGpu
->
mutualInducedCurrentEpsilon
<
amoebaGpu
->
mutualInducedTargetEpsilon
){
done
=
1
;
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
#if 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
methodName
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
#else
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e %14.6e crrntEps=%14.6e %14.6e %14.6e %14.6e done=%d
\n
"
,
methodName
,
iteration
,
sum1
,
sum2
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
0
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
#endif
(
void
)
fflush
(
amoebaGpu
->
log
);
int
offset
=
0
;
int
maxPrint
=
20
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%4d "
,
ii
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
" Mi[%14.6e %14.6e %14.6e] "
,
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Mip[%14.6e %14.6e %14.6e]
\n
"
,
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
+
2
]
);
if
(
ii
==
maxPrint
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
)
)
){
ii
=
(
gpu
->
natoms
-
maxPrint
);
offset
=
3
*
(
ii
+
1
);
}
else
{
offset
+=
3
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
iteration
++
;
}
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
#ifdef AMOEBA_DEBUG
/*
if( 0 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
}
*/
#endif
// ---------------------------------------------------------------------------------------
}
void
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpuContext
amoebaGpu
)
{
if
(
amoebaGpu
->
mutualInducedIterativeMethod
==
0
){
cudaComputeAmoebaPmeMutualInducedFieldBySOR
(
amoebaGpu
);
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
0 → 100644
View file @
c22f00cf
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateAmoebaPmeMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputField
,
float
*
outputFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
extern
__shared__
MutualInducedParticle
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
const
float
uscale
=
1
.
0
f
;
#ifdef AMOEBA_DEBUG
float4
pullBack
[
4
];
#endif
while
(
pos
<
end
)
{
unsigned
int
x
;
unsigned
int
y
;
bool
bExclusionFlag
;
// Extract cell coordinates
decodeCell
(
workUnit
[
pos
],
&
x
,
&
y
,
&
bExclusionFlag
);
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
MutualInducedParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
MutualInducedParticle
localParticle
;
loadMutualInducedShared
(
&
localParticle
,
atomI
);
float
fieldSum
[
3
];
float
fieldPolarSum
[
3
];
// 0: field at i due to j
// 1: field at i due to j polar
fieldSum
[
0
]
=
0
.
0
f
;
fieldSum
[
1
]
=
0
.
0
f
;
fieldSum
[
2
]
=
0
.
0
f
;
fieldPolarSum
[
0
]
=
0
.
0
f
;
fieldPolarSum
[
1
]
=
0
.
0
f
;
fieldPolarSum
[
2
]
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// load shared data
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
ijField
[
4
][
3
];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
uscale
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
mask
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
j
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
][
0
]
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
0
][
1
]
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
0
][
2
]
:
0
.
0
f
;
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
2
][
0
]
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
2
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
pullBackIndex
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
cAmoebaSim
.
cutoffDistance2
;
debugArray
[
index
].
w
=
6
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
pullBackIndex
++
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
float
flag
=
6
.
0
f
;
debugArray
[
index
].
x
=
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[indexI][0];
debugArray[index].y = match ? 0.0f : ijField[indexI][1];
debugArray[index].z = match ? 0.0f : ijField[indexI][2];
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
fieldSum
,
outputField
);
load3dArrayBufferPerWarp
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
fieldSum
,
outputField
);
load3dArray
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
unsigned
int
atomJ
=
y
+
tgx
;
// load coordinates, charge, ...
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
);
}
// zero shared fields
zeroMutualInducedParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
ijField
[
4
][
3
];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
uscale
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
tj
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
][
0
]
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
0
][
1
]
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
0
][
2
]
:
0
.
0
f
;
// add to polar field at atomI the field due atomJ's dipole
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
2
][
0
]
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
2
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
][
2
]
:
0
.
0
f
;
// add to field at atomJ the field due atomI's dipole
psA
[
tj
].
field
[
0
]
+=
mask
?
ijField
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
field
[
1
]
+=
mask
?
ijField
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
field
[
2
]
+=
mask
?
ijField
[
1
][
2
]
:
0
.
0
f
;
// add to polar field at atomJ the field due atomI's dipole
psA
[
tj
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
3
][
0
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
3
][
1
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
3
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
tj
)
==
targetAtom
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
tj
)
:
atomI
;
unsigned
int
pullBackIndex
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
debugArray
[
index
].
z
=
cAmoebaSim
.
cutoffDistance2
;
debugArray
[
index
].
w
=
7
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
pullBackIndex
++
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[indexI][0];
debugArray[index].y = match ? 0.0f : ijField[indexI][1];
debugArray[index].z = match ? 0.0f : ijField[indexI][2];
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
fieldSum
,
outputField
);
load3dArrayBufferPerWarp
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
offset
=
3
*
(
y
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
field
,
outputField
);
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
fieldPolar
,
outputFieldPolar
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
fieldSum
,
outputField
);
load3dArray
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
offset
=
3
*
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
sA
[
threadIdx
.
x
].
field
,
outputField
);
load3dArray
(
offset
,
sA
[
threadIdx
.
x
].
fieldPolar
,
outputFieldPolar
);
#endif
lasty
=
y
;
}
pos
++
;
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
View file @
c22f00cf
...
@@ -373,6 +373,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
...
@@ -373,6 +373,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
cudaComputeAmoebaMutualInducedField
(
amoebaGpu
);
cudaComputeAmoebaMutualInducedField
(
amoebaGpu
);
}
else
{
}
else
{
cudaComputeAmoebaPmeFixedEField
(
amoebaGpu
);
cudaComputeAmoebaPmeFixedEField
(
amoebaGpu
);
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpu
);
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment