Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
354f7b67
Commit
354f7b67
authored
Jul 31, 2010
by
Peter Eastman
Browse files
Optimizations to Amoeba electrostatics kernel
parent
c2361935
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
270 additions
and
483 deletions
+270
-483
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+178
-342
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
+92
-141
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
354f7b67
...
...
@@ -48,70 +48,13 @@ static int const Ddsc50Index = 11;
static
int
const
Ddsc70Index
=
14
;
//static int const Ddsc71Index = 15;
//static int const Ddsc72Index = 16;
//static int const DampIndex = 17;
//static int const DampRatioIndex = 18;
//static int const DampExpIndex = 19;
static
int
const
LastScalingIndex
=
20
;
static
int
const
_qI
=
0
;
static
int
const
_qJ
=
1
;
static
int
const
_r
=
0
;
static
int
const
_dI
=
1
;
static
int
const
_dJ
=
2
;
static
int
const
_uI
=
3
;
static
int
const
_uJ
=
4
;
static
int
const
_uIp
=
5
;
static
int
const
_uJp
=
6
;
static
int
const
_qIr
=
7
;
static
int
const
_qJr
=
8
;
static
int
const
_qIqJr
=
9
;
static
int
const
_qIdJ
=
10
;
static
int
const
_qIuJ
=
11
;
static
int
const
_qIuJp
=
12
;
static
int
const
LastScalingIndex
=
17
;
/*
static int const _dIxdJ = 13;
static int const _dIxuJ = 14;
static int const _dIxuJp = 15;
static int const _dJxr = 16;
static int const _dJxuI = 17;
static int const _dJxuIp = 18;
static int const _dIxr = 19;
*/
#define DOT3_4(u,v) ((u[0])*(v[0]) + (u[1])*(v[1]) + (u[2])*(v[2]))
static
int
const
_qJqIr
=
13
;
//static int const _qIxqJ = 21;
//static int const _rxqIr = 22;
//static int const _rxqJr = 23;
//static int const _rxqIJr = 24;
//static int const _rxqJIr = 25;
//static int const _qJrxqIr = 26;
static
int
const
_qJdI
=
14
;
//static int const _qJuI = 28;
static
int
const
_qJuI
=
15
;
static
int
const
_qJuIp
=
16
;
//static int const _dIxqJr = 30;
//static int const _dJxqIr = 31;
//static int const _uIxqJr = 32;
//static int const uJxqIr = 33;
//static int const _uIxqJrp = 34;
//static int const _uJxqIrp = 35;
//static int const _rxqIdJ = 36;
//static int const _rxqJdI = 37;
//static int const _rxqIuJ = 38;
//static int const _rxqJuI = 17;
//static int const _rxqJuIp = 18;
//static int const _rxqIuJp = 40;
//static int const _rxqJuIp = 41;
static
int
const
LastVectorFieldIndex
=
17
;
#define DOT3_4(u,k,v,l) ((u[k*3+0])*(v[l*3+0]) + (u[k*3+1])*(v[l*3+1]) + (u[k*3+2])*(v[l*3+2]))
#define MATRIXDOT31(u,k,v,l) u[k*9+0]*v[l*9+0] + u[k*9+1]*v[l*9+1] + u[k*9+2]*v[l*9+2] + \
u[k*9+3]*v[l*9+3] + u[k*9+4]*v[l*9+4] + u[k*9+5]*v[l*9+5] + \
u[k*9+6]*v[l*9+6] + u[k*9+7]*v[l*9+7] + u[k*9+8]*v[l*9+8]
#define MATRIXDOT31(u,v) u[0]*v[0] + u[1]*v[1] + u[2]*v[2] + \
u[3]*v[3] + u[4]*v[4] + u[5]*v[5] + \
u[6]*v[6] + u[7]*v[7] + u[8]*v[8]
#define DOT31(u,v) ((u[0])*(v[0]) + (u[1])*(v[1]) + (u[2])*(v[2]))
...
...
@@ -149,86 +92,65 @@ __device__ void amatrixCrossProductMatrix3( float* matrixX, float* matrixY, floa
}
__device__
void
calculateElectrostaticPairIxn_kernel
(
float4
atomCoordinatesI
,
float4
atomCoordinatesJ
,
float
dampingFactorI
,
float
dampingFactorJ
,
float
tholeI
,
float
tholeJ
,
float
*
labFrameDipoleI
,
float
*
labFrameDipoleJ
,
float
*
labFrameQuadrupoleI
,
float
*
labFrameQuadrupoleJ
,
float
*
inducedDipoleI
,
float
*
inducedDipoleJ
,
float
*
inducedDipolePolarI
,
float
*
inducedDipolePolarJ
,
float
scalingDistanceCutoff
,
float
*
scalingFactors
,
float
*
outputForce
,
float
outputTorque
[
2
][
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
){
struct
ElectrostaticParticle
{
float
deltaR
[
5
];
// coordinates charge
// ---------------------------------------------------------------------------------------
float
x
;
float
y
;
float
z
;
float
q
;
float
vectorFields
[
LastVectorFieldIndex
*
3
];
float
vectorFields1
[
2
*
9
];
// lab frame dipole
float
chargeI
=
atomCoordinatesI
.
w
;
vectorFields
[
_dI
*
3
]
=
labFrameDipoleI
[
0
];
vectorFields
[
_dI
*
3
+
1
]
=
labFrameDipoleI
[
1
];
vectorFields
[
_dI
*
3
+
2
]
=
labFrameDipoleI
[
2
];
float
labFrameDipole
[
3
];
vectorFields
[
_uI
*
3
]
=
inducedDipoleI
[
0
];
vectorFields
[
_uI
*
3
+
1
]
=
inducedDipoleI
[
1
];
vectorFields
[
_uI
*
3
+
2
]
=
inducedDipoleI
[
2
];
// lab frame quadrupole
vectorFields
[
_uIp
*
3
]
=
inducedDipolePolarI
[
0
];
vectorFields
[
_uIp
*
3
+
1
]
=
inducedDipolePolarI
[
1
];
vectorFields
[
_uIp
*
3
+
2
]
=
inducedDipolePolarI
[
2
];
float
labFrameQuadrupole
[
9
];
vectorFields1
[
_qI
*
9
]
=
labFrameQuadrupoleI
[
0
];
vectorFields1
[
_qI
*
9
+
1
]
=
labFrameQuadrupoleI
[
1
];
vectorFields1
[
_qI
*
9
+
2
]
=
labFrameQuadrupoleI
[
2
];
vectorFields1
[
_qI
*
9
+
3
]
=
labFrameQuadrupoleI
[
3
];
vectorFields1
[
_qI
*
9
+
4
]
=
labFrameQuadrupoleI
[
4
];
vectorFields1
[
_qI
*
9
+
5
]
=
labFrameQuadrupoleI
[
5
];
vectorFields1
[
_qI
*
9
+
6
]
=
labFrameQuadrupoleI
[
6
];
vectorFields1
[
_qI
*
9
+
7
]
=
labFrameQuadrupoleI
[
7
];
vectorFields1
[
_qI
*
9
+
8
]
=
labFrameQuadrupoleI
[
8
];
// induced dipole
// ---------------------------------------------------------------------------------------
float
inducedDipole
[
3
];
float
chargeJ
=
atomCoordinatesJ
.
w
;
// polar induced dipole
vectorFields
[
_dJ
*
3
+
0
]
=
labFrameDipoleJ
[
0
];
vectorFields
[
_dJ
*
3
+
1
]
=
labFrameDipoleJ
[
1
];
vectorFields
[
_dJ
*
3
+
2
]
=
labFrameDipoleJ
[
2
];
float
inducedDipoleP
[
3
];
vectorFields
[
_uJ
*
3
+
0
]
=
inducedDipoleJ
[
0
];
vectorFields
[
_uJ
*
3
+
1
]
=
inducedDipoleJ
[
1
];
vectorFields
[
_uJ
*
3
+
2
]
=
inducedDipoleJ
[
2
];
// scaling factors
vectorFields
[
_uJp
*
3
+
0
]
=
inducedDipolePolarJ
[
0
];
vectorFields
[
_uJp
*
3
+
1
]
=
inducedDipolePolarJ
[
1
];
vectorFields
[
_uJp
*
3
+
2
]
=
inducedDipolePolarJ
[
2
];
float
thole
;
float
damp
;
float
force
[
3
];
vectorFields1
[
_qJ
*
9
+
0
]
=
labFrameQuadrupoleJ
[
0
];
vectorFields1
[
_qJ
*
9
+
1
]
=
labFrameQuadrupoleJ
[
1
];
vectorFields1
[
_qJ
*
9
+
2
]
=
labFrameQuadrupoleJ
[
2
];
vectorFields1
[
_qJ
*
9
+
3
]
=
labFrameQuadrupoleJ
[
3
];
vectorFields1
[
_qJ
*
9
+
4
]
=
labFrameQuadrupoleJ
[
4
];
vectorFields1
[
_qJ
*
9
+
5
]
=
labFrameQuadrupoleJ
[
5
];
vectorFields1
[
_qJ
*
9
+
6
]
=
labFrameQuadrupoleJ
[
6
];
vectorFields1
[
_qJ
*
9
+
7
]
=
labFrameQuadrupoleJ
[
7
];
vectorFields1
[
_qJ
*
9
+
8
]
=
labFrameQuadrupoleJ
[
8
];
float
torque
[
3
];
float
padding
;
};
__device__
void
calculateElectrostaticPairIxn_kernel
(
ElectrostaticParticle
&
atomI
,
ElectrostaticParticle
&
atomJ
,
float
scalingDistanceCutoff
,
float
*
scalingFactors
,
float
*
outputForce
,
float
outputTorque
[
2
][
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
){
float
deltaR
[
3
];
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
float
*
ddsc3
=
scalingFactors
+
Ddsc30Index
;
float
*
ddsc5
=
scalingFactors
+
Ddsc50Index
;
float
*
ddsc7
=
scalingFactors
+
Ddsc70Index
;
float
damp
=
dampingFactorI
*
dampingFactorJ
;
deltaR
[
0
]
=
atomCoordinatesJ
.
x
-
atomCoordinatesI
.
x
;
deltaR
[
1
]
=
atomCoordinatesJ
.
y
-
atomCoordinatesI
.
y
;
deltaR
[
2
]
=
atomCoordinatesJ
.
z
-
atomCoordinatesI
.
z
;
deltaR
[
0
]
=
atomJ
.
x
-
atomI
.
x
;
deltaR
[
1
]
=
atomJ
.
y
-
atomI
.
y
;
deltaR
[
2
]
=
atomJ
.
z
-
atomI
.
z
;
float
r2
=
DOT31
(
deltaR
,
deltaR
);
float
r
=
sqrtf
(
r2
);
...
...
@@ -240,20 +162,18 @@ __device__ void calculateElectrostaticPairIxn_kernel( float4 atomCoordinatesI,
float
rr9
=
7.0
f
*
rr7
*
rr2
;
float
rr11
=
9.0
f
*
rr9
*
rr2
;
memcpy
(
&
vectorFields
[
_r
*
3
+
0
],
deltaR
,
3
*
sizeof
(
float
)
);
//-------------------------------------------
if
(
damp
!=
0.0
f
&&
r
<
scalingDistanceCutoff
){
if
(
atomI
.
damp
!=
0.0
f
&&
atomJ
.
damp
!=
0.0
&&
r
<
scalingDistanceCutoff
){
float
distanceIJ
,
r2I
;
distanceIJ
=
r
;
r2I
=
rr2
;
float
ratio
=
distanceIJ
/
damp
;
float
pGamma
=
thole
J
>
thole
I
?
thole
I
:
thole
J
;
float
ratio
=
distanceIJ
/
(
atomI
.
damp
*
atomJ
.
damp
)
;
float
pGamma
=
atomJ
.
thole
>
atomI
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
float
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
float
dampExp
=
expf
(
-
damp
);
float
damp1
=
damp
+
one
;
float
damp2
=
damp
*
damp
;
...
...
@@ -288,46 +208,47 @@ __device__ void calculateElectrostaticPairIxn_kernel( float4 atomCoordinatesI,
float
sc
[
11
];
float
sci
[
9
];
float
scip
[
9
];
float
qIr
[
3
],
qJr
[
3
];
amatrixProductVector3
(
&
vectorFields1
[
_qJ
*
9
],
&
vectorFields
[
_r
*
3
],
&
vectorFields
[
_qJr
*
3
]
);
amatrixProductVector3
(
&
vectorFields1
[
_qI
*
9
],
&
vectorFields
[
_r
*
3
],
&
vectorFields
[
_qIr
*
3
]
);
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
deltaR
,
qJr
);
amatrixProductVector3
(
atomI
.
labFrameQuadrupole
,
deltaR
,
qIr
);
sc
[
2
]
=
DOT3_4
(
vectorFields
,
_dI
,
vectorFields
,
_dJ
);
sc
[
3
]
=
DOT3_4
(
vectorFields
,
_dI
,
vectorFields
,
_r
);
sc
[
4
]
=
DOT3_4
(
vectorFields
,
_dJ
,
vectorFields
,
_r
);
sc
[
2
]
=
DOT3_4
(
atomI
.
labFrameDipole
,
atomJ
.
labFrameDipole
);
sc
[
3
]
=
DOT3_4
(
atomI
.
labFrameDipole
,
deltaR
);
sc
[
4
]
=
DOT3_4
(
atomJ
.
labFrameDipole
,
deltaR
);
sc
[
5
]
=
DOT3_4
(
vectorFields
,
_qIr
,
vectorFields
,
_r
);
sc
[
6
]
=
DOT3_4
(
vectorFields
,
_qJr
,
vectorFields
,
_r
);
sc
[
5
]
=
DOT3_4
(
qIr
,
deltaR
);
sc
[
6
]
=
DOT3_4
(
qJr
,
deltaR
);
sc
[
7
]
=
DOT3_4
(
vectorFields
,
_qIr
,
vectorFields
,
_dJ
);
sc
[
8
]
=
DOT3_4
(
vectorFields
,
_qJr
,
vectorFields
,
_dI
);
sc
[
7
]
=
DOT3_4
(
qIr
,
atomJ
.
labFrameDipole
);
sc
[
8
]
=
DOT3_4
(
qJr
,
atomI
.
labFrameDipole
);
sc
[
9
]
=
DOT3_4
(
vectorFields
,
_qIr
,
vectorFields
,
_
qJr
);
sc
[
9
]
=
DOT3_4
(
qIr
,
qJr
);
sc
[
10
]
=
MATRIXDOT31
(
vectorFields1
,
_qI
,
vectorFields1
,
_qJ
);
sc
[
10
]
=
MATRIXDOT31
(
atomI
.
labFrameQuadrupole
,
atomJ
.
labFrameQuadrupole
);
sci
[
1
]
=
DOT3_4
(
vectorFields
,
_uI
,
vectorFields
,
_dJ
)
+
DOT3_4
(
vectorFields
,
_uJ
,
vectorFields
,
_dI
);
sci
[
1
]
=
DOT3_4
(
atomI
.
inducedDipole
,
atomJ
.
labFrameDipole
)
+
DOT3_4
(
atomJ
.
inducedDipole
,
atomI
.
labFrameDipole
);
sci
[
2
]
=
DOT3_4
(
vectorFields
,
_uI
,
vectorFields
,
_uJ
);
sci
[
2
]
=
DOT3_4
(
atomI
.
inducedDipole
,
atomJ
.
inducedDipole
);
sci
[
3
]
=
DOT3_4
(
vectorFields
,
_uI
,
vectorFields
,
_r
);
sci
[
4
]
=
DOT3_4
(
vectorFields
,
_uJ
,
vectorFields
,
_r
);
sci
[
3
]
=
DOT3_4
(
atomI
.
inducedDipole
,
deltaR
);
sci
[
4
]
=
DOT3_4
(
atomJ
.
inducedDipole
,
deltaR
);
sci
[
7
]
=
DOT3_4
(
vectorFields
,
_qIr
,
vectorFields
,
_uJ
);
sci
[
8
]
=
DOT3_4
(
vectorFields
,
_qJr
,
vectorFields
,
_uI
);
sci
[
7
]
=
DOT3_4
(
qIr
,
atomJ
.
inducedDipole
);
sci
[
8
]
=
DOT3_4
(
qJr
,
atomI
.
inducedDipole
);
scip
[
1
]
=
DOT3_4
(
vectorFields
,
_uIp
,
vectorFields
,
_dJ
)
+
DOT3_4
(
vectorFields
,
_uJp
,
vectorFields
,
_dI
);
scip
[
1
]
=
DOT3_4
(
atomI
.
inducedDipoleP
,
atomJ
.
labFrameDipole
)
+
DOT3_4
(
atomJ
.
inducedDipoleP
,
atomI
.
labFrameDipole
);
scip
[
2
]
=
DOT3_4
(
vectorFields
,
_uI
,
vectorFields
,
_uJp
)
+
DOT3_4
(
vectorFields
,
_uJ
,
vectorFields
,
_uIp
);
scip
[
2
]
=
DOT3_4
(
atomI
.
inducedDipole
,
atomJ
.
inducedDipoleP
)
+
DOT3_4
(
atomJ
.
inducedDipole
,
atomI
.
inducedDipoleP
);
scip
[
3
]
=
DOT3_4
(
vectorFields
,
_uIp
,
vectorFields
,
_r
);
scip
[
4
]
=
DOT3_4
(
vectorFields
,
_uJp
,
vectorFields
,
_r
);
scip
[
3
]
=
DOT3_4
(
atomI
.
inducedDipoleP
,
deltaR
);
scip
[
4
]
=
DOT3_4
(
atomJ
.
inducedDipoleP
,
deltaR
);
scip
[
7
]
=
DOT3_4
(
vectorFields
,
_qIr
,
vectorFields
,
_uJp
);
scip
[
8
]
=
DOT3_4
(
vectorFields
,
_qJr
,
vectorFields
,
_uIp
);
scip
[
7
]
=
DOT3_4
(
qIr
,
atomJ
.
inducedDipoleP
);
scip
[
8
]
=
DOT3_4
(
qJr
,
atomI
.
inducedDipoleP
);
float
findmp
[
3
];
float
scaleF
=
0.5
f
*
scalingFactors
[
UScaleIndex
];
...
...
@@ -338,14 +259,14 @@ __device__ void calculateElectrostaticPairIxn_kernel( float4 atomCoordinatesI,
findmp
[
2
]
=
inducedFactor3
*
ddsc3
[
2
]
-
inducedFactor5
*
ddsc5
[
2
];
float
gli
[
8
];
gli
[
1
]
=
chargeJ
*
sci
[
3
]
-
chargeI
*
sci
[
4
];
gli
[
1
]
=
atomJ
.
q
*
sci
[
3
]
-
atomI
.
q
*
sci
[
4
];
gli
[
2
]
=
-
sc
[
3
]
*
sci
[
4
]
-
sci
[
3
]
*
sc
[
4
];
gli
[
3
]
=
sci
[
3
]
*
sc
[
6
]
-
sci
[
4
]
*
sc
[
5
];
gli
[
6
]
=
sci
[
1
];
gli
[
7
]
=
2.0
f
*
(
sci
[
7
]
-
sci
[
8
]);
float
glip
[
8
];
glip
[
1
]
=
chargeJ
*
scip
[
3
]
-
chargeI
*
scip
[
4
];
glip
[
1
]
=
atomJ
.
q
*
scip
[
3
]
-
atomI
.
q
*
scip
[
4
];
glip
[
2
]
=
-
sc
[
3
]
*
scip
[
4
]
-
scip
[
3
]
*
sc
[
4
];
glip
[
3
]
=
scip
[
3
]
*
sc
[
6
]
-
scip
[
4
]
*
sc
[
5
];
glip
[
6
]
=
scip
[
1
];
...
...
@@ -374,9 +295,9 @@ __device__ void calculateElectrostaticPairIxn_kernel( float4 atomCoordinatesI,
float
gl
[
9
];
gl
[
0
]
=
chargeI
*
chargeJ
;
gl
[
1
]
=
chargeJ
*
sc
[
3
]
-
chargeI
*
sc
[
4
];
gl
[
2
]
=
chargeI
*
sc
[
6
]
+
chargeJ
*
sc
[
5
]
-
sc
[
3
]
*
sc
[
4
];
gl
[
0
]
=
atomI
.
q
*
atomJ
.
q
;
gl
[
1
]
=
atomJ
.
q
*
sc
[
3
]
-
atomI
.
q
*
sc
[
4
];
gl
[
2
]
=
atomI
.
q
*
sc
[
6
]
+
atomJ
.
q
*
sc
[
5
]
-
sc
[
3
]
*
sc
[
4
];
gl
[
3
]
=
sc
[
3
]
*
sc
[
6
]
-
sc
[
4
]
*
sc
[
5
];
gl
[
4
]
=
sc
[
5
]
*
sc
[
6
];
...
...
@@ -387,11 +308,11 @@ __device__ void calculateElectrostaticPairIxn_kernel( float4 atomCoordinatesI,
float
gf
[
8
];
gf
[
1
]
=
rr3
*
gl
[
0
]
+
rr5
*
(
gl
[
1
]
+
gl
[
6
])
+
rr7
*
(
gl
[
2
]
+
gl
[
7
]
+
gl
[
8
])
+
rr9
*
(
gl
[
3
]
+
gl
[
5
])
+
rr11
*
gl
[
4
];
gf
[
2
]
=
-
chargeJ
*
rr3
+
sc
[
4
]
*
rr5
-
sc
[
6
]
*
rr7
;
gf
[
3
]
=
chargeI
*
rr3
+
sc
[
3
]
*
rr5
+
sc
[
5
]
*
rr7
;
gf
[
2
]
=
-
atomJ
.
q
*
rr3
+
sc
[
4
]
*
rr5
-
sc
[
6
]
*
rr7
;
gf
[
3
]
=
atomI
.
q
*
rr3
+
sc
[
3
]
*
rr5
+
sc
[
5
]
*
rr7
;
gf
[
4
]
=
2.0
f
*
rr5
;
gf
[
5
]
=
2.0
f
*
(
-
chargeJ
*
rr5
+
sc
[
4
]
*
rr7
-
sc
[
6
]
*
rr9
);
gf
[
6
]
=
2.0
f
*
(
-
chargeI
*
rr5
-
sc
[
3
]
*
rr7
-
sc
[
5
]
*
rr9
);
gf
[
5
]
=
2.0
f
*
(
-
atomJ
.
q
*
rr5
+
sc
[
4
]
*
rr7
-
sc
[
6
]
*
rr9
);
gf
[
6
]
=
2.0
f
*
(
-
atomI
.
q
*
rr5
-
sc
[
3
]
*
rr7
-
sc
[
5
]
*
rr9
);
gf
[
7
]
=
4.0
f
*
rr7
;
// energy
...
...
@@ -434,20 +355,21 @@ if( 1 ){
float
ftm2
[
3
];
float
temp1
[
3
],
temp2
[
3
],
temp3
[
3
];
amatrixProductVector3
(
&
vectorFields1
[
_qI
*
9
],
&
vectorFields
[
_dJ
*
3
],
&
vectorFields
[
_qIdJ
*
3
]
);
//MK
amatrixProductVector3
(
&
vectorFields1
[
_qJ
*
9
],
&
vectorFields
[
_dI
*
3
],
&
vectorFields
[
_qJdI
*
3
]
);
//MK
float
qIqJr
[
3
],
qJqIr
[
3
],
qIdJ
[
3
],
qJdI
[
3
];
amatrixProductVector3
(
atomI
.
labFrameQuadrupole
,
atomJ
.
labFrameDipole
,
qIdJ
);
//MK
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
atomI
.
labFrameDipole
,
qJdI
);
//MK
amatrixProductVector3
(
&
vectorFields1
[
_qI
*
9
],
&
vectorFields
[
_qJr
*
3
],
&
vectorFields
[
_
qIqJr
*
3
]
);
//MK
amatrixProductVector3
(
&
vectorFields1
[
_qJ
*
9
],
&
vectorFields
[
_qIr
*
3
],
&
vectorFields
[
_
qJqIr
*
3
]
);
//MK
amatrixProductVector3
(
&
vectorFields1
[
_qJ
*
9
],
&
vectorFields
[
_qIr
*
3
]
,
temp1
);
amatrixProductVector3
(
&
vectorFields1
[
_qJ
*
9
],
&
vectorFields
[
_dI
*
3
]
,
temp2
);
amatrixProductVector3
(
atomI
.
labFrameQuadrupole
,
qJr
,
qIqJr
);
//MK
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
qIr
,
qJqIr
);
//MK
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
qIr
,
temp1
);
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
atomI
.
labFrameDipole
,
temp2
);
for
(
int
ii
=
0
;
ii
<
3
;
ii
++
){
ftm2
[
ii
]
=
gf
[
1
]
*
vectorFields
[
_r
*
3
+
ii
]
+
gf
[
2
]
*
vectorFields
[
_dI
*
3
+
ii
]
+
gf
[
3
]
*
vectorFields
[
_dJ
*
3
+
ii
]
+
gf
[
4
]
*
(
temp2
[
ii
]
-
vectorFields
[
_
qIdJ
*
3
+
ii
])
+
gf
[
5
]
*
vectorFields
[
_qIr
*
3
+
ii
]
+
gf
[
6
]
*
vectorFields
[
_qJr
*
3
+
ii
]
+
gf
[
7
]
*
(
vectorFields
[
_
qIqJr
*
3
+
ii
]
+
temp1
[
ii
]);
ftm2
[
ii
]
=
gf
[
1
]
*
deltaR
[
ii
]
+
gf
[
2
]
*
atomI
.
labFrameDipole
[
ii
]
+
gf
[
3
]
*
atomJ
.
labFrameDipole
[
ii
]
+
gf
[
4
]
*
(
temp2
[
ii
]
-
qIdJ
[
ii
])
+
gf
[
5
]
*
qIr
[
ii
]
+
gf
[
6
]
*
qJr
[
ii
]
+
gf
[
7
]
*
(
qIqJr
[
ii
]
+
temp1
[
ii
]);
}
...
...
@@ -458,8 +380,8 @@ if( 1 ){
float
gfi
[
7
];
gfi
[
1
]
=
rr5
*
0.5
f
*
((
gli
[
1
]
+
gli
[
6
])
*
psc
[
0
]
+
(
glip
[
1
]
+
glip
[
6
])
*
dsc
[
0
]
+
scip
[
2
]
*
scaleI
[
0
])
+
rr7
*
((
gli
[
7
]
+
gli
[
2
])
*
psc
[
1
]
+
(
glip
[
7
]
+
glip
[
2
])
*
dsc
[
1
]
-
(
sci
[
3
]
*
scip
[
4
]
+
scip
[
3
]
*
sci
[
4
])
*
scaleI
[
1
])
*
0.5
f
+
rr9
*
(
gli
[
3
]
*
psc
[
2
]
+
glip
[
3
]
*
dsc
[
2
])
*
0.5
f
;
gfi
[
2
]
=
-
rr3
*
chargeJ
+
rr5
*
sc
[
4
]
-
rr7
*
sc
[
6
];
gfi
[
3
]
=
rr3
*
chargeI
+
rr5
*
sc
[
3
]
+
rr7
*
sc
[
5
];
gfi
[
2
]
=
-
rr3
*
atomJ
.
q
+
rr5
*
sc
[
4
]
-
rr7
*
sc
[
6
];
gfi
[
3
]
=
rr3
*
atomI
.
q
+
rr5
*
sc
[
3
]
+
rr7
*
sc
[
5
];
gfi
[
4
]
=
2.0
f
*
rr5
;
gfi
[
5
]
=
rr7
*
(
sci
[
4
]
*
psc
[
2
]
+
scip
[
4
]
*
dsc
[
2
]);
gfi
[
6
]
=
-
rr7
*
(
sci
[
3
]
*
psc
[
2
]
+
scip
[
3
]
*
dsc
[
2
]);
...
...
@@ -478,33 +400,35 @@ if( 1 ){
float
temp13
[
3
];
float
temp14
[
3
];
float
temp15
[
3
];
float
qIuJp
[
3
],
qJuIp
[
3
];
float
qIuJ
[
3
],
qJuI
[
3
];
amatrixProductVector3
(
&
vectorFields1
[
_qJ
*
9
],
&
vectorFields
[
_uIp
*
3
]
,
temp4
);
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
atomI
.
inducedDipoleP
,
temp4
);
amatrixProductVector3
(
&
vectorFields1
[
_qI
*
9
],
&
vectorFields
[
_uJp
*
3
],
&
vectorFields
[
_
qIuJp
*
3
]
);
//MK
amatrixProductVector3
(
&
vectorFields1
[
_qJ
*
9
],
&
vectorFields
[
_uIp
*
3
],
&
vectorFields
[
_
qJuIp
*
3
]
);
//MK
amatrixProductVector3
(
&
vectorFields1
[
_qJ
*
9
],
&
vectorFields
[
_uI
*
3
],
&
vectorFields
[
_
qJuI
*
3
]
);
//MK
amatrixProductVector3
(
atomI
.
labFrameQuadrupole
,
atomJ
.
inducedDipoleP
,
qIuJp
);
//MK
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
atomI
.
inducedDipoleP
,
qJuIp
);
//MK
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
atomI
.
inducedDipole
,
qJuI
);
//MK
amatrixProductVector3
(
&
vectorFields1
[
_qJ
*
9
],
&
vectorFields
[
_uI
*
3
]
,
temp5
);
amatrixProductVector3
(
&
vectorFields1
[
_qI
*
9
],
&
vectorFields
[
_uJ
*
3
],
&
vectorFields
[
_
qIuJ
*
3
]
);
//MK
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
atomI
.
inducedDipole
,
temp5
);
amatrixProductVector3
(
atomI
.
labFrameQuadrupole
,
atomJ
.
inducedDipole
,
qIuJ
);
//MK
float
temp1_0
,
temp2_0
,
temp3_0
;
for
(
int
ii
=
0
;
ii
<
3
;
ii
++
){
temp1_0
=
gfi
[
1
]
*
vectorFields
[
_r
*
3
+
ii
]
+
0.5
f
*
(
-
rr3
*
chargeJ
*
(
vectorFields
[
_uI
*
3
+
ii
]
*
psc
[
0
]
+
vectorFields
[
_uIp
*
3
+
ii
]
*
dsc
[
0
])
+
rr5
*
sc
[
4
]
*
(
vectorFields
[
_uI
*
3
+
ii
]
*
psc
[
1
]
+
vectorFields
[
_uIp
*
3
+
ii
]
*
dsc
[
1
])
-
rr7
*
sc
[
6
]
*
(
vectorFields
[
_uI
*
3
+
ii
]
*
psc
[
2
]
+
vectorFields
[
_uIp
*
3
+
ii
]
*
dsc
[
2
]))
;
temp2_0
=
(
rr3
*
chargeI
*
(
vectorFields
[
_uJ
*
3
+
ii
]
*
psc
[
0
]
+
vectorFields
[
_uJp
*
3
+
ii
]
*
dsc
[
0
])
+
rr5
*
sc
[
3
]
*
(
vectorFields
[
_uJ
*
3
+
ii
]
*
psc
[
1
]
+
vectorFields
[
_uJp
*
3
+
ii
]
*
dsc
[
1
])
+
rr7
*
sc
[
5
]
*
(
vectorFields
[
_uJ
*
3
+
ii
]
*
psc
[
2
]
+
vectorFields
[
_uJp
*
3
+
ii
]
*
dsc
[
2
]))
*
0.5
f
+
rr5
*
scaleI
[
1
]
*
(
sci
[
4
]
*
vectorFields
[
_uIp
*
3
+
ii
]
+
scip
[
4
]
*
vectorFields
[
_uI
*
3
+
ii
]
+
sci
[
3
]
*
vectorFields
[
_uJp
*
3
+
ii
]
+
scip
[
3
]
*
vectorFields
[
_uJ
*
3
+
ii
])
*
0.5
f
;
temp3_0
=
0.5
f
*
(
sci
[
4
]
*
psc
[
1
]
+
scip
[
4
]
*
dsc
[
1
])
*
rr5
*
vectorFields
[
_dI
*
3
+
ii
]
+
0.5
f
*
(
sci
[
3
]
*
psc
[
1
]
+
scip
[
3
]
*
dsc
[
1
])
*
rr5
*
vectorFields
[
_dJ
*
3
+
ii
]
+
0.5
f
*
gfi
[
4
]
*
((
temp5
[
ii
]
-
vectorFields
[
_
qIuJ
*
3
+
ii
])
*
psc
[
1
]
+
(
temp4
[
ii
]
-
vectorFields
[
_
qIuJp
*
3
+
ii
])
*
dsc
[
1
])
+
gfi
[
5
]
*
vectorFields
[
_qIr
*
3
+
ii
]
+
gfi
[
6
]
*
vectorFields
[
_qJr
*
3
+
ii
];
temp1_0
=
gfi
[
1
]
*
deltaR
[
ii
]
+
0.5
f
*
(
-
rr3
*
atomJ
.
q
*
(
atomI
.
inducedDipole
[
ii
]
*
psc
[
0
]
+
atomI
.
inducedDipoleP
[
ii
]
*
dsc
[
0
])
+
rr5
*
sc
[
4
]
*
(
atomI
.
inducedDipole
[
ii
]
*
psc
[
1
]
+
atomI
.
inducedDipoleP
[
ii
]
*
dsc
[
1
])
-
rr7
*
sc
[
6
]
*
(
atomI
.
inducedDipole
[
ii
]
*
psc
[
2
]
+
atomI
.
inducedDipoleP
[
ii
]
*
dsc
[
2
]))
;
temp2_0
=
(
rr3
*
atomI
.
q
*
(
atomJ
.
inducedDipole
[
ii
]
*
psc
[
0
]
+
atomJ
.
inducedDipoleP
[
ii
]
*
dsc
[
0
])
+
rr5
*
sc
[
3
]
*
(
atomJ
.
inducedDipole
[
ii
]
*
psc
[
1
]
+
atomJ
.
inducedDipoleP
[
ii
]
*
dsc
[
1
])
+
rr7
*
sc
[
5
]
*
(
atomJ
.
inducedDipole
[
ii
]
*
psc
[
2
]
+
atomJ
.
inducedDipoleP
[
ii
]
*
dsc
[
2
]))
*
0.5
f
+
rr5
*
scaleI
[
1
]
*
(
sci
[
4
]
*
atomI
.
inducedDipoleP
[
ii
]
+
scip
[
4
]
*
atomI
.
inducedDipole
[
ii
]
+
sci
[
3
]
*
atomJ
.
inducedDipoleP
[
ii
]
+
scip
[
3
]
*
atomJ
.
inducedDipole
[
ii
])
*
0.5
f
;
temp3_0
=
0.5
f
*
(
sci
[
4
]
*
psc
[
1
]
+
scip
[
4
]
*
dsc
[
1
])
*
rr5
*
atomI
.
labFrameDipole
[
ii
]
+
0.5
f
*
(
sci
[
3
]
*
psc
[
1
]
+
scip
[
3
]
*
dsc
[
1
])
*
rr5
*
atomJ
.
labFrameDipole
[
ii
]
+
0.5
f
*
gfi
[
4
]
*
((
temp5
[
ii
]
-
qIuJ
[
ii
])
*
psc
[
1
]
+
(
temp4
[
ii
]
-
qIuJp
[
ii
])
*
dsc
[
1
])
+
gfi
[
5
]
*
qIr
[
ii
]
+
gfi
[
6
]
*
qJr
[
ii
];
ftm2i
[
ii
]
=
temp1_0
+
temp2_0
+
temp3_0
;
}
...
...
@@ -531,22 +455,22 @@ if( 1 ){
float
ttm2i
[
3
];
float
ttm3
[
3
];
float
ttm3i
[
3
];
acrossProductVector3
(
&
vectorFields
[
_dI
*
3
],
&
vectorFields
[
_dJ
*
3
]
,
temp1
);
acrossProductVector3
(
&
vectorFields
[
_dI
*
3
],
&
vectorFields
[
_uJ
*
3
]
,
temp2
);
acrossProductVector3
(
&
vectorFields
[
_dI
*
3
],
&
vectorFields
[
_uJp
*
3
]
,
temp3
);
acrossProductVector3
(
&
vectorFields
[
_dI
*
3
],
&
vectorFields
[
_r
*
3
]
,
temp4
);
acrossProductVector3
(
&
vectorFields
[
_r
*
3
],
&
vectorFields
[
_
qIuJp
*
3
]
,
temp5
);
acrossProductVector3
(
&
vectorFields
[
_r
*
3
],
&
vectorFields
[
_qIr
*
3
]
,
temp6
);
acrossProductVector3
(
&
vectorFields
[
_r
*
3
],
&
vectorFields
[
_
qIuJ
*
3
]
,
temp7
);
acrossProductVector3
(
&
vectorFields
[
_uJ
*
3
],
&
vectorFields
[
_qIr
*
3
]
,
temp8
);
acrossProductVector3
(
&
vectorFields
[
_uJp
*
3
],
&
vectorFields
[
_qIr
*
3
]
,
temp9
);
acrossProductVector3
(
&
vectorFields
[
_dI
*
3
],
&
vectorFields
[
_qJr
*
3
]
,
temp10
);
acrossProductVector3
(
&
vectorFields
[
_dJ
*
3
],
&
vectorFields
[
_qIr
*
3
]
,
temp11
);
acrossProductVector3
(
&
vectorFields
[
_r
*
3
],
&
vectorFields
[
_
qIqJr
*
3
]
,
temp12
);
acrossProductVector3
(
&
vectorFields
[
_r
*
3
],
&
vectorFields
[
_
qIdJ
*
3
]
,
temp13
);
amatrixCrossProductMatrix3
(
&
vectorFields1
[
_qI
*
9
],
&
vectorFields1
[
_qJ
*
9
]
,
temp14
);
acrossProductVector3
(
&
vectorFields
[
_qJr
*
3
],
&
vectorFields
[
_qIr
*
3
]
,
temp15
);
acrossProductVector3
(
atomI
.
labFrameDipole
,
atomJ
.
labFrameDipole
,
temp1
);
acrossProductVector3
(
atomI
.
labFrameDipole
,
atomJ
.
inducedDipole
,
temp2
);
acrossProductVector3
(
atomI
.
labFrameDipole
,
atomJ
.
inducedDipoleP
,
temp3
);
acrossProductVector3
(
atomI
.
labFrameDipole
,
deltaR
,
temp4
);
acrossProductVector3
(
deltaR
,
qIuJp
,
temp5
);
acrossProductVector3
(
deltaR
,
qIr
,
temp6
);
acrossProductVector3
(
deltaR
,
qIuJ
,
temp7
);
acrossProductVector3
(
atomJ
.
inducedDipole
,
qIr
,
temp8
);
acrossProductVector3
(
atomJ
.
inducedDipoleP
,
qIr
,
temp9
);
acrossProductVector3
(
atomI
.
labFrameDipole
,
qJr
,
temp10
);
acrossProductVector3
(
atomJ
.
labFrameDipole
,
qIr
,
temp11
);
acrossProductVector3
(
deltaR
,
qIqJr
,
temp12
);
acrossProductVector3
(
deltaR
,
qIdJ
,
temp13
);
amatrixCrossProductMatrix3
(
atomI
.
labFrameQuadrupole
,
atomJ
.
labFrameQuadrupole
,
temp14
);
acrossProductVector3
(
qJr
,
qIr
,
temp15
);
// unroll?
...
...
@@ -561,20 +485,20 @@ if( 1 ){
}
acrossProductVector3
(
&
vectorFields
[
3
*
_dJ
],
&
vectorFields
[
3
*
_r
]
,
temp2
);
acrossProductVector3
(
&
vectorFields
[
3
*
_r
],
&
vectorFields
[
3
*
_
qJr
]
,
temp3
);
acrossProductVector3
(
&
vectorFields
[
3
*
_dI
],
&
vectorFields
[
3
*
_
qJr
]
,
temp4
);
acrossProductVector3
(
&
vectorFields
[
3
*
_dJ
],
&
vectorFields
[
3
*
_
qIr
]
,
temp5
);
acrossProductVector3
(
&
vectorFields
[
3
*
_r
],
&
vectorFields
[
3
*
_
qJdI
]
,
temp6
);
acrossProductVector3
(
&
vectorFields
[
3
*
_r
],
&
vectorFields
[
3
*
_
qJqIr
]
,
temp7
);
acrossProductVector3
(
&
vectorFields
[
3
*
_
qJr
]
,
&
vectorFields
[
3
*
_
qIr
]
,
temp8
);
// _qJrxqIr
acrossProductVector3
(
&
vectorFields
[
3
*
_dJ
],
&
vectorFields
[
3
*
_uI
]
,
temp9
);
// _dJxuI
acrossProductVector3
(
&
vectorFields
[
3
*
_dJ
],
&
vectorFields
[
3
*
_uIp
]
,
temp10
);
// _dJxuIp
acrossProductVector3
(
&
vectorFields
[
3
*
_uIp
],
&
vectorFields
[
3
*
_
qJr
]
,
temp11
);
// _uIxqJrp
acrossProductVector3
(
&
vectorFields
[
3
*
_uI
],
&
vectorFields
[
3
*
_
qJr
]
,
temp12
);
// _uIxqJr
acrossProductVector3
(
&
vectorFields
[
3
*
_r
],
&
vectorFields
[
3
*
_
qJuIp
]
,
temp13
);
// _rxqJuIp
acrossProductVector3
(
&
vectorFields
[
3
*
_r
],
&
vectorFields
[
3
*
_
qJuI
]
,
temp15
);
// _rxqJuI
acrossProductVector3
(
atomJ
.
labFrameDipole
,
deltaR
,
temp2
);
acrossProductVector3
(
deltaR
,
qJr
,
temp3
);
acrossProductVector3
(
atomI
.
labFrameDipole
,
qJr
,
temp4
);
acrossProductVector3
(
atomJ
.
labFrameDipole
,
qIr
,
temp5
);
acrossProductVector3
(
deltaR
,
qJdI
,
temp6
);
acrossProductVector3
(
deltaR
,
qJqIr
,
temp7
);
acrossProductVector3
(
qJr
,
qIr
,
temp8
);
// _qJrxqIr
acrossProductVector3
(
atomJ
.
labFrameDipole
,
atomI
.
inducedDipole
,
temp9
);
// _dJxuI
acrossProductVector3
(
atomJ
.
labFrameDipole
,
atomI
.
inducedDipoleP
,
temp10
);
// _dJxuIp
acrossProductVector3
(
atomI
.
inducedDipoleP
,
qJr
,
temp11
);
// _uIxqJrp
acrossProductVector3
(
atomI
.
inducedDipole
,
qJr
,
temp12
);
// _uIxqJr
acrossProductVector3
(
deltaR
,
qJuIp
,
temp13
);
// _rxqJuIp
acrossProductVector3
(
deltaR
,
qJuI
,
temp15
);
// _rxqJuI
// unroll?
...
...
@@ -684,15 +608,15 @@ int debugIndex = 0;
debugArray[debugIndex].w = 21.0f;
debugIndex++;
debugArray[debugIndex].x =
vectorFields[3*_dJ
];
debugArray[debugIndex].y =
vectorFields[3*_dJ+
1];
debugArray[debugIndex].z =
vectorFields[3*_dJ+
2];
debugArray[debugIndex].x =
atomJ.labFrameDipole[0
];
debugArray[debugIndex].y =
atomJ.labFrameDipole[
1];
debugArray[debugIndex].z =
atomJ.labFrameDipole[
2];
debugArray[debugIndex].w = 22.0f;
debugIndex++;
debugArray[debugIndex].x =
vectorFields[3*_r
];
debugArray[debugIndex].y =
vectorFields[3*_r+
1];
debugArray[debugIndex].z =
vectorFields[3*_r+
2];
debugArray[debugIndex].x =
deltaR[0
];
debugArray[debugIndex].y =
deltaR[
1];
debugArray[debugIndex].z =
deltaR[
2];
debugArray[debugIndex].w = 23.0f;
}
...
...
@@ -715,57 +639,6 @@ int debugIndex = 0;
}
struct
ElectrostaticParticle
{
// coordinates charge
float
x
;
float
y
;
float
z
;
float
q
;
// lab frame dipole
float
labFrameDipole_X
;
float
labFrameDipole_Y
;
float
labFrameDipole_Z
;
// lab frame quadrupole
float
labFrameQuadrupole_XX
;
float
labFrameQuadrupole_XY
;
float
labFrameQuadrupole_XZ
;
float
labFrameQuadrupole_YY
;
float
labFrameQuadrupole_YZ
;
float
labFrameQuadrupole_ZZ
;
// induced dipole
float
inducedDipole_X
;
float
inducedDipole_Y
;
float
inducedDipole_Z
;
// polar induced dipole
float
inducedDipoleP_X
;
float
inducedDipoleP_Y
;
float
inducedDipoleP_Z
;
// scaling factors
float
thole
;
float
damp
;
float
force_X
;
float
force_Y
;
float
force_Z
;
float
torque_X
;
float
torque_Y
;
float
torque_Z
;
};
__device__
void
loadElectrostaticShared
(
struct
ElectrostaticParticle
*
sA
,
unsigned
int
atomI
,
float4
*
atomCoord
,
float
*
labFrameDipoleJ
,
float
*
labQuadrupole
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float2
*
dampingFactorAndThole
)
...
...
@@ -779,76 +652,39 @@ __device__ void loadElectrostaticShared( struct ElectrostaticParticle* sA, unsig
// lab dipole
sA
->
labFrameDipole
_X
=
labFrameDipoleJ
[
atomI
*
3
];
sA
->
labFrameDipole
_Y
=
labFrameDipoleJ
[
atomI
*
3
+
1
];
sA
->
labFrameDipole
_Z
=
labFrameDipoleJ
[
atomI
*
3
+
2
];
sA
->
labFrameDipole
[
0
]
=
labFrameDipoleJ
[
atomI
*
3
];
sA
->
labFrameDipole
[
1
]
=
labFrameDipoleJ
[
atomI
*
3
+
1
];
sA
->
labFrameDipole
[
2
]
=
labFrameDipoleJ
[
atomI
*
3
+
2
];
// lab quadrupole
sA
->
labFrameQuadrupole_XX
=
labQuadrupole
[
atomI
*
9
];
sA
->
labFrameQuadrupole_XY
=
labQuadrupole
[
atomI
*
9
+
1
];
sA
->
labFrameQuadrupole_XZ
=
labQuadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole_YY
=
labQuadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole_YZ
=
labQuadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole_ZZ
=
labQuadrupole
[
atomI
*
9
+
8
];
sA
->
labFrameQuadrupole
[
0
]
=
labQuadrupole
[
atomI
*
9
];
sA
->
labFrameQuadrupole
[
1
]
=
labQuadrupole
[
atomI
*
9
+
1
];
sA
->
labFrameQuadrupole
[
2
]
=
labQuadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole
[
3
]
=
labQuadrupole
[
atomI
*
9
+
3
];
sA
->
labFrameQuadrupole
[
4
]
=
labQuadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole
[
5
]
=
labQuadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole
[
6
]
=
labQuadrupole
[
atomI
*
9
+
6
];
sA
->
labFrameQuadrupole
[
7
]
=
labQuadrupole
[
atomI
*
9
+
7
];
sA
->
labFrameQuadrupole
[
8
]
=
labQuadrupole
[
atomI
*
9
+
8
];
// induced dipole
sA
->
inducedDipole
_X
=
inducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
_Y
=
inducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
_Z
=
inducedDipole
[
atomI
*
3
+
2
];
sA
->
inducedDipole
[
0
]
=
inducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
[
1
]
=
inducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
[
2
]
=
inducedDipole
[
atomI
*
3
+
2
];
// induced dipole polar
sA
->
inducedDipoleP
_X
=
inducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipoleP
_Y
=
inducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipoleP
_Z
=
inducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
inducedDipoleP
[
0
]
=
inducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipoleP
[
1
]
=
inducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipoleP
[
2
]
=
inducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
damp
=
dampingFactorAndThole
[
atomI
].
x
;
sA
->
thole
=
dampingFactorAndThole
[
atomI
].
y
;
}
// load struct and arrays w/ shared data in sA
__device__
void
loadElectrostaticData
(
struct
ElectrostaticParticle
*
sA
,
float4
*
jCoord
,
float
*
jDipole
,
float
*
jQuadrupole
,
float
*
jInducedDipole
,
float
*
jInducedDipolePolar
)
{
// load coords, charge, ...
jCoord
->
x
=
sA
->
x
;
jCoord
->
y
=
sA
->
y
;
jCoord
->
z
=
sA
->
z
;
jCoord
->
w
=
sA
->
q
;
jDipole
[
0
]
=
sA
->
labFrameDipole_X
;
jDipole
[
1
]
=
sA
->
labFrameDipole_Y
;
jDipole
[
2
]
=
sA
->
labFrameDipole_Z
;
jQuadrupole
[
0
]
=
sA
->
labFrameQuadrupole_XX
;
jQuadrupole
[
1
]
=
sA
->
labFrameQuadrupole_XY
;
jQuadrupole
[
2
]
=
sA
->
labFrameQuadrupole_XZ
;
jQuadrupole
[
3
]
=
sA
->
labFrameQuadrupole_XY
;
jQuadrupole
[
4
]
=
sA
->
labFrameQuadrupole_YY
;
jQuadrupole
[
5
]
=
sA
->
labFrameQuadrupole_YZ
;
jQuadrupole
[
6
]
=
sA
->
labFrameQuadrupole_XZ
;
jQuadrupole
[
7
]
=
sA
->
labFrameQuadrupole_YZ
;
jQuadrupole
[
8
]
=
sA
->
labFrameQuadrupole_ZZ
;
jInducedDipole
[
0
]
=
sA
->
inducedDipole_X
;
jInducedDipole
[
1
]
=
sA
->
inducedDipole_Y
;
jInducedDipole
[
2
]
=
sA
->
inducedDipole_Z
;
jInducedDipolePolar
[
0
]
=
sA
->
inducedDipoleP_X
;
jInducedDipolePolar
[
1
]
=
sA
->
inducedDipoleP_Y
;
jInducedDipolePolar
[
2
]
=
sA
->
inducedDipoleP_Z
;
}
// Include versions of the kernels for N^2 calculations.
#undef USE_OUTPUT_BUFFER_PER_WARP
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
View file @
354f7b67
...
...
@@ -63,11 +63,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
unsigned
int
lasty
=
0xFFFFFFFF
;
float
totalEnergy
=
0
.
0
f
;
float4
jCoord
;
float
jDipole
[
3
];
float
jQuadrupole
[
9
];
float
jInducedDipole
[
3
];
float
jInducedDipolePolar
[
3
];
float
scalingFactors
[
LastScalingIndex
];
while
(
pos
<
end
)
...
...
@@ -87,18 +82,18 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
ElectrostaticParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
atomCoord
[
atomI
]
;
float
forceSum
[
3
];
float
torqueSum
[
3
]
;
ElectrostaticParticle
localParticle
;
loadElectrostaticShared
(
&
localParticle
,
atomI
,
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
)
;
force
Sum
[
0
]
=
0
.
0
f
;
force
Sum
[
1
]
=
0
.
0
f
;
force
Sum
[
2
]
=
0
.
0
f
;
localParticle
.
force
[
0
]
=
0
.
0
f
;
localParticle
.
force
[
1
]
=
0
.
0
f
;
localParticle
.
force
[
2
]
=
0
.
0
f
;
torque
Sum
[
0
]
=
0
.
0
f
;
torque
Sum
[
1
]
=
0
.
0
f
;
torque
Sum
[
2
]
=
0
.
0
f
;
localParticle
.
torque
[
0
]
=
0
.
0
f
;
localParticle
.
torque
[
1
]
=
0
.
0
f
;
localParticle
.
torque
[
2
]
=
0
.
0
f
;
scalingFactors
[
PScaleIndex
]
=
1
.
0
f
;
scalingFactors
[
DScaleIndex
]
=
1
.
0
f
;
...
...
@@ -126,19 +121,8 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
float
force
[
3
];
float
torque
[
2
][
3
];
// load coords, charge, ...
loadElectrostaticData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
);
float
energy
;
calculateElectrostaticPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
labFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
labFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
inducedDipole
[
3
*
atomI
]),
jInducedDipole
,
&
(
inducedDipolePolar
[
3
*
atomI
]),
jInducedDipolePolar
,
calculateElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
j
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
...
...
@@ -150,13 +134,13 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
torque
Sum
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
}
...
...
@@ -178,11 +162,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
unsigned
int
atomJ
=
y
+
j
;
// load coords, charge, ...
loadElectrostaticData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
);
// set scale factors
getMaskedDScaleFactor
(
j
,
dScaleMask
,
scalingFactors
+
DScaleIndex
);
...
...
@@ -192,13 +171,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
// force
float
energy
;
calculateElectrostaticPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
labFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
labFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
inducedDipole
[
3
*
atomI
]),
jInducedDipole
,
&
(
inducedDipolePolar
[
3
*
atomI
]),
jInducedDipolePolar
,
calculateElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
j
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
...
...
@@ -213,13 +186,13 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
torque
Sum
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
...
...
@@ -284,38 +257,38 @@ if( atomI == targetAtom ){
float
of
;
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
of
=
outputForce
[
offset
];
of
+=
force
Sum
[
0
];
of
+=
localParticle
.
force
[
0
];
outputForce
[
offset
]
=
of
;
of
=
outputForce
[
offset
+
1
];
of
+=
force
Sum
[
1
];
of
+=
localParticle
.
force
[
1
];
outputForce
[
offset
+
1
]
=
of
;
of
=
outputForce
[
offset
+
2
];
of
+=
force
Sum
[
2
];
of
+=
localParticle
.
force
[
2
];
outputForce
[
offset
+
2
]
=
of
;
of
=
outputTorque
[
offset
];
of
+=
torque
Sum
[
0
];
of
+=
localParticle
.
torque
[
0
];
outputTorque
[
offset
]
=
of
;
of
=
outputTorque
[
offset
+
1
];
of
+=
torque
Sum
[
1
];
of
+=
localParticle
.
torque
[
1
];
outputTorque
[
offset
+
1
]
=
of
;
of
=
outputTorque
[
offset
+
2
];
of
+=
torque
Sum
[
2
];
of
+=
localParticle
.
torque
[
2
];
outputTorque
[
offset
+
2
]
=
of
;
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
outputForce
[
offset
]
=
force
Sum
[
0
];
outputForce
[
offset
+
1
]
=
force
Sum
[
1
];
outputForce
[
offset
+
2
]
=
force
Sum
[
2
];
outputForce
[
offset
]
=
localParticle
.
force
[
0
];
outputForce
[
offset
+
1
]
=
localParticle
.
force
[
1
];
outputForce
[
offset
+
2
]
=
localParticle
.
force
[
2
];
outputTorque
[
offset
]
=
torque
Sum
[
0
];
outputTorque
[
offset
+
1
]
=
torque
Sum
[
1
];
outputTorque
[
offset
+
2
]
=
torque
Sum
[
2
];
outputTorque
[
offset
]
=
localParticle
.
torque
[
0
];
outputTorque
[
offset
+
1
]
=
localParticle
.
torque
[
1
];
outputTorque
[
offset
+
2
]
=
localParticle
.
torque
[
2
];
#endif
}
...
...
@@ -333,13 +306,13 @@ if( atomI == targetAtom ){
}
sA
[
threadIdx
.
x
].
force
_X
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
_Y
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
_Z
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
0
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
1
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
2
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
torque
_X
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
torque
_Y
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
torque
_Z
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
torque
[
0
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
torque
[
1
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
torque
[
2
]
=
0
.
0
f
;
if
(
!
bExclusionFlag
)
{
...
...
@@ -351,19 +324,8 @@ if( atomI == targetAtom ){
unsigned
int
atomJ
=
y
+
tj
;
// load coords, charge, ...
loadElectrostaticData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
);
float
energy
;
calculateElectrostaticPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
labFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
labFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
inducedDipole
[
3
*
atomI
]),
jInducedDipole
,
&
(
inducedDipolePolar
[
3
*
atomI
]),
jInducedDipolePolar
,
calculateElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
tj
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
...
...
@@ -375,25 +337,25 @@ if( atomI == targetAtom ){
// add force and torque to atom I due atom J
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
torque
Sum
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
// add force and torque to atom J due atom I
psA
[
tj
].
force
_X
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
tj
].
force
_Y
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
tj
].
force
_Z
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
tj
].
force
[
0
]
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
tj
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
tj
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
tj
].
torque
_X
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
torque
_Y
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
torque
_Z
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
0
]
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
1
]
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
2
]
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
...
...
@@ -478,11 +440,6 @@ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned
int
atomJ
=
y
+
tj
;
// load coords, charge, ...
loadElectrostaticData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
);
// set scale factors
getMaskedDScaleFactor
(
tj
,
dScaleMask
,
scalingFactors
+
DScaleIndex
);
...
...
@@ -492,13 +449,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// force
float
energy
;
calculateElectrostaticPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
labFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
labFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
inducedDipole
[
3
*
atomI
]),
jInducedDipole
,
&
(
inducedDipolePolar
[
3
*
atomI
]),
jInducedDipolePolar
,
calculateElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
tj
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
...
...
@@ -512,25 +463,25 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// add force and torque to atom I due atom J
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
torque
Sum
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
// add force and torque to atom J due atom I
psA
[
tj
].
force
_X
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
tj
].
force
_Y
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
tj
].
force
_Z
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
tj
].
force
[
0
]
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
tj
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
tj
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
tj
].
torque
_X
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
torque
_Y
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
torque
_Z
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
0
]
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
1
]
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
2
]
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
...
...
@@ -642,75 +593,75 @@ if( atomI == targetAtom || atomJ == targetAtom ){
float
of
;
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
of
=
outputForce
[
offset
];
of
+=
force
Sum
[
0
];
of
+=
localParticle
.
force
[
0
];
outputForce
[
offset
]
=
of
;
of
=
outputForce
[
offset
+
1
];
of
+=
force
Sum
[
1
];
of
+=
localParticle
.
force
[
1
];
outputForce
[
offset
+
1
]
=
of
;
of
=
outputForce
[
offset
+
2
];
of
+=
force
Sum
[
2
];
of
+=
localParticle
.
force
[
2
];
outputForce
[
offset
+
2
]
=
of
;
of
=
outputTorque
[
offset
];
of
+=
torque
Sum
[
0
];
of
+=
localParticle
.
torque
[
0
];
outputTorque
[
offset
]
=
of
;
of
=
outputTorque
[
offset
+
1
];
of
+=
torque
Sum
[
1
];
of
+=
localParticle
.
torque
[
1
];
outputTorque
[
offset
+
1
]
=
of
;
of
=
outputTorque
[
offset
+
2
];
of
+=
torque
Sum
[
2
];
of
+=
localParticle
.
torque
[
2
];
outputTorque
[
offset
+
2
]
=
of
;
offset
=
3
*
(
y
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
of
=
outputForce
[
offset
];
of
+=
sA
[
threadIdx
.
x
].
force
_X
;
of
+=
sA
[
threadIdx
.
x
].
force
[
0
]
;
outputForce
[
offset
]
=
of
;
of
=
outputForce
[
offset
+
1
];
of
+=
sA
[
threadIdx
.
x
].
force
_Y
;
of
+=
sA
[
threadIdx
.
x
].
force
[
1
]
;
outputForce
[
offset
+
1
]
=
of
;
of
=
outputForce
[
offset
+
2
];
of
+=
sA
[
threadIdx
.
x
].
force
_Z
;
of
+=
sA
[
threadIdx
.
x
].
force
[
2
]
;
outputForce
[
offset
+
2
]
=
of
;
of
=
outputTorque
[
offset
];
of
+=
sA
[
threadIdx
.
x
].
torque
_X
;
of
+=
sA
[
threadIdx
.
x
].
torque
[
0
]
;
outputTorque
[
offset
]
=
of
;
of
=
outputTorque
[
offset
+
1
];
of
+=
sA
[
threadIdx
.
x
].
torque
_Y
;
of
+=
sA
[
threadIdx
.
x
].
torque
[
1
]
;
outputTorque
[
offset
+
1
]
=
of
;
of
=
outputTorque
[
offset
+
2
];
of
+=
sA
[
threadIdx
.
x
].
torque
_Z
;
of
+=
sA
[
threadIdx
.
x
].
torque
[
2
]
;
outputTorque
[
offset
+
2
]
=
of
;
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
outputForce
[
offset
]
=
force
Sum
[
0
];
outputForce
[
offset
+
1
]
=
force
Sum
[
1
];
outputForce
[
offset
+
2
]
=
force
Sum
[
2
];
outputForce
[
offset
]
=
localParticle
.
force
[
0
];
outputForce
[
offset
+
1
]
=
localParticle
.
force
[
1
];
outputForce
[
offset
+
2
]
=
localParticle
.
force
[
2
];
outputTorque
[
offset
]
=
torque
Sum
[
0
];
outputTorque
[
offset
+
1
]
=
torque
Sum
[
1
];
outputTorque
[
offset
+
2
]
=
torque
Sum
[
2
];
outputTorque
[
offset
]
=
localParticle
.
torque
[
0
];
outputTorque
[
offset
+
1
]
=
localParticle
.
torque
[
1
];
outputTorque
[
offset
+
2
]
=
localParticle
.
torque
[
2
];
offset
=
3
*
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
outputForce
[
offset
]
=
sA
[
threadIdx
.
x
].
force
_X
;
outputForce
[
offset
+
1
]
=
sA
[
threadIdx
.
x
].
force
_Y
;
outputForce
[
offset
+
2
]
=
sA
[
threadIdx
.
x
].
force
_Z
;
outputForce
[
offset
]
=
sA
[
threadIdx
.
x
].
force
[
0
]
;
outputForce
[
offset
+
1
]
=
sA
[
threadIdx
.
x
].
force
[
1
]
;
outputForce
[
offset
+
2
]
=
sA
[
threadIdx
.
x
].
force
[
2
]
;
outputTorque
[
offset
]
=
sA
[
threadIdx
.
x
].
torque
_X
;
outputTorque
[
offset
+
1
]
=
sA
[
threadIdx
.
x
].
torque
_Y
;
outputTorque
[
offset
+
2
]
=
sA
[
threadIdx
.
x
].
torque
_Z
;
outputTorque
[
offset
]
=
sA
[
threadIdx
.
x
].
torque
[
0
]
;
outputTorque
[
offset
+
1
]
=
sA
[
threadIdx
.
x
].
torque
[
1
]
;
outputTorque
[
offset
+
2
]
=
sA
[
threadIdx
.
x
].
torque
[
2
]
;
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment