Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
42d5b80a
"platforms/opencl/src/OpenCLForceInfo.cpp" did not exist on "012b28e6e9d3a3848789b7cb3ca055052528fb36"
Commit
42d5b80a
authored
Sep 14, 2010
by
Mark Friedrichs
Browse files
Continue PME
parent
770f7abc
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
161 additions
and
105 deletions
+161
-105
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+59
-29
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+61
-68
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+7
-3
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaPME.cpp
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaPME.cpp
+33
-4
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
42d5b80a
...
...
@@ -1271,6 +1271,6 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
{
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
kCalculateAmoebaPME
(
amoebaGpu
);
//
kCalculateAmoebaPME( amoebaGpu );
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
42d5b80a
...
...
@@ -46,6 +46,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int
// Reduce field
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
//const float term = 0.0f;
while
(
pos
<
fieldComponents
)
{
...
...
@@ -73,6 +74,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int
{
totalField
+=
pFt
[
0
];
}
fieldOut
[
pos
]
=
totalField
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
...
...
@@ -256,18 +258,19 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
if
(
r2
<=
cAmoebaSim
.
cutoffDistance2
){
fields
[
0
][
0
]
=
fim
[
0
]
-
fid
[
0
];
fields
[
1
][
0
]
=
fkm
[
0
]
-
fkd
[
0
];
fields
[
2
][
0
]
=
fim
[
0
]
-
fip
[
0
];
fields
[
3
][
0
]
=
fkm
[
0
]
-
fkp
[
0
];
fields
[
0
][
1
]
=
fim
[
1
]
-
fid
[
1
];
fields
[
1
][
1
]
=
fkm
[
1
]
-
fkd
[
1
];
fields
[
2
][
1
]
=
fim
[
1
]
-
fip
[
1
];
fields
[
3
][
1
]
=
fkm
[
1
]
-
fkp
[
1
];
fields
[
0
][
2
]
=
fim
[
2
]
-
fid
[
2
];
fields
[
1
][
0
]
=
fkm
[
0
]
-
fkd
[
0
];
fields
[
1
][
1
]
=
fkm
[
1
]
-
fkd
[
1
];
fields
[
1
][
2
]
=
fkm
[
2
]
-
fkd
[
2
];
fields
[
2
][
0
]
=
fim
[
0
]
-
fip
[
0
];
fields
[
2
][
1
]
=
fim
[
1
]
-
fip
[
1
];
fields
[
2
][
2
]
=
fim
[
2
]
-
fip
[
2
];
fields
[
3
][
0
]
=
fkm
[
0
]
-
fkp
[
0
];
fields
[
3
][
1
]
=
fkm
[
1
]
-
fkp
[
1
];
fields
[
3
][
2
]
=
fkm
[
2
]
-
fkp
[
2
];
}
else
{
...
...
@@ -297,11 +300,12 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
pullBack
[
1
].
y
=
atomJ
.
y
-
atomI
.
y
;
pullBack
[
1
].
z
=
atomJ
.
z
-
atomI
.
z
;
pullBack
[
1
].
w
=
(
atomJ
.
x
-
atomI
.
x
)
*
(
atomJ
.
x
-
atomI
.
x
)
+
(
atomJ
.
y
-
atomI
.
y
)
*
(
atomJ
.
y
-
atomI
.
y
)
+
(
atomJ
.
z
-
atomI
.
z
)
*
(
atomJ
.
z
-
atomI
.
z
);
/*
pullBack[1].x = scale3;
pullBack[1].y = scale5;
pullBack[1].z = scale7;
*/
pullBack
[
2
].
x
=
scale3
;
pullBack
[
2
].
y
=
scale5
;
pullBack
[
2
].
z
=
scale7
;
pullBack
[
2
].
w
=
-
1.0
;
#endif
}
...
...
@@ -351,10 +355,17 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
unsigned
int
targetAtom
=
0
;
#endif
//#define SET_ALPHA_EWALD
//#ifdef SET_ALPHA_EWALD
// amoebaGpu->gpuContext->sim.alphaEwald = 5.4459052e+00f;
// (void) fprintf( amoebaGpu->log, "computeCudaAmoebaPmeFixedEField: forceing alphaEwald=%15.7e\n", amoebaGpu->gpuContext->sim.alphaEwald );
// SetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpu);
//#endif
kClearFields_3
(
amoebaGpu
,
2
);
if
(
gpu
->
bOutputBufferPerWarp
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 warp
\n
"
);
kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
...
...
@@ -366,14 +377,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
#endif
}
else
{
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
...
...
@@ -384,7 +387,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
#if 0
...
...
@@ -406,16 +408,44 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
warp=%d
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psWorkArray_3_1
->
Download
();
amoebaGpu
->
psWorkArray_3_2
->
Download
();
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Out
EFields
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Out
WorkArray_3_[1,2] paddedNumberOfAtoms=%d
\n
"
,
amoebaGpu
->
paddedNumberOfAtoms
,
amoebaGpu
->
outputBuffers
);
int
maxPrint
=
32
;
for
(
int
ii
=
0
;
ii
<
amoebaGpu
->
paddedNumberOfAtoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// buffer 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"WArry1[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
// buffer 2
(
void
)
fprintf
(
amoebaGpu
->
log
,
"WArry2[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"EFields End
\n
"
);
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
...
...
@@ -450,7 +480,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d PmeFixedEField
\n
"
,
jj
);
for
(
int
kk
=
0
;
kk
<
7
;
kk
++
){
for
(
int
kk
=
0
;
kk
<
10
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
...
...
@@ -476,7 +506,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
}
#endif
if
(
1
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
...
...
@@ -491,5 +521,5 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
{
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
kCalculateAmoebaPMEFixedMultipoleField
(
amoebaGpu
);
//
kCalculateAmoebaPMEFixedMultipoleField( amoebaGpu );
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
42d5b80a
...
...
@@ -44,6 +44,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
){
#ifdef AMOEBA_DEBUG
int
pullIndexMax
=
12
;
float4
pullBack
[
12
];
float
dScaleVal
;
float
pScaleVal
;
...
...
@@ -120,6 +121,7 @@ pScaleVal = 1.0f;
);
unsigned
int
match
=
(
atomI
==
(
y
+
j
))
?
1
:
0
;
match
=
1
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
...
...
@@ -130,6 +132,7 @@ pScaleVal = 1.0f;
fieldPolarSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
2
];
}
}
...
...
@@ -166,7 +169,7 @@ pScaleVal = pScaleValue;
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
unsigned
int
match
=
(
atomI
==
(
y
+
j
))
?
1
:
0
;
unsigned
int
match
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
j
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
...
...
@@ -180,54 +183,40 @@ pScaleVal = pScaleValue;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
pullBackIndex
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
pullBackIndex
++
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
unsigned
int
indices
[
4
]
=
{
indexI
,
indexJ
,
indexI
+
1
,
indexJ
+
1
};
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
unsigned
int
off
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
tgx
;
debugArray
[
index
].
z
=
-
2
;
debugArray
[
index
].
w
=
(
float
)
off
;
float
flag
=
7
.
0
f
;
for
(
int
ii
=
0
;
ii
<
4
;
ii
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
][
2
];
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
0
];
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
1
];
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
2
];
debugArray
[
index
].
w
=
flag
;
}
for
(
int
pullIndex
=
0
;
pullIndex
<
pullIndexMax
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
][
2
];
debugArray
[
index
].
w
=
flag
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
...
...
@@ -300,25 +289,27 @@ pScaleVal = 1.0f;
#endif
);
unsigned
int
outOfBounds
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
tj
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
ijField
[
0
][
2
];
fieldSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
2
];
fieldPolarSum
[
0
]
+=
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
ijField
[
2
][
2
];
fieldPolarSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
2
];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA
[
tj
].
eField
[
0
]
+=
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
1
]
+=
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
2
]
+=
ijField
[
1
][
2
];
psA
[
tj
].
eField
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
psA
[
tj
].
eFieldP
[
0
]
+=
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
1
]
+=
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
2
]
+=
ijField
[
3
][
2
];
psA
[
tj
].
eFieldP
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
#ifdef AMOEBA_DEBUG
...
...
@@ -422,25 +413,27 @@ pScaleVal = pScaleValue;
#endif
);
unsigned
int
outOfBounds
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
tj
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
ijField
[
0
][
2
];
fieldSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
2
];
fieldPolarSum
[
0
]
+=
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
ijField
[
2
][
2
];
fieldPolarSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
2
];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA
[
tj
].
eField
[
0
]
+=
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
1
]
+=
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
2
]
+=
ijField
[
1
][
2
];
psA
[
tj
].
eField
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
psA
[
tj
].
eFieldP
[
0
]
+=
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
1
]
+=
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
2
]
+=
ijField
[
3
][
2
];
psA
[
tj
].
eFieldP
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
42d5b80a
...
...
@@ -429,7 +429,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished maxtrixMultiply kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished maxtrixMultiply kernel execution %d -- Direct only -- self added in kSorUpdateMutualInducedField_kernel
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
outputPolarArray
->
Download
();
debugArray
->
Download
();
...
...
@@ -556,6 +557,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
}
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
=
5.4459052e+00
f
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeMutualInducedFieldBySOR: forcing alphaEwald=%15.7e
\n
"
,
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
);
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpu
);
amoebaGpu
->
psE_Field
->
Upload
();
amoebaGpu
->
psE_FieldPolar
->
Upload
();
#endif
...
...
@@ -584,7 +588,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psPolarizability
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Initial setup for matrix multiply
\n
"
,
methodName
);
int
offset
=
0
;
int
maxPrint
=
2000
0
;
int
maxPrint
=
1
0
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%4d pol=%12.4e "
,
ii
,
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
]
);
...
...
@@ -616,7 +620,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// matrix multiply
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
//
kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
...
...
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaPME.cpp
View file @
42d5b80a
...
...
@@ -63,7 +63,9 @@ void testPMEWater() {
quadrupole
[
0
]
=
3.540307211e-2
;
quadrupole
[
4
]
=
-
3.902570771e-2
;
quadrupole
[
8
]
=
3.622635596e-3
;
mp
->
addParticle
(
-
0.51966
,
dipole
,
quadrupole
,
1
,
1
,
2
,
0.39
,
9.707801995e-1
,
0.837
);
double
damp
=
9.707801995e-01
*
sqrt
(
0.1
);
double
polarity
=
0.837
*
0.001
;
mp
->
addParticle
(
-
0.51966
,
dipole
,
quadrupole
,
1
,
1
,
2
,
0.39
,
damp
,
polarity
);
dipole
[
0
]
=
-
2.042094848e-2
;
dipole
[
2
]
=
-
3.078753000e-2
;
quadrupole
[
0
]
=
-
3.428482490e-3
;
...
...
@@ -71,10 +73,30 @@ void testPMEWater() {
quadrupole
[
4
]
=
-
1.002408752e-2
;
quadrupole
[
6
]
=
-
1.894859639e-4
;
quadrupole
[
8
]
=
1.345257001e-2
;
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
2
,
0.39
,
8.897068742e-1
,
0.496
);
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
1
,
0.39
,
8.897068742e-1
,
0.496
);
damp
=
8.897068742e-01
*
sqrt
(
0.1
);
polarity
=
0.496
*
0.001
;
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
2
,
0.39
,
damp
,
polarity
);
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
1
,
0.39
,
damp
,
polarity
);
mp
->
setCutoffDistance
(
1.0
);
// nonbonded->setEwaldErrorTolerance(TOL);
std
::
vector
<
int
>
intVector
;
intVector
.
push_back
(
0
);
intVector
.
push_back
(
1
);
intVector
.
push_back
(
2
);
mp
->
setCovalentMap
(
0
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
intVector
);
mp
->
setCovalentMap
(
1
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
intVector
);
mp
->
setCovalentMap
(
2
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
intVector
);
intVector
.
resize
(
0
);
intVector
.
push_back
(
1
);
intVector
.
push_back
(
2
);
mp
->
setCovalentMap
(
0
,
AmoebaMultipoleForce
::
Covalent12
,
intVector
);
intVector
.
resize
(
0
);
intVector
.
push_back
(
0
);
intVector
.
push_back
(
2
);
mp
->
setCovalentMap
(
1
,
AmoebaMultipoleForce
::
Covalent12
,
intVector
);
intVector
.
resize
(
0
);
intVector
.
push_back
(
0
);
intVector
.
push_back
(
1
);
mp
->
setCovalentMap
(
2
,
AmoebaMultipoleForce
::
Covalent12
,
intVector
);
mp
->
setEwaldErrorTolerance
(
TOL
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
2
,
0
,
0
),
Vec3
(
0
,
2
,
0
),
Vec3
(
0
,
0
,
2
));
system
.
addForce
(
mp
);
Context
context
(
system
,
integrator
,
platform
);
...
...
@@ -87,6 +109,13 @@ void testPMEWater() {
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
(
void
)
fprintf
(
stderr
,
"PME forces
\n
"
);
for
(
unsigned
int
ii
=
0
;
ii
<
forces
.
size
();
ii
++
){
(
void
)
fprintf
(
stderr
,
"%6u [%14.7e %14.7e %14.7e]
\n
"
,
ii
,
forces
[
ii
][
0
],
forces
[
ii
][
1
],
forces
[
ii
][
2
]
);
}
(
void
)
fflush
(
stderr
);
}
int
main
()
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment