Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
42d5b80a
Commit
42d5b80a
authored
Sep 14, 2010
by
Mark Friedrichs
Browse files
Continue PME
parent
770f7abc
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
161 additions
and
105 deletions
+161
-105
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+59
-29
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+61
-68
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+7
-3
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaPME.cpp
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaPME.cpp
+33
-4
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
42d5b80a
...
@@ -1271,6 +1271,6 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -1271,6 +1271,6 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
{
{
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
kCalculateAmoebaPME
(
amoebaGpu
);
//
kCalculateAmoebaPME( amoebaGpu );
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
42d5b80a
...
@@ -46,6 +46,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int
...
@@ -46,6 +46,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int
// Reduce field
// Reduce field
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
//const float term = 0.0f;
while
(
pos
<
fieldComponents
)
while
(
pos
<
fieldComponents
)
{
{
...
@@ -73,6 +74,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int
...
@@ -73,6 +74,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int
{
{
totalField
+=
pFt
[
0
];
totalField
+=
pFt
[
0
];
}
}
fieldOut
[
pos
]
=
totalField
;
fieldOut
[
pos
]
=
totalField
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
...
@@ -256,18 +258,19 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
...
@@ -256,18 +258,19 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
if
(
r2
<=
cAmoebaSim
.
cutoffDistance2
){
if
(
r2
<=
cAmoebaSim
.
cutoffDistance2
){
fields
[
0
][
0
]
=
fim
[
0
]
-
fid
[
0
];
fields
[
0
][
0
]
=
fim
[
0
]
-
fid
[
0
];
fields
[
1
][
0
]
=
fkm
[
0
]
-
fkd
[
0
];
fields
[
2
][
0
]
=
fim
[
0
]
-
fip
[
0
];
fields
[
3
][
0
]
=
fkm
[
0
]
-
fkp
[
0
];
fields
[
0
][
1
]
=
fim
[
1
]
-
fid
[
1
];
fields
[
0
][
1
]
=
fim
[
1
]
-
fid
[
1
];
fields
[
1
][
1
]
=
fkm
[
1
]
-
fkd
[
1
];
fields
[
2
][
1
]
=
fim
[
1
]
-
fip
[
1
];
fields
[
3
][
1
]
=
fkm
[
1
]
-
fkp
[
1
];
fields
[
0
][
2
]
=
fim
[
2
]
-
fid
[
2
];
fields
[
0
][
2
]
=
fim
[
2
]
-
fid
[
2
];
fields
[
1
][
0
]
=
fkm
[
0
]
-
fkd
[
0
];
fields
[
1
][
1
]
=
fkm
[
1
]
-
fkd
[
1
];
fields
[
1
][
2
]
=
fkm
[
2
]
-
fkd
[
2
];
fields
[
1
][
2
]
=
fkm
[
2
]
-
fkd
[
2
];
fields
[
2
][
0
]
=
fim
[
0
]
-
fip
[
0
];
fields
[
2
][
1
]
=
fim
[
1
]
-
fip
[
1
];
fields
[
2
][
2
]
=
fim
[
2
]
-
fip
[
2
];
fields
[
2
][
2
]
=
fim
[
2
]
-
fip
[
2
];
fields
[
3
][
0
]
=
fkm
[
0
]
-
fkp
[
0
];
fields
[
3
][
1
]
=
fkm
[
1
]
-
fkp
[
1
];
fields
[
3
][
2
]
=
fkm
[
2
]
-
fkp
[
2
];
fields
[
3
][
2
]
=
fkm
[
2
]
-
fkp
[
2
];
}
else
{
}
else
{
...
@@ -297,11 +300,12 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
...
@@ -297,11 +300,12 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
pullBack
[
1
].
y
=
atomJ
.
y
-
atomI
.
y
;
pullBack
[
1
].
y
=
atomJ
.
y
-
atomI
.
y
;
pullBack
[
1
].
z
=
atomJ
.
z
-
atomI
.
z
;
pullBack
[
1
].
z
=
atomJ
.
z
-
atomI
.
z
;
pullBack
[
1
].
w
=
(
atomJ
.
x
-
atomI
.
x
)
*
(
atomJ
.
x
-
atomI
.
x
)
+
(
atomJ
.
y
-
atomI
.
y
)
*
(
atomJ
.
y
-
atomI
.
y
)
+
(
atomJ
.
z
-
atomI
.
z
)
*
(
atomJ
.
z
-
atomI
.
z
);
pullBack
[
1
].
w
=
(
atomJ
.
x
-
atomI
.
x
)
*
(
atomJ
.
x
-
atomI
.
x
)
+
(
atomJ
.
y
-
atomI
.
y
)
*
(
atomJ
.
y
-
atomI
.
y
)
+
(
atomJ
.
z
-
atomI
.
z
)
*
(
atomJ
.
z
-
atomI
.
z
);
/*
pullBack[1].x = scale3;
pullBack
[
2
].
x
=
scale3
;
pullBack[1].y = scale5;
pullBack
[
2
].
y
=
scale5
;
pullBack[1].z = scale7;
pullBack
[
2
].
z
=
scale7
;
*/
pullBack
[
2
].
w
=
-
1.0
;
#endif
#endif
}
}
...
@@ -351,10 +355,17 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -351,10 +355,17 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
unsigned
int
targetAtom
=
0
;
unsigned
int
targetAtom
=
0
;
#endif
#endif
//#define SET_ALPHA_EWALD
//#ifdef SET_ALPHA_EWALD
// amoebaGpu->gpuContext->sim.alphaEwald = 5.4459052e+00f;
// (void) fprintf( amoebaGpu->log, "computeCudaAmoebaPmeFixedEField: forceing alphaEwald=%15.7e\n", amoebaGpu->gpuContext->sim.alphaEwald );
// SetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpu);
//#endif
kClearFields_3
(
amoebaGpu
,
2
);
kClearFields_3
(
amoebaGpu
,
2
);
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 warp
\n
"
);
kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
...
@@ -366,14 +377,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -366,14 +377,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
#endif
#endif
}
else
{
}
else
{
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
...
@@ -384,7 +387,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -384,7 +387,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
#if 0
#if 0
...
@@ -406,16 +408,44 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -406,16 +408,44 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
warp=%d
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psWorkArray_3_1
->
Download
();
amoebaGpu
->
psWorkArray_3_1
->
Download
();
amoebaGpu
->
psWorkArray_3_2
->
Download
();
amoebaGpu
->
psWorkArray_3_2
->
Download
();
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Out
EFields
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Out
WorkArray_3_[1,2] paddedNumberOfAtoms=%d
\n
"
,
amoebaGpu
->
paddedNumberOfAtoms
,
amoebaGpu
->
outputBuffers
);
int
maxPrint
=
32
;
int
maxPrint
=
32
;
for
(
int
ii
=
0
;
ii
<
amoebaGpu
->
paddedNumberOfAtoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// buffer 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"WArry1[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
// buffer 2
(
void
)
fprintf
(
amoebaGpu
->
log
,
"WArry2[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"EFields End
\n
"
);
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
...
@@ -450,7 +480,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -450,7 +480,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
int
debugIndex
=
jj
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d PmeFixedEField
\n
"
,
jj
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d PmeFixedEField
\n
"
,
jj
);
for
(
int
kk
=
0
;
kk
<
7
;
kk
++
){
for
(
int
kk
=
0
;
kk
<
10
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
...
@@ -476,7 +506,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -476,7 +506,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
}
}
#endif
#endif
if
(
1
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
...
@@ -491,5 +521,5 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -491,5 +521,5 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
{
{
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
kCalculateAmoebaPMEFixedMultipoleField
(
amoebaGpu
);
//
kCalculateAmoebaPMEFixedMultipoleField( amoebaGpu );
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
42d5b80a
...
@@ -44,6 +44,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
...
@@ -44,6 +44,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
){
){
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
int
pullIndexMax
=
12
;
float4
pullBack
[
12
];
float4
pullBack
[
12
];
float
dScaleVal
;
float
dScaleVal
;
float
pScaleVal
;
float
pScaleVal
;
...
@@ -120,6 +121,7 @@ pScaleVal = 1.0f;
...
@@ -120,6 +121,7 @@ pScaleVal = 1.0f;
);
);
unsigned
int
match
=
(
atomI
==
(
y
+
j
))
?
1
:
0
;
unsigned
int
match
=
(
atomI
==
(
y
+
j
))
?
1
:
0
;
match
=
1
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
...
@@ -130,6 +132,7 @@ pScaleVal = 1.0f;
...
@@ -130,6 +132,7 @@ pScaleVal = 1.0f;
fieldPolarSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
2
];
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
2
];
}
}
}
}
...
@@ -166,7 +169,7 @@ pScaleVal = pScaleValue;
...
@@ -166,7 +169,7 @@ pScaleVal = pScaleValue;
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
// by setting match flag
unsigned
int
match
=
(
atomI
==
(
y
+
j
))
?
1
:
0
;
unsigned
int
match
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
j
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
...
@@ -180,54 +183,40 @@ pScaleVal = pScaleValue;
...
@@ -180,54 +183,40 @@ pScaleVal = pScaleValue;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
if
(
atomI
==
targetAtom
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
pullBackIndex
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
unsigned
int
indices
[
4
]
=
{
indexI
,
indexJ
,
indexI
+
1
,
indexJ
+
1
};
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
unsigned
int
off
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
y
=
(
float
)
tgx
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
z
=
-
2
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
debugArray
[
index
].
w
=
(
float
)
off
;
pullBackIndex
++
;
float
flag
=
7
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
for
(
int
ii
=
0
;
ii
<
4
;
ii
++
){
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
0
];
debugArray
[
index
].
x
=
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
1
];
debugArray
[
index
].
y
=
ijField
[
indexI
][
1
];
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
2
];
debugArray
[
index
].
z
=
ijField
[
indexI
][
2
];
debugArray
[
index
].
w
=
flag
;
debugArray
[
index
].
w
=
flag
;
}
for
(
int
pullIndex
=
0
;
pullIndex
<
pullIndexMax
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
][
0
];
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
ijField
[
indexJ
][
1
];
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
ijField
[
indexJ
][
2
];
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
flag
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
/*
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
...
@@ -300,25 +289,27 @@ pScaleVal = 1.0f;
...
@@ -300,25 +289,27 @@ pScaleVal = 1.0f;
#endif
#endif
);
);
unsigned
int
outOfBounds
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
tj
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
ijField
[
0
][
0
];
fieldSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
ijField
[
0
][
1
];
fieldSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
ijField
[
0
][
2
];
fieldSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
2
];
fieldPolarSum
[
0
]
+=
ijField
[
2
][
0
];
fieldPolarSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
ijField
[
2
][
1
];
fieldPolarSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
ijField
[
2
][
2
];
fieldPolarSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
2
];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA
[
tj
].
eField
[
0
]
+=
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
1
]
+=
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
2
]
+=
ijField
[
1
][
2
];
psA
[
tj
].
eField
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
psA
[
tj
].
eFieldP
[
0
]
+=
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
1
]
+=
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
2
]
+=
ijField
[
3
][
2
];
psA
[
tj
].
eFieldP
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
@@ -422,25 +413,27 @@ pScaleVal = pScaleValue;
...
@@ -422,25 +413,27 @@ pScaleVal = pScaleValue;
#endif
#endif
);
);
unsigned
int
outOfBounds
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
tj
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
ijField
[
0
][
0
];
fieldSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
ijField
[
0
][
1
];
fieldSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
ijField
[
0
][
2
];
fieldSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
2
];
fieldPolarSum
[
0
]
+=
ijField
[
2
][
0
];
fieldPolarSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
ijField
[
2
][
1
];
fieldPolarSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
ijField
[
2
][
2
];
fieldPolarSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
2
];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA
[
tj
].
eField
[
0
]
+=
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
1
]
+=
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
2
]
+=
ijField
[
1
][
2
];
psA
[
tj
].
eField
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
psA
[
tj
].
eFieldP
[
0
]
+=
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
1
]
+=
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
2
]
+=
ijField
[
3
][
2
];
psA
[
tj
].
eFieldP
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
42d5b80a
...
@@ -429,7 +429,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
...
@@ -429,7 +429,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished maxtrixMultiply kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished maxtrixMultiply kernel execution %d -- Direct only -- self added in kSorUpdateMutualInducedField_kernel
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
outputArray
->
Download
();
outputPolarArray
->
Download
();
outputPolarArray
->
Download
();
debugArray
->
Download
();
debugArray
->
Download
();
...
@@ -556,6 +557,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -556,6 +557,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
}
}
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
=
5.4459052e+00
f
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeMutualInducedFieldBySOR: forcing alphaEwald=%15.7e
\n
"
,
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
);
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpu
);
amoebaGpu
->
psE_Field
->
Upload
();
amoebaGpu
->
psE_Field
->
Upload
();
amoebaGpu
->
psE_FieldPolar
->
Upload
();
amoebaGpu
->
psE_FieldPolar
->
Upload
();
#endif
#endif
...
@@ -584,7 +588,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -584,7 +588,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psPolarizability
->
Download
();
amoebaGpu
->
psPolarizability
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Initial setup for matrix multiply
\n
"
,
methodName
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Initial setup for matrix multiply
\n
"
,
methodName
);
int
offset
=
0
;
int
offset
=
0
;
int
maxPrint
=
2000
0
;
int
maxPrint
=
1
0
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%4d pol=%12.4e "
,
ii
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%4d pol=%12.4e "
,
ii
,
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
]
);
amoebaGpu
->
psPolarizability
->
_pSysStream
[
0
][
offset
]
);
...
@@ -616,7 +620,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -616,7 +620,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// matrix multiply
// matrix multiply
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
//
kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
...
...
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaPME.cpp
View file @
42d5b80a
...
@@ -63,7 +63,9 @@ void testPMEWater() {
...
@@ -63,7 +63,9 @@ void testPMEWater() {
quadrupole
[
0
]
=
3.540307211e-2
;
quadrupole
[
0
]
=
3.540307211e-2
;
quadrupole
[
4
]
=
-
3.902570771e-2
;
quadrupole
[
4
]
=
-
3.902570771e-2
;
quadrupole
[
8
]
=
3.622635596e-3
;
quadrupole
[
8
]
=
3.622635596e-3
;
mp
->
addParticle
(
-
0.51966
,
dipole
,
quadrupole
,
1
,
1
,
2
,
0.39
,
9.707801995e-1
,
0.837
);
double
damp
=
9.707801995e-01
*
sqrt
(
0.1
);
double
polarity
=
0.837
*
0.001
;
mp
->
addParticle
(
-
0.51966
,
dipole
,
quadrupole
,
1
,
1
,
2
,
0.39
,
damp
,
polarity
);
dipole
[
0
]
=
-
2.042094848e-2
;
dipole
[
0
]
=
-
2.042094848e-2
;
dipole
[
2
]
=
-
3.078753000e-2
;
dipole
[
2
]
=
-
3.078753000e-2
;
quadrupole
[
0
]
=
-
3.428482490e-3
;
quadrupole
[
0
]
=
-
3.428482490e-3
;
...
@@ -71,10 +73,30 @@ void testPMEWater() {
...
@@ -71,10 +73,30 @@ void testPMEWater() {
quadrupole
[
4
]
=
-
1.002408752e-2
;
quadrupole
[
4
]
=
-
1.002408752e-2
;
quadrupole
[
6
]
=
-
1.894859639e-4
;
quadrupole
[
6
]
=
-
1.894859639e-4
;
quadrupole
[
8
]
=
1.345257001e-2
;
quadrupole
[
8
]
=
1.345257001e-2
;
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
2
,
0.39
,
8.897068742e-1
,
0.496
);
damp
=
8.897068742e-01
*
sqrt
(
0.1
);
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
1
,
0.39
,
8.897068742e-1
,
0.496
);
polarity
=
0.496
*
0.001
;
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
2
,
0.39
,
damp
,
polarity
);
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
1
,
0.39
,
damp
,
polarity
);
mp
->
setCutoffDistance
(
1.0
);
mp
->
setCutoffDistance
(
1.0
);
// nonbonded->setEwaldErrorTolerance(TOL);
std
::
vector
<
int
>
intVector
;
intVector
.
push_back
(
0
);
intVector
.
push_back
(
1
);
intVector
.
push_back
(
2
);
mp
->
setCovalentMap
(
0
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
intVector
);
mp
->
setCovalentMap
(
1
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
intVector
);
mp
->
setCovalentMap
(
2
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
intVector
);
intVector
.
resize
(
0
);
intVector
.
push_back
(
1
);
intVector
.
push_back
(
2
);
mp
->
setCovalentMap
(
0
,
AmoebaMultipoleForce
::
Covalent12
,
intVector
);
intVector
.
resize
(
0
);
intVector
.
push_back
(
0
);
intVector
.
push_back
(
2
);
mp
->
setCovalentMap
(
1
,
AmoebaMultipoleForce
::
Covalent12
,
intVector
);
intVector
.
resize
(
0
);
intVector
.
push_back
(
0
);
intVector
.
push_back
(
1
);
mp
->
setCovalentMap
(
2
,
AmoebaMultipoleForce
::
Covalent12
,
intVector
);
mp
->
setEwaldErrorTolerance
(
TOL
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
2
,
0
,
0
),
Vec3
(
0
,
2
,
0
),
Vec3
(
0
,
0
,
2
));
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
2
,
0
,
0
),
Vec3
(
0
,
2
,
0
),
Vec3
(
0
,
0
,
2
));
system
.
addForce
(
mp
);
system
.
addForce
(
mp
);
Context
context
(
system
,
integrator
,
platform
);
Context
context
(
system
,
integrator
,
platform
);
...
@@ -87,6 +109,13 @@ void testPMEWater() {
...
@@ -87,6 +109,13 @@ void testPMEWater() {
context
.
setPositions
(
positions
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
(
void
)
fprintf
(
stderr
,
"PME forces
\n
"
);
for
(
unsigned
int
ii
=
0
;
ii
<
forces
.
size
();
ii
++
){
(
void
)
fprintf
(
stderr
,
"%6u [%14.7e %14.7e %14.7e]
\n
"
,
ii
,
forces
[
ii
][
0
],
forces
[
ii
][
1
],
forces
[
ii
][
2
]
);
}
(
void
)
fflush
(
stderr
);
}
}
int
main
()
{
int
main
()
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment