Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f01dc8d9
"vscode:/vscode.git/clone" did not exist on "3c100e69f095ad86f55b27768fce0df9b4563f5a"
Commit
f01dc8d9
authored
Jun 10, 2011
by
Mark Friedrichs
Browse files
Bug fix for MI calculation
parent
35a4b156
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
72 additions
and
75 deletions
+72
-75
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+29
-20
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+43
-55
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
f01dc8d9
...
...
@@ -445,9 +445,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
static
int
iteration
=
1
;
int
targetAtom
=
546
;
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
...
...
@@ -485,30 +485,17 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaPmeMutualInducedFieldCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeMutualInducedField"
);
...
...
@@ -520,8 +507,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
outputPolarArray
->
Download
();
debugArray
->
Download
();
int
maxPrint
=
5
;
//
debugArray->Download();
int
maxPrint
=
5
000000
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
...
...
@@ -564,7 +551,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
iteration
++
;
}
delete
debugArray
;
//
delete debugArray;
#endif
}
...
...
@@ -652,6 +639,18 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// post matrix multiply
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkVector
[
0
],
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkVector
[
1
],
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectMIPre"
,
fileId
,
outputVector
);
}
kSorUpdateMutualInducedField_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
...
...
@@ -664,11 +663,13 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
// cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
// cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psPolarizability
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectMI"
,
fileId
,
outputVector
);
//exit(0);
}
// get total epsilon -- performing sums on gpu
...
...
@@ -762,6 +763,14 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
// exit if nan
...
...
@@ -781,7 +790,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
//
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, 1.0f );
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
f01dc8d9
...
...
@@ -104,13 +104,12 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
float4
delta
;
float
prefactor2
;
if
(
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
){
delta
.
w
=
prefactor2
=
0
.
0
f
;
}
else
{
if
(
(
(
atomI
!=
(
y
+
j
))
&&
(
atomI
<
cSim
.
atoms
)
&&
((
y
+
j
)
<
cSim
.
atoms
)
)
){
setupMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
uscale
,
&
delta
,
&
prefactor2
);
//delta.w = prefactor2 = 0.0f;
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
j
].
inducedDipole
,
delta
,
prefactor2
,
fieldSum
);
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
j
].
inducedDipolePolar
,
delta
,
prefactor2
,
fieldPolarSum
);
}
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
j
].
inducedDipole
,
delta
,
prefactor2
,
fieldSum
);
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
j
].
inducedDipolePolar
,
delta
,
prefactor2
,
fieldPolarSum
);
}
...
...
@@ -118,24 +117,18 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
fieldSum
,
outputField
);
load3dArrayBufferPerWarp
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
#endif
load3dArray
(
offset
,
fieldSum
,
outputField
);
load3dArray
(
offset
,
fieldPolarSum
,
outputFieldPolar
);
#endif
}
else
{
if
(
lasty
!=
y
)
{
unsigned
int
atomJ
=
y
+
tgx
;
// load coordinates, charge, ...
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
);
}
...
...
@@ -152,58 +145,53 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
zeroMutualInducedParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
){
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
float4
delta
;
float
prefactor2
;
if
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
){
delta
.
w
=
prefactor2
=
0
.
0
f
;
}
else
{
if
(
(
atomI
<
cSim
.
atoms
)
&&
((
y
+
jIdx
)
<
cSim
.
atoms
)
){
setupMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
uscale
,
&
delta
,
&
prefactor2
);
}
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
jIdx
].
inducedDipole
,
delta
,
prefactor2
,
fieldSum
);
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
jIdx
].
inducedDipolePolar
,
delta
,
prefactor2
,
fieldPolarSum
);
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
jIdx
].
inducedDipole
,
delta
,
prefactor2
,
fieldSum
);
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
jIdx
].
inducedDipolePolar
,
delta
,
prefactor2
,
fieldPolarSum
);
#ifndef INCLUDE_MI_FIELD_BUFFERS
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
psA
[
jIdx
].
field
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
psA
[
jIdx
].
fieldPolar
);
#else
if
(
flags
==
0xFFFFFFFF
){
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
psA
[
jIdx
].
field
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
psA
[
jIdx
].
fieldPolar
);
}
else
{
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
sA
[
threadIdx
.
x
].
tempBuffer
);
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
sA
[
threadIdx
.
x
].
tempBufferP
);
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
field
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
field
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
field
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
fieldPolar
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
fieldPolar
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
fieldPolar
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
#else
if
(
flags
==
0xFFFFFFFF
){
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
psA
[
jIdx
].
field
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
psA
[
jIdx
].
fieldPolar
);
}
else
{
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
sA
[
threadIdx
.
x
].
tempBuffer
);
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
sA
[
threadIdx
.
x
].
tempBufferP
);
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
field
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
field
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
field
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
fieldPolar
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
fieldPolar
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
fieldPolar
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
}
}
}
#endif
}
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment