Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2b508482
Commit
2b508482
authored
Nov 23, 2011
by
Mark Friedrichs
Browse files
Added copyright
Removed debugging code
parent
36762962
Changes
33
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
222 additions
and
1310 deletions
+222
-1310
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
...cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
+7
-76
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
...eba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
+25
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+26
-190
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+26
-248
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+4
-140
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+29
-196
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+0
-7
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+26
-39
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
...atforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
+25
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+26
-224
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
.../platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
+2
-85
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+26
-47
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
...ba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
+0
-50
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
View file @
2b508482
...
...
@@ -36,11 +36,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputField
,
float
*
outputFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
float
*
outputField
,
float
*
outputFieldPolar
){
extern
__shared__
MutualInducedParticle
sA
[];
...
...
@@ -99,11 +95,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
// load coords, charge, ...
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
);
unsigned
int
mask
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
...
...
@@ -117,34 +109,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
1
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
unsigned
int
index
=
y
+
j
;
unsigned
int
indexI
=
0
;
//unsigned int indexJ = 2;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
//debugArray[index].z = cAmoebaSim.pDampingFactorAndThole[atomI].x;
debugArray
[
index
].
z
=
(
float
)
cSim
.
atoms
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
+
1
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
+
1
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
+
1
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
1
.
0
f
;
}
#endif
}
// Write results
...
...
@@ -161,9 +125,8 @@ if( atomI == targetAtom ){
#endif
}
else
// 100% utilization
{
}
else
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
...
...
@@ -185,11 +148,7 @@ if( atomI == targetAtom ){
// load coords, charge, ...
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
);
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
tj
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
...
...
@@ -217,34 +176,6 @@ if( atomI == targetAtom ){
psA
[
tj
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
3
][
1
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
3
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
//#if 0
if
(
atomI
==
targetAtom
||
(
y
+
tj
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
tj
)
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
2
;
//unsigned int indexJ = (atomI == targetAtom) ? 2 : 0;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
debugArray
[
index
].
z
=
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
+
1
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
+
1
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
+
1
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
-
1
.
0
f
;
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "cudaKernels.h"
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
2b508482
///-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
...
...
@@ -213,11 +233,7 @@ __device__ void calculateBn_kernel( float r, float4* bn, float* bn0, float *bn5
#undef SUB_METHOD_NAME
__device__
void
calculatePmeDirectElectrostaticPairIxnOrig_kernel
(
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
const
float
*
scalingFactors
,
float4
forceTorqueEnergy
[
3
]
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
){
const
float
*
scalingFactors
,
float4
forceTorqueEnergy
[
3
]){
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
...
...
@@ -909,132 +925,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
forceTorqueEnergy
[
2
].
y
=
(
ttm32
+
ttm3i2
);
forceTorqueEnergy
[
2
].
z
=
(
ttm33
+
ttm3i3
);
#ifdef AMOEBA_DEBUG
int
debugIndex
=
0
;
float
idTracker
=
1.0
f
;
/*
debugArray[debugIndex].x = atomI.labFrameDipole[0];
debugArray[debugIndex].y = atomI.labFrameDipole[1];
debugArray[debugIndex].z = atomI.labFrameDipole[2];
debugArray[debugIndex].w = r2;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.labFrameDipole[0];
debugArray[debugIndex].y = atomJ.labFrameDipole[1];
debugArray[debugIndex].z = atomJ.labFrameDipole[2];
debugArray[debugIndex].w = cSim.alphaEwald;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipole[0];
debugArray[debugIndex].y = atomI.inducedDipole[1];
debugArray[debugIndex].z = atomI.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipole[0];
debugArray[debugIndex].y = atomJ.inducedDipole[1];
debugArray[debugIndex].z = atomJ.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipoleP[0];
debugArray[debugIndex].y = atomI.inducedDipoleP[1];
debugArray[debugIndex].z = atomI.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipoleP[0];
debugArray[debugIndex].y = atomJ.inducedDipoleP[1];
debugArray[debugIndex].z = atomJ.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ftm21;
debugArray[debugIndex].y = conversionFactor*ftm22;
debugArray[debugIndex].z = conversionFactor*ftm23;
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = e;
debugArray[debugIndex].y = ei;
debugArray[debugIndex].z = erl;
debugArray[debugIndex].w = erli;
debugIndex++;
*/
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
r2
;
debugArray
[
debugIndex
].
y
=
cSim
.
alphaEwald
;
debugArray
[
debugIndex
].
z
=
conversionFactor
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm21
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm22
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm23
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm2i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm2i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm2i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
/*
idTracker += 100.0;
debugArray[debugIndex].x = fridmp1;
debugArray[debugIndex].y = fridmp2;
debugArray[debugIndex].z = fridmp3;
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 100.0;
debugArray[debugIndex].x = findmp1;
debugArray[debugIndex].y = findmp2;
debugArray[debugIndex].z = findmp3;
debugArray[debugIndex].w = idTracker;
debugIndex++;
*/
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm21
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm22
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm23
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm2i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm2i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm2i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm31
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm32
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm33
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm3i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm3i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm3i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
#endif
}
else
{
forceTorqueEnergy
[
0
].
x
=
0.0
f
;
...
...
@@ -1051,15 +941,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
forceTorqueEnergy
[
0
].
w
=
0.0
f
;
#ifdef AMOEBA_DEBUG
for
(
int
ii
=
0
;
ii
<
12
;
ii
++
){
debugArray
[
ii
].
x
=
0.0
f
;
debugArray
[
ii
].
y
=
0.0
f
;
debugArray
[
ii
].
z
=
0.0
f
;
debugArray
[
ii
].
w
=
(
float
)
(
-
ii
);
}
#endif
}
return
;
...
...
@@ -1351,35 +1232,11 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
void
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeDirectElectrostatic"
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
fileId
.
resize
(
2
);
fileId
[
0
]
=
timestep
;
fileId
[
1
]
=
1
;
#endif
// ---------------------------------------------------------------------------------------
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
// apparently debug array can take up nontrivial no. registers
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
maxOffset
=
20
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
maxOffset
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
maxOffset
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
unsigned
int
targetAtom
=
49
;
#endif
// on first pass, set threads/block
static
unsigned
int
threadsPerBlock
=
0
;
...
...
@@ -1403,37 +1260,16 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
kClearFields_3
(
amoebaGpu
,
1
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u maxL1=%d
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
PmeDirectElectrostaticParticle
),
(
sizeof
(
PmeDirectElectrostaticParticle
))
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
maxL1
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeDirectElectrostaticCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaPmeDirectElectrostaticCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces"
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
...
...
@@ -171,12 +191,7 @@ __device__ void sumTempBuffer( FixedFieldParticle& atomI, FixedFieldParticle& at
}
__device__
void
calculateFixedFieldRealSpacePairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
float
dscale
,
float
pscale
,
float4
fields
[
3
]
#ifdef AMOEBA_DEBUG
,
float4
*
pullBack
#endif
){
float
dscale
,
float
pscale
,
float4
fields
[
3
]){
// compute the real space portion of the Ewald summation
...
...
@@ -329,18 +344,6 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
fields
[
2
].
w
=
0.0
f
;
}
#ifdef AMOEBA_DEBUG
pullBack
[
0
].
x
=
xr
;
pullBack
[
0
].
y
=
yr
;
pullBack
[
0
].
z
=
zr
;
pullBack
[
0
].
w
=
r2
;
pullBack
[
1
].
x
=
atomJ
.
x
-
atomI
.
x
;
pullBack
[
1
].
y
=
atomJ
.
y
-
atomI
.
y
;
pullBack
[
1
].
z
=
atomJ
.
z
-
atomI
.
z
;
pullBack
[
1
].
w
=
(
atomJ
.
x
-
atomI
.
x
)
*
(
atomJ
.
x
-
atomI
.
x
)
+
(
atomJ
.
y
-
atomI
.
y
)
*
(
atomJ
.
y
-
atomI
.
y
)
+
(
atomJ
.
z
-
atomI
.
z
)
*
(
atomJ
.
z
-
atomI
.
z
);
#endif
}
// Include versions of the kernels for N^2 calculations.
...
...
@@ -361,40 +364,6 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
--------------------------------------------------------------------------------------- */
#ifdef AMOEBA_DEBUG
static
int
isNanOrInfinity
(
double
number
){
return
(
number
!=
number
||
number
==
std
::
numeric_limits
<
double
>::
infinity
()
||
number
==
-
std
::
numeric_limits
<
double
>::
infinity
())
?
1
:
0
;
}
static
void
bubbleSort
(
std
::
vector
<
int
>&
array
,
std
::
vector
<
int
>&
track
,
int
length
)
{
int
i
,
j
,
temp
;
int
test
;
/*use this only if unsure whether the list is already sorted or not*/
for
(
i
=
length
-
1
;
i
>
0
;
i
--
)
{
test
=
0
;
for
(
j
=
0
;
j
<
i
;
j
++
)
{
if
(
array
[
j
]
>
array
[
j
+
1
])
/* compare neighboring elements */
{
temp
=
array
[
j
];
/* swap array[j] and array[j+1] */
array
[
j
]
=
array
[
j
+
1
];
array
[
j
+
1
]
=
temp
;
temp
=
track
[
j
];
/* swap array[j] and array[j+1] */
track
[
j
]
=
track
[
j
+
1
];
track
[
j
+
1
]
=
temp
;
test
=
1
;
}
}
/*end for j*/
if
(
test
==
0
)
break
;
/*will exit if the list is sorted!*/
}
/*end for i*/
}
#endif
/**---------------------------------------------------------------------------------------
Compute fixed electric field using PME
...
...
@@ -409,22 +378,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"computeCudaAmoebaPmeFixedEField"
;
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
slots
=
15
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
slots
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
slots
);
debugArray
->
Upload
();
// print intermediate results for the targetAtom
unsigned
int
targetAtom
=
0
;
#endif
kClearFields_3
(
amoebaGpu
,
2
);
// on first pass, set threads/block
...
...
@@ -444,175 +397,17 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaPmeDirectFixedE_FieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaPmeDirectFixedE_FieldCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
kReducePmeDirectE_Fields
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
),
gpu
->
sharedMemoryPerBlock
),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaCutoffForces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u warp=%d
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fflush
(
amoebaGpu
->
log
);
/*
(void) fprintf( amoebaGpu->log, "Out WorkArray_3_[1,2] paddedNumberOfAtoms=%d\n", gpu->sim.paddedNumberOfAtoms, gpu->sim.outputBuffers );
amoebaGpu->psWorkArray_3_1->Download();
amoebaGpu->psWorkArray_3_2->Download();
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5d ", ii);
int indexOffset = ii*3;
// buffer 1
(void) fprintf( amoebaGpu->log,"WArry1[%16.9e %16.9e %16.9e] ",
amoebaGpu->psWorkArray_3_1->_pSysData[indexOffset],
amoebaGpu->psWorkArray_3_1->_pSysData[indexOffset+1],
amoebaGpu->psWorkArray_3_1->_pSysData[indexOffset+2] );
// buffer 2
(void) fprintf( amoebaGpu->log,"WArry2[%16.9e %16.9e %16.9e] ",
amoebaGpu->psWorkArray_3_2->_pSysData[indexOffset],
amoebaGpu->psWorkArray_3_2->_pSysData[indexOffset+1],
amoebaGpu->psWorkArray_3_2->_pSysData[indexOffset+2] );
(void) fprintf( amoebaGpu->log,"\n" );
if( ii == maxPrint && (gpu->natoms - maxPrint) > ii ){
ii = gpu->natoms - maxPrint;
}
}
(void) fflush( amoebaGpu->log );
*/
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"E-field (includes self term)"
);
int
maxPrint
=
3002
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// E_Field
(
void
)
fprintf
(
amoebaGpu
->
log
,
"E[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psE_Field
->
_pSysData
[
indexOffset
],
amoebaGpu
->
psE_Field
->
_pSysData
[
indexOffset
+
1
],
amoebaGpu
->
psE_Field
->
_pSysData
[
indexOffset
+
2
]
);
// E_Field polar
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Epol[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
indexOffset
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
indexOffset
+
1
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"EFields End
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugQ
\n
"
);
debugArray
->
Download
();
std
::
vector
<
int
>
indices
;
std
::
vector
<
int
>
track
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
if
(
fabs
(
debugArray
->
_pSysData
[
jj
+
3
*
paddedNumberOfAtoms
].
x
)
>
0.0
){
int
orderIndex
=
gpu
->
psAtomIndex
->
_pSysData
[
jj
];
indices
.
push_back
(
orderIndex
);
track
.
push_back
(
jj
);
}
}
bubbleSort
(
indices
,
track
,
static_cast
<
int
>
(
track
.
size
())
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
amoebaGpu
->
gpuContext
->
psPosq4
->
Download
();
unsigned
int
count
=
0
;
float
sum0
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
float
sum1
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
int
offset0
=
1
;
int
offset1
=
2
;
/*
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
if( fabs(debugArray->_pSysData[jj+3*paddedNumberOfAtoms].x) > 0.0 ){
int orderIndex = gpu->psAtomIndex->_pSysData[jj];
count++;
*/
for
(
unsigned
int
ii
=
0
;
ii
<
track
.
size
();
ii
++
){
int
jj
=
track
[
ii
];
int
debugIndex
=
jj
;
int
orderIndex
=
indices
[
ii
];
if
(
orderIndex
>
31
&&
offset0
==
1
){
offset0
=
2
;
offset1
=
2
;
}
count
++
;
sum0
[
0
]
+=
debugArray
->
_pSysData
[
jj
+
offset0
*
paddedNumberOfAtoms
].
x
;
sum0
[
1
]
+=
debugArray
->
_pSysData
[
jj
+
offset0
*
paddedNumberOfAtoms
].
y
;
sum0
[
2
]
+=
debugArray
->
_pSysData
[
jj
+
offset0
*
paddedNumberOfAtoms
].
z
;
sum1
[
0
]
+=
debugArray
->
_pSysData
[
jj
+
offset1
*
paddedNumberOfAtoms
].
x
;
sum1
[
1
]
+=
debugArray
->
_pSysData
[
jj
+
offset1
*
paddedNumberOfAtoms
].
y
;
sum1
[
2
]
+=
debugArray
->
_pSysData
[
jj
+
offset1
*
paddedNumberOfAtoms
].
z
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d %u PmeFixedEField
\n
"
,
orderIndex
,
jj
,
count
);
for
(
int
kk
=
0
;
kk
<
7
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e %6d %6d cum sumsOp
\n
"
,
orderIndex
,
sum0
[
0
],
sum0
[
1
],
sum0
[
2
],
sum1
[
0
],
sum1
[
1
],
sum1
[
2
],
jj
,
count
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
// write results to file
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
delete
debugArray
;
}
#endif
}
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
...
...
@@ -621,21 +416,4 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaPMEFixedMultipoles
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
kReduceForces
(
gpu
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psForce4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaRecipForceTorqueFixed"
,
fileId
,
outputVector
);
//cudaWriteVectorOfDoubleVectorsToFile( "CudaRecipEField", fileId, outputVector );
exit
(
0
);
}
#endif
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
2b508482
...
...
@@ -37,16 +37,7 @@ __launch_bounds__(64, 1)
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectFixedE_Field
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputEField
,
float
*
outputEFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
#ifdef AMOEBA_DEBUG
int
maxPullIndex
=
1
;
float4
pullBack
[
12
];
#endif
float
*
outputEFieldPolar
){
extern
__shared__
FixedFieldParticle
sA
[];
...
...
@@ -118,11 +109,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
}
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
);
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
...
...
@@ -139,66 +126,6 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
1
].
z
;
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
].
z
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
targetAtom
==
(
y
+
j
)
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
/*
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) bExclusionFlag;
debugArray[index].y = (float) (tgx);
debugArray[index].z = (float) j;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) dScaleMask;
debugArray[index].y = (float) pScaleMask.x;
debugArray[index].z = (float) pScaleMask.y;
debugArray[index].w = flag;
*/
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
x
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
x
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
x
;
debugArray
[
index
].
w
=
flag
+
1
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
z
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
z
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
z
;
debugArray
[
index
].
w
=
flag
+
2
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
y
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
y
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
y
;
debugArray
[
index
].
w
=
flag
+
3
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
w
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
w
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
w
;
debugArray
[
index
].
w
=
flag
+
4
.
0
f
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
}
// Write results
...
...
@@ -252,11 +179,7 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
}
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
);
unsigned
int
outOfBounds
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
)
?
1
:
0
;
...
...
@@ -317,65 +240,6 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
}
}
#ifdef AMOEBA_DEBUG
if
(
(
atomI
==
targetAtom
||
(
y
+
jIdx
)
==
targetAtom
)
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
jIdx
)
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
2
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
2
:
0
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
jIdx
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
float
flag
=
9
.
0
f
;
/*
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) bExclusionFlag;
debugArray[index].y = (float) (tgx);
debugArray[index].z = (float) j;
debugArray[index].w = jIdx;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) dScaleMask;
debugArray[index].y = (float) pScaleMask.x;
debugArray[index].z = (float) pScaleMask.y;
debugArray[index].w = (float) flags;
*/
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
x
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
x
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
x
;
debugArray
[
index
].
w
=
flag
+
1
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
y
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
y
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
y
;
debugArray
[
index
].
w
=
flag
+
2
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
z
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
z
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
z
;
debugArray
[
index
].
w
=
flag
+
3
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
w
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
w
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
w
;
debugArray
[
index
].
w
=
flag
+
4
.
0
f
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
...
...
@@ -33,9 +55,6 @@ void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#undef INCLUDE_MI_FIELD_BUFFERS
#define INCLUDE_MI_FIELD_BUFFERS
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
...
...
@@ -331,10 +350,6 @@ static void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* ar
{
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
48.033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#ifdef AMOEBA_DEBUG
epsilon
[
1
]
=
48.033324
f
*
sqrtf
(
delta
[
0
].
x
/
(
(
float
)
(
numberOfEntries
/
3
))
);
epsilon
[
2
]
=
48.033324
f
*
sqrtf
(
delta
[
0
].
y
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#endif
}
}
...
...
@@ -419,16 +434,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
int
targetAtom
=
546
;
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
kClearFields_3
(
amoebaGpu
,
2
);
// on first pass, set threads/block
...
...
@@ -444,17 +449,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
MutualInducedParticle
),
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
...
...
@@ -474,43 +468,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
kReduceMutualInducedFields
(
amoebaGpu
,
outputArray
,
outputPolarArray
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished maxtrixMultiply kernel execution %d -- Direct only -- self added in kSorUpdateMutualInducedField_kernel
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
outputPolarArray
->
Download
();
//debugArray->Download();
int
maxPrint
=
5
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// MI
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Mult[%16.9e %16.9e %16.9e] "
,
outputArray
->
_pSysData
[
indexOffset
],
outputArray
->
_pSysData
[
indexOffset
+
1
],
outputArray
->
_pSysData
[
indexOffset
+
2
]
);
// MI polar
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MultP[%16.9e %16.9e %16.9e]
\n
"
,
outputPolarArray
->
_pSysData
[
indexOffset
],
outputPolarArray
->
_pSysData
[
indexOffset
+
1
],
outputPolarArray
->
_pSysData
[
indexOffset
+
2
]
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
iteration
++
;
}
#endif
}
/**---------------------------------------------------------------------------------------
...
...
@@ -526,19 +483,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// ---------------------------------------------------------------------------------------
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldBySOR"
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
fileId
.
resize
(
2
);
fileId
[
0
]
=
timestep
;
fileId
[
1
]
=
1
;
#endif
// ---------------------------------------------------------------------------------------
int
done
;
int
iteration
;
...
...
@@ -559,19 +503,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
cudaMemcpy
(
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
cudaMemcpy
(
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
std
::
vector
<
int
>
fileId
;
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeEFieldPolarity"
,
fileId
,
outputVector
);
}
#endif
// if polarization type is direct, set flags signalling done and return
if
(
amoebaGpu
->
amoebaSim
.
polarizationType
)
...
...
@@ -609,12 +540,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
LAUNCHERROR
(
"kReducePmeMutualInducedFieldDelta"
);
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
// trackMutualInducedIterations
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
}
#endif
// Debye=48.033324f
amoebaGpu
->
psCurrentEpsilon
->
Download
();
float
currentEpsilon
=
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
];
...
...
@@ -624,79 +549,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
done
=
1
;
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
#if 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeMutualInducedFieldBySOR iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
#else
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e %14.6e crrntEps=%14.6e %14.6e %14.6e %14.6e done=%d
\n
"
,
methodName
,
iteration
,
sum1
,
sum2
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
#endif
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
/*
int offset = 0;
int maxPrint = 10;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%4d ", ii );
(void) fprintf( amoebaGpu->log," Mi[%14.6e %14.6e %14.6e] ",
amoebaGpu->psInducedDipole->_pSysData[offset],
amoebaGpu->psInducedDipole->_pSysData[offset+1],
amoebaGpu->psInducedDipole->_pSysData[offset+2] );
(void) fprintf( amoebaGpu->log,"Mip[%14.6e %14.6e %14.6e]\n",
amoebaGpu->psInducedDipolePolar->_pSysData[offset],
amoebaGpu->psInducedDipolePolar->_pSysData[offset+1],
amoebaGpu->psInducedDipolePolar->_pSysData[offset+2] );
if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) ){
ii = (gpu->natoms - maxPrint);
offset = 3*(ii+1);
} else {
offset += 3;
}
}
(void) fflush( amoebaGpu->log );
*/
if
(
0
){
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
// exit if nan
if
(
amoebaGpu
->
mutualInducedCurrentEpsilon
!=
amoebaGpu
->
mutualInducedCurrentEpsilon
){
...
...
@@ -710,25 +562,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
#ifdef AMOEBA_DEBUG
if
(
0
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
if
(
0
){
static
int
iteration
=
0
;
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"CudaPmeMI"
,
stderr
);
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"CudaPmeMIPolar"
,
stderr
);
}
#endif
// ---------------------------------------------------------------------------------------
}
void
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpuContext
amoebaGpu
)
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
2b508482
...
...
@@ -37,9 +37,6 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
void
METHOD_NAME
(
kCalculateAmoebaPmeMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputField
,
float
*
outputFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
extern
__shared__
MutualInducedParticle
sA
[];
...
...
@@ -52,10 +49,6 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
unsigned
int
lasty
=
0xFFFFFFFF
;
const
float
uscale
=
1
.
0
f
;
#ifdef AMOEBA_DEBUG
float4
pullBack
[
4
];
#endif
while
(
pos
<
end
)
{
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
...
...
@@ -51,8 +73,6 @@ __device__ static float normVector3( float* vector )
return
returnNorm
;
}
#undef AMOEBA_DEBUG
// ZThenX == 0
// Bisector == 1
// ZBisect == 2
...
...
@@ -379,10 +399,6 @@ void kCudaComputeLabFrameMoments_kernel( void )
void
cudaComputeAmoebaLabFrameMoments
(
amoebaGpuContext
amoebaGpu
)
{
// ---------------------------------------------------------------------------------------
static
const
char
*
methodName
=
"computeCudaAmoebaLabFrameMoments"
;
// ---------------------------------------------------------------------------------------
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
...
...
@@ -390,33 +406,6 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
int
numBlocks
=
gpu
->
sim
.
blocks
;
int
numThreads
=
gpu
->
sim
.
threads_per_block
;
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: numBlocks/atoms=%d
\n
"
,
methodName
,
numBlocks
);
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
Download
();
amoebaGpu
->
psMolecularDipole
->
Download
();
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
Download
();
gpu
->
psPosq4
->
Download
();
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
int
mIndex
=
3
*
ii
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d [%6d %6d %6d %6d] x[%16.9e %16.9e %16.9e] %s [%6d %6d %6d %6d]
\n
"
,
ii
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
x
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
z
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
x
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
y
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
z
,
(
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
>
1
?
" XXX"
:
""
),
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
x
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
z
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
w
);
//if( ii == 30 )ii = gpu->natoms - 30;
}
}
#endif
// copy molecular moments to lab frame moment arrays
// check if chiral center requires moments to have sign flipped
// compute lab frame moments
...
...
@@ -428,7 +417,7 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
LAUNCHERROR
(
"kCudaComputeCheckChiral"
);
kCudaComputeLabFrameMoments_kernel
<<<
numBlocks
,
numThreads
>>>
(
);
LAUNCHERROR
(
methodName
);
LAUNCHERROR
(
"kCudaComputeLabFrameMoments"
);
}
...
...
@@ -505,5 +494,3 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
}
}
#undef AMOEBA_DEBUG
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaCudaKernels.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
...
...
@@ -38,12 +60,6 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
//#define AMOEBA_DEBUG_PRINT
#undef AMOEBA_DEBUG_PRINT
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
zeroVdw14_7SharedForce
(
struct
Vdw14_7Particle
*
sA
)
{
// zero shared fields
...
...
@@ -101,11 +117,7 @@ __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule,
}
__device__
void
calculateVdw14_7PairIxn_kernel
(
float
combindedSigma
,
float
combindedEpsilon
,
float
force
[
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
)
float
force
[
3
],
float
*
energy
)
{
const
float
deltaHalM1
=
0.07
f
;
...
...
@@ -120,14 +132,6 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float c
float
r2
=
force
[
0
]
*
force
[
0
]
+
force
[
1
]
*
force
[
1
]
+
force
[
2
]
*
force
[
2
];
if
(
r2
>
cAmoebaSim
.
vdwCutoff2
){
*
energy
=
force
[
0
]
=
force
[
1
]
=
force
[
2
]
=
0.0
f
;
#ifdef AMOEBA_DEBUG
float
rI
=
rsqrtf
(
r2
);
float
r
=
1.0
f
/
rI
;
debugArray
[
0
].
x
=
r
;
debugArray
[
0
].
y
=
debugArray
[
0
].
z
=
debugArray
[
0
].
w
=
0.0
f
;
debugArray
[
1
].
x
=
debugArray
[
1
].
y
=
debugArray
[
1
].
z
=
0.0
f
;
debugArray
[
1
].
w
=
r
;
#endif
return
;
}
float
rI
=
rsqrtf
(
r2
);
...
...
@@ -156,17 +160,6 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float c
force
[
1
]
*=
deltaE
;
force
[
2
]
*=
deltaE
;
#ifdef AMOEBA_DEBUG
debugArray
[
0
].
x
=
r
;
debugArray
[
0
].
y
=
deltaE
;
debugArray
[
0
].
z
=
combindedSigma
;
debugArray
[
0
].
w
=
combindedEpsilon
;
debugArray
[
1
].
x
=
tau
;
debugArray
[
1
].
y
=
rho
;
debugArray
[
1
].
z
=
gTau
;
debugArray
[
1
].
w
=
r
;
#endif
}
// perform reduction of force on H's and add to heavy atom partner
...
...
@@ -504,22 +497,6 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG_PRINT
static
const
char
*
methodName
=
"kCalculateAmoebaVdw14_7Forces"
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s:
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#ifdef AMOEBA_DEBUG
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
maxSlots
=
10
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
maxSlots
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
maxSlots
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
int
targetAtom
=
1
;
#endif
#endif
// set threads/block first time through
// on first pass, set threads/block
...
...
@@ -535,44 +512,9 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG_PRINT
if
(
0
){
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"
\n\n
zCoordPreCopyVdw"
,
stderr
);
}
#endif
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
#ifdef AMOEBA_DEBUG_PRINT
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Apply cutoff=%d warp=%d
\n
"
,
applyCutoff
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
Vdw14_7Particle
),
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
if
(
0
){
gpu
->
psInteractionCount
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
unsigned
int
totalWarps
=
(
gpu
->
sim
.
nonbond_blocks
*
threadsPerBlock
)
/
GRID
;
float
ratiof
=
(
float
)
totalWarps
/
(
float
)
amoebaGpu
->
psVdwWorkUnit
->
_length
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ixn warps=%u count=%u
\n
"
,
totalWarps
,
gpu
->
psInteractionCount
->
_pSysData
[
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
amoebaGpu
->
psVdwWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysData
[
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
warp
=
(
float
)(
ii
)
*
ratiof
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u [%5u %5u %1u] %10u warp=%15.6f
\n
"
,
ii
,
x
,
y
,
exclusions
,
warp
);
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
// clear output arrays
kClearFields_3
(
amoebaGpu
,
1
);
...
...
@@ -588,33 +530,6 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksVdwPeriodic"
);
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Vdw Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysData
[
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysData
[
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u %8u [%5u %5u %1u] %10u "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysData
[
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysData
[
ii
]
);
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysData
[
ii
];
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysData
[
ii
],
x
,
y
,
exclusions
);
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaVdw14_7CutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
...
...
@@ -622,12 +537,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaVdw14_7Cutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
...
...
@@ -636,13 +546,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Cutoff"
);
...
...
@@ -656,12 +560,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaVdw14_7N2_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
...
...
@@ -670,112 +569,15 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7N2"
);
}
#ifdef AMOEBA_DEBUG_PRINT
if
(
amoebaGpu
->
log
){
static
int
iteration
=
0
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished 14-7 kernel execution step=%d
\n
"
,
++
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#ifdef AMOEBA_DEBUG
debugArray
->
Download
();
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
double
cutOff
=
1.0e+03
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d DebugVdw
\n
"
,
targetAtom
,
jj
);
for
(
int
kk
=
0
;
kk
<
5
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
if
(
kk
==
4
&&
(
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
x
)
>
cutOff
||
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
y
)
>
cutOff
||
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
z
)
>
cutOff
)
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
" XXXX
\n
"
);
}
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
#endif
/*
amoebaGpu->psWorkArray_3_2->Download();
amoebaGpu->psWorkArray_3_1->Download();
//for( int jj = 0; jj < 3*gpu->natoms; jj += 3 )
for( int jj = 0; jj < 3*gpu->natoms; jj += 3 ){
for( int kk = 0; kk < gpu->sim.outputBuffers; kk++ ){
float delta = fabs(amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2] + 1.0f);
if( delta < 5.0e-06 || isNanOrInfinity( (double) amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj] ) || isNanOrInfinity( (double) amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2] ) )
(void) fprintf( amoebaGpu->log,"%6d %6d [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]\n", jj, kk,
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj],
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+2] );
}
}
*/
}
#endif
#ifdef AMOEBA_DEBUG
if
(
0
){
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"PreVdw"
,
stderr
);
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"zCoordPreVdw"
,
stderr
);
}
#endif
kReduceVdw14_7
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
);
#ifdef AMOEBA_DEBUG
if
(
0
){
static
int
iteration
=
0
;
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkArray_3_2
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"Vdw32"
,
stderr
);
}
#endif
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
#ifdef AMOEBA_DEBUG
if
(
0
){
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
psTempForce
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
kClearFloat4
(
amoebaGpu
,
paddedNumberOfAtoms
,
psTempForce
);
//kCalculateAmoebaVdw14_7Reduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, psTempForce );
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
psTempForce
);
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
psTempForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaVdw"
,
fileId
,
outputVector
);
delete
psTempForce
;
//exit(0);
}
if
(
0
){
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"VdwForce"
,
stderr
);
}
#endif
#ifdef AMOEBA_DEBUG
delete
debugArray
;
#endif
// ---------------------------------------------------------------------------------------
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
View file @
2b508482
...
...
@@ -39,9 +39,6 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
int
sigmaCombiningRule
,
int
epsilonCombiningRule
,
float
*
outputForce
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
extern
__shared__
Vdw14_7Particle
sA
[];
...
...
@@ -57,9 +54,6 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
int
exclusionMask
;
float
totalEnergy
=
0
.
0
f
;
#ifdef AMOEBA_DEBUG
float4
pullDebug
[
5
];
#endif
while
(
pos
<
end
)
{
...
...
@@ -129,11 +123,7 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
}
float
energy
;
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullDebug
#endif
);
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
);
// mask out excluded ixns
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
...
...
@@ -148,41 +138,6 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
forceSum
[
1
]
+=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
forceSum
[
2
]
+=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
f
*
energy
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
j
)
:
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
-
1
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
tgx
;
debugArray
[
index
].
w
=
energy
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
0
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
0
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
0
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
0
].
w
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
1
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
1
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
1
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
1
].
w
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
}
#endif
}
// Write results
...
...
@@ -260,11 +215,7 @@ flags = 0xFFFFFFFF;
ijForce
[
1
]
-=
floor
(
ijForce
[
1
]
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
ijForce
[
2
]
-=
floor
(
ijForce
[
2
]
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
}
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullDebug
#endif
);
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
);
// mask out excluded ixns
...
...
@@ -328,40 +279,6 @@ flags = 0xFFFFFFFF;
#endif
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
jIdx
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
jIdx
)
:
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
jIdx
);
debugArray
[
index
].
z
=
-
3
.
0
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
tgx
;
debugArray
[
index
].
w
=
energy
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
0
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
0
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
0
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
0
].
w
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
1
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
1
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
1
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
1
].
w
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
}
#endif
#ifdef USE_CUTOFF
}
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
...
...
@@ -35,9 +57,6 @@ void GetCalculateAmoebaCudaWcaDispersionSim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaWcaDispersionSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
zeroWcaDispersionSharedForce
(
struct
WcaDispersionParticle
*
sA
)
{
// zero shared fields
...
...
@@ -105,14 +124,7 @@ __device__ void calculateWcaDispersionPairIxn_kernel( float4 atomCoordinatesI, f
float
radiusI
,
float
radiusJ
,
float
rmixo
,
float
rmixh
,
float
emixo
,
float
emixh
,
float
force
[
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
)
{
float
force
[
3
],
float
*
energy
)
{
const
float
pi
=
3.1415926535897
f
;
const
float
shctd
=
cAmoebaSim
.
shctd
;
...
...
@@ -318,29 +330,6 @@ __device__ void calculateWcaDispersionPairIxn_kernel( float4 atomCoordinatesI, f
force
[
1
]
*=
de
;
force
[
2
]
*=
de
;
#ifdef AMOEBA_DEBUG
debugArray
[
0
].
x
=
sum
;
debugArray
[
0
].
y
=
sum
;
debugArray
[
0
].
z
=
sum
;
debugArray
[
0
].
w
=
sum
;
#if 0
debugArray[0].x = r;
debugArray[0].y = -r*de/awater;
debugArray[0].z = emixo;
debugArray[0].w = mask2;
debugArray[1].x = dl;
debugArray[1].y = du;
debugArray[1].z = lik;
debugArray[1].w = uik;
debugArray[2].x = du1;
debugArray[2].y = du2;
debugArray[2].z = term;
debugArray[2].w = sk;
#endif
#endif
}
// Include versions of the kernels for N^2 calculations.
...
...
@@ -385,16 +374,6 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u
\n
"
,
methodName
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
WcaDispersionParticle
),
sizeof
(
WcaDispersionParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaWcaDispersionN2ByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
WcaDispersionParticle
)
*
threadsPerBlock
>>>
(
...
...
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
View file @
2b508482
...
...
@@ -24,56 +24,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for identifying interacting blocks. It is included
* several times in kCalculateCDLJForces.cu with different #defines to generate
* different versions of the kernels.
*/
/**
* Find a bounding box for the atoms in each block.
*/
/*
__global__ void METHOD_NAME(kFindBlockBounds, _kernel)()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int base = pos << GRIDBITS;
if (base < cSim.atoms)
{
float4 apos = cSim.pPosq[base];
#ifdef USE_PERIODIC
apos.x -= floor(apos.x*cSim.invPeriodicBoxSizeX)*cSim.periodicBoxSizeX;
apos.y -= floor(apos.y*cSim.invPeriodicBoxSizeY)*cSim.periodicBoxSizeY;
apos.z -= floor(apos.z*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ;
float4 firstPoint = apos;
#endif
float minx = apos.x;
float maxx = apos.x;
float miny = apos.y;
float maxy = apos.y;
float minz = apos.z;
float maxz = apos.z;
for (unsigned int i = 1; i < GRID; i++)
{
apos = cSim.pPosq[base+i];
#ifdef USE_PERIODIC
apos.x -= floor((apos.x-firstPoint.x)*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
apos.y -= floor((apos.y-firstPoint.y)*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
apos.z -= floor((apos.z-firstPoint.z)*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
minx = min(minx, apos.x);
maxx = max(maxx, apos.x);
miny = min(miny, apos.y);
maxy = max(maxy, apos.y);
minz = min(minz, apos.z);
maxz = max(maxz, apos.z);
}
cSim.pGridBoundingBox[pos] = make_float4(0.5f*(maxx-minx), 0.5f*(maxy-miny), 0.5f*(maxz-minz), 0);
cSim.pGridCenter[pos] = make_float4(0.5f*(maxx+minx), 0.5f*(maxy+miny), 0.5f*(maxz+minz), 0);
}
}
*/
/**
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting.
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment