Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2b508482
"docs-source/vscode:/vscode.git/clone" did not exist on "3a0e748cdb5f73e6ba2e0fbf66dd230f41692bf0"
Commit
2b508482
authored
Nov 23, 2011
by
Mark Friedrichs
Browse files
Added copyright
Removed debugging code
parent
36762962
Changes
33
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
222 additions
and
1310 deletions
+222
-1310
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
...cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
+7
-76
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
...eba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
+25
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+26
-190
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+26
-248
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+4
-140
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+29
-196
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+0
-7
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+26
-39
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
...atforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
+25
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+26
-224
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
.../platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
+2
-85
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+26
-47
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
...ba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
+0
-50
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
View file @
2b508482
...
...
@@ -36,11 +36,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputField
,
float
*
outputFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
float
*
outputField
,
float
*
outputFieldPolar
){
extern
__shared__
MutualInducedParticle
sA
[];
...
...
@@ -99,11 +95,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
// load coords, charge, ...
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
);
unsigned
int
mask
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
...
...
@@ -117,34 +109,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
1
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
unsigned
int
index
=
y
+
j
;
unsigned
int
indexI
=
0
;
//unsigned int indexJ = 2;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
//debugArray[index].z = cAmoebaSim.pDampingFactorAndThole[atomI].x;
debugArray
[
index
].
z
=
(
float
)
cSim
.
atoms
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
+
1
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
+
1
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
+
1
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
1
.
0
f
;
}
#endif
}
// Write results
...
...
@@ -161,9 +125,8 @@ if( atomI == targetAtom ){
#endif
}
else
// 100% utilization
{
}
else
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
...
...
@@ -185,13 +148,9 @@ if( atomI == targetAtom ){
// load coords, charge, ...
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
);
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
tj
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
tj
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
...
...
@@ -217,36 +176,8 @@ if( atomI == targetAtom ){
psA
[
tj
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
3
][
1
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
3
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
//#if 0
if
(
atomI
==
targetAtom
||
(
y
+
tj
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
tj
)
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
2
;
//unsigned int indexJ = (atomI == targetAtom) ? 2 : 0;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
debugArray
[
index
].
z
=
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
+
1
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
+
1
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
+
1
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
-
1
.
0
f
;
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "cudaKernels.h"
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
2b508482
///-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
...
...
@@ -213,11 +233,7 @@ __device__ void calculateBn_kernel( float r, float4* bn, float* bn0, float *bn5
#undef SUB_METHOD_NAME
__device__
void
calculatePmeDirectElectrostaticPairIxnOrig_kernel
(
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
const
float
*
scalingFactors
,
float4
forceTorqueEnergy
[
3
]
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
){
const
float
*
scalingFactors
,
float4
forceTorqueEnergy
[
3
]){
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
...
...
@@ -909,132 +925,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
forceTorqueEnergy
[
2
].
y
=
(
ttm32
+
ttm3i2
);
forceTorqueEnergy
[
2
].
z
=
(
ttm33
+
ttm3i3
);
#ifdef AMOEBA_DEBUG
int
debugIndex
=
0
;
float
idTracker
=
1.0
f
;
/*
debugArray[debugIndex].x = atomI.labFrameDipole[0];
debugArray[debugIndex].y = atomI.labFrameDipole[1];
debugArray[debugIndex].z = atomI.labFrameDipole[2];
debugArray[debugIndex].w = r2;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.labFrameDipole[0];
debugArray[debugIndex].y = atomJ.labFrameDipole[1];
debugArray[debugIndex].z = atomJ.labFrameDipole[2];
debugArray[debugIndex].w = cSim.alphaEwald;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipole[0];
debugArray[debugIndex].y = atomI.inducedDipole[1];
debugArray[debugIndex].z = atomI.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipole[0];
debugArray[debugIndex].y = atomJ.inducedDipole[1];
debugArray[debugIndex].z = atomJ.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipoleP[0];
debugArray[debugIndex].y = atomI.inducedDipoleP[1];
debugArray[debugIndex].z = atomI.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipoleP[0];
debugArray[debugIndex].y = atomJ.inducedDipoleP[1];
debugArray[debugIndex].z = atomJ.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ftm21;
debugArray[debugIndex].y = conversionFactor*ftm22;
debugArray[debugIndex].z = conversionFactor*ftm23;
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = e;
debugArray[debugIndex].y = ei;
debugArray[debugIndex].z = erl;
debugArray[debugIndex].w = erli;
debugIndex++;
*/
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
r2
;
debugArray
[
debugIndex
].
y
=
cSim
.
alphaEwald
;
debugArray
[
debugIndex
].
z
=
conversionFactor
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm21
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm22
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm23
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm2i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm2i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm2i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
/*
idTracker += 100.0;
debugArray[debugIndex].x = fridmp1;
debugArray[debugIndex].y = fridmp2;
debugArray[debugIndex].z = fridmp3;
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 100.0;
debugArray[debugIndex].x = findmp1;
debugArray[debugIndex].y = findmp2;
debugArray[debugIndex].z = findmp3;
debugArray[debugIndex].w = idTracker;
debugIndex++;
*/
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm21
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm22
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm23
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm2i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm2i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm2i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm31
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm32
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm33
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm3i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm3i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm3i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
#endif
}
else
{
forceTorqueEnergy
[
0
].
x
=
0.0
f
;
...
...
@@ -1051,15 +941,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
forceTorqueEnergy
[
0
].
w
=
0.0
f
;
#ifdef AMOEBA_DEBUG
for
(
int
ii
=
0
;
ii
<
12
;
ii
++
){
debugArray
[
ii
].
x
=
0.0
f
;
debugArray
[
ii
].
y
=
0.0
f
;
debugArray
[
ii
].
z
=
0.0
f
;
debugArray
[
ii
].
w
=
(
float
)
(
-
ii
);
}
#endif
}
return
;
...
...
@@ -1351,35 +1232,11 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
void
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeDirectElectrostatic"
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
fileId
.
resize
(
2
);
fileId
[
0
]
=
timestep
;
fileId
[
1
]
=
1
;
#endif
// ---------------------------------------------------------------------------------------
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
// apparently debug array can take up nontrivial no. registers
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
maxOffset
=
20
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
maxOffset
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
maxOffset
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
unsigned
int
targetAtom
=
49
;
#endif
// on first pass, set threads/block
static
unsigned
int
threadsPerBlock
=
0
;
...
...
@@ -1403,37 +1260,16 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
kClearFields_3
(
amoebaGpu
,
1
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u maxL1=%d
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
PmeDirectElectrostaticParticle
),
(
sizeof
(
PmeDirectElectrostaticParticle
))
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
maxL1
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeDirectElectrostaticCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaPmeDirectElectrostaticCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces"
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
...
...
@@ -171,12 +191,7 @@ __device__ void sumTempBuffer( FixedFieldParticle& atomI, FixedFieldParticle& at
}
__device__
void
calculateFixedFieldRealSpacePairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
float
dscale
,
float
pscale
,
float4
fields
[
3
]
#ifdef AMOEBA_DEBUG
,
float4
*
pullBack
#endif
){
float
dscale
,
float
pscale
,
float4
fields
[
3
]){
// compute the real space portion of the Ewald summation
...
...
@@ -329,18 +344,6 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
fields
[
2
].
w
=
0.0
f
;
}
#ifdef AMOEBA_DEBUG
pullBack
[
0
].
x
=
xr
;
pullBack
[
0
].
y
=
yr
;
pullBack
[
0
].
z
=
zr
;
pullBack
[
0
].
w
=
r2
;
pullBack
[
1
].
x
=
atomJ
.
x
-
atomI
.
x
;
pullBack
[
1
].
y
=
atomJ
.
y
-
atomI
.
y
;
pullBack
[
1
].
z
=
atomJ
.
z
-
atomI
.
z
;
pullBack
[
1
].
w
=
(
atomJ
.
x
-
atomI
.
x
)
*
(
atomJ
.
x
-
atomI
.
x
)
+
(
atomJ
.
y
-
atomI
.
y
)
*
(
atomJ
.
y
-
atomI
.
y
)
+
(
atomJ
.
z
-
atomI
.
z
)
*
(
atomJ
.
z
-
atomI
.
z
);
#endif
}
// Include versions of the kernels for N^2 calculations.
...
...
@@ -361,40 +364,6 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
--------------------------------------------------------------------------------------- */
#ifdef AMOEBA_DEBUG
static
int
isNanOrInfinity
(
double
number
){
return
(
number
!=
number
||
number
==
std
::
numeric_limits
<
double
>::
infinity
()
||
number
==
-
std
::
numeric_limits
<
double
>::
infinity
())
?
1
:
0
;
}
static
void
bubbleSort
(
std
::
vector
<
int
>&
array
,
std
::
vector
<
int
>&
track
,
int
length
)
{
int
i
,
j
,
temp
;
int
test
;
/*use this only if unsure whether the list is already sorted or not*/
for
(
i
=
length
-
1
;
i
>
0
;
i
--
)
{
test
=
0
;
for
(
j
=
0
;
j
<
i
;
j
++
)
{
if
(
array
[
j
]
>
array
[
j
+
1
])
/* compare neighboring elements */
{
temp
=
array
[
j
];
/* swap array[j] and array[j+1] */
array
[
j
]
=
array
[
j
+
1
];
array
[
j
+
1
]
=
temp
;
temp
=
track
[
j
];
/* swap array[j] and array[j+1] */
track
[
j
]
=
track
[
j
+
1
];
track
[
j
+
1
]
=
temp
;
test
=
1
;
}
}
/*end for j*/
if
(
test
==
0
)
break
;
/*will exit if the list is sorted!*/
}
/*end for i*/
}
#endif
/**---------------------------------------------------------------------------------------
Compute fixed electric field using PME
...
...
@@ -409,22 +378,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"computeCudaAmoebaPmeFixedEField"
;
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
slots
=
15
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
slots
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
slots
);
debugArray
->
Upload
();
// print intermediate results for the targetAtom
unsigned
int
targetAtom
=
0
;
#endif
kClearFields_3
(
amoebaGpu
,
2
);
// on first pass, set threads/block
...
...
@@ -444,175 +397,17 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaPmeDirectFixedE_FieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaPmeDirectFixedE_FieldCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
kReducePmeDirectE_Fields
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
),
gpu
->
sharedMemoryPerBlock
),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaCutoffForces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u warp=%d
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fflush
(
amoebaGpu
->
log
);
/*
(void) fprintf( amoebaGpu->log, "Out WorkArray_3_[1,2] paddedNumberOfAtoms=%d\n", gpu->sim.paddedNumberOfAtoms, gpu->sim.outputBuffers );
amoebaGpu->psWorkArray_3_1->Download();
amoebaGpu->psWorkArray_3_2->Download();
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5d ", ii);
int indexOffset = ii*3;
// buffer 1
(void) fprintf( amoebaGpu->log,"WArry1[%16.9e %16.9e %16.9e] ",
amoebaGpu->psWorkArray_3_1->_pSysData[indexOffset],
amoebaGpu->psWorkArray_3_1->_pSysData[indexOffset+1],
amoebaGpu->psWorkArray_3_1->_pSysData[indexOffset+2] );
// buffer 2
(void) fprintf( amoebaGpu->log,"WArry2[%16.9e %16.9e %16.9e] ",
amoebaGpu->psWorkArray_3_2->_pSysData[indexOffset],
amoebaGpu->psWorkArray_3_2->_pSysData[indexOffset+1],
amoebaGpu->psWorkArray_3_2->_pSysData[indexOffset+2] );
(void) fprintf( amoebaGpu->log,"\n" );
if( ii == maxPrint && (gpu->natoms - maxPrint) > ii ){
ii = gpu->natoms - maxPrint;
}
}
(void) fflush( amoebaGpu->log );
*/
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"E-field (includes self term)"
);
int
maxPrint
=
3002
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// E_Field
(
void
)
fprintf
(
amoebaGpu
->
log
,
"E[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psE_Field
->
_pSysData
[
indexOffset
],
amoebaGpu
->
psE_Field
->
_pSysData
[
indexOffset
+
1
],
amoebaGpu
->
psE_Field
->
_pSysData
[
indexOffset
+
2
]
);
// E_Field polar
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Epol[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
indexOffset
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
indexOffset
+
1
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"EFields End
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugQ
\n
"
);
debugArray
->
Download
();
std
::
vector
<
int
>
indices
;
std
::
vector
<
int
>
track
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
if
(
fabs
(
debugArray
->
_pSysData
[
jj
+
3
*
paddedNumberOfAtoms
].
x
)
>
0.0
){
int
orderIndex
=
gpu
->
psAtomIndex
->
_pSysData
[
jj
];
indices
.
push_back
(
orderIndex
);
track
.
push_back
(
jj
);
}
}
bubbleSort
(
indices
,
track
,
static_cast
<
int
>
(
track
.
size
())
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
amoebaGpu
->
gpuContext
->
psPosq4
->
Download
();
unsigned
int
count
=
0
;
float
sum0
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
float
sum1
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
int
offset0
=
1
;
int
offset1
=
2
;
/*
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
if( fabs(debugArray->_pSysData[jj+3*paddedNumberOfAtoms].x) > 0.0 ){
int orderIndex = gpu->psAtomIndex->_pSysData[jj];
count++;
*/
for
(
unsigned
int
ii
=
0
;
ii
<
track
.
size
();
ii
++
){
int
jj
=
track
[
ii
];
int
debugIndex
=
jj
;
int
orderIndex
=
indices
[
ii
];
if
(
orderIndex
>
31
&&
offset0
==
1
){
offset0
=
2
;
offset1
=
2
;
}
count
++
;
sum0
[
0
]
+=
debugArray
->
_pSysData
[
jj
+
offset0
*
paddedNumberOfAtoms
].
x
;
sum0
[
1
]
+=
debugArray
->
_pSysData
[
jj
+
offset0
*
paddedNumberOfAtoms
].
y
;
sum0
[
2
]
+=
debugArray
->
_pSysData
[
jj
+
offset0
*
paddedNumberOfAtoms
].
z
;
sum1
[
0
]
+=
debugArray
->
_pSysData
[
jj
+
offset1
*
paddedNumberOfAtoms
].
x
;
sum1
[
1
]
+=
debugArray
->
_pSysData
[
jj
+
offset1
*
paddedNumberOfAtoms
].
y
;
sum1
[
2
]
+=
debugArray
->
_pSysData
[
jj
+
offset1
*
paddedNumberOfAtoms
].
z
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d %u PmeFixedEField
\n
"
,
orderIndex
,
jj
,
count
);
for
(
int
kk
=
0
;
kk
<
7
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e %6d %6d cum sumsOp
\n
"
,
orderIndex
,
sum0
[
0
],
sum0
[
1
],
sum0
[
2
],
sum1
[
0
],
sum1
[
1
],
sum1
[
2
],
jj
,
count
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
// write results to file
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
delete
debugArray
;
}
#endif
}
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
...
...
@@ -621,21 +416,4 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaPMEFixedMultipoles
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
kReduceForces
(
gpu
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psForce4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaRecipForceTorqueFixed"
,
fileId
,
outputVector
);
//cudaWriteVectorOfDoubleVectorsToFile( "CudaRecipEField", fileId, outputVector );
exit
(
0
);
}
#endif
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
2b508482
...
...
@@ -37,16 +37,7 @@ __launch_bounds__(64, 1)
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectFixedE_Field
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputEField
,
float
*
outputEFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
#ifdef AMOEBA_DEBUG
int
maxPullIndex
=
1
;
float4
pullBack
[
12
];
#endif
float
*
outputEFieldPolar
){
extern
__shared__
FixedFieldParticle
sA
[];
...
...
@@ -118,11 +109,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
}
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
);
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
...
...
@@ -139,66 +126,6 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
1
].
z
;
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
].
z
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
targetAtom
==
(
y
+
j
)
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
/*
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) bExclusionFlag;
debugArray[index].y = (float) (tgx);
debugArray[index].z = (float) j;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) dScaleMask;
debugArray[index].y = (float) pScaleMask.x;
debugArray[index].z = (float) pScaleMask.y;
debugArray[index].w = flag;
*/
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
x
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
x
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
x
;
debugArray
[
index
].
w
=
flag
+
1
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
z
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
z
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
z
;
debugArray
[
index
].
w
=
flag
+
2
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
y
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
y
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
y
;
debugArray
[
index
].
w
=
flag
+
3
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
w
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
w
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
w
;
debugArray
[
index
].
w
=
flag
+
4
.
0
f
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
}
// Write results
...
...
@@ -252,11 +179,7 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
}
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
);
unsigned
int
outOfBounds
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
)
?
1
:
0
;
...
...
@@ -317,67 +240,8 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
}
}
#ifdef AMOEBA_DEBUG
if
(
(
atomI
==
targetAtom
||
(
y
+
jIdx
)
==
targetAtom
)
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
jIdx
)
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
2
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
2
:
0
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
jIdx
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
float
flag
=
9
.
0
f
;
/*
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) bExclusionFlag;
debugArray[index].y = (float) (tgx);
debugArray[index].z = (float) j;
debugArray[index].w = jIdx;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) dScaleMask;
debugArray[index].y = (float) pScaleMask.x;
debugArray[index].z = (float) pScaleMask.y;
debugArray[index].w = (float) flags;
*/
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
x
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
x
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
x
;
debugArray
[
index
].
w
=
flag
+
1
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
y
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
y
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
y
;
debugArray
[
index
].
w
=
flag
+
2
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
z
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
z
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
z
;
debugArray
[
index
].
w
=
flag
+
3
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
w
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
w
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
w
;
debugArray
[
index
].
w
=
flag
+
4
.
0
f
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// j-loop block
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
...
...
@@ -33,9 +55,6 @@ void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#undef INCLUDE_MI_FIELD_BUFFERS
#define INCLUDE_MI_FIELD_BUFFERS
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
...
...
@@ -331,10 +350,6 @@ static void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* ar
{
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
48.033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#ifdef AMOEBA_DEBUG
epsilon
[
1
]
=
48.033324
f
*
sqrtf
(
delta
[
0
].
x
/
(
(
float
)
(
numberOfEntries
/
3
))
);
epsilon
[
2
]
=
48.033324
f
*
sqrtf
(
delta
[
0
].
y
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#endif
}
}
...
...
@@ -416,18 +431,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
CUDAStream
<
float
>*
outputArray
,
CUDAStream
<
float
>*
outputPolarArray
)
{
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
int
targetAtom
=
546
;
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kClearFields_3
(
amoebaGpu
,
2
);
...
...
@@ -444,17 +449,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
MutualInducedParticle
),
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
...
...
@@ -474,43 +468,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
kReduceMutualInducedFields
(
amoebaGpu
,
outputArray
,
outputPolarArray
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished maxtrixMultiply kernel execution %d -- Direct only -- self added in kSorUpdateMutualInducedField_kernel
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
outputPolarArray
->
Download
();
//debugArray->Download();
int
maxPrint
=
5
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// MI
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Mult[%16.9e %16.9e %16.9e] "
,
outputArray
->
_pSysData
[
indexOffset
],
outputArray
->
_pSysData
[
indexOffset
+
1
],
outputArray
->
_pSysData
[
indexOffset
+
2
]
);
// MI polar
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MultP[%16.9e %16.9e %16.9e]
\n
"
,
outputPolarArray
->
_pSysData
[
indexOffset
],
outputPolarArray
->
_pSysData
[
indexOffset
+
1
],
outputPolarArray
->
_pSysData
[
indexOffset
+
2
]
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
iteration
++
;
}
#endif
}
/**---------------------------------------------------------------------------------------
...
...
@@ -526,25 +483,12 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// ---------------------------------------------------------------------------------------
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldBySOR"
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
fileId
.
resize
(
2
);
fileId
[
0
]
=
timestep
;
fileId
[
1
]
=
1
;
#endif
// ---------------------------------------------------------------------------------------
int
done
;
int
iteration
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
...
...
@@ -559,19 +503,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
cudaMemcpy
(
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
cudaMemcpy
(
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
std
::
vector
<
int
>
fileId
;
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeEFieldPolarity"
,
fileId
,
outputVector
);
}
#endif
// if polarization type is direct, set flags signalling done and return
if
(
amoebaGpu
->
amoebaSim
.
polarizationType
)
...
...
@@ -609,12 +540,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
LAUNCHERROR
(
"kReducePmeMutualInducedFieldDelta"
);
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
// trackMutualInducedIterations
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
}
#endif
// Debye=48.033324f
amoebaGpu
->
psCurrentEpsilon
->
Download
();
float
currentEpsilon
=
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
];
...
...
@@ -624,79 +549,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
done
=
1
;
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
#if 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeMutualInducedFieldBySOR iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
#else
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e %14.6e crrntEps=%14.6e %14.6e %14.6e %14.6e done=%d
\n
"
,
methodName
,
iteration
,
sum1
,
sum2
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
#endif
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
/*
int offset = 0;
int maxPrint = 10;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%4d ", ii );
(void) fprintf( amoebaGpu->log," Mi[%14.6e %14.6e %14.6e] ",
amoebaGpu->psInducedDipole->_pSysData[offset],
amoebaGpu->psInducedDipole->_pSysData[offset+1],
amoebaGpu->psInducedDipole->_pSysData[offset+2] );
(void) fprintf( amoebaGpu->log,"Mip[%14.6e %14.6e %14.6e]\n",
amoebaGpu->psInducedDipolePolar->_pSysData[offset],
amoebaGpu->psInducedDipolePolar->_pSysData[offset+1],
amoebaGpu->psInducedDipolePolar->_pSysData[offset+2] );
if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) ){
ii = (gpu->natoms - maxPrint);
offset = 3*(ii+1);
} else {
offset += 3;
}
}
(void) fflush( amoebaGpu->log );
*/
if
(
0
){
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
// exit if nan
if
(
amoebaGpu
->
mutualInducedCurrentEpsilon
!=
amoebaGpu
->
mutualInducedCurrentEpsilon
){
...
...
@@ -710,25 +562,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
#ifdef AMOEBA_DEBUG
if
(
0
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
if
(
0
){
static
int
iteration
=
0
;
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"CudaPmeMI"
,
stderr
);
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"CudaPmeMIPolar"
,
stderr
);
}
#endif
// ---------------------------------------------------------------------------------------
}
void
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpuContext
amoebaGpu
)
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
2b508482
...
...
@@ -37,9 +37,6 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
void
METHOD_NAME
(
kCalculateAmoebaPmeMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputField
,
float
*
outputFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
extern
__shared__
MutualInducedParticle
sA
[];
...
...
@@ -52,10 +49,6 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
unsigned
int
lasty
=
0xFFFFFFFF
;
const
float
uscale
=
1
.
0
f
;
#ifdef AMOEBA_DEBUG
float4
pullBack
[
4
];
#endif
while
(
pos
<
end
)
{
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
...
...
@@ -51,8 +73,6 @@ __device__ static float normVector3( float* vector )
return
returnNorm
;
}
#undef AMOEBA_DEBUG
// ZThenX == 0
// Bisector == 1
// ZBisect == 2
...
...
@@ -379,10 +399,6 @@ void kCudaComputeLabFrameMoments_kernel( void )
void
cudaComputeAmoebaLabFrameMoments
(
amoebaGpuContext
amoebaGpu
)
{
// ---------------------------------------------------------------------------------------
static
const
char
*
methodName
=
"computeCudaAmoebaLabFrameMoments"
;
// ---------------------------------------------------------------------------------------
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
...
...
@@ -390,33 +406,6 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
int
numBlocks
=
gpu
->
sim
.
blocks
;
int
numThreads
=
gpu
->
sim
.
threads_per_block
;
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: numBlocks/atoms=%d
\n
"
,
methodName
,
numBlocks
);
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
Download
();
amoebaGpu
->
psMolecularDipole
->
Download
();
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
Download
();
gpu
->
psPosq4
->
Download
();
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
int
mIndex
=
3
*
ii
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d [%6d %6d %6d %6d] x[%16.9e %16.9e %16.9e] %s [%6d %6d %6d %6d]
\n
"
,
ii
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
x
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
z
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
x
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
y
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
z
,
(
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
>
1
?
" XXX"
:
""
),
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
x
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
z
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
w
);
//if( ii == 30 )ii = gpu->natoms - 30;
}
}
#endif
// copy molecular moments to lab frame moment arrays
// check if chiral center requires moments to have sign flipped
// compute lab frame moments
...
...
@@ -428,7 +417,7 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
LAUNCHERROR
(
"kCudaComputeCheckChiral"
);
kCudaComputeLabFrameMoments_kernel
<<<
numBlocks
,
numThreads
>>>
(
);
LAUNCHERROR
(
methodName
);
LAUNCHERROR
(
"kCudaComputeLabFrameMoments"
);
}
...
...
@@ -505,5 +494,3 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
}
}
#undef AMOEBA_DEBUG
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaCudaKernels.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
...
...
@@ -38,12 +60,6 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
//#define AMOEBA_DEBUG_PRINT
#undef AMOEBA_DEBUG_PRINT
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
zeroVdw14_7SharedForce
(
struct
Vdw14_7Particle
*
sA
)
{
// zero shared fields
...
...
@@ -101,11 +117,7 @@ __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule,
}
__device__
void
calculateVdw14_7PairIxn_kernel
(
float
combindedSigma
,
float
combindedEpsilon
,
float
force
[
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
)
float
force
[
3
],
float
*
energy
)
{
const
float
deltaHalM1
=
0.07
f
;
...
...
@@ -120,14 +132,6 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float c
float
r2
=
force
[
0
]
*
force
[
0
]
+
force
[
1
]
*
force
[
1
]
+
force
[
2
]
*
force
[
2
];
if
(
r2
>
cAmoebaSim
.
vdwCutoff2
){
*
energy
=
force
[
0
]
=
force
[
1
]
=
force
[
2
]
=
0.0
f
;
#ifdef AMOEBA_DEBUG
float
rI
=
rsqrtf
(
r2
);
float
r
=
1.0
f
/
rI
;
debugArray
[
0
].
x
=
r
;
debugArray
[
0
].
y
=
debugArray
[
0
].
z
=
debugArray
[
0
].
w
=
0.0
f
;
debugArray
[
1
].
x
=
debugArray
[
1
].
y
=
debugArray
[
1
].
z
=
0.0
f
;
debugArray
[
1
].
w
=
r
;
#endif
return
;
}
float
rI
=
rsqrtf
(
r2
);
...
...
@@ -156,17 +160,6 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float c
force
[
1
]
*=
deltaE
;
force
[
2
]
*=
deltaE
;
#ifdef AMOEBA_DEBUG
debugArray
[
0
].
x
=
r
;
debugArray
[
0
].
y
=
deltaE
;
debugArray
[
0
].
z
=
combindedSigma
;
debugArray
[
0
].
w
=
combindedEpsilon
;
debugArray
[
1
].
x
=
tau
;
debugArray
[
1
].
y
=
rho
;
debugArray
[
1
].
z
=
gTau
;
debugArray
[
1
].
w
=
r
;
#endif
}
// perform reduction of force on H's and add to heavy atom partner
...
...
@@ -504,22 +497,6 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG_PRINT
static
const
char
*
methodName
=
"kCalculateAmoebaVdw14_7Forces"
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s:
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#ifdef AMOEBA_DEBUG
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
maxSlots
=
10
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
maxSlots
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
maxSlots
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
int
targetAtom
=
1
;
#endif
#endif
// set threads/block first time through
// on first pass, set threads/block
...
...
@@ -535,44 +512,9 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG_PRINT
if
(
0
){
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"
\n\n
zCoordPreCopyVdw"
,
stderr
);
}
#endif
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
#ifdef AMOEBA_DEBUG_PRINT
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Apply cutoff=%d warp=%d
\n
"
,
applyCutoff
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
Vdw14_7Particle
),
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
if
(
0
){
gpu
->
psInteractionCount
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
unsigned
int
totalWarps
=
(
gpu
->
sim
.
nonbond_blocks
*
threadsPerBlock
)
/
GRID
;
float
ratiof
=
(
float
)
totalWarps
/
(
float
)
amoebaGpu
->
psVdwWorkUnit
->
_length
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ixn warps=%u count=%u
\n
"
,
totalWarps
,
gpu
->
psInteractionCount
->
_pSysData
[
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
amoebaGpu
->
psVdwWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysData
[
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
warp
=
(
float
)(
ii
)
*
ratiof
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u [%5u %5u %1u] %10u warp=%15.6f
\n
"
,
ii
,
x
,
y
,
exclusions
,
warp
);
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
// clear output arrays
kClearFields_3
(
amoebaGpu
,
1
);
...
...
@@ -588,33 +530,6 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksVdwPeriodic"
);
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Vdw Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysData
[
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysData
[
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u %8u [%5u %5u %1u] %10u "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysData
[
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysData
[
ii
]
);
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysData
[
ii
];
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysData
[
ii
],
x
,
y
,
exclusions
);
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaVdw14_7CutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
...
...
@@ -622,12 +537,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaVdw14_7Cutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
...
...
@@ -636,13 +546,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Cutoff"
);
...
...
@@ -656,12 +560,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaVdw14_7N2_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
...
...
@@ -670,112 +569,15 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7N2"
);
}
#ifdef AMOEBA_DEBUG_PRINT
if
(
amoebaGpu
->
log
){
static
int
iteration
=
0
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished 14-7 kernel execution step=%d
\n
"
,
++
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#ifdef AMOEBA_DEBUG
debugArray
->
Download
();
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
double
cutOff
=
1.0e+03
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d DebugVdw
\n
"
,
targetAtom
,
jj
);
for
(
int
kk
=
0
;
kk
<
5
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
if
(
kk
==
4
&&
(
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
x
)
>
cutOff
||
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
y
)
>
cutOff
||
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
z
)
>
cutOff
)
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
" XXXX
\n
"
);
}
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
#endif
/*
amoebaGpu->psWorkArray_3_2->Download();
amoebaGpu->psWorkArray_3_1->Download();
//for( int jj = 0; jj < 3*gpu->natoms; jj += 3 )
for( int jj = 0; jj < 3*gpu->natoms; jj += 3 ){
for( int kk = 0; kk < gpu->sim.outputBuffers; kk++ ){
float delta = fabs(amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2] + 1.0f);
if( delta < 5.0e-06 || isNanOrInfinity( (double) amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj] ) || isNanOrInfinity( (double) amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2] ) )
(void) fprintf( amoebaGpu->log,"%6d %6d [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]\n", jj, kk,
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj],
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+2] );
}
}
*/
}
#endif
#ifdef AMOEBA_DEBUG
if
(
0
){
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"PreVdw"
,
stderr
);
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"zCoordPreVdw"
,
stderr
);
}
#endif
kReduceVdw14_7
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
);
#ifdef AMOEBA_DEBUG
if
(
0
){
static
int
iteration
=
0
;
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkArray_3_2
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"Vdw32"
,
stderr
);
}
#endif
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
#ifdef AMOEBA_DEBUG
if
(
0
){
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
psTempForce
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
kClearFloat4
(
amoebaGpu
,
paddedNumberOfAtoms
,
psTempForce
);
//kCalculateAmoebaVdw14_7Reduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, psTempForce );
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
psTempForce
);
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
psTempForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaVdw"
,
fileId
,
outputVector
);
delete
psTempForce
;
//exit(0);
}
if
(
0
){
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"VdwForce"
,
stderr
);
}
#endif
#ifdef AMOEBA_DEBUG
delete
debugArray
;
#endif
// ---------------------------------------------------------------------------------------
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
View file @
2b508482
...
...
@@ -39,9 +39,6 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
int
sigmaCombiningRule
,
int
epsilonCombiningRule
,
float
*
outputForce
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
extern
__shared__
Vdw14_7Particle
sA
[];
...
...
@@ -57,9 +54,6 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
int
exclusionMask
;
float
totalEnergy
=
0
.
0
f
;
#ifdef AMOEBA_DEBUG
float4
pullDebug
[
5
];
#endif
while
(
pos
<
end
)
{
...
...
@@ -129,11 +123,7 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
}
float
energy
;
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullDebug
#endif
);
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
);
// mask out excluded ixns
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
...
...
@@ -148,41 +138,6 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
forceSum
[
1
]
+=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
forceSum
[
2
]
+=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
f
*
energy
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
j
)
:
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
-
1
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
tgx
;
debugArray
[
index
].
w
=
energy
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
0
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
0
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
0
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
0
].
w
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
1
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
1
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
1
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
1
].
w
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
}
#endif
}
// Write results
...
...
@@ -260,11 +215,7 @@ flags = 0xFFFFFFFF;
ijForce
[
1
]
-=
floor
(
ijForce
[
1
]
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
ijForce
[
2
]
-=
floor
(
ijForce
[
2
]
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
}
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullDebug
#endif
);
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
);
// mask out excluded ixns
...
...
@@ -328,40 +279,6 @@ flags = 0xFFFFFFFF;
#endif
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
jIdx
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
jIdx
)
:
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
jIdx
);
debugArray
[
index
].
z
=
-
3
.
0
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
tgx
;
debugArray
[
index
].
w
=
energy
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
0
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
0
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
0
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
0
].
w
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
1
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
1
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
1
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
1
].
w
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
}
#endif
#ifdef USE_CUTOFF
}
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
...
...
@@ -35,9 +57,6 @@ void GetCalculateAmoebaCudaWcaDispersionSim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaWcaDispersionSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
zeroWcaDispersionSharedForce
(
struct
WcaDispersionParticle
*
sA
)
{
// zero shared fields
...
...
@@ -105,14 +124,7 @@ __device__ void calculateWcaDispersionPairIxn_kernel( float4 atomCoordinatesI, f
float
radiusI
,
float
radiusJ
,
float
rmixo
,
float
rmixh
,
float
emixo
,
float
emixh
,
float
force
[
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
)
{
float
force
[
3
],
float
*
energy
)
{
const
float
pi
=
3.1415926535897
f
;
const
float
shctd
=
cAmoebaSim
.
shctd
;
...
...
@@ -318,29 +330,6 @@ __device__ void calculateWcaDispersionPairIxn_kernel( float4 atomCoordinatesI, f
force
[
1
]
*=
de
;
force
[
2
]
*=
de
;
#ifdef AMOEBA_DEBUG
debugArray
[
0
].
x
=
sum
;
debugArray
[
0
].
y
=
sum
;
debugArray
[
0
].
z
=
sum
;
debugArray
[
0
].
w
=
sum
;
#if 0
debugArray[0].x = r;
debugArray[0].y = -r*de/awater;
debugArray[0].z = emixo;
debugArray[0].w = mask2;
debugArray[1].x = dl;
debugArray[1].y = du;
debugArray[1].z = lik;
debugArray[1].w = uik;
debugArray[2].x = du1;
debugArray[2].y = du2;
debugArray[2].z = term;
debugArray[2].w = sk;
#endif
#endif
}
// Include versions of the kernels for N^2 calculations.
...
...
@@ -385,16 +374,6 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u
\n
"
,
methodName
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
WcaDispersionParticle
),
sizeof
(
WcaDispersionParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaWcaDispersionN2ByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
WcaDispersionParticle
)
*
threadsPerBlock
>>>
(
...
...
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
View file @
2b508482
...
...
@@ -24,56 +24,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for identifying interacting blocks. It is included
* several times in kCalculateCDLJForces.cu with different #defines to generate
* different versions of the kernels.
*/
/**
* Find a bounding box for the atoms in each block.
*/
/*
__global__ void METHOD_NAME(kFindBlockBounds, _kernel)()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int base = pos << GRIDBITS;
if (base < cSim.atoms)
{
float4 apos = cSim.pPosq[base];
#ifdef USE_PERIODIC
apos.x -= floor(apos.x*cSim.invPeriodicBoxSizeX)*cSim.periodicBoxSizeX;
apos.y -= floor(apos.y*cSim.invPeriodicBoxSizeY)*cSim.periodicBoxSizeY;
apos.z -= floor(apos.z*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ;
float4 firstPoint = apos;
#endif
float minx = apos.x;
float maxx = apos.x;
float miny = apos.y;
float maxy = apos.y;
float minz = apos.z;
float maxz = apos.z;
for (unsigned int i = 1; i < GRID; i++)
{
apos = cSim.pPosq[base+i];
#ifdef USE_PERIODIC
apos.x -= floor((apos.x-firstPoint.x)*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
apos.y -= floor((apos.y-firstPoint.y)*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
apos.z -= floor((apos.z-firstPoint.z)*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
minx = min(minx, apos.x);
maxx = max(maxx, apos.x);
miny = min(miny, apos.y);
maxy = max(maxy, apos.y);
minz = min(minz, apos.z);
maxz = max(maxz, apos.z);
}
cSim.pGridBoundingBox[pos] = make_float4(0.5f*(maxx-minx), 0.5f*(maxy-miny), 0.5f*(maxz-minz), 0);
cSim.pGridCenter[pos] = make_float4(0.5f*(maxx+minx), 0.5f*(maxy+miny), 0.5f*(maxz+minz), 0);
}
}
*/
/**
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting.
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment