Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2b508482
Commit
2b508482
authored
Nov 23, 2011
by
Mark Friedrichs
Browse files
Added copyright
Removed debugging code
parent
36762962
Changes
33
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
222 additions
and
1310 deletions
+222
-1310
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
...cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
+7
-76
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
...eba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
+25
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+26
-190
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+26
-248
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+4
-140
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+29
-196
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+0
-7
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+26
-39
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
...atforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
+25
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+26
-224
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
.../platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
+2
-85
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+26
-47
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
...ba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
+0
-50
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.h
View file @
2b508482
...
@@ -36,11 +36,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
...
@@ -36,11 +36,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedField
,
_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
float
*
outputField
,
float
*
outputFieldPolar
float
*
outputField
,
float
*
outputFieldPolar
){
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
extern
__shared__
MutualInducedParticle
sA
[];
extern
__shared__
MutualInducedParticle
sA
[];
...
@@ -99,11 +95,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
...
@@ -99,11 +95,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
// load coords, charge, ...
// load coords, charge, ...
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
);
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
);
unsigned
int
mask
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
unsigned
int
mask
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
...
@@ -117,34 +109,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
...
@@ -117,34 +109,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
1
][
2
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
1
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
unsigned
int
index
=
y
+
j
;
unsigned
int
indexI
=
0
;
//unsigned int indexJ = 2;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
//debugArray[index].z = cAmoebaSim.pDampingFactorAndThole[atomI].x;
debugArray
[
index
].
z
=
(
float
)
cSim
.
atoms
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
+
1
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
+
1
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
+
1
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
1
.
0
f
;
}
#endif
}
}
// Write results
// Write results
...
@@ -161,9 +125,8 @@ if( atomI == targetAtom ){
...
@@ -161,9 +125,8 @@ if( atomI == targetAtom ){
#endif
#endif
}
}
else
{
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
if
(
lasty
!=
y
)
{
{
...
@@ -185,11 +148,7 @@ if( atomI == targetAtom ){
...
@@ -185,11 +148,7 @@ if( atomI == targetAtom ){
// load coords, charge, ...
// load coords, charge, ...
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
);
#ifdef AMOEBA_DEBUG
,
debugArray
#endif
);
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
tj
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
tj
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
...
@@ -217,34 +176,6 @@ if( atomI == targetAtom ){
...
@@ -217,34 +176,6 @@ if( atomI == targetAtom ){
psA
[
tj
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
3
][
1
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
3
][
1
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
3
][
2
]
:
0
.
0
f
;
psA
[
tj
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
3
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
//#if 0
if
(
atomI
==
targetAtom
||
(
y
+
tj
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
tj
)
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
2
;
//unsigned int indexJ = (atomI == targetAtom) ? 2 : 0;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
debugArray
[
index
].
z
=
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
ijField
[
indexI
+
1
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
ijField
[
indexI
+
1
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijField
[
indexI
+
1
][
2
]
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
-
1
.
0
f
;
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
//-----------------------------------------------------------------------------------------
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaGpuTypes.h"
#include "cudaKernels.h"
#include "cudaKernels.h"
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
2b508482
///-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
//-----------------------------------------------------------------------------------------
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaGpuTypes.h"
#include "cudaKernels.h"
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#include "kCalculateAmoebaCudaUtilities.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
...
@@ -213,11 +233,7 @@ __device__ void calculateBn_kernel( float r, float4* bn, float* bn0, float *bn5
...
@@ -213,11 +233,7 @@ __device__ void calculateBn_kernel( float r, float4* bn, float* bn0, float *bn5
#undef SUB_METHOD_NAME
#undef SUB_METHOD_NAME
__device__
void
calculatePmeDirectElectrostaticPairIxnOrig_kernel
(
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
__device__
void
calculatePmeDirectElectrostaticPairIxnOrig_kernel
(
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
const
float
*
scalingFactors
,
float4
forceTorqueEnergy
[
3
]
const
float
*
scalingFactors
,
float4
forceTorqueEnergy
[
3
]){
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
){
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
...
@@ -909,132 +925,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
...
@@ -909,132 +925,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
forceTorqueEnergy
[
2
].
y
=
(
ttm32
+
ttm3i2
);
forceTorqueEnergy
[
2
].
y
=
(
ttm32
+
ttm3i2
);
forceTorqueEnergy
[
2
].
z
=
(
ttm33
+
ttm3i3
);
forceTorqueEnergy
[
2
].
z
=
(
ttm33
+
ttm3i3
);
#ifdef AMOEBA_DEBUG
int
debugIndex
=
0
;
float
idTracker
=
1.0
f
;
/*
debugArray[debugIndex].x = atomI.labFrameDipole[0];
debugArray[debugIndex].y = atomI.labFrameDipole[1];
debugArray[debugIndex].z = atomI.labFrameDipole[2];
debugArray[debugIndex].w = r2;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.labFrameDipole[0];
debugArray[debugIndex].y = atomJ.labFrameDipole[1];
debugArray[debugIndex].z = atomJ.labFrameDipole[2];
debugArray[debugIndex].w = cSim.alphaEwald;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipole[0];
debugArray[debugIndex].y = atomI.inducedDipole[1];
debugArray[debugIndex].z = atomI.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipole[0];
debugArray[debugIndex].y = atomJ.inducedDipole[1];
debugArray[debugIndex].z = atomJ.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipoleP[0];
debugArray[debugIndex].y = atomI.inducedDipoleP[1];
debugArray[debugIndex].z = atomI.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipoleP[0];
debugArray[debugIndex].y = atomJ.inducedDipoleP[1];
debugArray[debugIndex].z = atomJ.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ftm21;
debugArray[debugIndex].y = conversionFactor*ftm22;
debugArray[debugIndex].z = conversionFactor*ftm23;
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = e;
debugArray[debugIndex].y = ei;
debugArray[debugIndex].z = erl;
debugArray[debugIndex].w = erli;
debugIndex++;
*/
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
r2
;
debugArray
[
debugIndex
].
y
=
cSim
.
alphaEwald
;
debugArray
[
debugIndex
].
z
=
conversionFactor
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm21
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm22
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm23
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm2i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm2i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm2i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
/*
idTracker += 100.0;
debugArray[debugIndex].x = fridmp1;
debugArray[debugIndex].y = fridmp2;
debugArray[debugIndex].z = fridmp3;
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 100.0;
debugArray[debugIndex].x = findmp1;
debugArray[debugIndex].y = findmp2;
debugArray[debugIndex].z = findmp3;
debugArray[debugIndex].w = idTracker;
debugIndex++;
*/
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm21
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm22
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm23
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm2i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm2i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm2i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm31
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm32
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm33
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm3i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm3i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm3i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
#endif
}
else
{
}
else
{
forceTorqueEnergy
[
0
].
x
=
0.0
f
;
forceTorqueEnergy
[
0
].
x
=
0.0
f
;
...
@@ -1051,15 +941,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
...
@@ -1051,15 +941,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
forceTorqueEnergy
[
0
].
w
=
0.0
f
;
forceTorqueEnergy
[
0
].
w
=
0.0
f
;
#ifdef AMOEBA_DEBUG
for
(
int
ii
=
0
;
ii
<
12
;
ii
++
){
debugArray
[
ii
].
x
=
0.0
f
;
debugArray
[
ii
].
y
=
0.0
f
;
debugArray
[
ii
].
z
=
0.0
f
;
debugArray
[
ii
].
w
=
(
float
)
(
-
ii
);
}
#endif
}
}
return
;
return
;
...
@@ -1351,35 +1232,11 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
...
@@ -1351,35 +1232,11 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
void
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpuContext
amoebaGpu
)
void
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
{
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeDirectElectrostatic"
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
fileId
.
resize
(
2
);
fileId
[
0
]
=
timestep
;
fileId
[
1
]
=
1
;
#endif
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
// apparently debug array can take up nontrivial no. registers
// apparently debug array can take up nontrivial no. registers
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
maxOffset
=
20
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
maxOffset
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
maxOffset
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
unsigned
int
targetAtom
=
49
;
#endif
// on first pass, set threads/block
// on first pass, set threads/block
static
unsigned
int
threadsPerBlock
=
0
;
static
unsigned
int
threadsPerBlock
=
0
;
...
@@ -1403,37 +1260,16 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -1403,37 +1260,16 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
kClearFields_3
(
amoebaGpu
,
1
);
kClearFields_3
(
amoebaGpu
,
1
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u maxL1=%d
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
PmeDirectElectrostaticParticle
),
(
sizeof
(
PmeDirectElectrostaticParticle
))
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
maxL1
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeDirectElectrostaticCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectElectrostaticCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
}
else
{
kCalculateAmoebaPmeDirectElectrostaticCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectElectrostaticCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces"
);
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces"
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
//-----------------------------------------------------------------------------------------
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#include "kCalculateAmoebaCudaUtilities.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
...
@@ -171,12 +191,7 @@ __device__ void sumTempBuffer( FixedFieldParticle& atomI, FixedFieldParticle& at
...
@@ -171,12 +191,7 @@ __device__ void sumTempBuffer( FixedFieldParticle& atomI, FixedFieldParticle& at
}
}
__device__
void
calculateFixedFieldRealSpacePairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
__device__
void
calculateFixedFieldRealSpacePairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
float
dscale
,
float
pscale
,
float4
fields
[
3
]
float
dscale
,
float
pscale
,
float4
fields
[
3
]){
#ifdef AMOEBA_DEBUG
,
float4
*
pullBack
#endif
){
// compute the real space portion of the Ewald summation
// compute the real space portion of the Ewald summation
...
@@ -329,18 +344,6 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
...
@@ -329,18 +344,6 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
fields
[
2
].
w
=
0.0
f
;
fields
[
2
].
w
=
0.0
f
;
}
}
#ifdef AMOEBA_DEBUG
pullBack
[
0
].
x
=
xr
;
pullBack
[
0
].
y
=
yr
;
pullBack
[
0
].
z
=
zr
;
pullBack
[
0
].
w
=
r2
;
pullBack
[
1
].
x
=
atomJ
.
x
-
atomI
.
x
;
pullBack
[
1
].
y
=
atomJ
.
y
-
atomI
.
y
;
pullBack
[
1
].
z
=
atomJ
.
z
-
atomI
.
z
;
pullBack
[
1
].
w
=
(
atomJ
.
x
-
atomI
.
x
)
*
(
atomJ
.
x
-
atomI
.
x
)
+
(
atomJ
.
y
-
atomI
.
y
)
*
(
atomJ
.
y
-
atomI
.
y
)
+
(
atomJ
.
z
-
atomI
.
z
)
*
(
atomJ
.
z
-
atomI
.
z
);
#endif
}
}
// Include versions of the kernels for N^2 calculations.
// Include versions of the kernels for N^2 calculations.
...
@@ -361,40 +364,6 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
...
@@ -361,40 +364,6 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
#ifdef AMOEBA_DEBUG
static
int
isNanOrInfinity
(
double
number
){
return
(
number
!=
number
||
number
==
std
::
numeric_limits
<
double
>::
infinity
()
||
number
==
-
std
::
numeric_limits
<
double
>::
infinity
())
?
1
:
0
;
}
static
void
bubbleSort
(
std
::
vector
<
int
>&
array
,
std
::
vector
<
int
>&
track
,
int
length
)
{
int
i
,
j
,
temp
;
int
test
;
/*use this only if unsure whether the list is already sorted or not*/
for
(
i
=
length
-
1
;
i
>
0
;
i
--
)
{
test
=
0
;
for
(
j
=
0
;
j
<
i
;
j
++
)
{
if
(
array
[
j
]
>
array
[
j
+
1
])
/* compare neighboring elements */
{
temp
=
array
[
j
];
/* swap array[j] and array[j+1] */
array
[
j
]
=
array
[
j
+
1
];
array
[
j
+
1
]
=
temp
;
temp
=
track
[
j
];
/* swap array[j] and array[j+1] */
track
[
j
]
=
track
[
j
+
1
];
track
[
j
+
1
]
=
temp
;
test
=
1
;
}
}
/*end for j*/
if
(
test
==
0
)
break
;
/*will exit if the list is sorted!*/
}
/*end for i*/
}
#endif
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Compute fixed electric field using PME
Compute fixed electric field using PME
...
@@ -409,22 +378,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -409,22 +378,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
static
unsigned
int
threadsPerBlock
=
0
;
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"computeCudaAmoebaPmeFixedEField"
;
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
slots
=
15
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
slots
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
slots
);
debugArray
->
Upload
();
// print intermediate results for the targetAtom
unsigned
int
targetAtom
=
0
;
#endif
kClearFields_3
(
amoebaGpu
,
2
);
kClearFields_3
(
amoebaGpu
,
2
);
// on first pass, set threads/block
// on first pass, set threads/block
...
@@ -444,175 +397,17 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -444,175 +397,17 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaPmeDirectFixedE_FieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectFixedE_FieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
else
{
}
else
{
kCalculateAmoebaPmeDirectFixedE_FieldCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectFixedE_FieldCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
kReducePmeDirectE_Fields
(
amoebaGpu
);
kReducePmeDirectE_Fields
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
),
gpu
->
sharedMemoryPerBlock
),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaCutoffForces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u warp=%d
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fflush
(
amoebaGpu
->
log
);
/*
(void) fprintf( amoebaGpu->log, "Out WorkArray_3_[1,2] paddedNumberOfAtoms=%d\n", gpu->sim.paddedNumberOfAtoms, gpu->sim.outputBuffers );
amoebaGpu->psWorkArray_3_1->Download();
amoebaGpu->psWorkArray_3_2->Download();
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5d ", ii);
int indexOffset = ii*3;
// buffer 1
(void) fprintf( amoebaGpu->log,"WArry1[%16.9e %16.9e %16.9e] ",
amoebaGpu->psWorkArray_3_1->_pSysData[indexOffset],
amoebaGpu->psWorkArray_3_1->_pSysData[indexOffset+1],
amoebaGpu->psWorkArray_3_1->_pSysData[indexOffset+2] );
// buffer 2
(void) fprintf( amoebaGpu->log,"WArry2[%16.9e %16.9e %16.9e] ",
amoebaGpu->psWorkArray_3_2->_pSysData[indexOffset],
amoebaGpu->psWorkArray_3_2->_pSysData[indexOffset+1],
amoebaGpu->psWorkArray_3_2->_pSysData[indexOffset+2] );
(void) fprintf( amoebaGpu->log,"\n" );
if( ii == maxPrint && (gpu->natoms - maxPrint) > ii ){
ii = gpu->natoms - maxPrint;
}
}
(void) fflush( amoebaGpu->log );
*/
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"E-field (includes self term)"
);
int
maxPrint
=
3002
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// E_Field
(
void
)
fprintf
(
amoebaGpu
->
log
,
"E[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psE_Field
->
_pSysData
[
indexOffset
],
amoebaGpu
->
psE_Field
->
_pSysData
[
indexOffset
+
1
],
amoebaGpu
->
psE_Field
->
_pSysData
[
indexOffset
+
2
]
);
// E_Field polar
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Epol[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
indexOffset
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
indexOffset
+
1
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"EFields End
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugQ
\n
"
);
debugArray
->
Download
();
std
::
vector
<
int
>
indices
;
std
::
vector
<
int
>
track
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
if
(
fabs
(
debugArray
->
_pSysData
[
jj
+
3
*
paddedNumberOfAtoms
].
x
)
>
0.0
){
int
orderIndex
=
gpu
->
psAtomIndex
->
_pSysData
[
jj
];
indices
.
push_back
(
orderIndex
);
track
.
push_back
(
jj
);
}
}
bubbleSort
(
indices
,
track
,
static_cast
<
int
>
(
track
.
size
())
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
amoebaGpu
->
gpuContext
->
psPosq4
->
Download
();
unsigned
int
count
=
0
;
float
sum0
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
float
sum1
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
int
offset0
=
1
;
int
offset1
=
2
;
/*
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
if( fabs(debugArray->_pSysData[jj+3*paddedNumberOfAtoms].x) > 0.0 ){
int orderIndex = gpu->psAtomIndex->_pSysData[jj];
count++;
*/
for
(
unsigned
int
ii
=
0
;
ii
<
track
.
size
();
ii
++
){
int
jj
=
track
[
ii
];
int
debugIndex
=
jj
;
int
orderIndex
=
indices
[
ii
];
if
(
orderIndex
>
31
&&
offset0
==
1
){
offset0
=
2
;
offset1
=
2
;
}
count
++
;
sum0
[
0
]
+=
debugArray
->
_pSysData
[
jj
+
offset0
*
paddedNumberOfAtoms
].
x
;
sum0
[
1
]
+=
debugArray
->
_pSysData
[
jj
+
offset0
*
paddedNumberOfAtoms
].
y
;
sum0
[
2
]
+=
debugArray
->
_pSysData
[
jj
+
offset0
*
paddedNumberOfAtoms
].
z
;
sum1
[
0
]
+=
debugArray
->
_pSysData
[
jj
+
offset1
*
paddedNumberOfAtoms
].
x
;
sum1
[
1
]
+=
debugArray
->
_pSysData
[
jj
+
offset1
*
paddedNumberOfAtoms
].
y
;
sum1
[
2
]
+=
debugArray
->
_pSysData
[
jj
+
offset1
*
paddedNumberOfAtoms
].
z
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d %u PmeFixedEField
\n
"
,
orderIndex
,
jj
,
count
);
for
(
int
kk
=
0
;
kk
<
7
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e %6d %6d cum sumsOp
\n
"
,
orderIndex
,
sum0
[
0
],
sum0
[
1
],
sum0
[
2
],
sum1
[
0
],
sum1
[
1
],
sum1
[
2
],
jj
,
count
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
// write results to file
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
delete
debugArray
;
}
#endif
}
}
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
...
@@ -621,21 +416,4 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -621,21 +416,4 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaPMEFixedMultipoles
(
amoebaGpu
);
kCalculateAmoebaPMEFixedMultipoles
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
kReduceForces
(
gpu
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psForce4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaRecipForceTorqueFixed"
,
fileId
,
outputVector
);
//cudaWriteVectorOfDoubleVectorsToFile( "CudaRecipEField", fileId, outputVector );
exit
(
0
);
}
#endif
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
2b508482
...
@@ -37,16 +37,7 @@ __launch_bounds__(64, 1)
...
@@ -37,16 +37,7 @@ __launch_bounds__(64, 1)
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectFixedE_Field
,
_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectFixedE_Field
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
float
*
outputEField
,
float
*
outputEField
,
float
*
outputEFieldPolar
float
*
outputEFieldPolar
){
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
#ifdef AMOEBA_DEBUG
int
maxPullIndex
=
1
;
float4
pullBack
[
12
];
#endif
extern
__shared__
FixedFieldParticle
sA
[];
extern
__shared__
FixedFieldParticle
sA
[];
...
@@ -118,11 +109,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
...
@@ -118,11 +109,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
}
}
float4
ijField
[
3
];
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
);
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
// by setting match flag
...
@@ -139,66 +126,6 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
...
@@ -139,66 +126,6 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
1
].
z
;
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
1
].
z
;
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
].
z
;
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
].
z
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
targetAtom
==
(
y
+
j
)
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
/*
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) bExclusionFlag;
debugArray[index].y = (float) (tgx);
debugArray[index].z = (float) j;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) dScaleMask;
debugArray[index].y = (float) pScaleMask.x;
debugArray[index].z = (float) pScaleMask.y;
debugArray[index].w = flag;
*/
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
x
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
x
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
x
;
debugArray
[
index
].
w
=
flag
+
1
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
z
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
z
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
z
;
debugArray
[
index
].
w
=
flag
+
2
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
y
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
y
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
y
;
debugArray
[
index
].
w
=
flag
+
3
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
0
].
w
;
debugArray
[
index
].
y
=
match
?
0
.
0
f
:
ijField
[
1
].
w
;
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
2
].
w
;
debugArray
[
index
].
w
=
flag
+
4
.
0
f
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
}
}
// Write results
// Write results
...
@@ -252,11 +179,7 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
...
@@ -252,11 +179,7 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
}
}
float4
ijField
[
3
];
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
);
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
outOfBounds
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
)
?
1
:
0
;
unsigned
int
outOfBounds
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
)
?
1
:
0
;
...
@@ -317,65 +240,6 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
...
@@ -317,65 +240,6 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
}
}
}
}
#ifdef AMOEBA_DEBUG
if
(
(
atomI
==
targetAtom
||
(
y
+
jIdx
)
==
targetAtom
)
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
jIdx
)
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
2
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
2
:
0
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
jIdx
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
float
flag
=
9
.
0
f
;
/*
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) bExclusionFlag;
debugArray[index].y = (float) (tgx);
debugArray[index].z = (float) j;
debugArray[index].w = jIdx;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) dScaleMask;
debugArray[index].y = (float) pScaleMask.x;
debugArray[index].z = (float) pScaleMask.y;
debugArray[index].w = (float) flags;
*/
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
x
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
x
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
x
;
debugArray
[
index
].
w
=
flag
+
1
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
y
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
y
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
y
;
debugArray
[
index
].
w
=
flag
+
2
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
z
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
z
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
z
;
debugArray
[
index
].
w
=
flag
+
3
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
w
;
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
w
;
debugArray
[
index
].
z
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
w
;
debugArray
[
index
].
w
=
flag
+
4
.
0
f
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
}
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
//-----------------------------------------------------------------------------------------
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
#include "amoebaCudaKernels.h"
...
@@ -33,9 +55,6 @@ void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
...
@@ -33,9 +55,6 @@ void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
}
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#undef INCLUDE_MI_FIELD_BUFFERS
#undef INCLUDE_MI_FIELD_BUFFERS
#define INCLUDE_MI_FIELD_BUFFERS
#define INCLUDE_MI_FIELD_BUFFERS
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
...
@@ -331,10 +350,6 @@ static void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* ar
...
@@ -331,10 +350,6 @@ static void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* ar
{
{
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
48.033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
epsilon
[
0
]
=
48.033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#ifdef AMOEBA_DEBUG
epsilon
[
1
]
=
48.033324
f
*
sqrtf
(
delta
[
0
].
x
/
(
(
float
)
(
numberOfEntries
/
3
))
);
epsilon
[
2
]
=
48.033324
f
*
sqrtf
(
delta
[
0
].
y
/
(
(
float
)
(
numberOfEntries
/
3
))
);
#endif
}
}
}
}
...
@@ -419,16 +434,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
...
@@ -419,16 +434,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
static
unsigned
int
threadsPerBlock
=
0
;
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
int
targetAtom
=
546
;
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
kClearFields_3
(
amoebaGpu
,
2
);
kClearFields_3
(
amoebaGpu
,
2
);
// on first pass, set threads/block
// on first pass, set threads/block
...
@@ -444,17 +449,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
...
@@ -444,17 +449,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
MutualInducedParticle
),
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
...
@@ -474,43 +468,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
...
@@ -474,43 +468,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
kReduceMutualInducedFields
(
amoebaGpu
,
outputArray
,
outputPolarArray
);
kReduceMutualInducedFields
(
amoebaGpu
,
outputArray
,
outputPolarArray
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished maxtrixMultiply kernel execution %d -- Direct only -- self added in kSorUpdateMutualInducedField_kernel
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
outputPolarArray
->
Download
();
//debugArray->Download();
int
maxPrint
=
5
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// MI
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Mult[%16.9e %16.9e %16.9e] "
,
outputArray
->
_pSysData
[
indexOffset
],
outputArray
->
_pSysData
[
indexOffset
+
1
],
outputArray
->
_pSysData
[
indexOffset
+
2
]
);
// MI polar
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MultP[%16.9e %16.9e %16.9e]
\n
"
,
outputPolarArray
->
_pSysData
[
indexOffset
],
outputPolarArray
->
_pSysData
[
indexOffset
+
1
],
outputPolarArray
->
_pSysData
[
indexOffset
+
2
]
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
iteration
++
;
}
#endif
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -526,19 +483,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -526,19 +483,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldBySOR"
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
fileId
.
resize
(
2
);
fileId
[
0
]
=
timestep
;
fileId
[
1
]
=
1
;
#endif
// ---------------------------------------------------------------------------------------
int
done
;
int
done
;
int
iteration
;
int
iteration
;
...
@@ -559,19 +503,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -559,19 +503,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
cudaMemcpy
(
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
cudaMemcpy
(
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
cudaMemcpy
(
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
cudaMemcpy
(
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
std
::
vector
<
int
>
fileId
;
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeEFieldPolarity"
,
fileId
,
outputVector
);
}
#endif
// if polarization type is direct, set flags signalling done and return
// if polarization type is direct, set flags signalling done and return
if
(
amoebaGpu
->
amoebaSim
.
polarizationType
)
if
(
amoebaGpu
->
amoebaSim
.
polarizationType
)
...
@@ -609,12 +540,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -609,12 +540,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
LAUNCHERROR
(
"kReducePmeMutualInducedFieldDelta"
);
LAUNCHERROR
(
"kReducePmeMutualInducedFieldDelta"
);
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
// trackMutualInducedIterations
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
}
#endif
// Debye=48.033324f
// Debye=48.033324f
amoebaGpu
->
psCurrentEpsilon
->
Download
();
amoebaGpu
->
psCurrentEpsilon
->
Download
();
float
currentEpsilon
=
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
];
float
currentEpsilon
=
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
];
...
@@ -624,79 +549,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -624,79 +549,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
done
=
1
;
done
=
1
;
}
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
#if 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeMutualInducedFieldBySOR iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
#else
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s iteration=%3d eps %14.6e %14.6e crrntEps=%14.6e %14.6e %14.6e %14.6e done=%d
\n
"
,
methodName
,
iteration
,
sum1
,
sum2
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
#endif
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
/*
int offset = 0;
int maxPrint = 10;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%4d ", ii );
(void) fprintf( amoebaGpu->log," Mi[%14.6e %14.6e %14.6e] ",
amoebaGpu->psInducedDipole->_pSysData[offset],
amoebaGpu->psInducedDipole->_pSysData[offset+1],
amoebaGpu->psInducedDipole->_pSysData[offset+2] );
(void) fprintf( amoebaGpu->log,"Mip[%14.6e %14.6e %14.6e]\n",
amoebaGpu->psInducedDipolePolar->_pSysData[offset],
amoebaGpu->psInducedDipolePolar->_pSysData[offset+1],
amoebaGpu->psInducedDipolePolar->_pSysData[offset+2] );
if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) ){
ii = (gpu->natoms - maxPrint);
offset = 3*(ii+1);
} else {
offset += 3;
}
}
(void) fflush( amoebaGpu->log );
*/
if
(
0
){
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
// exit if nan
// exit if nan
if
(
amoebaGpu
->
mutualInducedCurrentEpsilon
!=
amoebaGpu
->
mutualInducedCurrentEpsilon
){
if
(
amoebaGpu
->
mutualInducedCurrentEpsilon
!=
amoebaGpu
->
mutualInducedCurrentEpsilon
){
...
@@ -710,25 +562,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -710,25 +562,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
#ifdef AMOEBA_DEBUG
if
(
0
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
if
(
0
){
static
int
iteration
=
0
;
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"CudaPmeMI"
,
stderr
);
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"CudaPmeMIPolar"
,
stderr
);
}
#endif
// ---------------------------------------------------------------------------------------
}
}
void
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpuContext
amoebaGpu
)
void
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpuContext
amoebaGpu
)
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
2b508482
...
@@ -37,9 +37,6 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
...
@@ -37,9 +37,6 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
void
METHOD_NAME
(
kCalculateAmoebaPmeMutualInducedField
,
_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaPmeMutualInducedField
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
float
*
outputField
,
float
*
outputFieldPolar
float
*
outputField
,
float
*
outputFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
){
extern
__shared__
MutualInducedParticle
sA
[];
extern
__shared__
MutualInducedParticle
sA
[];
...
@@ -52,10 +49,6 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
...
@@ -52,10 +49,6 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
const
float
uscale
=
1
.
0
f
;
const
float
uscale
=
1
.
0
f
;
#ifdef AMOEBA_DEBUG
float4
pullBack
[
4
];
#endif
while
(
pos
<
end
)
while
(
pos
<
end
)
{
{
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
//-----------------------------------------------------------------------------------------
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "amoebaCudaKernels.h"
...
@@ -51,8 +73,6 @@ __device__ static float normVector3( float* vector )
...
@@ -51,8 +73,6 @@ __device__ static float normVector3( float* vector )
return
returnNorm
;
return
returnNorm
;
}
}
#undef AMOEBA_DEBUG
// ZThenX == 0
// ZThenX == 0
// Bisector == 1
// Bisector == 1
// ZBisect == 2
// ZBisect == 2
...
@@ -379,10 +399,6 @@ void kCudaComputeLabFrameMoments_kernel( void )
...
@@ -379,10 +399,6 @@ void kCudaComputeLabFrameMoments_kernel( void )
void
cudaComputeAmoebaLabFrameMoments
(
amoebaGpuContext
amoebaGpu
)
void
cudaComputeAmoebaLabFrameMoments
(
amoebaGpuContext
amoebaGpu
)
{
{
// ---------------------------------------------------------------------------------------
static
const
char
*
methodName
=
"computeCudaAmoebaLabFrameMoments"
;
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
...
@@ -390,33 +406,6 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
...
@@ -390,33 +406,6 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
int
numBlocks
=
gpu
->
sim
.
blocks
;
int
numBlocks
=
gpu
->
sim
.
blocks
;
int
numThreads
=
gpu
->
sim
.
threads_per_block
;
int
numThreads
=
gpu
->
sim
.
threads_per_block
;
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: numBlocks/atoms=%d
\n
"
,
methodName
,
numBlocks
);
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
Download
();
amoebaGpu
->
psMolecularDipole
->
Download
();
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
Download
();
gpu
->
psPosq4
->
Download
();
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
int
mIndex
=
3
*
ii
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d [%6d %6d %6d %6d] x[%16.9e %16.9e %16.9e] %s [%6d %6d %6d %6d]
\n
"
,
ii
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
x
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
z
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
x
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
y
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
z
,
(
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
>
1
?
" XXX"
:
""
),
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
x
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
z
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
w
);
//if( ii == 30 )ii = gpu->natoms - 30;
}
}
#endif
// copy molecular moments to lab frame moment arrays
// copy molecular moments to lab frame moment arrays
// check if chiral center requires moments to have sign flipped
// check if chiral center requires moments to have sign flipped
// compute lab frame moments
// compute lab frame moments
...
@@ -428,7 +417,7 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
...
@@ -428,7 +417,7 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
LAUNCHERROR
(
"kCudaComputeCheckChiral"
);
LAUNCHERROR
(
"kCudaComputeCheckChiral"
);
kCudaComputeLabFrameMoments_kernel
<<<
numBlocks
,
numThreads
>>>
(
);
kCudaComputeLabFrameMoments_kernel
<<<
numBlocks
,
numThreads
>>>
(
);
LAUNCHERROR
(
methodName
);
LAUNCHERROR
(
"kCudaComputeLabFrameMoments"
);
}
}
...
@@ -505,5 +494,3 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
...
@@ -505,5 +494,3 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
}
}
}
}
#undef AMOEBA_DEBUG
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
View file @
2b508482
/* -------------------------------------------------------------------------- *
//-----------------------------------------------------------------------------------------
* OpenMM *
* -------------------------------------------------------------------------- *
//-----------------------------------------------------------------------------------------
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaCudaKernels.h"
#include "amoebaCudaKernels.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
2b508482
//-----------------------------------------------------------------------------------------
/* -------------------------------------------------------------------------- *
* OpenMM *
//-----------------------------------------------------------------------------------------
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
#include "amoebaCudaKernels.h"
...
@@ -38,12 +60,6 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu)
...
@@ -38,12 +60,6 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
RTERROR
(
status
,
"GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
}
//#define AMOEBA_DEBUG_PRINT
#undef AMOEBA_DEBUG_PRINT
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
zeroVdw14_7SharedForce
(
struct
Vdw14_7Particle
*
sA
)
__device__
void
zeroVdw14_7SharedForce
(
struct
Vdw14_7Particle
*
sA
)
{
{
// zero shared fields
// zero shared fields
...
@@ -101,11 +117,7 @@ __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule,
...
@@ -101,11 +117,7 @@ __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule,
}
}
__device__
void
calculateVdw14_7PairIxn_kernel
(
float
combindedSigma
,
float
combindedEpsilon
,
__device__
void
calculateVdw14_7PairIxn_kernel
(
float
combindedSigma
,
float
combindedEpsilon
,
float
force
[
3
],
float
*
energy
float
force
[
3
],
float
*
energy
)
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
)
{
{
const
float
deltaHalM1
=
0.07
f
;
const
float
deltaHalM1
=
0.07
f
;
...
@@ -120,14 +132,6 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float c
...
@@ -120,14 +132,6 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float c
float
r2
=
force
[
0
]
*
force
[
0
]
+
force
[
1
]
*
force
[
1
]
+
force
[
2
]
*
force
[
2
];
float
r2
=
force
[
0
]
*
force
[
0
]
+
force
[
1
]
*
force
[
1
]
+
force
[
2
]
*
force
[
2
];
if
(
r2
>
cAmoebaSim
.
vdwCutoff2
){
if
(
r2
>
cAmoebaSim
.
vdwCutoff2
){
*
energy
=
force
[
0
]
=
force
[
1
]
=
force
[
2
]
=
0.0
f
;
*
energy
=
force
[
0
]
=
force
[
1
]
=
force
[
2
]
=
0.0
f
;
#ifdef AMOEBA_DEBUG
float
rI
=
rsqrtf
(
r2
);
float
r
=
1.0
f
/
rI
;
debugArray
[
0
].
x
=
r
;
debugArray
[
0
].
y
=
debugArray
[
0
].
z
=
debugArray
[
0
].
w
=
0.0
f
;
debugArray
[
1
].
x
=
debugArray
[
1
].
y
=
debugArray
[
1
].
z
=
0.0
f
;
debugArray
[
1
].
w
=
r
;
#endif
return
;
return
;
}
}
float
rI
=
rsqrtf
(
r2
);
float
rI
=
rsqrtf
(
r2
);
...
@@ -156,17 +160,6 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float c
...
@@ -156,17 +160,6 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float c
force
[
1
]
*=
deltaE
;
force
[
1
]
*=
deltaE
;
force
[
2
]
*=
deltaE
;
force
[
2
]
*=
deltaE
;
#ifdef AMOEBA_DEBUG
debugArray
[
0
].
x
=
r
;
debugArray
[
0
].
y
=
deltaE
;
debugArray
[
0
].
z
=
combindedSigma
;
debugArray
[
0
].
w
=
combindedEpsilon
;
debugArray
[
1
].
x
=
tau
;
debugArray
[
1
].
y
=
rho
;
debugArray
[
1
].
z
=
gTau
;
debugArray
[
1
].
w
=
r
;
#endif
}
}
// perform reduction of force on H's and add to heavy atom partner
// perform reduction of force on H's and add to heavy atom partner
...
@@ -504,22 +497,6 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -504,22 +497,6 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG_PRINT
static
const
char
*
methodName
=
"kCalculateAmoebaVdw14_7Forces"
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s:
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#ifdef AMOEBA_DEBUG
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
maxSlots
=
10
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
maxSlots
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
maxSlots
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
int
targetAtom
=
1
;
#endif
#endif
// set threads/block first time through
// set threads/block first time through
// on first pass, set threads/block
// on first pass, set threads/block
...
@@ -535,44 +512,9 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -535,44 +512,9 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
#ifdef AMOEBA_DEBUG_PRINT
if
(
0
){
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"
\n\n
zCoordPreCopyVdw"
,
stderr
);
}
#endif
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
#ifdef AMOEBA_DEBUG_PRINT
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Apply cutoff=%d warp=%d
\n
"
,
applyCutoff
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
Vdw14_7Particle
),
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
if
(
0
){
gpu
->
psInteractionCount
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
unsigned
int
totalWarps
=
(
gpu
->
sim
.
nonbond_blocks
*
threadsPerBlock
)
/
GRID
;
float
ratiof
=
(
float
)
totalWarps
/
(
float
)
amoebaGpu
->
psVdwWorkUnit
->
_length
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ixn warps=%u count=%u
\n
"
,
totalWarps
,
gpu
->
psInteractionCount
->
_pSysData
[
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
amoebaGpu
->
psVdwWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysData
[
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
warp
=
(
float
)(
ii
)
*
ratiof
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u [%5u %5u %1u] %10u warp=%15.6f
\n
"
,
ii
,
x
,
y
,
exclusions
,
warp
);
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
// clear output arrays
// clear output arrays
kClearFields_3
(
amoebaGpu
,
1
);
kClearFields_3
(
amoebaGpu
,
1
);
...
@@ -588,33 +530,6 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -588,33 +530,6 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksVdwPeriodic"
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksVdwPeriodic"
);
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Vdw Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysData
[
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysData
[
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u %8u [%5u %5u %1u] %10u "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysData
[
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysData
[
ii
]
);
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysData
[
ii
];
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysData
[
ii
],
x
,
y
,
exclusions
);
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaVdw14_7CutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7CutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
...
@@ -622,12 +537,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -622,12 +537,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
}
else
{
kCalculateAmoebaVdw14_7Cutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7Cutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
...
@@ -636,13 +546,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -636,13 +546,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Cutoff"
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Cutoff"
);
...
@@ -656,12 +560,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -656,12 +560,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
}
else
{
kCalculateAmoebaVdw14_7N2_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7N2_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
...
@@ -670,112 +569,15 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -670,112 +569,15 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevData
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7N2"
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7N2"
);
}
}
#ifdef AMOEBA_DEBUG_PRINT
if
(
amoebaGpu
->
log
){
static
int
iteration
=
0
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished 14-7 kernel execution step=%d
\n
"
,
++
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#ifdef AMOEBA_DEBUG
debugArray
->
Download
();
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
double
cutOff
=
1.0e+03
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d DebugVdw
\n
"
,
targetAtom
,
jj
);
for
(
int
kk
=
0
;
kk
<
5
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
if
(
kk
==
4
&&
(
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
x
)
>
cutOff
||
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
y
)
>
cutOff
||
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
z
)
>
cutOff
)
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
" XXXX
\n
"
);
}
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
#endif
/*
amoebaGpu->psWorkArray_3_2->Download();
amoebaGpu->psWorkArray_3_1->Download();
//for( int jj = 0; jj < 3*gpu->natoms; jj += 3 )
for( int jj = 0; jj < 3*gpu->natoms; jj += 3 ){
for( int kk = 0; kk < gpu->sim.outputBuffers; kk++ ){
float delta = fabs(amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2] + 1.0f);
if( delta < 5.0e-06 || isNanOrInfinity( (double) amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj] ) || isNanOrInfinity( (double) amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2] ) )
(void) fprintf( amoebaGpu->log,"%6d %6d [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]\n", jj, kk,
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj],
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+2] );
}
}
*/
}
#endif
#ifdef AMOEBA_DEBUG
if
(
0
){
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"PreVdw"
,
stderr
);
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"zCoordPreVdw"
,
stderr
);
}
#endif
kReduceVdw14_7
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
);
kReduceVdw14_7
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
);
#ifdef AMOEBA_DEBUG
if
(
0
){
static
int
iteration
=
0
;
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkArray_3_2
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"Vdw32"
,
stderr
);
}
#endif
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
#ifdef AMOEBA_DEBUG
if
(
0
){
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
psTempForce
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
kClearFloat4
(
amoebaGpu
,
paddedNumberOfAtoms
,
psTempForce
);
//kCalculateAmoebaVdw14_7Reduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, psTempForce );
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
psTempForce
);
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
psTempForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaVdw"
,
fileId
,
outputVector
);
delete
psTempForce
;
//exit(0);
}
if
(
0
){
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"VdwForce"
,
stderr
);
}
#endif
#ifdef AMOEBA_DEBUG
delete
debugArray
;
#endif
// ---------------------------------------------------------------------------------------
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
View file @
2b508482
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
2b508482
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
View file @
2b508482
This diff is collapsed.
Click to expand it.
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment