Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b8e12104
Commit
b8e12104
authored
Oct 09, 2010
by
Mark Friedrichs
Browse files
Cutoffs/PBC for Vdw force
parent
408469c3
Changes
18
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
373 additions
and
375 deletions
+373
-375
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+5
-0
plugins/amoeba/openmmapi/include/AmoebaVdwForce.h
plugins/amoeba/openmmapi/include/AmoebaVdwForce.h
+31
-1
plugins/amoeba/openmmapi/src/AmoebaVdwForce.cpp
plugins/amoeba/openmmapi/src/AmoebaVdwForce.cpp
+17
-1
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+35
-97
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
+3
-0
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
+1
-7
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+2
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+2
-4
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+2
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+14
-17
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+5
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+6
-6
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+137
-69
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
.../platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
+87
-144
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
...platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+3
-8
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
...amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
+20
-3
No files found.
platforms/cuda/src/kernels/cudaKernels.h
View file @
b8e12104
...
...
@@ -136,3 +136,8 @@ extern void SetCustomExternalGlobalParams(const std::vector<float>& paramValues)
extern
void
SetCustomNonbondedForceExpression
(
const
Expression
<
256
>&
expression
);
extern
void
SetCustomNonbondedEnergyExpression
(
const
Expression
<
256
>&
expression
);
extern
void
SetCustomNonbondedGlobalParams
(
const
std
::
vector
<
float
>&
paramValues
);
extern
__global__
void
OPENMMCUDA_EXPORT
kFindBlockBoundsPeriodic_kernel
(
void
);
extern
__global__
void
OPENMMCUDA_EXPORT
kFindBlocksWithInteractionsPeriodic_kernel
(
void
);
extern
__global__
void
OPENMMCUDA_EXPORT
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
workUnit
);
plugins/amoeba/openmmapi/include/AmoebaVdwForce.h
View file @
b8e12104
...
...
@@ -143,11 +143,41 @@ public:
*/
void
getParticleExclusions
(
int
particleIndex
,
std
::
vector
<
int
>&
exclusions
)
const
;
/**
* Set cutoff
*
* @param cutoff cutoff
*/
void
setCutoff
(
double
cutoff
);
/**
* Get cutoff
*
* @return cutoff
*/
double
getCutoff
(
void
)
const
;
/**
* Set flag for employing periodic boundary conditions
*
* @param pbcFlag if nonozero, use periodic boundary conditions
*/
void
setPBC
(
int
pbcFlag
);
/**
* Get periodic boundary conditions flag
*
* @return periodic boundary conditions flag (nonzero -> use PBC)
*/
int
getPBC
(
void
)
const
;
protected:
ForceImpl
*
createImpl
();
private:
class
VdwInfo
;
int
usePBC
;
double
cutoff
;
std
::
string
sigmaCombiningRule
;
std
::
string
epsilonCombiningRule
;
std
::
vector
<
std
::
vector
<
int
>
>
exclusions
;
...
...
@@ -170,7 +200,7 @@ private:
class
AmoebaVdwForce
::
VdwInfo
{
public:
int
ivIndex
,
classIndex
;
double
reductionFactor
,
sigma
,
epsilon
;
double
reductionFactor
,
sigma
,
epsilon
,
cutoff
;
VdwInfo
()
{
ivIndex
=
classIndex
=
-
1
;
reductionFactor
=
0.0
;
...
...
plugins/amoeba/openmmapi/src/AmoebaVdwForce.cpp
View file @
b8e12104
...
...
@@ -36,7 +36,7 @@
using
namespace
OpenMM
;
AmoebaVdwForce
::
AmoebaVdwForce
()
{
AmoebaVdwForce
::
AmoebaVdwForce
()
:
usePBC
(
0
),
cutoff
(
1.0e+10
)
{
}
int
AmoebaVdwForce
::
addParticle
(
int
ivIndex
,
int
classIndex
,
double
sigma
,
double
epsilon
,
double
reductionFactor
)
{
...
...
@@ -102,6 +102,22 @@ void AmoebaVdwForce::getParticleExclusions( int particleIndex, std::vector< int
}
void
AmoebaVdwForce
::
setCutoff
(
double
inputCutoff
){
cutoff
=
inputCutoff
;
}
double
AmoebaVdwForce
::
getCutoff
(
void
)
const
{
return
cutoff
;
}
void
AmoebaVdwForce
::
setPBC
(
int
pbcFlag
){
usePBC
=
pbcFlag
;
}
int
AmoebaVdwForce
::
getPBC
(
void
)
const
{
return
usePBC
;
}
ForceImpl
*
AmoebaVdwForce
::
createImpl
()
{
return
new
AmoebaVdwForceImpl
(
*
this
);
}
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
b8e12104
...
...
@@ -954,7 +954,7 @@ static void computeAmoebaVdwForce( AmoebaCudaData& data ) {
// Vdw14_7F
kCalculateAmoebaVdw14_7Forces
(
gpu
);
kCalculateAmoebaVdw14_7Forces
(
gpu
,
data
.
getApplyCutoff
()
);
}
class
CudaCalcAmoebaVdwForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
...
...
@@ -1014,7 +1014,7 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
gpuSetAmoebaVdwParameters
(
data
.
getAmoebaGpu
(),
indexIVs
,
indexClasses
,
sigmas
,
epsilons
,
reductions
,
force
.
getSigmaCombiningRule
(),
force
.
getEpsilonCombiningRule
(),
allExclusions
);
allExclusions
,
force
.
getPBC
(),
static_cast
<
float
>
(
force
.
getCutoff
())
);
data
.
getAmoebaGpu
()
->
gpuContext
->
forces
.
push_back
(
new
ForceInfo
(
force
));
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
View file @
b8e12104
...
...
@@ -339,8 +339,6 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" maxCovalentDegreeSz %d
\n
"
,
amoebaGpu
->
maxCovalentDegreeSz
);
(
void
)
fprintf
(
log
,
" solventDielectric %10.3f
\n
"
,
amoebaGpu
->
solventDielectric
);
(
void
)
fprintf
(
log
,
" pGamma %10.3f
\n
"
,
amoebaGpu
->
pGamma
);
(
void
)
fprintf
(
log
,
" scalingDistanceCutoff %10.3f
\n
"
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fprintf
(
log
,
" scalingDistanceCutoff %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
scalingDistanceCutoff
);
(
void
)
fprintf
(
log
,
" pDampingFactorAndThole %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pDampingFactorAndThole
);
(
void
)
fprintf
(
log
,
" pScaleIndicesIndex %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pScaleIndicesIndex
);
...
...
@@ -384,7 +382,6 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkVector
[
0
],
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psForce
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psTorque
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psEnergy
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
torqueMapForce
,
log
);
(
void
)
fprintf
(
log
,
" maxMapTorqueDifference %d
\n
"
,
amoebaGpu
->
maxMapTorqueDifference
);
...
...
@@ -404,12 +401,11 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
" useVdwTable %u
\n
"
,
amoebaGpu
->
useVdwTable
);
(
void
)
fprintf
(
log
,
" vdwTableSize %u
\n
"
,
amoebaGpu
->
vdwTableSize
);
(
void
)
fprintf
(
log
,
" vdwSigmaCombiningRule %d
\n
"
,
amoebaGpu
->
vdwSigmaCombiningRule
);
(
void
)
fprintf
(
log
,
" vdwEpsilonCombiningRule %d
\n
"
,
amoebaGpu
->
vdwEpsilonCombiningRule
);
(
void
)
fprintf
(
log
,
" vdwUsePBC %d
\n
"
,
amoebaGpu
->
amoebaSim
.
vdwUsePBC
);
(
void
)
fprintf
(
log
,
" vdwCutoff2 %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
vdwCutoff2
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psVdwSigmaEpsilon
,
log
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psVdwTable
,
log
);
gpuPrintCudaStreamInt
(
amoebaGpu
->
psAmoebaVdwNonReductionID
,
log
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaVdwReductionID
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psAmoebaVdwReduction
,
log
);
...
...
@@ -1469,7 +1465,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu
->
maxCovalentDegreeSz
=
maxCovalentRange
;
amoebaGpu
->
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
amoebaGpu
->
scalingDistanceCutoff
=
static_cast
<
float
>
(
scalingDistanceCutoff
);
gpuRotationToLabFrameAllocate
(
amoebaGpu
);
gpuFixedEFieldAllocate
(
amoebaGpu
);
...
...
@@ -1708,11 +1703,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// covalent/polarization degree
if
(
ii
<
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Gamma=%.5f scaledDistCutoff=%.5f
\n
"
,
amoebaGpu
->
pGamma
,
amoebaGpu
->
scalingDistanceCutoff
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%3d covalent/polarization degree: minIdx[%6d %6d] Thole=%12.5f dampingFactor=%12.5f
\n
"
,
ii
,
amoebaGpu
->
psCovalentDegree
->
_pSysStream
[
0
][
particlesOffset
],
amoebaGpu
->
psPolarizationDegree
->
_pSysStream
[
0
][
particlesOffset
],
amoebaGpu
->
psDampingFactorAndThole
->
_pSysStream
[
0
][
ii
].
y
,
amoebaGpu
->
psDampingFactorAndThole
->
_pSysStream
[
0
][
ii
].
x
);
...
...
@@ -1884,7 +1874,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// upload
amoebaGpu
->
amoebaSim
.
scalingDistanceCutoff
=
amoebaGpu
->
scalingDistanceCutoff
;
amoebaGpu
->
amoebaSim
.
scalingDistanceCutoff
=
static_cast
<
float
>
(
scalingDistanceCutoff
)
;
amoebaGpu
->
amoebaSim
.
numberOfAtoms
=
amoebaGpu
->
gpuContext
->
natoms
;
amoebaGpu
->
amoebaSim
.
paddedNumberOfAtoms
=
amoebaGpu
->
paddedNumberOfAtoms
;
...
...
@@ -1992,7 +1982,8 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
const
std
::
vector
<
float
>&
reductions
,
const
std
::
string
&
vdwSigmaCombiningRule
,
const
std
::
string
&
vdwEpsilonCombiningRule
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
allExclusions
)
const
std
::
vector
<
std
::
vector
<
int
>
>&
allExclusions
,
int
usePBC
,
float
cutoff
)
{
// ---------------------------------------------------------------------------------------
...
...
@@ -2000,10 +1991,14 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
// ---------------------------------------------------------------------------------------
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
amoebaGpu
->
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
unsigned
int
particles
=
sigmas
.
size
();
amoebaGpu
->
amoebaSim
.
vdwUsePBC
=
usePBC
;
amoebaGpu
->
amoebaSim
.
vdwCutoff2
=
cutoff
*
cutoff
;
// set sigma combining rule flag
if
(
vdwSigmaCombiningRule
.
compare
(
"ARITHMETIC"
)
==
0
){
...
...
@@ -2157,13 +2152,12 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
psVdwReductionID
->
Upload
();
psAmoebaVdwReduction
->
Upload
();
amoebaGpuBuildOutputBuffers
(
amoebaGpu
);
amoebaGpuBuildVdwExclusionList
(
amoebaGpu
,
allExclusions
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
unsigned
int
maxPrint
=
32
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s useVdwTable=%d size=%d
\n
"
,
methodName
,
amoebaGpu
->
useVdwTable
,
(
amoebaGpu
->
useVdwTable
?
amoebaGpu
->
vdwTableSize
:
0
)
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s sigma/epsilon combining rules=%d %d
\n
"
,
methodName
,
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
)
...
...
@@ -2659,7 +2653,6 @@ void amoebaGpuShutDown(amoebaGpuContext gpu)
delete
gpu
->
psWorkVector
[
3
];
delete
gpu
->
psForce
;
delete
gpu
->
psTorque
;
delete
gpu
->
psEnergy
;
delete
gpu
->
torqueMapForce
;
delete
gpu
->
psGk_Field
;
...
...
@@ -2671,7 +2664,6 @@ void amoebaGpuShutDown(amoebaGpuContext gpu)
delete
gpu
->
psKirkwoodEDiffForce
;
delete
gpu
->
psVdwSigmaEpsilon
;
delete
gpu
->
psVdwTable
;
delete
gpu
->
psAmoebaVdwNonReductionID
;
delete
gpu
->
psAmoebaVdwReductionID
;
delete
gpu
->
psAmoebaVdwReduction
;
...
...
@@ -2768,7 +2760,12 @@ extern "C"
void
amoebaGpuBuildOutputBuffers
(
amoebaGpuContext
amoebaGpu
)
{
if
(
amoebaGpu
->
nonbondBlocks
==
amoebaGpu
->
gpuContext
->
sim
.
blocks
){
return
;
}
unsigned
int
paddedNumberOfAtoms
=
amoebaGpu
->
paddedNumberOfAtoms
;
amoebaGpu
->
nonbondBlocks
=
amoebaGpu
->
gpuContext
->
sim
.
blocks
;
amoebaGpu
->
threadsPerBlock
=
amoebaGpu
->
gpuContext
->
sim
.
threads_per_block
;
...
...
@@ -2810,6 +2807,15 @@ void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu )
amoebaGpu
->
fieldReduceThreadsPerBlock
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
if
(
amoebaGpu
->
psWorkArray_3_1
){
delete
amoebaGpu
->
psWorkArray_3_1
;
delete
amoebaGpu
->
psWorkArray_3_2
;
delete
amoebaGpu
->
psWorkArray_3_3
;
delete
amoebaGpu
->
psWorkArray_3_4
;
delete
amoebaGpu
->
psWorkArray_1_1
;
delete
amoebaGpu
->
psWorkArray_1_2
;
}
amoebaGpu
->
psWorkArray_3_1
=
new
CUDAStream
<
float
>
(
3
*
paddedNumberOfAtoms
,
(
amoebaGpu
->
outputBuffers
),
"AmoebaField_3_1"
);
amoebaGpu
->
amoebaSim
.
pWorkArray_3_1
=
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
];
...
...
@@ -2826,8 +2832,6 @@ void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_1_2
=
new
CUDAStream
<
float
>
(
paddedNumberOfAtoms
,
(
amoebaGpu
->
outputBuffers
),
"AmoebaField_1_2"
);
amoebaGpu
->
amoebaSim
.
pWorkArray_1_2
=
amoebaGpu
->
psWorkArray_1_2
->
_pDevStream
[
0
];
amoebaGpu
->
psEnergy
=
new
CUDAStream
<
float
>
(
amoebaGpu
->
energyOutputBuffers
,
1
,
"AmoebaEnergy"
);
return
;
}
...
...
@@ -2928,87 +2932,23 @@ int amoebaGpuBuildThreadBlockWorkList( amoebaGpuContext amoebaGpu )
CUDAStream
<
unsigned
int
>*
psWorkUnit
=
new
CUDAStream
<
unsigned
int
>
(
cells
,
1u
,
"WorkUnit"
);
unsigned
int
*
pWorkList
=
psWorkUnit
->
_pSysData
;
amoebaGpu
->
psWorkUnit
=
psWorkUnit
;
memset
(
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
],
0
,
cells
*
sizeof
(
unsigned
int
)
);
CUDAStream
<
unsigned
int
>*
psVdwWorkUnit
=
new
CUDAStream
<
unsigned
int
>
(
cells
,
1u
,
"VdwWorkUnit"
);
unsigned
int
*
pVdwWorkList
=
psVdwWorkUnit
->
_pSysData
;
amoebaGpu
->
amoebaSim
.
pVdwWorkUnit
=
psVdwWorkUnit
->
_pDevStream
[
0
];
amoebaGpu
->
psVdwWorkUnit
=
psVdwWorkUnit
;
/*
CUDAStream<unsigned int>* psInteractingWorkUnit = new CUDAStream<unsigned int>(cells, 1u, "InteractingWorkUnit");
amoebaGpu->psInteractingWorkUnit = psInteractingWorkUnit;
amoebaGpu->workUnits = cells;
CUDAStream<unsigned int>* psInteractionFlag = new CUDAStream<unsigned int>(cells, 1u, "InteractionFlag");
amoebaGpu->psInteractionFlag = psInteractionFlag;
amoebaGpu->sim.pInteractionFlag = psInteractionFlag->_pDevStream[0];
CUDAStream<size_t>* psInteractionCount = new CUDAStream<size_t>(1, 1u, "InteractionCount");
amoebaGpu->psInteractionCount = psInteractionCount;
amoebaGpu->sim.pInteractionCount = psInteractionCount->_pDevStream[0];
CUDAStream<float4>* psGridBoundingBox = new CUDAStream<float4>(dim, 1u, "GridBoundingBox");
amoebaGpu->psGridBoundingBox = psGridBoundingBox;
amoebaGpu->sim.pGridBoundingBox = psGridBoundingBox->_pDevStream[0];
CUDAStream<float4>* psGridCenter = new CUDAStream<float4>(dim, 1u, "GridCenter");
amoebaGpu->psGridCenter = psGridCenter;
amoebaGpu->sim.pGridCenter = psGridCenter->_pDevStream[0];
amoebaGpu->sim.nonbond_workBlock = amoebaGpu->sim.nonbondThreadsPerBlock / GRID;
amoebaGpu->sim.bornForce2_workBlock = amoebaGpu->sim.bornForce2_threads_per_block / GRID;
amoebaGpu->sim.workUnits = cells;
*/
// Initialize the plan for doing stream compaction.
// planCompaction(amoebaGpu->compactPlan);
// Increase block count if necessary for extra large molecules that would
// otherwise overflow the SM workunit buffers
// int minimumBlocks = (cells + amoebaGpu->sim.workUnitsPerSM - 1) / amoebaGpu->sim.workUnitsPerSM;
// if ((int) amoebaGpu->sim.nonbond_blocks < minimumBlocks)
// {
// amoebaGpu->sim.nonbond_blocks = amoebaGpu->sim.nonbond_blocks * ((minimumBlocks + amoebaGpu->sim.nonbond_blocks - 1) / amoebaGpu->sim.nonbond_blocks);
// }
// if ((int) amoebaGpu->sim.bornForce2_blocks < minimumBlocks)
// {
// amoebaGpu->sim.bornForce2_blocks = amoebaGpu->sim.bornForce2_blocks * ((minimumBlocks + amoebaGpu->sim.bornForce2_blocks - 1) / amoebaGpu->sim.bornForce2_blocks);
// }
/*
amoebaGpu->sim.nbWorkUnitsPerBlock = cells / amoebaGpu->sim.nonbond_blocks;
amoebaGpu->sim.nbWorkUnitsPerBlockRemainder = cells - amoebaGpu->sim.nonbond_blocks * amoebaGpu->sim.nbWorkUnitsPerBlock;
amoebaGpu->sim.interaction_threads_per_block = 64;
amoebaGpu->sim.interaction_blocks = (amoebaGpu->workUnits + amoebaGpu->sim.interaction_threads_per_block - 1) / amoebaGpu->sim.interaction_threads_per_block;
if (amoebaGpu->sim.interaction_blocks > 8*amoebaGpu->sim.blocks)
amoebaGpu->sim.interaction_blocks = 8*amoebaGpu->sim.blocks;
if (activeWorkUnits > (int) cells)
{
int balancedWorkBlock = (cells + amoebaGpu->sim.nonbond_blocks - 1) / amoebaGpu->sim.nonbond_blocks;
amoebaGpu->sim.nonbondThreadsPerBlock = balancedWorkBlock * GRID;
amoebaGpu->sim.nonbond_workBlock = balancedWorkBlock;
}
activeWorkUnits = amoebaGpu->sim.bornForce2_blocks * amoebaGpu->sim.bornForce2_workBlock;
if (activeWorkUnits > (int) cells)
{
int balancedWorkBlock = (cells + amoebaGpu->sim.bornForce2_blocks - 1) / amoebaGpu->sim.bornForce2_blocks;
amoebaGpu->sim.bornForce2_threads_per_block = balancedWorkBlock * GRID;
amoebaGpu->sim.bornForce2_workBlock = balancedWorkBlock;
}
*/
memset
(
amoebaGpu
->
psVdwWorkUnit
->
_pSysStream
[
0
],
0
,
cells
*
sizeof
(
unsigned
int
)
);
unsigned
int
count
=
0
;
for
(
unsigned
int
y
=
0
;
y
<
dim
;
y
++
)
{
for
(
unsigned
int
x
=
y
;
x
<
dim
;
x
++
)
for
(
unsigned
int
x
=
y
;
x
<
dim
;
x
++
,
count
++
)
{
pWorkList
[
count
]
=
encodeCell
(
x
,
y
);
pVdwWorkList
[
count
]
=
encodeCell
(
x
,
y
);
count
++
;
pVdwWorkList
[
count
]
=
pWorkList
[
count
];
}
}
//(*amoebaGpu->psInteractionCount)[0] = amoebaGpu->workUnits;
//amoebaGpu->psInteractionCount->Upload();
psWorkUnit
->
Upload
();
psVdwWorkUnit
->
Upload
();
...
...
@@ -3100,8 +3040,8 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
CUDAStream
<
int
>*
psScalingIndicesIndex
=
new
CUDAStream
<
int
>
(
cells
,
1u
,
"ScalingIndicesIndex"
);
amoebaGpu
->
psScalingIndicesIndex
=
psScalingIndicesIndex
;
amoebaGpu
->
amoebaSim
.
pScaleIndicesIndex
=
psScalingIndicesIndex
->
_pDevStream
[
0
];
memset
(
amoebaGpu
->
psScalingIndicesIndex
->
_pSysStream
[
0
],
0
,
cells
*
sizeof
(
unsigned
int
)
);
memset
(
amoebaGpu
->
psScalingIndicesIndex
->
_pSysStream
[
0
],
0
,
sizeof
(
cells
)
*
sizeof
(
unsigned
int
)
);
int
numWithScalingIndices
=
0
;
int
gridOffset
=
grid
-
1
;
int
lastBlock
=
(
static_cast
<
int
>
(
paddedAtoms
)
>
amoebaGpu
->
gpuContext
->
natoms
)
?
(
amoebaGpu
->
gpuContext
->
natoms
)
/
grid
:
-
1
;
...
...
@@ -4308,19 +4248,17 @@ void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration){
void
gpuCopyWorkUnit
(
amoebaGpuContext
amoebaGpu
){
// ---------------------------------------------------------------------------------------
/*
gpuContext gpu = amoebaGpu->gpuContext;
gpu->psInteractingWorkUnit->Download();
gpu->psWorkUnit->Download();
amoebaGpu->psWorkUnit->Download();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"gpuCopyInteractingWorkUnit called -- to be removed.
\n
"
);
for( unsigned int ii = 0; ii < gpu->psInteractingWorkUnit->_length; ii++ ){
//gpu->psInteractingWorkUnit->_pSysStream[0][ii] = amoebaGpu->psWorkUnit->_pSysStream[0][ii];
gpu->psWorkUnit->_pSysStream[0][ii] = amoebaGpu->psWorkUnit->_pSysStream[0][ii];
}
gpu->psInteractingWorkUnit->Upload();
gpu->psWorkUnit->Upload();
*/
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
b8e12104
...
...
@@ -57,7 +57,7 @@ extern void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool per
extern
void
SetCalculateAmoebaCudaVdw14_7Sim
(
amoebaGpuContext
gpu
);
extern
void
GetCalculateAmoebaCudaVdw14_7Sim
(
amoebaGpuContext
gpu
);
extern
void
kCalculateAmoebaVdw14_7Forces
(
amoebaGpuContext
amoebaGpu
);
extern
void
kCalculateAmoebaVdw14_7Forces
(
amoebaGpuContext
amoebaGpu
,
int
applyCutoff
);
// wca dispersion
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
View file @
b8e12104
...
...
@@ -146,8 +146,11 @@ struct cudaAmoebaGmxSimulation {
float
*
pWorkArray_1_1
;
float
*
pWorkArray_1_2
;
int
vdwUsePBC
;
float
vdwCutoff2
;
unsigned
int
amoebaVdwNonReductions
;
int
*
pAmoebaVdwNonReductionID
;
unsigned
int
*
pVdwWorkUnit
;
unsigned
int
amoebaVdwReductions
;
int4
*
pAmoebaVdwReductionID
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
View file @
b8e12104
...
...
@@ -136,8 +136,6 @@ struct _amoebaGpuContext {
// scaling-related parameters
float
pGamma
;
float
scalingDistanceCutoff
;
CUDAStream
<
float2
>*
psDampingFactorAndThole
;
// slated for removal -- no longer used
...
...
@@ -177,7 +175,6 @@ struct _amoebaGpuContext {
CUDAStream
<
float
>*
psForce
;
CUDAStream
<
float
>*
psTorque
;
CUDAStream
<
float
>*
psEnergy
;
CUDAStream
<
float
>*
torqueMapForce
;
int
maxMapTorqueDifference
;
int
maxMapTorqueDifferencePow2
;
...
...
@@ -198,9 +195,6 @@ struct _amoebaGpuContext {
CUDAStream
<
float2
>*
psVdwSigmaEpsilon
;
unsigned
int
useVdwTable
;
unsigned
int
vdwTableSize
;
CUDAStream
<
float2
>*
psVdwTable
;
CUDAStream
<
int
>*
psAmoebaVdwNonReductionID
;
CUDAStream
<
int4
>*
psAmoebaVdwReductionID
;
CUDAStream
<
float
>*
psAmoebaVdwReduction
;
...
...
@@ -322,7 +316,7 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
const
std
::
vector
<
float
>&
reductions
,
const
std
::
string
&
sigmaCombiningRule
,
const
std
::
string
&
epsilonCombiningRule
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
allExclusions
);
const
std
::
vector
<
std
::
vector
<
int
>
>&
allExclusions
,
int
usePBC
,
float
cutoff
);
extern
"C"
void
gpuSetAmoebaPMEParameters
(
amoebaGpuContext
amoebaGpu
,
float
alpha
,
int
gridSizeX
,
int
gridSizeY
,
int
gridSizeZ
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
b8e12104
...
...
@@ -766,11 +766,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d"
" gamma=%.3e scalingDistanceCutoff=%.3f ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
,
amoebaGpu
->
pGamma
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
b8e12104
...
...
@@ -1861,10 +1861,8 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d"
" gamma=%.3e scalingDistanceCutoff=%.3f ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
,
amoebaGpu
->
pGamma
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
);
amoebaGpu
->
scalingDistanceCutoff
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
b8e12104
...
...
@@ -1035,11 +1035,8 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d"
" gamma=%.3e scalingDistanceCutoff=%.3f ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
,
amoebaGpu
->
pGamma
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
b8e12104
...
...
@@ -1028,11 +1028,11 @@ __device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticP
// Include versions of the kernels for N^2 calculations.
#undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##
N2
##b
#define METHOD_NAME(a, b) a##
Cutoff
##b
#include "kCalculateAmoebaCudaPmeDirectElectrostatic.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##
N2
ByWarp##b
#define METHOD_NAME(a, b) a##
Cutoff
ByWarp##b
#include "kCalculateAmoebaCudaPmeDirectElectrostatic.h"
// reduce psWorkArray_3_1 -> force
...
...
@@ -1088,11 +1088,8 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d"
" gamma=%.3e scalingDistanceCutoff=%.3f ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
,
amoebaGpu
->
pGamma
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d ZZZ
\n
"
,
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
...
...
@@ -1142,26 +1139,26 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
)
),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostaticParticle
)),
maxThreads
);
}
kClearFields_3
(
amoebaGpu
,
2
);
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostatic
N2
Forces: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
)
),
(
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
))
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostatic
Cutoff
Forces: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostaticParticle
)),
sizeof
(
PmeDirectElectrostaticParticle
)
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostatic
N2
Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Obuf=%u ixnCt=%u workUnits=%u gpu->nonbond_threads_per_block=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostatic
Cutoff
Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Obuf=%u ixnCt=%u workUnits=%u gpu->nonbond_threads_per_block=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
)
,
(
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
)
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
nonbond_threads_per_block
);
sizeof
(
PmeDirectElectrostaticParticle
),
(
sizeof
(
PmeDirectElectrostaticParticle
))
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
nonbond_threads_per_block
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeDirectElectrostatic
N2
ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
(
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectElectrostatic
Cutoff
ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
...
...
@@ -1176,7 +1173,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
// gpu->sim.pInteractingWorkUnit,
// amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeDirectElectrostatic
N2
Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
(
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectElectrostatic
Cutoff
Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
...
...
@@ -1186,7 +1183,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectElectrostatic
N2
Forces"
);
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectElectrostatic
Cutoff
Forces"
);
kReduceForceTorque
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
b8e12104
...
...
@@ -373,11 +373,11 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##
N2
##b
#define METHOD_NAME(a, b) a##
Cutoff
##b
#include "kCalculateAmoebaCudaPmeFixedEField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##
N2
ByWarp##b
#define METHOD_NAME(a, b) a##
Cutoff
ByWarp##b
#include "kCalculateAmoebaCudaPmeFixedEField.h"
/**---------------------------------------------------------------------------------------
...
...
@@ -443,7 +443,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
}
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeDirectFixedE_Field
N2
ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectFixedE_Field
Cutoff
ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
...
...
@@ -453,7 +453,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
else
{
kCalculateAmoebaPmeDirectFixedE_Field
N2
_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectFixedE_Field
Cutoff
_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
...
...
@@ -473,7 +473,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Amoeba
N2
Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u warp=%d
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Amoeba
Cutoff
Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u warp=%d
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
bOutputBufferPerWarp
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
b8e12104
...
...
@@ -218,11 +218,11 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##
N2
##b
#define METHOD_NAME(a, b) a##
Cutoff
##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##
N2
ByWarp##b
#define METHOD_NAME(a, b) a##
Cutoff
ByWarp##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"
__global__
...
...
@@ -413,7 +413,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
if
(
gpu
->
bOutputBufferPerWarp
){
//gpu->sim.pInteractingWorkUnit,
//amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeMutualInducedField
N2
ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeMutualInducedField
Cutoff
ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
...
...
@@ -426,14 +426,14 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
}
else
{
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
N2
no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Amoeba
N2
Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
Cutoff
no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Amoeba
Cutoff
Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
MutualInducedParticle
),
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaPmeMutualInducedField
N2
_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeMutualInducedField
Cutoff
_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
b8e12104
...
...
@@ -4,6 +4,7 @@
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
#include "cudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#include "kCalculateAmoebaCudaVdwParticle.h"
#include "amoebaScaleFactors.h"
...
...
@@ -62,22 +63,6 @@ __device__ void loadVdw14_7Shared( struct Vdw14_7Particle* sA, unsigned int atom
}
// load struct and arrays w/ shared data in sA
__device__
void
loadVdw14_7Data
(
struct
Vdw14_7Particle
*
sA
,
float4
*
jCoord
,
float
*
jSigma
,
float
*
jEpsilon
)
{
// load coordinates, sigma, epsilon
jCoord
->
x
=
sA
->
x
;
jCoord
->
y
=
sA
->
y
;
jCoord
->
z
=
sA
->
z
;
*
jSigma
=
sA
->
sigma
;
*
jEpsilon
=
sA
->
epsilon
;
}
__device__
void
getVdw14_7CombindedSigmaEpsilon_kernel
(
int
sigmaCombiningRule
,
float
iSigma
,
float
jSigma
,
float
*
combindedSigma
,
int
epsilonCombiningRule
,
float
iEpsilon
,
float
jEpsilon
,
float
*
combindedEpsilon
)
{
...
...
@@ -106,15 +91,12 @@ __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule,
}
__device__
void
calculateVdw14_7PairIxn_kernel
(
float4
atomCoordinatesI
,
float4
atomCoordinatesJ
,
float
combindedSigma
,
float
combindedEpsilon
,
__device__
void
calculateVdw14_7PairIxn_kernel
(
float
combindedSigma
,
float
combindedEpsilon
,
float
force
[
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
)
)
{
const
float
deltaHalM1
=
0.07
f
;
...
...
@@ -124,15 +106,15 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4
// ---------------------------------------------------------------------------------------
// get deltaR, and r between 2 atoms
force
[
0
]
=
atomCoordinatesJ
.
x
-
atomCoordinatesI
.
x
;
force
[
1
]
=
atomCoordinatesJ
.
y
-
atomCoordinatesI
.
y
;
force
[
2
]
=
atomCoordinatesJ
.
z
-
atomCoordinatesI
.
z
;
// on input force[i] is assummed to contain delta[i] for coordinates of atom I and J
float
rI
=
rsqrtf
(
force
[
0
]
*
force
[
0
]
+
force
[
1
]
*
force
[
1
]
+
force
[
2
]
*
force
[
2
]
);
float
r2
=
force
[
0
]
*
force
[
0
]
+
force
[
1
]
*
force
[
1
]
+
force
[
2
]
*
force
[
2
];
if
(
r2
>
cAmoebaSim
.
vdwCutoff2
){
*
energy
=
force
[
0
]
=
force
[
1
]
=
force
[
2
]
=
0.0
f
;
return
;
}
float
rI
=
rsqrtf
(
r2
);
float
r
=
1.0
f
/
rI
;
float
r2
=
r
*
r
;
float
r6
=
r2
*
r2
*
r2
;
float
r7
=
r6
*
r
;
...
...
@@ -166,6 +148,7 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4
debugArray
[
1
].
x
=
tau
;
debugArray
[
1
].
y
=
rho
;
debugArray
[
1
].
z
=
gTau
;
debugArray
[
1
].
w
=
r
;
#endif
}
...
...
@@ -424,6 +407,21 @@ static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDA
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaVdw14_7.h"
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateAmoebaCudaVdw14_7.h"
#undef METHOD_NAME
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateAmoebaCudaVdw14_7.h"
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#undef USE_CUTOFF
// reduce psWorkArray_3_1 -> outputArray
...
...
@@ -471,7 +469,7 @@ void kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpuContext amoebaGpu, CUDAStr
--------------------------------------------------------------------------------------- */
void
kCalculateAmoebaVdw14_7Forces
(
amoebaGpuContext
amoebaGpu
)
void
kCalculateAmoebaVdw14_7Forces
(
amoebaGpuContext
amoebaGpu
,
int
applyCutoff
)
{
// ---------------------------------------------------------------------------------------
...
...
@@ -492,52 +490,122 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu )
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
int
targetAtom
=
2
1
;
int
targetAtom
=
34
2
;
#endif
// clear output arrays
kClearFields_3
(
amoebaGpu
,
1
);
// set threads/block first time through
// on first pass, set threads/block
if
(
threadsPerBlock
==
0
){
threadsPerBlock
=
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
));
threadsPerBlock
=
192
;
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
384
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
192
;
else
maxThreads
=
128
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
)),
maxThreads
);
}
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
if
(
gpu
->
bOutputBufferPerWarp
){
#if 0
(void) fprintf( amoebaGpu->log, "N2 warp\n" ); (void) fflush( amoebaGpu->log );
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Apply cutoff=%d warp=%d
\n
"
,
applyCutoff
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
Vdw14_7Particle
),
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
// clear output arrays
kClearFields_3
(
amoebaGpu
,
1
);
kCalculateAmoebaVdw14_7N2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*amoebaGpu->nonbondThreadsPerBlock>>>(
if
(
applyCutoff
){
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
amoebaSim
.
pVdwWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksPeriodic"
);
if
(
0
){
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Vdw Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysStream
[
0
][
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u %8u [%5u %5u %1u] %10u "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysStream
[
0
][
ii
]
);
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysStream
[
0
][
ii
];
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
}
(
void
)
fflush
(
amoebaGpu
->
log
);
}
amoebaGpu->psWorkUnit->_pDevStream[0],
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaVdw14_7CutoffByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu->ps
InducedDipole
->_pDevStream[0],
amoebaGpu->
psInducedDipolePolar->_pDevStream[0]
,
amoebaGpu->
psWorkArray_3_1->_pDevStream[0]
,
amoebaGpu
->
ps
VdwSigmaEpsilon
->
_pDevStream
[
0
],
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_
2
->_pDevStream[0],
amoebaGpu
->
psWorkArray_3_
1
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu->psWorkArray_3_
2
->_pDevStream[0] );
amoebaGpu
->
psWorkArray_3_
1
->
_pDevStream
[
0
]
);
#endif
}
else
{
kCalculateAmoebaVdw14_7Cutoff_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevStream
[
0
],
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Cutoff"
);
}
else
{
if
(
gpu
->
bOutputBufferPerWarp
){
//amoebaGpu->psVdwWorkUnit->_pDevStream[0],
kCalculateAmoebaVdw14_7N2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevStream
[
0
],
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwEpsilonCombiningRule
,
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
Vdw14_7Particle
),
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
#endif
}
else
{
kCalculateAmoebaVdw14_7N2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psVdwWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
...
...
@@ -552,8 +620,8 @@ threadsPerBlock = 192;
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7"
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7N2"
);
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
View file @
b8e12104
...
...
@@ -53,9 +53,8 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float4
jCoord
;
float
jSigma
;
float
jEpsilon
;
int
exclusionIndex
;
int
exclusionMask
;
float
totalEnergy
=
0
.
0
f
;
#ifdef AMOEBA_DEBUG
...
...
@@ -77,10 +76,10 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
unsigned
int
tj
=
tgx
;
Vdw14_7Particle
*
psA
=
&
sA
[
tbx
];
Vdw14_7Particle
localParticle
;
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
atomCoord
[
atomI
];
float
iSigma
=
vdwParameters
[
atomI
].
x
;
float
iEpsilon
=
vdwParameters
[
atomI
].
y
;
loadVdw14_7Shared
(
&
localParticle
,
atomI
,
atomCoord
,
vdwParameters
);
float
forceSum
[
3
];
...
...
@@ -94,10 +93,12 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
if
(
x
==
y
)
{
if
(
bExclusionFlag
){
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
xi
*
cAmoebaSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
int
exclusionIndex
=
cAmoebaSim
.
pVdwExclusionIndicesIndex
[
cell
]
+
tgx
;
int
exclusionMask
=
cAmoebaSim
.
pVdwExclusionIndices
[
exclusionIndex
];
exclusionIndex
=
cAmoebaSim
.
pVdwExclusionIndicesIndex
[
cell
]
+
tgx
;
exclusionMask
=
cAmoebaSim
.
pVdwExclusionIndices
[
exclusionIndex
];
}
// load shared data
...
...
@@ -108,30 +109,38 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
float
ijForce
[
3
];
// load coords, charge, ...
loadVdw14_7Data
(
&
(
psA
[
j
]),
&
jCoord
,
&
jSigma
,
&
jEpsilon
);
// get combined sigma and epsilon
float
combindedSigma
;
float
combindedEpsilon
;
getVdw14_7CombindedSigmaEpsilon_kernel
(
sigmaCombiningRule
,
iSigma
,
jS
igma
,
&
combindedSigma
,
epsilonCombiningRule
,
iEpsilon
,
jE
psilon
,
&
combindedEpsilon
);
getVdw14_7CombindedSigmaEpsilon_kernel
(
sigmaCombiningRule
,
localParticle
.
sigma
,
psA
[
j
].
s
igma
,
&
combindedSigma
,
epsilonCombiningRule
,
localParticle
.
epsilon
,
psA
[
j
].
e
psilon
,
&
combindedEpsilon
);
// calculate force
ijForce
[
0
]
=
psA
[
j
].
x
-
localParticle
.
x
;
ijForce
[
1
]
=
psA
[
j
].
y
-
localParticle
.
y
;
ijForce
[
2
]
=
psA
[
j
].
z
-
localParticle
.
z
;
if
(
cAmoebaSim
.
vdwUsePBC
)
{
ijForce
[
0
]
-=
floor
(
ijForce
[
0
]
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
ijForce
[
1
]
-=
floor
(
ijForce
[
1
]
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
ijForce
[
2
]
-=
floor
(
ijForce
[
2
]
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
}
float
energy
;
calculateVdw14_7PairIxn_kernel
(
iCoord
,
jCoord
,
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullDebug
#endif
);
// mask out excluded ixns
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
j
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
if
(
mask
&&
bExclusionFlag
){
unsigned
int
maskIndex
=
1
<<
j
;
unsigned
int
mask
=
(
(
exclusionMask
&
maskIndex
)
||
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
j
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
mask
=
(
exclusionMask
&
maskIndex
)
?
0
:
1
;
}
// add to field at atomI the field due atomJ's dipole
...
...
@@ -152,7 +161,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
cell
+
tgx
;
debugArray
[
index
].
z
=
(
float
)
tgx
;
debugArray
[
index
].
w
=
energy
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
...
...
@@ -188,10 +197,8 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
load3dArray
(
offset
,
forceSum
,
outputForce
);
#endif
}
else
{
// Read fixed atom data into registers and GRF
}
else
{
if
(
lasty
!=
y
)
{
// load coordinates, charge, ...
...
...
@@ -200,120 +207,51 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
}
// zero shared fields
zeroVdw14_7SharedForce
(
&
(
sA
[
threadIdx
.
x
])
);
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
ijForce
[
3
];
// load coords, charge, ...
loadVdw14_7Data
(
&
(
psA
[
tj
]),
&
jCoord
,
&
jSigma
,
&
jEpsilon
);
// get combined sigma and epsilon
float
combindedSigma
;
float
combindedEpsilon
;
getVdw14_7CombindedSigmaEpsilon_kernel
(
sigmaCombiningRule
,
iSigma
,
jSigma
,
&
combindedSigma
,
epsilonCombiningRule
,
iEpsilon
,
jEpsilon
,
&
combindedEpsilon
);
// calculate force
float
energy
;
calculateVdw14_7PairIxn_kernel
(
iCoord
,
jCoord
,
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullDebug
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
}
else
{
#endif
);
if
(
(
atomI
<
cAmoebaSim
.
numberOfAtoms
)
&&
((
y
+
tj
)
<
cAmoebaSim
.
numberOfAtoms
)
){
// add to field at atomI the field due atomJ's dipole
forceSum
[
0
]
+=
ijForce
[
0
];
forceSum
[
1
]
+=
ijForce
[
1
];
forceSum
[
2
]
+=
ijForce
[
2
];
// add to field at atomJ the field due atomI's dipole
psA
[
tj
].
force
[
0
]
-=
ijForce
[
0
];
psA
[
tj
].
force
[
1
]
-=
ijForce
[
1
];
psA
[
tj
].
force
[
2
]
-=
ijForce
[
2
];
totalEnergy
+=
energy
;
}
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
tj
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
tj
)
:
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
debugArray
[
index
].
z
=
-
2
.
0
f
;
debugArray
[
index
].
w
=
-
1
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
-
1
.
0
f
;
debugArray
[
index
].
w
=
energy
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
0
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
0
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
0
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
0
].
w
;
// zero shared fields
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
1
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
1
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
1
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
1
].
w
;
zeroVdw14_7SharedForce
(
&
(
sA
[
threadIdx
.
x
])
);
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijForce
[
0
];
debugArray
[
index
].
y
=
ijForce
[
1
];
debugArray
[
index
].
z
=
ijForce
[
2
];
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
if
(
bExclusionFlag
){
}
}
else
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
yi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
yi
*
(
yi
+
1
)
/
2
;
int
exclusionIndex
=
cAmoebaSim
.
pVdwExclusionIndicesIndex
[
cell
]
+
tgx
;
int
exclusionMask
=
cAmoebaSim
.
pVdwExclusionIndices
[
exclusionIndex
];
exclusionIndex
=
cAmoebaSim
.
pVdwExclusionIndicesIndex
[
cell
]
+
tgx
;
exclusionMask
=
cAmoebaSim
.
pVdwExclusionIndices
[
exclusionIndex
];
}
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
ijForce
[
3
];
// load coords, charge, ...
loadVdw14_7Data
(
&
(
psA
[
tj
]),
&
jCoord
,
&
jSigma
,
&
jEpsilon
);
// get combined sigma and epsilon
float
combindedSigma
;
float
combindedEpsilon
;
getVdw14_7CombindedSigmaEpsilon_kernel
(
sigmaCombiningRule
,
iSigma
,
jS
igma
,
&
combindedSigma
,
epsilonCombiningRule
,
iEpsilon
,
jE
psilon
,
&
combindedEpsilon
);
getVdw14_7CombindedSigmaEpsilon_kernel
(
sigmaCombiningRule
,
localParticle
.
sigma
,
psA
[
tj
].
s
igma
,
&
combindedSigma
,
epsilonCombiningRule
,
localParticle
.
epsilon
,
psA
[
tj
].
e
psilon
,
&
combindedEpsilon
);
// calculate force
float
energy
;
calculateVdw14_7PairIxn_kernel
(
iCoord
,
jCoord
,
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
ijForce
[
0
]
=
psA
[
tj
].
x
-
localParticle
.
x
;
ijForce
[
1
]
=
psA
[
tj
].
y
-
localParticle
.
y
;
ijForce
[
2
]
=
psA
[
tj
].
z
-
localParticle
.
z
;
if
(
cAmoebaSim
.
vdwUsePBC
)
{
ijForce
[
0
]
-=
floor
(
ijForce
[
0
]
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
ijForce
[
1
]
-=
floor
(
ijForce
[
1
]
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
ijForce
[
2
]
-=
floor
(
ijForce
[
2
]
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
}
calculateVdw14_7PairIxn_kernel
(
combindedSigma
,
combindedEpsilon
,
ijForce
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullDebug
#endif
...
...
@@ -321,8 +259,11 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
// mask out excluded ixns
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
tj
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
if
(
mask
&&
bExclusionFlag
){
unsigned
int
maskIndex
=
1
<<
tj
;
unsigned
int
mask
=
(
(
exclusionMask
&
maskIndex
)
||
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
tj
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
mask
=
(
exclusionMask
&
maskIndex
)
?
0
:
1
;
}
// accumulate force for atomI
...
...
@@ -350,7 +291,7 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
x
;
debugArray
[
index
].
y
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
cell
+
tgx
;
debugArray
[
index
].
z
=
(
float
)
tgx
;
debugArray
[
index
].
w
=
energy
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
...
...
@@ -373,8 +314,10 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
}
// end of j-loop
#ifdef USE_CUTOFF
}
#endif
// Write results
...
...
@@ -385,7 +328,6 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
offset
=
3
*
(
y
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
force
,
outputForce
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
forceSum
,
outputForce
);
...
...
@@ -395,7 +337,8 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
#endif
lasty
=
y
;
}
}
// x == y block
pos
++
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
totalEnergy
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
View file @
b8e12104
...
...
@@ -2,6 +2,7 @@
//-----------------------------------------------------------------------------------------
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include <stdio.h>
...
...
@@ -353,13 +354,6 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
}
#undef USE_PERIODIC
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kFindInteractingBlocks.h"
#undef USE_PERIODIC
#undef METHOD_NAME
void
kCalculateAmoebaMultipoleForces
(
amoebaGpuContext
amoebaGpu
,
bool
hasAmoebaGeneralizedKirkwood
)
{
std
::
string
methodName
=
"kCalculateAmoebaMultipoleForces"
;
...
...
@@ -384,7 +378,8 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
//compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksPeriodic"
);
...
...
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
View file @
b8e12104
...
...
@@ -2460,7 +2460,7 @@ static int readAmoebaGeneralizedKirkwoodParameters( FILE* filePtr, MapStringInt&
--------------------------------------------------------------------------------------- */
static
int
readAmoebaVdwParameters
(
FILE
*
filePtr
,
MapStringInt
&
forceMap
,
const
StringVector
&
tokens
,
static
int
readAmoebaVdwParameters
(
FILE
*
filePtr
,
int
version
,
MapStringInt
&
forceMap
,
const
StringVector
&
tokens
,
System
&
system
,
int
useOpenMMUnits
,
MapStringVectorOfVectors
&
supplementary
,
MapStringString
&
inputArgumentMap
,
int
*
lineCount
,
FILE
*
log
){
...
...
@@ -2542,6 +2542,21 @@ static int readAmoebaVdwParameters( FILE* filePtr, MapStringInt& forceMap, const
}
}
if
(
version
>
1
){
lineTokensT
.
resize
(
0
);
isNotEof
=
readLine
(
filePtr
,
lineTokensT
,
lineCount
,
log
);
if
(
lineTokensT
[
0
]
==
"AmoebaVdw14_7Periodic"
){
int
usePBC
=
atoi
(
lineTokensT
[
1
].
c_str
()
);
vdwForce
->
setPBC
(
usePBC
);
}
lineTokensT
.
resize
(
0
);
isNotEof
=
readLine
(
filePtr
,
lineTokensT
,
lineCount
,
log
);
if
(
lineTokensT
[
0
]
==
"AmoebaVdw14_7CutOff"
){
double
cutoff
=
atof
(
lineTokensT
[
1
].
c_str
()
);
vdwForce
->
setCutoff
(
cutoff
);
}
}
lineTokensT
.
resize
(
0
);
isNotEof
=
readLine
(
filePtr
,
lineTokensT
,
lineCount
,
log
);
if
(
lineTokensT
[
0
]
==
"AmoebaVdw14_7Exclusion"
){
...
...
@@ -2621,6 +2636,8 @@ static int readAmoebaVdwParameters( FILE* filePtr, MapStringInt& forceMap, const
methodName
.
c_str
(),
arraySize
,
(
useOpenMMUnits
?
"OpenMM"
:
"Amoeba"
),
vdwForce
->
getSigmaCombiningRule
().
c_str
(),
vdwForce
->
getEpsilonCombiningRule
().
c_str
()
);
(
void
)
fprintf
(
log
,
"use periodic boundary conditions=%d cutoff=%15.7e
\n
"
,
vdwForce
->
getPBC
(),
vdwForce
->
getCutoff
()
);
for
(
int
ii
=
0
;
ii
<
vdwForce
->
getNumParticles
();
ii
++
){
int
indexIV
,
indexClass
;
double
sigma
,
epsilon
,
reduction
;
...
...
@@ -3669,7 +3686,7 @@ Integrator* readAmoebaParameterFile( const std::string& inputParameterFile, MapS
// Amoeba Vdw
}
else
if
(
field
==
"AmoebaVdw14_7SigEpsTable"
||
field
==
"AmoebaVdw14_7Reduction"
){
readAmoebaVdwParameters
(
filePtr
,
forceMap
,
tokens
,
system
,
useOpenMMUnits
,
supplementary
,
inputArgumentMap
,
&
lineCount
,
log
);
readAmoebaVdwParameters
(
filePtr
,
version
,
forceMap
,
tokens
,
system
,
useOpenMMUnits
,
supplementary
,
inputArgumentMap
,
&
lineCount
,
log
);
}
else
if
(
field
==
"AmoebaVdwForce"
){
readVec3
(
filePtr
,
tokens
,
forces
[
AMOEBA_VDW_FORCE
],
&
lineCount
,
field
,
log
);
}
else
if
(
field
==
"AmoebaVdwEnergy"
){
...
...
@@ -4560,7 +4577,7 @@ void testUsingAmoebaTinkerParameterFile( const std::string& amoebaTinkerParamete
StringVector
forceList
;
std
::
string
activeForceNames
;
for
(
MapStringInt
::
const_iterator
ii
=
forceMap
.
begin
();
ii
!=
forceMap
.
end
();
ii
++
){
if
(
ii
->
second
){
if
(
ii
->
second
&&
tinkerForces
.
find
(
ii
->
first
)
!=
tinkerForces
.
end
()
){
if
(
includeCavityTerm
&&
ii
->
first
==
AMOEBA_GK_FORCE
){
forceList
.
push_back
(
AMOEBA_GK_CAVITY_FORCE
);
activeForceNames
+=
AMOEBA_GK_CAVITY_FORCE
+
":"
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment