Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b1dc92cc
Commit
b1dc92cc
authored
Apr 08, 2011
by
Mark Friedrichs
Browse files
Added support for polarization type='direct' option
WCA force now written directly to cuda.force4 array
parent
959f2fab
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
178 additions
and
536 deletions
+178
-536
plugins/amoeba/openmmapi/include/openmm/AmoebaMultipoleForce.h
...ns/amoeba/openmmapi/include/openmm/AmoebaMultipoleForce.h
+25
-1
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
+9
-1
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+7
-2
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+12
-9
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
+2
-1
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+13
-18
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+20
-36
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+9
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+9
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
...eba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
+10
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+21
-34
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+11
-59
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+3
-239
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
+24
-130
No files found.
plugins/amoeba/openmmapi/include/openmm/AmoebaMultipoleForce.h
View file @
b1dc92cc
...
@@ -66,6 +66,19 @@ public:
...
@@ -66,6 +66,19 @@ public:
PME
=
1
PME
=
1
};
};
enum
AmoebaPolarizationType
{
/**
* Mutual polarization
*/
Mutual
=
0
,
/**
* Direct polarization
*/
Direct
=
1
};
enum
MultipoleAxisTypes
{
ZThenX
=
0
,
Bisector
=
1
,
ZBisect
=
2
,
ThreeFold
=
3
,
ZOnly
=
4
,
NoAxisType
=
5
,
LastAxisTypeIndex
=
6
};
enum
MultipoleAxisTypes
{
ZThenX
=
0
,
Bisector
=
1
,
ZBisect
=
2
,
ThreeFold
=
3
,
ZOnly
=
4
,
NoAxisType
=
5
,
LastAxisTypeIndex
=
6
};
// Algorithm used to converge mutual induced dipoles:
// Algorithm used to converge mutual induced dipoles:
...
@@ -100,6 +113,16 @@ public:
...
@@ -100,6 +113,16 @@ public:
*/
*/
void
setNonbondedMethod
(
AmoebaNonbondedMethod
method
);
void
setNonbondedMethod
(
AmoebaNonbondedMethod
method
);
/**
* Get polarization type
*/
AmoebaPolarizationType
getPolarizationType
(
void
)
const
;
/**
* Set the polarization type
*/
void
setPolarizationType
(
AmoebaPolarizationType
type
);
/**
/**
* Get the cutoff distance (in nm) being used for nonbonded interactions. If the NonbondedMethod in use
* Get the cutoff distance (in nm) being used for nonbonded interactions. If the NonbondedMethod in use
* is NoCutoff, this value will have no effect.
* is NoCutoff, this value will have no effect.
...
@@ -326,6 +349,7 @@ protected:
...
@@ -326,6 +349,7 @@ protected:
private:
private:
AmoebaNonbondedMethod
nonbondedMethod
;
AmoebaNonbondedMethod
nonbondedMethod
;
AmoebaPolarizationType
polarizationType
;
double
cutoffDistance
;
double
cutoffDistance
;
double
aewald
;
double
aewald
;
int
pmeBSplineOrder
;
int
pmeBSplineOrder
;
...
...
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
View file @
b1dc92cc
...
@@ -38,7 +38,7 @@ using namespace OpenMM;
...
@@ -38,7 +38,7 @@ using namespace OpenMM;
using
std
::
string
;
using
std
::
string
;
using
std
::
vector
;
using
std
::
vector
;
AmoebaMultipoleForce
::
AmoebaMultipoleForce
()
:
nonbondedMethod
(
NoCutoff
),
pmeBSplineOrder
(
5
),
cutoffDistance
(
1.0
),
ewaldErrorTol
(
1e-4
),
mutualInducedIterationMethod
(
SOR
),
mutualInducedMaxIterations
(
60
),
AmoebaMultipoleForce
::
AmoebaMultipoleForce
()
:
nonbondedMethod
(
NoCutoff
),
polarizationType
(
Mutual
),
pmeBSplineOrder
(
5
),
cutoffDistance
(
1.0
),
ewaldErrorTol
(
1e-4
),
mutualInducedIterationMethod
(
SOR
),
mutualInducedMaxIterations
(
60
),
mutualInducedTargetEpsilon
(
1.0e-05
),
scalingDistanceCutoff
(
100.0
),
electricConstant
(
138.9354558456
)
{
mutualInducedTargetEpsilon
(
1.0e-05
),
scalingDistanceCutoff
(
100.0
),
electricConstant
(
138.9354558456
)
{
}
}
...
@@ -50,6 +50,14 @@ void AmoebaMultipoleForce::setNonbondedMethod( AmoebaMultipoleForce::AmoebaNonbo
...
@@ -50,6 +50,14 @@ void AmoebaMultipoleForce::setNonbondedMethod( AmoebaMultipoleForce::AmoebaNonbo
nonbondedMethod
=
method
;
nonbondedMethod
=
method
;
}
}
AmoebaMultipoleForce
::
AmoebaPolarizationType
AmoebaMultipoleForce
::
getPolarizationType
(
void
)
const
{
return
polarizationType
;
}
void
AmoebaMultipoleForce
::
setPolarizationType
(
AmoebaMultipoleForce
::
AmoebaPolarizationType
type
)
{
polarizationType
=
type
;
}
double
AmoebaMultipoleForce
::
getCutoffDistance
(
void
)
const
{
double
AmoebaMultipoleForce
::
getCutoffDistance
(
void
)
const
{
return
cutoffDistance
;
return
cutoffDistance
;
}
}
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
b1dc92cc
...
@@ -940,8 +940,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -940,8 +940,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
}
}
}
}
int
polarizationType
=
static_cast
<
int
>
(
force
.
getPolarizationType
());
int
iterativeMethod
=
static_cast
<
int
>
(
force
.
getMutualInducedIterationMethod
());
int
iterativeMethod
=
static_cast
<
int
>
(
force
.
getMutualInducedIterationMethod
());
if
(
iterativeMethod
!=
0
){
if
(
iterativeMethod
!=
0
&&
polarizationType
==
0
){
throw
OpenMMException
(
"Iterative method for mutual induced dipoles not recognized.
\n
"
);
throw
OpenMMException
(
"Iterative method for mutual induced dipoles not recognized.
\n
"
);
}
}
...
@@ -950,13 +951,17 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -950,13 +951,17 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
throw
OpenMMException
(
"AmoebaMultipoleForce nonbonded method not recognized.
\n
"
);
throw
OpenMMException
(
"AmoebaMultipoleForce nonbonded method not recognized.
\n
"
);
}
}
if
(
polarizationType
!=
0
&&
polarizationType
!=
1
){
throw
OpenMMException
(
"AmoebaMultipoleForce polarization type not recognized.
\n
"
);
}
gpuSetAmoebaMultipoleParameters
(
data
.
getAmoebaGpu
(),
charges
,
dipoles
,
quadrupoles
,
axisTypes
,
multipoleAtomZs
,
multipoleAtomXs
,
multipoleAtomYs
,
gpuSetAmoebaMultipoleParameters
(
data
.
getAmoebaGpu
(),
charges
,
dipoles
,
quadrupoles
,
axisTypes
,
multipoleAtomZs
,
multipoleAtomXs
,
multipoleAtomYs
,
tholes
,
scalingDistanceCutoff
,
dampingFactors
,
polarity
,
tholes
,
scalingDistanceCutoff
,
dampingFactors
,
polarity
,
multipoleAtomCovalentInfo
,
covalentDegree
,
minCovalentIndices
,
minCovalentPolarizationIndices
,
(
maxCovalentRange
+
2
),
multipoleAtomCovalentInfo
,
covalentDegree
,
minCovalentIndices
,
minCovalentPolarizationIndices
,
(
maxCovalentRange
+
2
),
static_cast
<
int
>
(
force
.
getMutualInducedIterationMethod
()),
static_cast
<
int
>
(
force
.
getMutualInducedIterationMethod
()),
force
.
getMutualInducedMaxIterations
(),
force
.
getMutualInducedMaxIterations
(),
static_cast
<
float
>
(
force
.
getMutualInducedTargetEpsilon
()),
static_cast
<
float
>
(
force
.
getMutualInducedTargetEpsilon
()),
nonbondedMethod
,
nonbondedMethod
,
polarizationType
,
static_cast
<
float
>
(
force
.
getCutoffDistance
()),
static_cast
<
float
>
(
force
.
getCutoffDistance
()),
static_cast
<
float
>
(
force
.
getAEwald
()),
static_cast
<
float
>
(
force
.
getAEwald
()),
static_cast
<
float
>
(
force
.
getElectricConstant
())
);
static_cast
<
float
>
(
force
.
getElectricConstant
())
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
View file @
b1dc92cc
...
@@ -358,6 +358,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -358,6 +358,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psLabFrameDipole
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psLabFrameDipole
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psLabFrameQuadrupole
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psLabFrameQuadrupole
,
log
);
(
void
)
fprintf
(
log
,
" polarizationType %d
\n
"
,
amoebaGpu
->
amoebaSim
.
polarizationType
);
(
void
)
fprintf
(
log
,
" maxCovalentDegreeSz %d
\n
"
,
amoebaGpu
->
maxCovalentDegreeSz
);
(
void
)
fprintf
(
log
,
" maxCovalentDegreeSz %d
\n
"
,
amoebaGpu
->
maxCovalentDegreeSz
);
(
void
)
fprintf
(
log
,
" solventDielectric %10.3f
\n
"
,
amoebaGpu
->
solventDielectric
);
(
void
)
fprintf
(
log
,
" solventDielectric %10.3f
\n
"
,
amoebaGpu
->
solventDielectric
);
(
void
)
fprintf
(
log
,
" scalingDistanceCutoff %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
scalingDistanceCutoff
);
(
void
)
fprintf
(
log
,
" scalingDistanceCutoff %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
scalingDistanceCutoff
);
...
@@ -1519,7 +1520,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1519,7 +1520,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleParticleCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleParticleCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
int
mutualInducedIterativeMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
mutualInducedIterativeMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
nonbondedMethod
,
float
cutoffDistance
,
float
alphaEwald
,
float
electricConstant
)
{
int
nonbondedMethod
,
int
polarizationType
,
float
cutoffDistance
,
float
alphaEwald
,
float
electricConstant
)
{
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
...
@@ -1545,6 +1546,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1545,6 +1546,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// allocate memory
// allocate memory
amoebaGpu
->
mutualInducedIterativeMethod
=
mutualInducedIterativeMethod
;
amoebaGpu
->
mutualInducedIterativeMethod
=
mutualInducedIterativeMethod
;
amoebaGpu
->
amoebaSim
.
polarizationType
=
polarizationType
;
gpuMutualInducedFieldAllocate
(
amoebaGpu
);
gpuMutualInducedFieldAllocate
(
amoebaGpu
);
amoebaGpu
->
mutualInducedMaxIterations
=
mutualInducedMaxIterations
;
amoebaGpu
->
mutualInducedMaxIterations
=
mutualInducedMaxIterations
;
amoebaGpu
->
mutualInducedTargetEpsilon
=
mutualInducedTargetEpsilon
;
amoebaGpu
->
mutualInducedTargetEpsilon
=
mutualInducedTargetEpsilon
;
...
@@ -1562,9 +1564,9 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1562,9 +1564,9 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
}
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Nonbonded method=%d %d [NoCutoff=%d PME=%d]
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Nonbonded method=%d %d [NoCutoff=%d PME=%d]
polarizationType=%d (0=mutual/1=direct)
\n
"
,
methodName
.
c_str
(),
nonbondedMethod
,
amoebaGpu
->
multipoleNonbondedMethod
,
methodName
.
c_str
(),
nonbondedMethod
,
amoebaGpu
->
multipoleNonbondedMethod
,
AMOEBA_NO_CUTOFF
,
AMOEBA_PARTICLE_MESH_EWALD
);
AMOEBA_NO_CUTOFF
,
AMOEBA_PARTICLE_MESH_EWALD
,
polarizationType
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
}
amoebaGpu
->
amoebaSim
.
sqrtPi
=
std
::
sqrt
(
3.14159265358
f
);
amoebaGpu
->
amoebaSim
.
sqrtPi
=
std
::
sqrt
(
3.14159265358
f
);
...
@@ -2554,6 +2556,7 @@ void gpuSetAmoebaWcaDispersionParameters( amoebaGpuContext amoebaGpu,
...
@@ -2554,6 +2556,7 @@ void gpuSetAmoebaWcaDispersionParameters( amoebaGpuContext amoebaGpu,
unsigned
int
particles
=
radii
.
size
();
unsigned
int
particles
=
radii
.
size
();
amoebaGpu
->
psWcaDispersionRadiusEpsilon
=
new
CUDAStream
<
float2
>
(
paddedNumberOfAtoms
,
1
,
"WcaDispersionRadiusEpsilon"
);
amoebaGpu
->
psWcaDispersionRadiusEpsilon
=
new
CUDAStream
<
float2
>
(
paddedNumberOfAtoms
,
1
,
"WcaDispersionRadiusEpsilon"
);
amoebaGpu
->
amoebaSim
.
pWcaDispersionRadiusEpsilon
=
amoebaGpu
->
psWcaDispersionRadiusEpsilon
->
_pDevData
;
for
(
unsigned
int
ii
=
0
;
ii
<
particles
;
ii
++
)
for
(
unsigned
int
ii
=
0
;
ii
<
particles
;
ii
++
)
{
{
amoebaGpu
->
psWcaDispersionRadiusEpsilon
->
_pSysData
[
ii
].
x
=
radii
[
ii
];
amoebaGpu
->
psWcaDispersionRadiusEpsilon
->
_pSysData
[
ii
].
x
=
radii
[
ii
];
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
View file @
b1dc92cc
...
@@ -136,6 +136,7 @@ struct cudaAmoebaGmxSimulation {
...
@@ -136,6 +136,7 @@ struct cudaAmoebaGmxSimulation {
float
sqrtPi
;
// sqrt(PI)
float
sqrtPi
;
// sqrt(PI)
float
scalingDistanceCutoff
;
// scaling cutoff
float
scalingDistanceCutoff
;
// scaling cutoff
float2
*
pDampingFactorAndThole
;
// Thole & damping factors
float2
*
pDampingFactorAndThole
;
// Thole & damping factors
int
polarizationType
;
// polarization type (0=Mutual, 1=Direct)
int4
*
pMultipoleParticlesIdsAndAxisType
;
int4
*
pMultipoleParticlesIdsAndAxisType
;
int4
*
pMultipoleParticlesTorqueBufferIndices
;
int4
*
pMultipoleParticlesTorqueBufferIndices
;
...
@@ -181,8 +182,8 @@ struct cudaAmoebaGmxSimulation {
...
@@ -181,8 +182,8 @@ struct cudaAmoebaGmxSimulation {
float
awater
;
float
awater
;
float
shctd
;
float
shctd
;
float
dispoff
;
float
dispoff
;
float
totalMaxWcaDispersionEnergy
;
float
totalMaxWcaDispersionEnergy
;
float2
*
pWcaDispersionRadiusEpsilon
;
// scaling indices
// scaling indices
int
*
pScaleIndicesIndex
;
int
*
pScaleIndicesIndex
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
View file @
b1dc92cc
...
@@ -293,7 +293,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -293,7 +293,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleAtomCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleAtomCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
int
mutualInducedIterationMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
mutualInducedIterationMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
nonbondedMethod
,
float
cutoffDistance
,
float
alphaEwald
,
float
electricConstant
);
int
nonbondedMethod
,
int
polarizationType
,
float
cutoffDistance
,
float
alphaEwald
,
float
electricConstant
);
extern
"C"
extern
"C"
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
b1dc92cc
...
@@ -388,23 +388,6 @@ if( 1 ){
...
@@ -388,23 +388,6 @@ if( 1 ){
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
atomI
.
inducedDipole
,
temp5
);
amatrixProductVector3
(
atomJ
.
labFrameQuadrupole
,
atomI
.
inducedDipole
,
temp5
);
amatrixProductVector3
(
atomI
.
labFrameQuadrupole
,
atomJ
.
inducedDipole
,
qIuJ
);
//MK
amatrixProductVector3
(
atomI
.
labFrameQuadrupole
,
atomJ
.
inducedDipole
,
qIuJ
);
//MK
/*
ftm2i(1) = gfi(1)*xr + 0.5d0*
& (- rr3*ck*(uind(1,i)*psc3+uinp(1,i)*dsc3)
& + rr5*sc(4)*(uind(1,i)*psc5+uinp(1,i)*dsc5)
& - rr7*sc(6)*(uind(1,i)*psc7+uinp(1,i)*dsc7))
& + (rr3*ci*(uind(1,k)*psc3+uinp(1,k)*dsc3)
& + rr5*sc(3)*(uind(1,k)*psc5+uinp(1,k)*dsc5)
& + rr7*sc(5)*(uind(1,k)*psc7+uinp(1,k)*dsc7))*0.5d0
& + rr5*scale5i*(sci(4)*uinp(1,i)+scip(4)*uind(1,i)
& + sci(3)*uinp(1,k)+scip(3)*uind(1,k))*0.5d0
& + 0.5d0*(sci(4)*psc5+scip(4)*dsc5)*rr5*di(1)
& + 0.5d0*(sci(3)*psc5+scip(3)*dsc5)*rr5*dk(1)
& + 0.5d0*gfi(4)*((qkui(1)-qiuk(1))*psc5
& + (qkuip(1)-qiukp(1))*dsc5)
& + gfi(5)*qir(1) + gfi(6)*qkr(1)
*/
float
ftm2i_0
=
gfi1
*
deltaR
[
0
]
+
float
ftm2i_0
=
gfi1
*
deltaR
[
0
]
+
0.5
f
*
(
-
rr3
*
atomJ
.
q
*
(
atomI
.
inducedDipole
[
0
]
*
psc0
+
atomI
.
inducedDipoleP
[
0
]
*
dsc0
)
+
0.5
f
*
(
-
rr3
*
atomJ
.
q
*
(
atomI
.
inducedDipole
[
0
]
*
psc0
+
atomI
.
inducedDipoleP
[
0
]
*
dsc0
)
+
rr5
*
sc4
*
(
atomI
.
inducedDipole
[
0
]
*
psc1
+
atomI
.
inducedDipoleP
[
0
]
*
dsc1
)
-
rr5
*
sc4
*
(
atomI
.
inducedDipole
[
0
]
*
psc1
+
atomI
.
inducedDipoleP
[
0
]
*
dsc1
)
-
...
@@ -460,6 +443,18 @@ if( 1 ){
...
@@ -460,6 +443,18 @@ if( 1 ){
ftm2i_1
-=
(
fridmp_1
+
findmp_1
);
ftm2i_1
-=
(
fridmp_1
+
findmp_1
);
ftm2i_2
-=
(
fridmp_2
+
findmp_2
);
ftm2i_2
-=
(
fridmp_2
+
findmp_2
);
if
(
cAmoebaSim
.
polarizationType
)
{
float
gfd
=
0.5
*
(
rr5
*
scip2
*
scaleI0
-
rr7
*
(
scip3
*
sci4
+
sci3
*
scip4
)
*
scaleI1
);
float
temp5
=
0.5
*
rr5
*
scaleI1
;
float
fdir_0
=
gfd
*
deltaR
[
0
]
+
temp5
*
(
sci4
*
atomI
.
inducedDipoleP
[
0
]
+
scip4
*
atomI
.
inducedDipole
[
0
]
+
sci3
*
atomJ
.
inducedDipoleP
[
0
]
+
scip3
*
atomJ
.
inducedDipole
[
0
]);
float
fdir_1
=
gfd
*
deltaR
[
1
]
+
temp5
*
(
sci4
*
atomI
.
inducedDipoleP
[
1
]
+
scip4
*
atomI
.
inducedDipole
[
1
]
+
sci3
*
atomJ
.
inducedDipoleP
[
1
]
+
scip3
*
atomJ
.
inducedDipole
[
1
]);
float
fdir_2
=
gfd
*
deltaR
[
2
]
+
temp5
*
(
sci4
*
atomI
.
inducedDipoleP
[
2
]
+
scip4
*
atomI
.
inducedDipole
[
2
]
+
sci3
*
atomJ
.
inducedDipoleP
[
2
]
+
scip3
*
atomJ
.
inducedDipole
[
2
]);
ftm2i_0
-=
fdir_0
-
findmp_0
;
ftm2i_1
-=
fdir_1
-
findmp_1
;
ftm2i_2
-=
fdir_2
-
findmp_2
;
}
// now perform the torque calculation;
// now perform the torque calculation;
// intermediate terms for torque between multipoles i and j;
// intermediate terms for torque between multipoles i and j;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
b1dc92cc
...
@@ -1311,7 +1311,7 @@ __device__ void calculateKirkwoodPairIxn_kernel( KirkwoodParticle& atomI,
...
@@ -1311,7 +1311,7 @@ __device__ void calculateKirkwoodPairIxn_kernel( KirkwoodParticle& atomI,
// mutual polarization electrostatic solvation free energy gradient
// mutual polarization electrostatic solvation free energy gradient
//
if
(poltyp .eq. 'MUTUAL'
){
if
(
cAmoebaSim
.
polarizationType
==
0
){
dpdx
=
dpdx
-
0.5
f
*
dpdx
=
dpdx
-
0.5
f
*
(
atomI
.
inducedDipole
[
0
]
*
(
atomJ
.
inducedDipoleP
[
0
]
*
gux5
+
atomJ
.
inducedDipoleP
[
1
]
*
gux6
+
atomJ
.
inducedDipoleP
[
2
]
*
gux7
)
(
atomI
.
inducedDipole
[
0
]
*
(
atomJ
.
inducedDipoleP
[
0
]
*
gux5
+
atomJ
.
inducedDipoleP
[
1
]
*
gux6
+
atomJ
.
inducedDipoleP
[
2
]
*
gux7
)
...
@@ -1345,7 +1345,7 @@ __device__ void calculateKirkwoodPairIxn_kernel( KirkwoodParticle& atomI,
...
@@ -1345,7 +1345,7 @@ __device__ void calculateKirkwoodPairIxn_kernel( KirkwoodParticle& atomI,
+
atomJ
.
inducedDipole
[
2
]
*
(
atomI
.
inducedDipoleP
[
0
]
*
guz22
+
atomI
.
inducedDipoleP
[
1
]
*
guz23
+
atomI
.
inducedDipoleP
[
2
]
*
guz24
);
+
atomJ
.
inducedDipole
[
2
]
*
(
atomI
.
inducedDipoleP
[
0
]
*
guz22
+
atomI
.
inducedDipoleP
[
1
]
*
guz23
+
atomI
.
inducedDipoleP
[
2
]
*
guz24
);
dpbi
=
dpbi
-
0.5
f
*
atomJ
.
bornRadius
*
duvdr
;
dpbi
=
dpbi
-
0.5
f
*
atomJ
.
bornRadius
*
duvdr
;
dpbk
=
dpbk
-
0.5
f
*
atomI
.
bornRadius
*
duvdr
;
dpbk
=
dpbk
-
0.5
f
*
atomI
.
bornRadius
*
duvdr
;
//
}
}
// torque due to induced reaction field on permanent dipoles
// torque due to induced reaction field on permanent dipoles
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
b1dc92cc
...
@@ -515,24 +515,16 @@ __device__ void calculateKirkwoodEDiffPairIxn_kernel( KirkwoodEDiffParticle& ato
...
@@ -515,24 +515,16 @@ __device__ void calculateKirkwoodEDiffPairIxn_kernel( KirkwoodEDiffParticle& ato
// correction to convert mutual to direct polarization force
// correction to convert mutual to direct polarization force
#if 0
if
(
cAmoebaSim
.
polarizationType
){
if (poltyp .eq. 'DIRECT') then;
float
gfd
=
0.5
f
*
(
rr5
*
scip2
*
scale3i
-
rr7
*
(
scip3
*
sci4
+
sci3
*
scip4
)
*
scale5i
);
gfd = 0.5f * (rr5*scip2*scale3i;
float
fdir1
=
gfd
*
xr
+
0.5
f
*
rr5
*
scale5i
*
(
sci4
*
atomI
.
inducedDipolePS
[
0
]
+
scip4
*
atomI
.
inducedDipoleS
[
0
]
+
sci3
*
atomJ
.
inducedDipolePS
[
0
]
+
scip3
*
atomJ
.
inducedDipoleS
[
0
]);
& - rr7*(scip3*sci4+sci3*scip4)*scale5i);
float
fdir2
=
gfd
*
yr
+
0.5
f
*
rr5
*
scale5i
*
(
sci4
*
atomI
.
inducedDipolePS
[
1
]
+
scip4
*
atomI
.
inducedDipoleS
[
1
]
+
sci3
*
atomJ
.
inducedDipolePS
[
1
]
+
scip3
*
atomJ
.
inducedDipoleS
[
1
]);
fdir1 = gfd*xr + 0.5f*rr5*scale5i;
float
fdir3
=
gfd
*
zr
+
0.5
f
*
rr5
*
scale5i
*
(
sci4
*
atomI
.
inducedDipolePS
[
2
]
+
scip4
*
atomI
.
inducedDipoleS
[
2
]
+
sci3
*
atomJ
.
inducedDipolePS
[
2
]
+
scip3
*
atomJ
.
inducedDipoleS
[
2
]);
& * (sci4*atomI.inducedDipolePS[0]+scip4*atomI.inducedDipoleS[0];
ftm2i1
-=
fdir1
-
findmp1
;
& +sci3*atomJ.inducedDipolePS[0]+scip3*atomJ.inducedDipoleS[0]);
ftm2i2
-=
fdir2
-
findmp2
;
fdir2 = gfd*yr + 0.5f*rr5*scale5i;
ftm2i3
-=
fdir3
-
findmp3
;
& * (sci4*atomI.inducedDipolePS[1]+scip4*atomI.inducedDipoleS[1];
& +sci3*atomJ.inducedDipolePS[1]+scip3*atomJ.inducedDipoleS[1]);
}
fdir3 = gfd*zr + 0.5f*rr5*scale5i;
& * (sci4*atomI.inducedDipolePS[2]+scip4*atomI.inducedDipoleS[2];
& +sci3*atomJ.inducedDipolePS[2]+scip3*atomJ.inducedDipoleS[2]);
ftm2i1 = ftm2i1 - fdir1 + findmp1;
ftm2i2 = ftm2i2 - fdir2 + findmp2;
ftm2i3 = ftm2i3 - fdir3 + findmp3;
end if;
#endif
// now perform the torque calculation
// now perform the torque calculation
// intermediate terms for torque between multipoles i and k
// intermediate terms for torque between multipoles i and k
...
@@ -803,24 +795,16 @@ __device__ void calculateKirkwoodEDiffPairIxn_kernel( KirkwoodEDiffParticle& ato
...
@@ -803,24 +795,16 @@ __device__ void calculateKirkwoodEDiffPairIxn_kernel( KirkwoodEDiffParticle& ato
// correction to convert mutual to direct polarization force;
// correction to convert mutual to direct polarization force;
#if 0
if
(
cAmoebaSim
.
polarizationType
){
if (poltyp .eq. 'DIRECT') then;
gfd = 0.5f * (rr5*scip2*scale3i;
float
gfd
=
0.5
f
*
(
rr5
*
scip2
*
scale3i
-
rr7
*
(
scip3
*
sci4
+
sci3
*
scip4
)
*
scale5i
);
& - rr7*(scip3*sci4+sci3*scip4)*scale5i);
float
fdir1
=
gfd
*
xr
+
0.5
f
*
rr5
*
scale5i
*
(
sci4
*
atomI
.
inducedDipoleP
[
0
]
+
scip4
*
atomI
.
inducedDipole
[
0
]
+
sci3
*
atomJ
.
inducedDipoleP
[
0
]
+
scip3
*
atomJ
.
inducedDipole
[
0
]);
fdir1 = gfd*xr + 0.5f*rr5*scale5i;
float
fdir2
=
gfd
*
yr
+
0.5
f
*
rr5
*
scale5i
*
(
sci4
*
atomI
.
inducedDipoleP
[
1
]
+
scip4
*
atomI
.
inducedDipole
[
1
]
+
sci3
*
atomJ
.
inducedDipoleP
[
1
]
+
scip3
*
atomJ
.
inducedDipole
[
1
]);
& * (sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0];
float
fdir3
=
gfd
*
zr
+
0.5
f
*
rr5
*
scale5i
*
(
sci4
*
atomI
.
inducedDipoleP
[
2
]
+
scip4
*
atomI
.
inducedDipole
[
2
]
+
sci3
*
atomJ
.
inducedDipoleP
[
2
]
+
scip3
*
atomJ
.
inducedDipole
[
2
]);
& +sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0]);
ftm2i1
-=
fdir1
-
findmp1
;
fdir2 = gfd*yr + 0.5f*rr5*scale5i;
ftm2i2
-=
fdir2
-
findmp2
;
& * (sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1];
ftm2i3
-=
fdir3
-
findmp3
;
& +sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1]);
}
fdir3 = gfd*zr + 0.5f*rr5*scale5i;
& * (sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2];
& +sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2]);
ftm2i1 = ftm2i1 - fdir1 + findmp1;
ftm2i2 = ftm2i2 - fdir2 + findmp2;
ftm2i3 = ftm2i3 - fdir3 + findmp3;
end if;
#endif
// now perform the torque calculation
// now perform the torque calculation
// intermediate terms for torque between multipoles i and k
// intermediate terms for torque between multipoles i and k
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
b1dc92cc
...
@@ -784,6 +784,15 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
...
@@ -784,6 +784,15 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
}
}
#endif
#endif
// if polarization type is direct, set flags signalling done and return
if
(
amoebaGpu
->
amoebaSim
.
polarizationType
)
{
amoebaGpu
->
mutualInducedDone
=
1
;
amoebaGpu
->
mutualInducedConverged
=
1
;
return
;
}
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
done
=
0
;
done
=
0
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
b1dc92cc
...
@@ -532,6 +532,15 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
...
@@ -532,6 +532,15 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
}
}
#endif
#endif
// if polarization type is direct, set flags signalling done and return
if
(
amoebaGpu
->
amoebaSim
.
polarizationType
)
{
amoebaGpu
->
mutualInducedDone
=
1
;
amoebaGpu
->
mutualInducedConverged
=
1
;
return
;
}
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
done
=
0
;
done
=
0
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
View file @
b1dc92cc
...
@@ -911,9 +911,16 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
...
@@ -911,9 +911,16 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
int
j2
=
deriv2
[
k
+
1
];
int
j2
=
deriv2
[
k
+
1
];
int
j3
=
deriv3
[
k
+
1
];
int
j3
=
deriv3
[
k
+
1
];
f
.
x
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j1
]
+
inducedDipole
[
k
]
*
phip
[
j1
]
+
inducedDipolePolar
[
k
]
*
phid
[
j1
];
f
.
x
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j1
];
f
.
y
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j2
]
+
inducedDipole
[
k
]
*
phip
[
j2
]
+
inducedDipolePolar
[
k
]
*
phid
[
j2
];
f
.
y
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j2
];
f
.
z
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j3
]
+
inducedDipole
[
k
]
*
phip
[
j3
]
+
inducedDipolePolar
[
k
]
*
phid
[
j3
];
f
.
z
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j3
];
if
(
cAmoebaSim
.
polarizationType
==
0
)
{
f
.
x
+=
inducedDipole
[
k
]
*
phip
[
j1
]
+
inducedDipolePolar
[
k
]
*
phid
[
j1
];
f
.
y
+=
inducedDipole
[
k
]
*
phip
[
j2
]
+
inducedDipolePolar
[
k
]
*
phid
[
j2
];
f
.
z
+=
inducedDipole
[
k
]
*
phip
[
j3
]
+
inducedDipolePolar
[
k
]
*
phid
[
j3
];
}
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
b1dc92cc
...
@@ -704,36 +704,22 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
...
@@ -704,36 +704,22 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// correction to convert mutual to direct polarization force
// correction to convert mutual to direct polarization force
/*
if
(
cAmoebaSim
.
polarizationType
){
if (poltyp .eq. 'DIRECT') {
gfd = 0.5f * (bn2*scip2;
float
gfd
=
0.5
f
*
(
bn2
*
scip2
-
bn3
*
(
scip3
*
sci4
+
sci3
*
scip4
));
& - bn3*(scip3*sci4+sci3*scip4));
ftm2i1
-=
gfd
*
xr
+
0.5
f
*
bn2
*
(
sci4
*
atomI
.
inducedDipoleP
[
0
]
+
scip4
*
atomI
.
inducedDipole
[
0
]
+
sci3
*
atomJ
.
inducedDipoleP
[
0
]
+
scip3
*
atomJ
.
inducedDipole
[
0
]);
gfdr = 0.5f * (rr5*scip2*usc3;
ftm2i2
-=
gfd
*
yr
+
0.5
f
*
bn2
*
(
sci4
*
atomI
.
inducedDipoleP
[
1
]
+
scip4
*
atomI
.
inducedDipole
[
1
]
+
sci3
*
atomJ
.
inducedDipoleP
[
1
]
+
scip3
*
atomJ
.
inducedDipole
[
1
]);
& - rr7*(scip3*sci4;
ftm2i3
-=
gfd
*
zr
+
0.5
f
*
bn2
*
(
sci4
*
atomI
.
inducedDipoleP
[
2
]
+
scip4
*
atomI
.
inducedDipole
[
2
]
+
sci3
*
atomJ
.
inducedDipoleP
[
2
]
+
scip3
*
atomJ
.
inducedDipole
[
2
]);
& +sci3*scip4)*usc5);
ftm2i1 = ftm2i1 - gfd*xr - 0.5f*bn2*;
float
gfdr
=
0.5
f
*
(
rr5
*
scip2
*
usc3
-
rr7
*
(
scip3
*
sci4
+
sci3
*
scip4
)
*
usc5
);
& (sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0];
float
fdir1
=
gfdr
*
xr
+
0.5
f
*
usc5
*
rr5
*
(
sci4
*
atomI
.
inducedDipoleP
[
0
]
+
scip4
*
atomI
.
inducedDipole
[
0
]
+
sci3
*
atomJ
.
inducedDipoleP
[
0
]
+
scip3
*
atomJ
.
inducedDipole
[
0
]);
& +sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0]);
float
fdir2
=
gfdr
*
yr
+
0.5
f
*
usc5
*
rr5
*
(
sci4
*
atomI
.
inducedDipoleP
[
1
]
+
scip4
*
atomI
.
inducedDipole
[
1
]
+
sci3
*
atomJ
.
inducedDipoleP
[
1
]
+
scip3
*
atomJ
.
inducedDipole
[
1
]);
ftm2i2 = ftm2i2 - gfd*yr - 0.5f*bn2*;
float
fdir3
=
gfdr
*
zr
+
0.5
f
*
usc5
*
rr5
*
(
sci4
*
atomI
.
inducedDipoleP
[
2
]
+
scip4
*
atomI
.
inducedDipole
[
2
]
+
sci3
*
atomJ
.
inducedDipoleP
[
2
]
+
scip3
*
atomJ
.
inducedDipole
[
2
]);
& (sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1];
& +sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1]);
ftm2i1
+=
fdir1
+
findmp1
;
ftm2i3 = ftm2i3 - gfd*zr - 0.5f*bn2*;
ftm2i2
+=
fdir2
+
findmp2
;
& (sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2];
ftm2i3
+=
fdir3
+
findmp3
;
& +sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2]);
fdir1 = gfdr*xr + 0.5f*usc5*rr5*;
& (sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0];
& + sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0]);
fdir2 = gfdr*yr + 0.5f*usc5*rr5*;
& (sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1];
& + sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1]);
fdir3 = gfdr*zr + 0.5f*usc5*rr5*;
& (sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2];
& + sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2]);
ftm2i1 = ftm2i1 + fdir1 + findmp1;
ftm2i2 = ftm2i2 + fdir2 + findmp2;
ftm2i3 = ftm2i3 + fdir3 + findmp3;
}
}
*/
// intermediate variables for induced torque terms
// intermediate variables for induced torque terms
...
@@ -1198,6 +1184,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -1198,6 +1184,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
void
cudaComputeAmoebaPmeElectrostatic
(
amoebaGpuContext
amoebaGpu
)
void
cudaComputeAmoebaPmeElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
{
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpu
);
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
b1dc92cc
...
@@ -572,44 +572,25 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -572,44 +572,25 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEFieldPolarity"
,
fileId
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeEFieldPolarity"
,
fileId
,
outputVector
);
/*
amoebaGpu->psE_FieldPolar->Download();
amoebaGpu->psInducedDipole->Download(),
amoebaGpu->psInducedDipolePolar->Download();
amoebaGpu->psPolarizability->Download();
(void) fprintf( amoebaGpu->log, "%s Initial setup for matrix multiply\n", methodName );
int offset = 0;
int maxPrint = 10;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%4d pol=%12.4e ", ii,
amoebaGpu->psPolarizability->_pSysData[offset] );
if( amoebaGpu->psPolarizability->_pSysData[offset] != amoebaGpu->psPolarizability->_pSysData[offset+1] ||
amoebaGpu->psPolarizability->_pSysData[offset] != amoebaGpu->psPolarizability->_pSysData[offset+2] ){
(void) fprintf( amoebaGpu->log, "PolX!!! %12.4e %12.4e ", amoebaGpu->psPolarizability->_pSysData[offset+1], amoebaGpu->psPolarizability->_pSysData[offset+2] );
}
(void) fprintf( amoebaGpu->log," E[%14.6e %14.6e %14.6e] Mi[%14.6e %14.6e %14.6e] ",
amoebaGpu->psE_Field->_pSysData[offset], amoebaGpu->psE_Field->_pSysData[offset+1], amoebaGpu->psE_Field->_pSysData[offset+2],
amoebaGpu->psInducedDipole->_pSysData[offset], amoebaGpu->psInducedDipole->_pSysData[offset+1], amoebaGpu->psInducedDipole->_pSysData[offset+2] );
(void) fprintf( amoebaGpu->log,"Ep[%14.6e %14.6e %14.6e] Mip[%14.6e %14.6e %14.6e]\n",
amoebaGpu->psE_FieldPolar->_pSysData[offset], amoebaGpu->psE_FieldPolar->_pSysData[offset+1], amoebaGpu->psE_FieldPolar->_pSysData[offset+2],
amoebaGpu->psInducedDipolePolar->_pSysData[offset], amoebaGpu->psInducedDipolePolar->_pSysData[offset+1], amoebaGpu->psInducedDipolePolar->_pSysData[offset+2] );
offset += 3;
if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) )ii = (gpu->natoms - maxPrint);
}
}
#endif
void) fflush( amoebaGpu->log );
// if polarization type is direct, set flags signalling done and return
*/
if
(
amoebaGpu
->
amoebaSim
.
polarizationType
)
{
amoebaGpu
->
mutualInducedDone
=
1
;
amoebaGpu
->
mutualInducedConverged
=
1
;
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
return
;
}
}
#endif
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
...
@@ -624,35 +605,6 @@ void) fflush( amoebaGpu->log );
...
@@ -624,35 +605,6 @@ void) fflush( amoebaGpu->log );
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
#ifdef GET_EFIELD_FROM_FILE
{
std
::
string
fileName
=
"waterInduceRecip.txt"
;
StringVectorVector
fileContents
;
readFile
(
fileName
,
fileContents
);
unsigned
int
offset
=
0
;
amoebaGpu
->
psWorkVector
[
0
]
->
Download
();
amoebaGpu
->
psWorkVector
[
1
]
->
Download
();
if
(
amoebaGpu
->
log
)(
void
)
fprintf
(
amoebaGpu
->
log
,
"Read file: %s %u
\n
"
,
fileName
.
c_str
(),
fileContents
.
size
()
);
fflush
(
amoebaGpu
->
log
);
float
conversion
=
100.0
f
;
for
(
unsigned
int
ii
=
1
;
ii
<
fileContents
.
size
()
-
1
;
ii
++
){
StringVector
lineTokens
=
fileContents
[
ii
];
unsigned
int
lineTokenIndex
=
1
;
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu
->
psWorkVector
[
0
]
->
_pSysData
[
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
0
]
->
_pSysData
[
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
0
]
->
_pSysData
[
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
offset
-=
3
;
amoebaGpu
->
psWorkVector
[
1
]
->
_pSysData
[
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
1
]
->
_pSysData
[
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psWorkVector
[
1
]
->
_pSysData
[
offset
++
]
+=
conversion
*
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
}
amoebaGpu
->
psWorkVector
[
0
]
->
Upload
();
amoebaGpu
->
psWorkVector
[
1
]
->
Upload
();
}
#endif
// post matrix multiply
// post matrix multiply
kSorUpdateMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
kSorUpdateMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
b1dc92cc
...
@@ -353,28 +353,6 @@ __device__ void calculateWcaDispersionPairIxn_kernel( float4 atomCoordinatesI, f
...
@@ -353,28 +353,6 @@ __device__ void calculateWcaDispersionPairIxn_kernel( float4 atomCoordinatesI, f
#define METHOD_NAME(a, b) a##N2ByWarp##b
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaWcaDispersion.h"
#include "kCalculateAmoebaCudaWcaDispersion.h"
// reduce psWorkArray_3_1 -> outputArray
static
void
kReduceWcaDispersion
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
outputArray
)
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceWcaDispersion"
);
}
// reduce psWorkArray_3_1 -> outputArray
static
void
kReduceWcaDispersionToFloat4
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float4
>*
outputArray
)
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFieldsToFloat4_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
);
LAUNCHERROR
(
"kReduceWcaDispersion"
);
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Compute WCA dispersion
Compute WCA dispersion
...
@@ -394,15 +372,6 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
...
@@ -394,15 +372,6 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"kCalculateAmoebaWcaDispersionForces"
;
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
int
targetAtom
=
3
;
#endif
// set threads/block first time through
// set threads/block first time through
if
(
threadsPerBlock
==
0
){
if
(
threadsPerBlock
==
0
){
...
@@ -427,222 +396,17 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
...
@@ -427,222 +396,17 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
#endif
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaWcaDispersionN2ByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
WcaDispersionParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaWcaDispersionN2ByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
WcaDispersionParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevData
,
amoebaGpu
->
psWorkUnit
->
_pDevData
);
gpu
->
psPosq4
->
_pDevData
,
amoebaGpu
->
psWcaDispersionRadiusEpsilon
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
else
{
}
else
{
kCalculateAmoebaWcaDispersionN2_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
WcaDispersionParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaWcaDispersionN2_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
WcaDispersionParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevData
,
amoebaGpu
->
psWorkUnit
->
_pDevData
);
gpu
->
psPosq4
->
_pDevData
,
amoebaGpu
->
psWcaDispersionRadiusEpsilon
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoebaWcaDispersion"
);
LAUNCHERROR
(
"kCalculateAmoebaWcaDispersion"
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psWorkArray_3_1
->
Download
();
debugArray
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
double
sum
=
0.0
;
double
sums
[
8
]
=
{
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
};
std
::
map
<
int
,
double
>
buffers
;
std
::
vector
<
std
::
vector
<
double
>
>
trackD
;
std
::
vector
<
std
::
vector
<
double
>
>
trackT
;
std
::
vector
<
double
>
maxD
;
std
::
vector
<
std
::
vector
<
int
>
>
trackI
;
trackD
.
resize
(
gpu
->
natoms
);
trackT
.
resize
(
gpu
->
natoms
);
maxD
.
resize
(
gpu
->
natoms
);
trackI
.
resize
(
gpu
->
natoms
);
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
unsigned
int
debugIndex
=
jj
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d DebugWca
\n
"
,
targetAtom
,
jj
);
int
block
=
-
1
;
for
(
int
kk
=
0
;
kk
<
-
3
;
kk
++
){
if
(
kk
==
1
){
block
=
static_cast
<
int
>
(
debugArray
->
_pSysData
[
debugIndex
].
w
+
1.0e-04
);
if
(
buffers
.
find
(
block
)
==
buffers
.
end
()
){
buffers
[
block
]
=
0.0
;
}
}
if
(
kk
==
1
&&
jj
!=
targetAtom
){
sums
[
0
]
+=
debugArray
->
_pSysData
[
debugIndex
].
y
;
sums
[
1
]
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
sums
[
2
]
+=
debugArray
->
_pSysData
[
debugIndex
].
w
;
double
x4
=
debugArray
->
_pSysData
[
debugIndex
].
x
-
(
debugArray
->
_pSysData
[
debugIndex
].
y
+
debugArray
->
_pSysData
[
debugIndex
].
z
+
debugArray
->
_pSysData
[
debugIndex
].
w
);
sums
[
3
]
+=
x4
;
//sum += debugArray->_pSysData[debugIndex].x;
sum
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
buffers
[
block
]
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" %16.9e [%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
,
x4
);
}
else
if
(
kk
==
2
&&
jj
!=
targetAtom
){
//sum += debugArray->_pSysData[debugIndex].x;
sum
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
sums
[
4
]
+=
debugArray
->
_pSysData
[
debugIndex
].
y
;
sums
[
5
]
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
sums
[
6
]
+=
debugArray
->
_pSysData
[
debugIndex
].
w
;
double
x4
=
debugArray
->
_pSysData
[
debugIndex
].
x
-
(
debugArray
->
_pSysData
[
debugIndex
].
y
+
debugArray
->
_pSysData
[
debugIndex
].
z
+
debugArray
->
_pSysData
[
debugIndex
].
w
);
sums
[
7
]
+=
x4
;
buffers
[
block
]
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" %16.9e [%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
,
x4
);
}
else
{
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e] %7u
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
,
debugIndex
);
}
if
(
kk
==
5
)(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
debugIndex
+=
paddedNumberOfAtoms
;
}
block
=
static_cast
<
int
>
(
debugArray
->
_pSysData
[
debugIndex
+
paddedNumberOfAtoms
].
w
+
1.0e-04
);
if
(
buffers
.
find
(
block
)
==
buffers
.
end
()
){
buffers
[
block
]
=
0.0
;
maxD
[
block
]
=
0.0
;
}
for
(
int
kk
=
0
;
kk
<
3
;
kk
++
){
if
(
kk
==
0
&&
jj
!=
targetAtom
){
sum
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
buffers
[
block
]
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
trackI
[
block
].
push_back
(
jj
);
trackD
[
block
].
push_back
(
debugArray
->
_pSysData
[
debugIndex
].
z
);
trackT
[
block
].
push_back
(
debugArray
->
_pSysData
[
debugIndex
].
w
);
if
(
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
w
)
>
maxD
[
block
]
){
maxD
[
block
]
=
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
w
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
}
else
if
(
kk
==
2
&&
jj
!=
targetAtom
){
sum
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
buffers
[
block
]
+=
debugArray
->
_pSysData
[
debugIndex
].
z
;
trackI
[
block
].
push_back
(
jj
);
trackD
[
block
].
push_back
(
debugArray
->
_pSysData
[
debugIndex
].
z
);
trackT
[
block
].
push_back
(
debugArray
->
_pSysData
[
debugIndex
].
w
);
if
(
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
w
)
>
maxD
[
block
]
){
maxD
[
block
]
=
fabs
(
debugArray
->
_pSysData
[
debugIndex
].
w
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
}
else
{
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e] %7u
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
,
debugIndex
);
}
if
(
kk
==
5
)(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Total sum=%14.7e
\n
"
,
sum
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DeWW
\n
"
);
sum
=
0.0
;
for
(
int
jj
=
0
;
jj
<
4
;
jj
++
){
sum
+=
sums
[
jj
]
+
sums
[
jj
+
4
];
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%14.7e %14.7e] %14.7e
\n
"
,
sums
[
jj
],
sums
[
jj
+
4
],
sums
[
jj
]
+
sums
[
jj
+
4
]
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Total sum8=%14.7e
\n
"
,
sum
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Buffers
\n
"
,
sum
);
sum
=
0.0
;
for
(
std
::
map
<
int
,
double
>::
const_iterator
jj
=
buffers
.
begin
();
jj
!=
buffers
.
end
();
jj
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %14.7e"
,
jj
->
first
,
jj
->
second
);
sum
+=
jj
->
second
;
int
block
=
jj
->
first
;
double
sumBlock
=
0.0
;
for
(
unsigned
int
kk
=
0
;
kk
<
trackI
[
block
].
size
();
kk
++
){
sumBlock
+=
trackD
[
block
][
kk
];
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
" %5u Total=%14.7e MaxD=%14.7e
\n
"
,
trackI
[
block
].
size
(),
sumBlock
,
maxD
[
block
]);
for
(
unsigned
int
kk
=
0
;
kk
<
trackI
[
block
].
size
();
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%5d %14.7e %14.7e] "
,
trackI
[
block
][
kk
],
trackD
[
block
][
kk
],
trackT
[
block
][
kk
]);
if
(
((
kk
+
1
)
%
3
)
==
0
)(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n\n\n
"
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Total buffer sum=%14.7e
\n
"
,
sum
);
/*
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
int debugIndex4 = debugIndex + 4*paddedNumberOfAtoms;
int debugIndex5 = debugIndex + 5*paddedNumberOfAtoms;
int debugIndex9 = debugIndex + 9*paddedNumberOfAtoms;
int debugIndex10 = debugIndex + 10*paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"%6d %14.7e %14.7e %14.7e %14.7e %14.7e %14.7e %14.7e %14.7e %14.7e \n", jj,
debugArray->_pSysData[debugIndex4].x*debugArray->_pSysData[debugIndex4].x, // r2
debugArray->_pSysData[debugIndex4].y, debugArray->_pSysData[debugIndex9].y, // de
debugArray->_pSysData[debugIndex5].x, debugArray->_pSysData[debugIndex10].x, // dll
debugArray->_pSysData[debugIndex5].y, debugArray->_pSysData[debugIndex10].y, // duu
debugArray->_pSysData[debugIndex4].z, debugArray->_pSysData[debugIndex9].z ); // emxio
}
*/
}
#endif
kReduceWcaDispersionToFloat4
(
amoebaGpu
,
gpu
->
psForce4
);
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
gpu
->
psEnergy
->
Download
();
double
sum
=
0.0
;
for
(
int
i
=
0
;
i
<
gpu
->
sim
.
energyOutputBuffers
;
i
++
){
sum
+=
gpu
->
psEnergy
->
_pSysData
[
i
];
if
(
fabsf
(
(
*
gpu
->
psEnergy
)[
i
])
>
0.0
)
(
void
)
fprintf
(
amoebaGpu
->
log
,
"SumQQ %6d %14.7e QQ SUM
\n
"
,
i
,
(
*
gpu
->
psEnergy
)[
i
]
);
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%14.7e QQ SUM
\n
"
,
sum
);
}
#endif
if
(
0
){
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float
>*
psTempForce
=
new
CUDAStream
<
float
>
(
4
*
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
kClearFloat
(
amoebaGpu
,
4
*
paddedNumberOfAtoms
,
psTempForce
);
kReduceWcaDispersion
(
amoebaGpu
,
psTempForce
);
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
NULL
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
psTempForce
,
outputVector
,
NULL
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaWca"
,
fileId
,
outputVector
);
delete
psTempForce
;
//exit(0);
}
#ifdef AMOEBA_DEBUG
delete
debugArray
;
//exit(0);
#endif
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
View file @
b1dc92cc
...
@@ -32,15 +32,8 @@ __launch_bounds__(192, 1)
...
@@ -32,15 +32,8 @@ __launch_bounds__(192, 1)
#else
#else
__launch_bounds__
(
64
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaWcaDispersion
,
_kernel
)(
unsigned
int
*
workUnit
,
void
METHOD_NAME
(
kCalculateAmoebaWcaDispersion
,
_kernel
)(
unsigned
int
*
workUnit
){
float4
*
atomCoord
,
float2
*
wcaDispersionParameters
,
float
*
outputForce
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
extern
__shared__
WcaDispersionParticle
sA
[];
extern
__shared__
WcaDispersionParticle
sA
[];
...
@@ -56,9 +49,6 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
...
@@ -56,9 +49,6 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
float
jEpsilon
;
float
jEpsilon
;
float
totalEnergy
=
0
.
0
f
;
float
totalEnergy
=
0
.
0
f
;
#ifdef AMOEBA_DEBUG
float4
pullDebug
[
3
];
#endif
while
(
pos
<
end
)
while
(
pos
<
end
)
{
{
...
@@ -76,9 +66,9 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
...
@@ -76,9 +66,9 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
WcaDispersionParticle
*
psA
=
&
sA
[
tbx
];
WcaDispersionParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
atomCoord
[
atomI
];
float4
iCoord
=
cSim
.
pPosq
[
atomI
];
float
iRadius
=
wcaDispersionParameters
[
atomI
].
x
;
float
iRadius
=
cAmoebaSim
.
pWcaDispersionRadiusEpsilon
[
atomI
].
x
;
float
iEpsilon
=
wcaDispersionParameters
[
atomI
].
y
;
float
iEpsilon
=
cAmoebaSim
.
pWcaDispersionRadiusEpsilon
[
atomI
].
y
;
float
forceSum
[
3
];
float
forceSum
[
3
];
...
@@ -100,7 +90,7 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
...
@@ -100,7 +90,7 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
if
(
lasty
!=
y
)
if
(
lasty
!=
y
)
{
{
loadWcaDispersionShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
),
atomCoord
,
wcaDispersionParameters
);
loadWcaDispersionShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
),
cSim
.
pPosq
,
cAmoebaSim
.
pWcaDispersionRadiusEpsilon
);
}
}
...
@@ -124,66 +114,8 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
...
@@ -124,66 +114,8 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
iRadius
,
jRadius
,
iRadius
,
jRadius
,
rmixo
,
rmixh
,
rmixo
,
rmixh
,
emixo
,
emixh
,
emixo
,
emixh
,
ijForce
,
&
energy
ijForce
,
&
energy
);
#ifdef AMOEBA_DEBUG
,
pullDebug
#endif
);
#ifdef AMOEBA_DEBUG
if
(
(
atomI
==
targetAtom
)
||
(
(
y
+
tj
)
==
targetAtom
)
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
tj
)
:
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
//debugArray[index].z = (float) cSim.atoms;
debugArray
[
index
].
z
=
atomI
==
(
y
+
tj
)
?
0
.
0
f
:
energy
;
energy
=
(
(
atomI
!=
(
y
+
tj
))
&&
(
atomI
<
cSim
.
atoms
)
&&
((
y
+
tj
)
<
cSim
.
atoms
)
)
?
(
energy
)
:
0
.
0
f
;
debugArray
[
index
].
w
=
energy
+
totalEnergy
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
iCoord
.
x
;
debugArray
[
index
].
y
=
iCoord
.
y
;
debugArray
[
index
].
z
=
iCoord
.
z
;
debugArray
[
index
].
w
=
(
float
)
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
jCoord
.
x
;
debugArray
[
index
].
y
=
jCoord
.
y
;
debugArray
[
index
].
z
=
jCoord
.
z
;
debugArray
[
index
].
w
=
-
4
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
emixo
;
debugArray
[
index
].
y
=
emixh
;
debugArray
[
index
].
z
=
rmixo
;
debugArray
[
index
].
w
=
rmixh
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
0
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
0
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
0
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
0
].
w
;
#if 0
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[1].x;
debugArray[index].y = pullDebug[1].y;
debugArray[index].z = pullDebug[1].z;
debugArray[index].w = pullDebug[1].w;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[2].x;
debugArray[index].y = pullDebug[2].y;
debugArray[index].z = pullDebug[2].z;
debugArray[index].w = pullDebug[2].w;
#endif
}
else
{
// energy = 0.0f;
}
#endif
if
(
(
atomI
!=
(
y
+
tj
))
&&
(
atomI
<
cSim
.
atoms
)
&&
((
y
+
tj
)
<
cSim
.
atoms
)
){
if
(
(
atomI
!=
(
y
+
tj
))
&&
(
atomI
<
cSim
.
atoms
)
&&
((
y
+
tj
)
<
cSim
.
atoms
)
){
// add to field at atomI the field due atomJ's dipole
// add to field at atomI the field due atomJ's dipole
...
@@ -209,54 +141,8 @@ debugArray[index].w = pullDebug[2].w;
...
@@ -209,54 +141,8 @@ debugArray[index].w = pullDebug[2].w;
jRadius
,
iRadius
,
jRadius
,
iRadius
,
rmjxo
,
rmjxh
,
rmjxo
,
rmjxh
,
emjxo
,
emjxh
,
emjxo
,
emjxh
,
ijForce
,
&
energy
ijForce
,
&
energy
);
#ifdef AMOEBA_DEBUG
,
pullDebug
#endif
);
#ifdef AMOEBA_DEBUG
if
(
(
atomI
==
targetAtom
)
||
(
(
y
+
tj
)
==
targetAtom
)
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
tj
)
:
atomI
;
index
+=
2
*
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
debugArray
[
index
].
z
=
atomI
==
(
y
+
tj
)
?
0
.
0
f
:
energy
;
energy
=
(
(
atomI
!=
(
y
+
tj
))
&&
(
atomI
<
cSim
.
atoms
)
&&
((
y
+
tj
)
<
cSim
.
atoms
)
)
?
(
energy
)
:
0
.
0
f
;
debugArray
[
index
].
w
=
energy
+
totalEnergy
;
//debugArray[index].w = -2.0f;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
emjxo
;
debugArray
[
index
].
y
=
emjxh
;
debugArray
[
index
].
z
=
rmjxo
;
debugArray
[
index
].
w
=
rmjxh
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullDebug
[
0
].
x
;
debugArray
[
index
].
y
=
pullDebug
[
0
].
y
;
debugArray
[
index
].
z
=
pullDebug
[
0
].
z
;
debugArray
[
index
].
w
=
pullDebug
[
0
].
w
;
#if 0
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[1].x;
debugArray[index].y = pullDebug[1].y;
debugArray[index].z = pullDebug[1].z;
debugArray[index].w = pullDebug[1].w;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[2].x;
debugArray[index].y = pullDebug[2].y;
debugArray[index].z = pullDebug[2].z;
debugArray[index].w = pullDebug[2].w;
#endif
}
else
{
//energy = 0.0f;
}
#endif
if
(
(
atomI
!=
(
y
+
tj
))
&&
(
atomI
<
cSim
.
atoms
)
&&
((
y
+
tj
)
<
cSim
.
atoms
)
){
if
(
(
atomI
!=
(
y
+
tj
))
&&
(
atomI
<
cSim
.
atoms
)
&&
((
y
+
tj
)
<
cSim
.
atoms
)
){
// add to field at atomI the field due atomJ's dipole
// add to field at atomI the field due atomJ's dipole
...
@@ -281,24 +167,32 @@ debugArray[index].w = pullDebug[2].w;
...
@@ -281,24 +167,32 @@ debugArray[index].w = pullDebug[2].w;
// Write results
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
unsigned
int
offset
=
(
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
lo
ad3dArray
BufferPerWarp
(
offset
,
forceSum
,
output
Force
);
a
d
d3dArray
ToFloat4
(
offset
,
forceSum
,
cSim
.
p
Force
4
);
offset
=
3
*
(
y
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
// include diagonal only once
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
force
,
outputForce
);
if
(
x
!=
y
){
offset
=
(
y
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
}
#else
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
unsigned
int
offset
=
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
lo
ad3dArray
(
offset
,
forceSum
,
output
Force
);
a
d
d3dArray
ToFloat4
(
offset
,
forceSum
,
cSim
.
p
Force
4
);
offset
=
3
*
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
// include diagonal only once
load3dArray
(
offset
,
sA
[
threadIdx
.
x
].
force
,
outputForce
);
if
(
x
!=
y
){
offset
=
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
}
#endif
#endif
lasty
=
y
;
lasty
=
y
;
pos
++
;
pos
++
;
}
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
-=
cAmoebaSim
.
awater
*
totalEnergy
;
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
-=
cAmoebaSim
.
awater
*
totalEnergy
;
if
(
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
==
0
){
if
(
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
==
0
){
cSim
.
pEnergy
[
0
]
+=
cAmoebaSim
.
totalMaxWcaDispersionEnergy
;
cSim
.
pEnergy
[
0
]
+=
cAmoebaSim
.
totalMaxWcaDispersionEnergy
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment