Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2ef05285
Commit
2ef05285
authored
Sep 13, 2010
by
Mark Friedrichs
Browse files
Direct space PME
parent
c22f00cf
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
546 additions
and
452 deletions
+546
-452
plugins/amoeba/CMakeLists.txt
plugins/amoeba/CMakeLists.txt
+6
-5
plugins/amoeba/openmmapi/include/AmoebaMultipoleForce.h
plugins/amoeba/openmmapi/include/AmoebaMultipoleForce.h
+15
-0
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
+8
-0
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+1
-0
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+4
-4
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+354
-235
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+140
-199
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+7
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+2
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
...platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+1
-1
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
...amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
+4
-2
No files found.
plugins/amoeba/CMakeLists.txt
View file @
2ef05285
...
...
@@ -38,8 +38,7 @@ ENDIF(LOG)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
#SET(OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS . openmmapi platforms/reference)
SET
(
OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS . openmmapi
)
SET
(
OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS . openmmapi platforms/reference
)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
...
...
@@ -103,8 +102,10 @@ FOREACH(subdir ${OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS})
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
)
ENDFOREACH
(
subdir
)
#INCLUDE_DIRECTORIES(BEFORE ${OPENMM_DIR}/platforms/reference/src)
#INCLUDE_DIRECTORIES(BEFORE ${OPENMM_DIR}/platforms/reference/src/SimTKReference)
INCLUDE_DIRECTORIES
(
BEFORE
${
OPENMM_DIR
}
/platforms/reference/src
)
INCLUDE_DIRECTORIES
(
BEFORE
${
OPENMM_DIR
}
/platforms/reference/src/SimTKReference
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/platforms/reference/src
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/platforms/reference/src/SimTKReference
)
# ----------------------------------------------------------------------------
IF
(
LOG
)
...
...
@@ -158,7 +159,7 @@ IF(OPENMM_BUILD_STATIC_LIB)
TARGET_LINK_LIBRARIES
(
${
STATIC_AMOEBA_TARGET
}
${
STATIC_TARGET
}
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
#
ADD_SUBDIRECTORY(platforms/reference/tests)
ADD_SUBDIRECTORY
(
platforms/reference/tests
)
# Which hardware platforms to build
IF
(
CUDA_FOUND
)
...
...
plugins/amoeba/openmmapi/include/AmoebaMultipoleForce.h
View file @
2ef05285
...
...
@@ -113,6 +113,20 @@ public:
*/
void
setCutoffDistance
(
double
distance
);
/**
* Get the aEwald parameter
*
* @return the Ewald parameter
*/
double
getAEwald
()
const
;
/**
* Set the aEwald parameter
*
* @param Ewald parameter
*/
void
setAEwald
(
double
aewald
);
/**
* Add multipole-related info for a particle
*
...
...
@@ -279,6 +293,7 @@ private:
AmoebaNonbondedMethod
nonbondedMethod
;
double
cutoffDistance
;
double
aewald
;
MutualInducedIterationMethod
mutualInducedIterationMethod
;
int
mutualInducedMaxIterations
;
double
mutualInducedTargetEpsilon
;
...
...
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
View file @
2ef05285
...
...
@@ -56,6 +56,14 @@ void AmoebaMultipoleForce::setCutoffDistance(double distance) {
cutoffDistance
=
distance
;
}
double
AmoebaMultipoleForce
::
getAEwald
()
const
{
return
aewald
;
}
void
AmoebaMultipoleForce
::
setAEwald
(
double
inputAewald
)
{
aewald
=
inputAewald
;
}
AmoebaMultipoleForce
::
MutualInducedIterationMethod
AmoebaMultipoleForce
::
getMutualInducedIterationMethod
(
void
)
const
{
return
mutualInducedIterationMethod
;
}
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
2ef05285
...
...
@@ -589,6 +589,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
static_cast
<
float
>
(
force
.
getMutualInducedTargetEpsilon
()),
nonbondedMethod
,
static_cast
<
float
>
(
force
.
getCutoffDistance
()),
static_cast
<
float
>
(
force
.
getAEwald
()),
static_cast
<
float
>
(
force
.
getElectricConstant
())
);
if
(
nonbondedMethod
==
AmoebaMultipoleForce
::
PME
)
{
double
alpha
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
View file @
2ef05285
...
...
@@ -350,6 +350,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pP_ScaleIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pP_ScaleIndices
);
(
void
)
fprintf
(
log
,
" pM_ScaleIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pM_ScaleIndices
);
(
void
)
fprintf
(
log
,
" sqrtPi %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
sqrtPi
);
(
void
)
fprintf
(
log
,
" alpha Ewald %15.7e
\n
"
,
gpu
->
sim
.
alphaEwald
);
(
void
)
fprintf
(
log
,
" cutoffDistance2 %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
cutoffDistance2
);
(
void
)
fprintf
(
log
,
" electric %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
electric
);
(
void
)
fprintf
(
log
,
" box %15.7e %15.7e %15.7e
\n
"
,
gpu
->
sim
.
periodicBoxSizeX
,
gpu
->
sim
.
periodicBoxSizeY
,
gpu
->
sim
.
periodicBoxSizeZ
);
...
...
@@ -1463,7 +1464,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleParticleCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
int
mutualInducedIterativeMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
nonbondedMethod
,
float
cutoffDistance
,
float
electricConstant
){
int
nonbondedMethod
,
float
cutoffDistance
,
float
alphaEwald
,
float
electricConstant
){
// ---------------------------------------------------------------------------------------
...
...
@@ -1565,9 +1566,8 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu
->
amoebaSim
.
cutoffDistance2
=
cutoffDistance
*
cutoffDistance
;
amoebaGpu
->
amoebaSim
.
sqrtPi
=
sqrt
(
3.1415926535897932384626433832795
);
amoebaGpu
->
amoebaSim
.
electric
=
electricConstant
;
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
=
alphaEwald
;
amoebaGpu
->
gpuContext
->
sim
.
nonbondedCutoff
=
cutoffDistance
;
tabulateErfc
(
amoebaGpu
->
gpuContext
);
if
(
amoebaGpu
->
amoebaSim
.
dielec
<
1.0e-05
){
amoebaGpu
->
amoebaSim
.
dielec
=
1.0
f
;
...
...
@@ -2766,7 +2766,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpu
);
SetCalculateAmoebaElectrostaticSim
(
amoebaGpu
);
SetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpu
);
SetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpu
);
SetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpu
);
SetCalculateAmoebaKirkwoodSim
(
amoebaGpu
);
SetCalculateAmoebaKirkwoodEDiffSim
(
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
2ef05285
...
...
@@ -106,9 +106,9 @@ extern void SetCalculateAmoebaElectrostaticSim( amoebaGpuContext amoebaGpu );
extern
void
GetCalculateAmoebaElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
cudaComputeAmoebaElectrostatic
(
amoebaGpuContext
amoebaGpu
);
extern
void
SetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpuContext
amoebaGpu
);
extern
void
GetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpuContext
amoebaGpu
);
extern
void
cudaComputeAmoeba
RealSpaceEwald
(
amoebaGpuContext
amoebaGpu
);
extern
void
SetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpuContext
amoebaGpu
);
extern
void
GetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpuContext
amoebaGpu
);
extern
void
cudaComputeAmoeba
PmeElectrostatic
(
amoebaGpuContext
amoebaGpu
);
extern
void
SetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpuContext
gpu
);
extern
void
GetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpuContext
gpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
View file @
2ef05285
...
...
@@ -307,7 +307,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleAtomCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
int
mutualInducedIterationMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
nonbondedMethod
,
float
cutoffDistance
,
float
electricConstant
);
int
nonbondedMethod
,
float
cutoffDistance
,
float
alphaEwald
,
float
electricConstant
);
extern
"C"
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCuda
RealSpaceEwald
.cu
→
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCuda
PmeDirectElectrostatic
.cu
View file @
2ef05285
...
...
@@ -11,34 +11,24 @@
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
void
SetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpuContext
amoebaGpu
)
void
SetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoeba
RealSpaceEwald
Sim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
RTERROR
(
status
,
"SetCalculateAmoeba
PmeDirectElectrostatic
Sim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoeba
RealSpaceEwald
Sim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
RTERROR
(
status
,
"SetCalculateAmoeba
PmeDirectElectrostatic
Sim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
}
void
GetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpuContext
amoebaGpu
)
void
GetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoeba
RealSpaceEwald
Sim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
RTERROR
(
status
,
"GetCalculateAmoeba
PmeDirectElectrostatic
Sim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaRealSpaceEwaldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
texture
<
float
,
1
,
cudaReadModeElementType
>
tabulatedErfcRef
;
static
__device__
float
fastErfc
(
float
r
)
{
float
normalized
=
cSim
.
tabulatedErfcScale
*
r
;
int
index
=
(
int
)
normalized
;
float
fract2
=
normalized
-
index
;
float
fract1
=
1.0
f
-
fract2
;
return
fract1
*
tex1Dfetch
(
tabulatedErfcRef
,
index
)
+
fract2
*
tex1Dfetch
(
tabulatedErfcRef
,
index
+
1
);
RTERROR
(
status
,
"GetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
static
int
const
PScaleIndex
=
0
;
...
...
@@ -60,7 +50,7 @@ static int const MScaleIndex = 3;
//static int const Ddsc72Index = 16;
static
int
const
LastScalingIndex
=
4
;
struct
RealSpaceEwald
Particle
{
struct
PmeDirectElectrostatic
Particle
{
// coordinates charge
...
...
@@ -97,9 +87,50 @@ struct RealSpaceEwaldParticle {
};
__device__
void
calculateRealSpaceEwaldPairIxn_kernel
(
RealSpaceEwaldParticle
&
atomI
,
RealSpaceEwaldParticle
&
atomJ
,
float
scalingDistanceCutoff
,
float
*
scalingFactors
,
float
*
outputForce
,
float
outputTorque
[
2
][
3
],
// self-energy for PME
__device__
static
void
calculatePmeSelfEnergyElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
float
*
energy
)
{
float
term
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
fterm
=
-
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
)
*
cSim
.
alphaEwald
/
cAmoebaSim
.
sqrtPi
;
float
cii
=
atomI
.
q
*
atomI
.
q
;
float
dii
=
atomI
.
labFrameDipole
[
0
]
*
atomI
.
labFrameDipole
[
0
]
+
atomI
.
labFrameDipole
[
1
]
*
atomI
.
labFrameDipole
[
1
]
+
atomI
.
labFrameDipole
[
2
]
*
atomI
.
labFrameDipole
[
2
];
float
qii
=
atomI
.
labFrameQuadrupole
[
0
]
*
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
4
]
*
atomI
.
labFrameQuadrupole
[
4
]
+
atomI
.
labFrameQuadrupole
[
8
]
*
atomI
.
labFrameQuadrupole
[
8
]
+
2.0
f
*
(
atomI
.
labFrameQuadrupole
[
1
]
*
atomI
.
labFrameQuadrupole
[
1
]
+
atomI
.
labFrameQuadrupole
[
2
]
*
atomI
.
labFrameQuadrupole
[
2
]
+
atomI
.
labFrameQuadrupole
[
5
]
*
atomI
.
labFrameQuadrupole
[
5
]);
float
uii
=
atomI
.
labFrameDipole
[
0
]
*
atomI
.
inducedDipole
[
0
]
+
atomI
.
labFrameDipole
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
atomI
.
labFrameDipole
[
2
]
*
atomI
.
inducedDipole
[
2
];
*
energy
=
(
cii
+
term
*
(
dii
/
3.0
f
+
2.0
f
*
term
*
qii
/
5.0
f
));
*
energy
+=
term
*
uii
/
3.0
f
;
*
energy
*=
fterm
;
}
// self-torque for PME
__device__
static
void
calculatePmeSelfTorqueElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
)
{
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
float
uix
=
0.5
f
*
(
atomI
.
inducedDipole
[
0
]
+
atomI
.
inducedDipoleP
[
0
]);
float
uiy
=
0.5
f
*
(
atomI
.
inducedDipole
[
1
]
+
atomI
.
inducedDipoleP
[
1
]);
float
uiz
=
0.5
f
*
(
atomI
.
inducedDipole
[
2
]
+
atomI
.
inducedDipoleP
[
2
]);
atomI
.
torque
[
0
]
+=
term
*
(
atomI
.
labFrameDipole
[
1
]
*
uiz
-
atomI
.
labFrameDipole
[
2
]
*
uiy
);
atomI
.
torque
[
1
]
+=
term
*
(
atomI
.
labFrameDipole
[
2
]
*
uix
-
atomI
.
labFrameDipole
[
0
]
*
uiz
);
atomI
.
torque
[
2
]
+=
term
*
(
atomI
.
labFrameDipole
[
0
]
*
uiy
-
atomI
.
labFrameDipole
[
1
]
*
uix
);
}
__device__
void
calculatePmeDirectElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
float
*
scalingFactors
,
float
*
outputForce
,
float
outputTorque
[
2
][
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
...
...
@@ -107,20 +138,8 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
){
float
e
,
ei
,
f
;
//float gfd,gfdr;
//float xix,yix,zix;
//float xiy,yiy,ziy;
//float xiz,yiz,ziz;
//float xkx,ykx,zkx;
//float xky,yky,zky;
//float xkz,ykz,zkz;
//float rr1,rr3;
//float rr5,rr7,rr9,rr11;
float
e
,
ei
;
float
erl
,
erli
;
//float frcxi[4],frcxk[4];
//float frcyi[4],frcyk[4];
//float frczi[4],frczk[4];
float
di
[
4
],
qi
[
10
];
float
dk
[
4
],
qk
[
10
];
float
fridmp
[
4
],
findmp
[
4
];
...
...
@@ -130,7 +149,6 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float
ttm2i
[
4
],
ttm3i
[
4
];
float
ttm2r
[
4
],
ttm3r
[
4
];
float
ttm2ri
[
4
],
ttm3ri
[
4
];
//float fdir[4]
float
dixdk
[
4
];
float
dkxui
[
4
],
dixuk
[
4
];
float
dixukp
[
4
],
dkxuip
[
4
];
...
...
@@ -159,7 +177,7 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float
gfr
[
8
],
gfri
[
7
];
float
gti
[
7
],
gtri
[
7
];
f
=
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
f
loat
conversionFactor
=
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
// set the permanent multipole and induced dipole values;
...
...
@@ -193,6 +211,7 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
if
(
r2
<=
cAmoebaSim
.
cutoffDistance2
){
float
r
=
sqrt
(
r2
);
float
ck
=
atomJ
.
q
;
...
...
@@ -214,7 +233,7 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float
ralpha
=
cSim
.
alphaEwald
*
r
;
bn
[
0
]
=
fastE
rfc
(
ralpha
)
/
r
;
bn
[
0
]
=
e
rfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
0.0
f
;
...
...
@@ -250,17 +269,17 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float
scale5
=
1.0
f
;
float
scale7
=
1.0
f
;
ddsc3
[
0
]
=
0.0
f
;
ddsc3
[
1
]
=
0.0
f
;
ddsc3
[
2
]
=
0.0
f
;
ddsc3
[
3
]
=
0.0
f
;
ddsc5
[
0
]
=
0.0
f
;
ddsc5
[
1
]
=
0.0
f
;
ddsc5
[
2
]
=
0.0
f
;
ddsc5
[
3
]
=
0.0
f
;
ddsc7
[
0
]
=
0.0
f
;
ddsc7
[
1
]
=
0.0
f
;
ddsc7
[
2
]
=
0.0
f
;
ddsc7
[
3
]
=
0.0
f
;
float
pdk
=
atomJ
.
damp
;
float
ptk
=
atomJ
.
thole
;
...
...
@@ -280,9 +299,11 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
ddsc3
[
1
]
=
temp3
*
xr
;
ddsc3
[
2
]
=
temp3
*
yr
;
ddsc3
[
3
]
=
temp3
*
zr
;
ddsc5
[
1
]
=
temp5
*
ddsc3
[
1
];
ddsc5
[
2
]
=
temp5
*
ddsc3
[
2
];
ddsc5
[
3
]
=
temp5
*
ddsc3
[
3
];
ddsc7
[
1
]
=
temp7
*
ddsc5
[
1
];
ddsc7
[
2
]
=
temp7
*
ddsc5
[
2
];
ddsc7
[
3
]
=
temp7
*
ddsc5
[
3
];
...
...
@@ -508,8 +529,8 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
+
rr7
*
gli
[
3
]
*
psc7
);
e
=
e
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
erl
;
ei
=
ei
-
erli
;
e
=
f
*
e
;
ei
=
f
*
ei
;
e
=
conversionFactor
*
e
;
ei
=
conversionFactor
*
ei
;
// em = em + e;
// ep = ep + ei;
...
...
@@ -679,12 +700,9 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
+
(
glip
[
2
]
+
glip
[
7
])
*
scalingFactors
[
DScaleIndex
]);
float
temp7
=
0.5
f
*
rr7
*
(
gli
[
3
]
*
scalingFactors
[
PScaleIndex
]
+
glip
[
3
]
*
scalingFactors
[
DScaleIndex
]);
fridmp
[
1
]
=
temp3
*
ddsc3
[
1
]
+
temp5
*
ddsc5
[
1
]
+
temp7
*
ddsc7
[
1
];
fridmp
[
2
]
=
temp3
*
ddsc3
[
2
]
+
temp5
*
ddsc5
[
2
]
+
temp7
*
ddsc7
[
2
];
fridmp
[
3
]
=
temp3
*
ddsc3
[
3
]
+
temp5
*
ddsc5
[
3
]
+
temp7
*
ddsc7
[
3
];
fridmp
[
1
]
=
temp3
*
ddsc3
[
1
]
+
temp5
*
ddsc5
[
1
]
+
temp7
*
ddsc7
[
1
];
fridmp
[
2
]
=
temp3
*
ddsc3
[
2
]
+
temp5
*
ddsc5
[
2
]
+
temp7
*
ddsc7
[
2
];
fridmp
[
3
]
=
temp3
*
ddsc3
[
3
]
+
temp5
*
ddsc5
[
3
]
+
temp7
*
ddsc7
[
3
];
// find some scaling terms for induced-induced force
...
...
@@ -833,34 +851,138 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
// handle the case where scaling is used
for
(
int
j
=
1
;
j
<=
3
;
j
++
){
ftm2
[
j
]
=
f
*
(
ftm2
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ftm2r
[
j
]);
ftm2i
[
j
]
=
f
*
(
ftm2i
[
j
]
-
ftm2ri
[
j
]);
ttm2
[
j
]
=
f
*
(
ttm2
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ttm2r
[
j
]);
ttm2i
[
j
]
=
f
*
(
ttm2i
[
j
]
-
ttm2ri
[
j
]);
ttm3
[
j
]
=
f
*
(
ttm3
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ttm3r
[
j
]);
ttm3i
[
j
]
=
f
*
(
ttm3i
[
j
]
-
ttm3ri
[
j
]);
ftm2
[
j
]
=
(
ftm2
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ftm2r
[
j
]);
ftm2i
[
j
]
=
(
ftm2i
[
j
]
-
ftm2ri
[
j
]);
ttm2
[
j
]
=
(
ttm2
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ttm2r
[
j
]);
ttm2i
[
j
]
=
(
ttm2i
[
j
]
-
ttm2ri
[
j
]);
ttm3
[
j
]
=
(
ttm3
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ttm3r
[
j
]);
ttm3i
[
j
]
=
(
ttm3i
[
j
]
-
ttm3ri
[
j
]);
}
// increment gradient due to force and torque on first site;
outputForce
[
0
]
=
-
conversionFactor
*
(
ftm2
[
1
]
+
ftm2i
[
1
]);
outputForce
[
1
]
=
-
conversionFactor
*
(
ftm2
[
2
]
+
ftm2i
[
2
]);
outputForce
[
2
]
=
-
conversionFactor
*
(
ftm2
[
3
]
+
ftm2i
[
3
]);
outputTorque
[
0
][
0
]
=
conversionFactor
*
(
ttm2
[
1
]
+
ttm2i
[
1
]);
outputTorque
[
0
][
1
]
=
conversionFactor
*
(
ttm2
[
2
]
+
ttm2i
[
2
]);
outputTorque
[
0
][
2
]
=
conversionFactor
*
(
ttm2
[
3
]
+
ttm2i
[
3
]);
outputTorque
[
1
][
0
]
=
conversionFactor
*
(
ttm3
[
1
]
+
ttm3i
[
1
]);
outputTorque
[
1
][
1
]
=
conversionFactor
*
(
ttm3
[
2
]
+
ttm3i
[
2
]);
outputTorque
[
1
][
2
]
=
conversionFactor
*
(
ttm3
[
3
]
+
ttm3i
[
3
]);
//outputTorque[1][2] = conversionFactor*(ttm3_2 + ttm3i_2);
#ifdef AMOEBA_DEBUG
int
debugIndex
=
0
;
float
idTracker
=
1.0
f
;
/*
dem(1,ii) = dem(1,ii) + ftm2[1];
dem(2,ii) = dem(2,ii) + ftm2[2];
dem(3,ii) = dem(3,ii) + ftm2[3];
dep(1,ii) = dep(1,ii) + ftm2i[1];
dep(2,ii) = dep(2,ii) + ftm2i[2];
dep(3,ii) = dep(3,ii) + ftm2i[3];
call torque (i,ttm2,ttm2i,frcxi,frcyi,frczi);
// increment gradient due to force and torque on second site;
dem(1,kk) = dem(1,kk) - ftm2[1];
dem(2,kk) = dem(2,kk) - ftm2[2];
dem(3,kk) = dem(3,kk) - ftm2[3];
dep(1,kk) = dep(1,kk) - ftm2i[1];
dep(2,kk) = dep(2,kk) - ftm2i[2];
dep(3,kk) = dep(3,kk) - ftm2i[3];
call torque (k,ttm3,ttm3i,frcxk,frcyk,frczk);
debugArray[debugIndex].x = atomI.labFrameDipole[0];
debugArray[debugIndex].y = atomI.labFrameDipole[1];
debugArray[debugIndex].z = atomI.labFrameDipole[2];
debugArray[debugIndex].w = r2;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.labFrameDipole[0];
debugArray[debugIndex].y = atomJ.labFrameDipole[1];
debugArray[debugIndex].z = atomJ.labFrameDipole[2];
debugArray[debugIndex].w = cSim.alphaEwald;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipole[0];
debugArray[debugIndex].y = atomI.inducedDipole[1];
debugArray[debugIndex].z = atomI.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipole[0];
debugArray[debugIndex].y = atomJ.inducedDipole[1];
debugArray[debugIndex].z = atomJ.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipoleP[0];
debugArray[debugIndex].y = atomI.inducedDipoleP[1];
debugArray[debugIndex].z = atomI.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipoleP[0];
debugArray[debugIndex].y = atomJ.inducedDipoleP[1];
debugArray[debugIndex].z = atomJ.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ftm2[1];
debugArray[debugIndex].y = conversionFactor*ftm2[2];
debugArray[debugIndex].z = conversionFactor*ftm2[3];
debugArray[debugIndex].w = idTracker;
debugIndex++;
*/
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm2i
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm2i
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm2i
[
3
];
debugArray
[
debugIndex
].
w
=
r2
;
debugIndex
++
;
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
fridmp
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
fridmp
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
fridmp
[
3
];
debugArray
[
debugIndex
].
w
=
cSim
.
alphaEwald
;
debugIndex
++
;
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
findmp
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
findmp
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
findmp
[
3
];
debugArray
[
debugIndex
].
w
=
cSim
.
alphaEwald
+
1.0
f
;
debugIndex
++
;
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm2
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm2
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm2
[
3
];
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm2i
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm2i
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm2i
[
3
];
debugArray
[
debugIndex
].
w
=
idTracker
;
#endif
}
else
{
outputForce
[
0
]
=
0.0
f
;
outputForce
[
1
]
=
0.0
f
;
outputForce
[
2
]
=
0.0
f
;
outputTorque
[
0
][
0
]
=
0.0
f
;
outputTorque
[
0
][
1
]
=
0.0
f
;
outputTorque
[
0
][
2
]
=
0.0
f
;
outputTorque
[
1
][
0
]
=
0.0
f
;
outputTorque
[
1
][
1
]
=
0.0
f
;
outputTorque
[
1
][
2
]
=
0.0
f
;
#ifdef AMOEBA_DEBUG
for
(
int
ii
=
0
;
ii
<
20
;
ii
++
){
debugArray
[
ii
].
x
=
0.0
f
;
debugArray
[
ii
].
y
=
0.0
f
;
debugArray
[
ii
].
z
=
0.0
f
;
debugArray
[
ii
].
w
=
(
float
)
ii
;
}
#endif
}
...
...
@@ -868,49 +990,47 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
}
__device__
void
loadRealSpaceEwaldShared
(
struct
RealSpaceEwaldParticle
*
sA
,
unsigned
int
atomI
,
float4
*
atomCoord
,
float
*
labFrameDipoleJ
,
float
*
labQuadrupole
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float2
*
dampingFactorAndThole
)
__device__
void
loadPmeDirectElectrostaticShared
(
struct
PmeDirectElectrostaticParticle
*
sA
,
unsigned
int
atomI
)
{
// coordinates & charge
sA
->
x
=
atomCoord
[
atomI
].
x
;
sA
->
y
=
atomCoord
[
atomI
].
y
;
sA
->
z
=
atomCoord
[
atomI
].
z
;
sA
->
q
=
atomCoord
[
atomI
].
w
;
sA
->
x
=
cSim
.
pPosq
[
atomI
].
x
;
sA
->
y
=
cSim
.
pPosq
[
atomI
].
y
;
sA
->
z
=
cSim
.
pPosq
[
atomI
].
z
;
sA
->
q
=
cSim
.
pPosq
[
atomI
].
w
;
// lab dipole
sA
->
labFrameDipole
[
0
]
=
labFrameDipoleJ
[
atomI
*
3
];
sA
->
labFrameDipole
[
1
]
=
labFrameDipoleJ
[
atomI
*
3
+
1
];
sA
->
labFrameDipole
[
2
]
=
labFrameDipoleJ
[
atomI
*
3
+
2
];
sA
->
labFrameDipole
[
0
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
];
sA
->
labFrameDipole
[
1
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
+
1
];
sA
->
labFrameDipole
[
2
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
+
2
];
// lab quadrupole
sA
->
labFrameQuadrupole
[
0
]
=
lab
Quadrupole
[
atomI
*
9
];
sA
->
labFrameQuadrupole
[
1
]
=
lab
Quadrupole
[
atomI
*
9
+
1
];
sA
->
labFrameQuadrupole
[
2
]
=
lab
Quadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole
[
3
]
=
lab
Quadrupole
[
atomI
*
9
+
3
];
sA
->
labFrameQuadrupole
[
4
]
=
lab
Quadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole
[
5
]
=
lab
Quadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole
[
6
]
=
lab
Quadrupole
[
atomI
*
9
+
6
];
sA
->
labFrameQuadrupole
[
7
]
=
lab
Quadrupole
[
atomI
*
9
+
7
];
sA
->
labFrameQuadrupole
[
8
]
=
lab
Quadrupole
[
atomI
*
9
+
8
];
sA
->
labFrameQuadrupole
[
0
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
];
sA
->
labFrameQuadrupole
[
1
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
1
];
sA
->
labFrameQuadrupole
[
2
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole
[
3
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
3
];
sA
->
labFrameQuadrupole
[
4
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole
[
5
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole
[
6
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
6
];
sA
->
labFrameQuadrupole
[
7
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
7
];
sA
->
labFrameQuadrupole
[
8
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
8
];
// induced dipole
sA
->
inducedDipole
[
0
]
=
i
nducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
[
1
]
=
i
nducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
[
2
]
=
i
nducedDipole
[
atomI
*
3
+
2
];
sA
->
inducedDipole
[
0
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
[
1
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
[
2
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
+
2
];
// induced dipole polar
sA
->
inducedDipoleP
[
0
]
=
i
nducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipoleP
[
1
]
=
i
nducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipoleP
[
2
]
=
i
nducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
inducedDipoleP
[
0
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipoleP
[
1
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipoleP
[
2
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
damp
=
d
ampingFactorAndThole
[
atomI
].
x
;
sA
->
thole
=
d
ampingFactorAndThole
[
atomI
].
y
;
sA
->
damp
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
x
;
sA
->
thole
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
y
;
}
...
...
@@ -918,11 +1038,11 @@ __device__ void loadRealSpaceEwaldShared( struct RealSpaceEwaldParticle* sA, uns
#undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCuda
RealSpaceEwald
.h"
#include "kCalculateAmoebaCuda
PmeDirectElectrostatic
.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCuda
RealSpaceEwald
.h"
#include "kCalculateAmoebaCuda
PmeDirectElectrostatic
.h"
// reduce psWorkArray_3_1 -> force
// reduce psWorkArray_3_2 -> torque
...
...
@@ -932,23 +1052,27 @@ static void kReduceForceTorque(amoebaGpuContext amoebaGpu )
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psForce
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReduce
RealSpaceEwald
Force"
);
LAUNCHERROR
(
"kReduce
PmeDirectElectrostatic
Force"
);
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psTorque
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReduce
RealSpaceEwald
Torque"
);
LAUNCHERROR
(
"kReduce
PmeDirectElectrostatic
Torque"
);
}
//#define GET_INDUCED_DIPOLE_FROM_FILE
#ifdef GET_INDUCED_DIPOLE_FROM_FILE
#include <stdlib.h>
#endif
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque
Compute Amoeba
dirrect space portion of
electrostatic force & torque
@param amoebaGpu amoebaGpu context
@param gpu OpenMM gpu Cuda context
--------------------------------------------------------------------------------------- */
void
cudaComputeAmoeba
RealSpaceEwald
(
amoebaGpuContext
amoebaGpu
)
void
cudaComputeAmoeba
PmeDirectElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
// ---------------------------------------------------------------------------------------
...
...
@@ -956,7 +1080,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
static
unsigned
int
threadsPerBlock
=
0
;
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoeba
RealSpaceEwald
"
;
static
const
char
*
methodName
=
"cudaComputeAmoeba
PmeDirectElectrostatic
"
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
...
...
@@ -983,7 +1107,38 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
unsigned
int
targetAtom
=
0
;
unsigned
int
targetAtom
=
10
;
#endif
#ifdef GET_INDUCED_DIPOLE_FROM_FILE
std
::
string
fileName
=
"waterInducedDipole.txt"
;
StringVectorVector
fileContents
;
readFile
(
fileName
,
fileContents
);
unsigned
int
offset
=
0
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Read file: %s %u
\n
"
,
fileName
.
c_str
(),
fileContents
.
size
()
);
fflush
(
amoebaGpu
->
log
);
for
(
unsigned
int
ii
=
1
;
ii
<
fileContents
.
size
()
-
1
;
ii
++
){
StringVector
lineTokens
=
fileContents
[
ii
];
unsigned
int
lineTokenIndex
=
1
;
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
offset
-=
3
;
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
}
float
conversion
=
0.1
f
;
for
(
int
ii
=
0
;
ii
<
3
*
gpu
->
natoms
;
ii
++
){
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
}
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
=
5.4459052e+00
f
;
SetCalculateAmoebaPmeDirectElectrostaticSim
(
amoebaGpu
);
amoebaGpu
->
psInducedDipole
->
Upload
();
amoebaGpu
->
psInducedDipolePolar
->
Upload
();
#endif
// on first pass, set threads/block
...
...
@@ -996,20 +1151,15 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
RealSpaceEwald
Particle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostatic
Particle
)),
maxThreads
);
}
kClearFields_3
(
amoebaGpu
,
2
);
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoeba
CudaRealSpaceEwald
N2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
RealSpaceEwald
Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoeba
PmeDirectElectrostatic
N2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostatic
Particle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameQuadrupole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -1021,22 +1171,14 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
}
else
{
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoeba
CudaRealSpaceEwald
N2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoeba
PmeDirectElectrostatic
N2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
RealSpaceEwaldParticle
),
sizeof
(
RealSpaceEwald
Particle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
sizeof
(
PmeDirectElectrostaticParticle
),
sizeof
(
PmeDirectElectrostatic
Particle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
cudaChannelFormatDesc
channelDesc
=
cudaCreateChannelDesc
<
float
>
();
cudaBindTexture
(
NULL
,
&
tabulatedErfcRef
,
gpu
->
psTabulatedErfc
->
_pDevData
,
&
channelDesc
,
gpu
->
psTabulatedErfc
->
_length
*
sizeof
(
float
));
kCalculateAmoebaCudaRealSpaceEwaldN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
RealSpaceEwaldParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectElectrostaticN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameQuadrupole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -1045,7 +1187,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoeba
CudaRealSpaceEwald
N2Forces"
);
LAUNCHERROR
(
"kCalculateAmoeba
PmeDirectElectrostatic
N2Forces"
);
kReduceForceTorque
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
...
...
@@ -1055,9 +1197,10 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psTorque
->
Download
();
debugArray
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished
RealSpaceEwald
kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished
PmeDirectElectrostatic
kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
int
maxPrint
=
1400
;
float
conversion
=
1.0
f
/
41.84
f
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
...
...
@@ -1065,38 +1208,18 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
// force
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
RealSpaceEwald
F [%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
PmeDirectElectrostatic
F [%16.9e %16.9e %16.9e] "
,
conversion
*
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
],
conversion
*
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
1
],
conversion
*
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
// torque
(
void
)
fprintf
(
amoebaGpu
->
log
,
"RealSpaceEwaldT [%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
// coords
#if 0
(void) fprintf( amoebaGpu->log,"x[%16.9e %16.9e %16.9e] ",
gpu->psPosq4->_pSysStream[0][ii].x,
gpu->psPosq4->_pSysStream[0][ii].y,
gpu->psPosq4->_pSysStream[0][ii].z);
for( int jj = 0; jj < gpu->natoms && jj < 5; jj++ ){
int debugIndex = jj*gpu->natoms + ii;
float xx = gpu->psPosq4->_pSysStream[0][jj].x - gpu->psPosq4->_pSysStream[0][ii].x;
float yy = gpu->psPosq4->_pSysStream[0][jj].y - gpu->psPosq4->_pSysStream[0][ii].y;
float zz = gpu->psPosq4->_pSysStream[0][jj].z - gpu->psPosq4->_pSysStream[0][ii].z;
(void) fprintf( amoebaGpu->log,"\n%4d %4d delta [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e] ",
ii, jj, xx, yy, zz,
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y, debugArray->_pSysStream[0][debugIndex].z );
(
void
)
fprintf
(
amoebaGpu
->
log
,
"PmeDirectElectrostaticT [%16.9e %16.9e %16.9e] "
,
conversion
*
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
],
conversion
*
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
1
],
conversion
*
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
}
#endif
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
...
...
@@ -1107,7 +1230,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
for
(
int
kk
=
0
;
kk
<
5
;
kk
++
){
for
(
int
kk
=
0
;
kk
<
1
5
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d [%16.9e %16.9e %16.9e %16.9e] E11
\n
"
,
targetAtom
,
jj
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
...
...
@@ -1118,26 +1241,6 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
}
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Tiled F & T
\n
"
,
methodName
);
fflush
(
amoebaGpu
->
log
);
int
maxPrint
=
12
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
// print cpu & gpu reductions
int
offset
=
3
*
ii
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d F[%16.7e %16.7e %16.7e] T[%16.7e %16.7e %16.7e]
\n
"
,
ii
,
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
offset
+
2
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
offset
+
2
]
);
if
(
(
ii
==
maxPrint
)
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
))
)
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
...
...
@@ -1145,7 +1248,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaForceTorque"
,
fileId
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"Cuda
PmeDirect
ForceTorque"
,
fileId
,
outputVector
);
}
}
...
...
@@ -1155,3 +1258,19 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
// ---------------------------------------------------------------------------------------
}
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque using PME
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void
cudaComputeAmoebaPmeElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
kCalculateAmoebaPME
(
amoebaGpu
);
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCuda
RealSpaceEwald
.h
→
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCuda
PmeDirectElectrostatic
.h
View file @
2ef05285
...
...
@@ -34,15 +34,8 @@ __launch_bounds__(128, 1)
#else
__launch_bounds__
(
64
,
1
)
#endif
void
METHOD_NAME
(
kCalculateAmoebaCudaRealSpaceEwald
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
float4
*
atomCoord
,
float
*
labFrameDipole
,
float
*
labFrameQuadrupole
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float
*
outputForce
,
float
*
outputTorque
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectElectrostatic
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputForce
,
float
*
outputTorque
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
...
...
@@ -50,10 +43,11 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
){
#ifdef AMOEBA_DEBUG
int
pullIndexMax
=
12
;
float4
pullBack
[
20
];
#endif
extern
__shared__
RealSpaceEwald
Particle
sA
[];
extern
__shared__
PmeDirectElectrostatic
Particle
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
...
...
@@ -80,12 +74,10 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
RealSpaceEwald
Particle
*
psA
=
&
sA
[
tbx
];
PmeDirectElectrostatic
Particle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
RealSpaceEwaldParticle
localParticle
;
loadRealSpaceEwaldShared
(
&
localParticle
,
atomI
,
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
PmeDirectElectrostaticParticle
localParticle
;
loadPmeDirectElectrostaticShared
(
&
localParticle
,
atomI
);
localParticle
.
force
[
0
]
=
0
.
0
f
;
localParticle
.
force
[
1
]
=
0
.
0
f
;
...
...
@@ -105,9 +97,7 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
// load shared data
loadRealSpaceEwaldShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
,
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
loadPmeDirectElectrostaticShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
if
(
!
bExclusionFlag
)
{
...
...
@@ -122,9 +112,8 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
float
torque
[
2
][
3
];
float
energy
;
calculateRealSpaceEwaldPairIxn_kernel
(
localParticle
,
psA
[
j
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
force
,
torque
,
&
energy
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
j
],
scalingFactors
,
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -171,9 +160,8 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
// force
float
energy
;
calculateRealSpaceEwaldPairIxn_kernel
(
localParticle
,
psA
[
j
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
force
,
torque
,
&
energy
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
j
],
scalingFactors
,
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -199,58 +187,54 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
float
blockId
=
1
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
1
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
y
;
/*
debugArray
[
index
].
z
=
(
float
)
y
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
force
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
force
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
force
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
*/
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
int
pullIndex
=
0
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
for
(
int
pullIndex
=
0
;
pullIndex
<
pullIndexMax
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
}
}
// include self energy and self torque
if
(
atomI
<
cAmoebaSim
.
numberOfAtoms
){
calculatePmeSelfTorqueElectrostaticPairIxn_kernel
(
localParticle
);
//float energy;
//calculatePmeSelfEnergyElectrostaticPairIxn_kernel( localParticle, &energy );
//totalEnergy += energy;
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
...
...
@@ -300,9 +284,7 @@ if( atomI == targetAtom ){
{
// load shared data
loadRealSpaceEwaldShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
),
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
loadPmeDirectElectrostaticShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
}
...
...
@@ -325,9 +307,8 @@ if( atomI == targetAtom ){
unsigned
int
atomJ
=
y
+
tj
;
float
energy
;
calculateRealSpaceEwaldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
force
,
torque
,
&
energy
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
tj
],
scalingFactors
,
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -360,60 +341,44 @@ if( atomI == targetAtom ){
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
atomJ
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
1
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
1
:
0
;
float
forceSign
=
(
atomI
==
targetAtom
)
?
1
.
0
f
:
-
1
.
0
f
;
float
blockId
=
2
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
2
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
y
;
/*
debugArray
[
index
].
z
=
(
float
)
y
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceSign
*
force
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceSign
*
force
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceSign
*
force
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
indexI
][
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
indexJ
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
indexJ
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
indexJ
][
2
]
:
0
.
0
f
;
*/
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
int
pullIndex
=
0
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
debugArray
[
index
].
w
=
blockId
;
for
(
int
pullIndex
=
0
;
pullIndex
<
pullIndexMax
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
...
...
@@ -449,9 +414,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// force
float
energy
;
calculateRealSpaceEwaldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
force
,
torque
,
&
energy
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
tj
],
scalingFactors
,
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -490,55 +454,39 @@ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
1
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
1
:
0
;
float
forceSign
=
(
atomI
==
targetAtom
)
?
1
.
0
f
:
-
1
.
0
f
;
float
blockId
=
3
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
3
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
y
;
/*
debugArray
[
index
].
z
=
(
float
)
y
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceSign
*
force
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceSign
*
force
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceSign
*
force
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
indexI
][
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
indexJ
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
indexJ
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
indexJ
][
2
]
:
0
.
0
f
;
*/
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
int
pullIndex
=
0
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
debugArray
[
index
].
w
=
blockId
;
for
(
int
pullIndex
=
0
;
pullIndex
<
pullIndexMax
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
#if 0
index += cAmoebaSim.paddedNumberOfAtoms;
...
...
@@ -550,13 +498,6 @@ if( atomI == targetAtom || atomJ == targetAtom ){
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = scalingFactors[MScaleIndex];
debugArray[index].y = mScaleVal;
for( int pIndex = 0; pIndex < 14; pIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pIndex].x;
debugArray[index].y = pullBack[pIndex].y;
debugArray[index].z = pullBack[pIndex].z;
debugArray[index].w = pullBack[pIndex].w;
}
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = labFrameDipole[3*atomI];
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
2ef05285
...
...
@@ -322,7 +322,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
--------------------------------------------------------------------------------------- */
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
static
void
cudaComputeAmoebaPme
Direct
FixedEField
(
amoebaGpuContext
amoebaGpu
)
{
// ---------------------------------------------------------------------------------------
...
...
@@ -487,3 +487,9 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
}
}
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
{
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
kCalculateAmoebaPMEFixedMultipoleField
(
amoebaGpu
);
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
2ef05285
...
...
@@ -616,6 +616,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// matrix multiply
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
View file @
2ef05285
...
...
@@ -390,7 +390,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
if
(
amoebaGpu
->
multipoleNonbondedMethod
==
AMOEBA_NO_CUTOFF
){
cudaComputeAmoebaElectrostatic
(
amoebaGpu
);
}
else
{
cudaComputeAmoeba
RealSpaceEwald
(
amoebaGpu
);
cudaComputeAmoeba
PmeElectrostatic
(
amoebaGpu
);
}
// map torques to forces
...
...
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
View file @
2ef05285
...
...
@@ -2008,6 +2008,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
multipoleForce
->
setNonbondedMethod
(
AmoebaMultipoleForce
::
NoCutoff
);
}
multipoleForce
->
setCutoffDistance
(
cutoffDistance
);
multipoleForce
->
setAEwald
(
aewald
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
box
[
0
],
0.0
,
0.0
),
Vec3
(
0.0
,
box
[
1
],
0.0
),
Vec3
(
0.0
,
0.0
,
box
[
2
])
);
if
(
log
){
(
void
)
fprintf
(
log
,
"%s number of MultipoleParameter terms=%d usePme=%d aewald=%15.7e cutoffDistance=%12.4f
\n
"
,
...
...
@@ -2111,6 +2112,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
double
polarityConversion
=
AngstromToNm
*
AngstromToNm
*
AngstromToNm
;
double
dampingFactorConversion
=
sqrt
(
AngstromToNm
);
multipoleForce
->
setAEwald
(
multipoleForce
->
getAEwald
()
/
AngstromToNm
);
multipoleForce
->
setCutoffDistance
(
multipoleForce
->
getCutoffDistance
()
*
AngstromToNm
);
multipoleForce
->
setScalingDistanceCutoff
(
multipoleForce
->
getScalingDistanceCutoff
()
*
AngstromToNm
);
...
...
@@ -2165,8 +2167,8 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
(
useOpenMMUnits
?
"OpenMM"
:
"Amoeba"
)
);
std
::
string
nonbondedMethod
=
multipoleForce
->
getNonbondedMethod
(
)
==
AmoebaMultipoleForce
::
PME
?
"PME"
:
"NoCutoff"
;
(
void
)
fprintf
(
log
,
"NonbondedMethod=%s cutoff=%15.7e.
\n
"
,
nonbondedMethod
.
c_str
(),
multipoleForce
->
getCutoffDistance
()
);
(
void
)
fprintf
(
log
,
"NonbondedMethod=%s
aEwald=%15.7e
cutoff=%15.7e.
\n
"
,
nonbondedMethod
.
c_str
(),
multipoleForce
->
getAEwald
(),
multipoleForce
->
getCutoffDistance
()
);
Vec3
a
,
b
,
c
;
system
.
getDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment