Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2ef05285
Commit
2ef05285
authored
Sep 13, 2010
by
Mark Friedrichs
Browse files
Direct space PME
parent
c22f00cf
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
546 additions
and
452 deletions
+546
-452
plugins/amoeba/CMakeLists.txt
plugins/amoeba/CMakeLists.txt
+6
-5
plugins/amoeba/openmmapi/include/AmoebaMultipoleForce.h
plugins/amoeba/openmmapi/include/AmoebaMultipoleForce.h
+15
-0
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
+8
-0
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+1
-0
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+4
-4
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+354
-235
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+140
-199
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+7
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+2
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
...platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+1
-1
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
...amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
+4
-2
No files found.
plugins/amoeba/CMakeLists.txt
View file @
2ef05285
...
@@ -38,8 +38,7 @@ ENDIF(LOG)
...
@@ -38,8 +38,7 @@ ENDIF(LOG)
# The source is organized into subdirectories, but we handle them all from
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
#SET(OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS . openmmapi platforms/reference)
SET
(
OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS . openmmapi platforms/reference
)
SET
(
OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS . openmmapi
)
# Collect up information about the version of the OpenMM library we're building
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
# and make it available to the code so it can be built into the binaries.
...
@@ -103,8 +102,10 @@ FOREACH(subdir ${OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS})
...
@@ -103,8 +102,10 @@ FOREACH(subdir ${OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS})
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
)
ENDFOREACH
(
subdir
)
ENDFOREACH
(
subdir
)
#INCLUDE_DIRECTORIES(BEFORE ${OPENMM_DIR}/platforms/reference/src)
INCLUDE_DIRECTORIES
(
BEFORE
${
OPENMM_DIR
}
/platforms/reference/src
)
#INCLUDE_DIRECTORIES(BEFORE ${OPENMM_DIR}/platforms/reference/src/SimTKReference)
INCLUDE_DIRECTORIES
(
BEFORE
${
OPENMM_DIR
}
/platforms/reference/src/SimTKReference
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/platforms/reference/src
)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/platforms/reference/src/SimTKReference
)
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
IF
(
LOG
)
IF
(
LOG
)
...
@@ -158,7 +159,7 @@ IF(OPENMM_BUILD_STATIC_LIB)
...
@@ -158,7 +159,7 @@ IF(OPENMM_BUILD_STATIC_LIB)
TARGET_LINK_LIBRARIES
(
${
STATIC_AMOEBA_TARGET
}
${
STATIC_TARGET
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_AMOEBA_TARGET
}
${
STATIC_TARGET
}
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
#
ADD_SUBDIRECTORY(platforms/reference/tests)
ADD_SUBDIRECTORY
(
platforms/reference/tests
)
# Which hardware platforms to build
# Which hardware platforms to build
IF
(
CUDA_FOUND
)
IF
(
CUDA_FOUND
)
...
...
plugins/amoeba/openmmapi/include/AmoebaMultipoleForce.h
View file @
2ef05285
...
@@ -113,6 +113,20 @@ public:
...
@@ -113,6 +113,20 @@ public:
*/
*/
void
setCutoffDistance
(
double
distance
);
void
setCutoffDistance
(
double
distance
);
/**
* Get the aEwald parameter
*
* @return the Ewald parameter
*/
double
getAEwald
()
const
;
/**
* Set the aEwald parameter
*
* @param Ewald parameter
*/
void
setAEwald
(
double
aewald
);
/**
/**
* Add multipole-related info for a particle
* Add multipole-related info for a particle
*
*
...
@@ -279,6 +293,7 @@ private:
...
@@ -279,6 +293,7 @@ private:
AmoebaNonbondedMethod
nonbondedMethod
;
AmoebaNonbondedMethod
nonbondedMethod
;
double
cutoffDistance
;
double
cutoffDistance
;
double
aewald
;
MutualInducedIterationMethod
mutualInducedIterationMethod
;
MutualInducedIterationMethod
mutualInducedIterationMethod
;
int
mutualInducedMaxIterations
;
int
mutualInducedMaxIterations
;
double
mutualInducedTargetEpsilon
;
double
mutualInducedTargetEpsilon
;
...
...
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
View file @
2ef05285
...
@@ -56,6 +56,14 @@ void AmoebaMultipoleForce::setCutoffDistance(double distance) {
...
@@ -56,6 +56,14 @@ void AmoebaMultipoleForce::setCutoffDistance(double distance) {
cutoffDistance
=
distance
;
cutoffDistance
=
distance
;
}
}
double
AmoebaMultipoleForce
::
getAEwald
()
const
{
return
aewald
;
}
void
AmoebaMultipoleForce
::
setAEwald
(
double
inputAewald
)
{
aewald
=
inputAewald
;
}
AmoebaMultipoleForce
::
MutualInducedIterationMethod
AmoebaMultipoleForce
::
getMutualInducedIterationMethod
(
void
)
const
{
AmoebaMultipoleForce
::
MutualInducedIterationMethod
AmoebaMultipoleForce
::
getMutualInducedIterationMethod
(
void
)
const
{
return
mutualInducedIterationMethod
;
return
mutualInducedIterationMethod
;
}
}
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
2ef05285
...
@@ -589,6 +589,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -589,6 +589,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
static_cast
<
float
>
(
force
.
getMutualInducedTargetEpsilon
()),
static_cast
<
float
>
(
force
.
getMutualInducedTargetEpsilon
()),
nonbondedMethod
,
nonbondedMethod
,
static_cast
<
float
>
(
force
.
getCutoffDistance
()),
static_cast
<
float
>
(
force
.
getCutoffDistance
()),
static_cast
<
float
>
(
force
.
getAEwald
()),
static_cast
<
float
>
(
force
.
getElectricConstant
())
);
static_cast
<
float
>
(
force
.
getElectricConstant
())
);
if
(
nonbondedMethod
==
AmoebaMultipoleForce
::
PME
)
{
if
(
nonbondedMethod
==
AmoebaMultipoleForce
::
PME
)
{
double
alpha
;
double
alpha
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
View file @
2ef05285
...
@@ -350,6 +350,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -350,6 +350,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pP_ScaleIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pP_ScaleIndices
);
(
void
)
fprintf
(
log
,
" pP_ScaleIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pP_ScaleIndices
);
(
void
)
fprintf
(
log
,
" pM_ScaleIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pM_ScaleIndices
);
(
void
)
fprintf
(
log
,
" pM_ScaleIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pM_ScaleIndices
);
(
void
)
fprintf
(
log
,
" sqrtPi %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
sqrtPi
);
(
void
)
fprintf
(
log
,
" sqrtPi %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
sqrtPi
);
(
void
)
fprintf
(
log
,
" alpha Ewald %15.7e
\n
"
,
gpu
->
sim
.
alphaEwald
);
(
void
)
fprintf
(
log
,
" cutoffDistance2 %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
cutoffDistance2
);
(
void
)
fprintf
(
log
,
" cutoffDistance2 %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
cutoffDistance2
);
(
void
)
fprintf
(
log
,
" electric %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
electric
);
(
void
)
fprintf
(
log
,
" electric %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
electric
);
(
void
)
fprintf
(
log
,
" box %15.7e %15.7e %15.7e
\n
"
,
gpu
->
sim
.
periodicBoxSizeX
,
gpu
->
sim
.
periodicBoxSizeY
,
gpu
->
sim
.
periodicBoxSizeZ
);
(
void
)
fprintf
(
log
,
" box %15.7e %15.7e %15.7e
\n
"
,
gpu
->
sim
.
periodicBoxSizeX
,
gpu
->
sim
.
periodicBoxSizeY
,
gpu
->
sim
.
periodicBoxSizeZ
);
...
@@ -1463,7 +1464,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1463,7 +1464,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleParticleCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleParticleCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
int
mutualInducedIterativeMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
mutualInducedIterativeMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
nonbondedMethod
,
float
cutoffDistance
,
float
electricConstant
){
int
nonbondedMethod
,
float
cutoffDistance
,
float
alphaEwald
,
float
electricConstant
){
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
...
@@ -1565,9 +1566,8 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1565,9 +1566,8 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu
->
amoebaSim
.
cutoffDistance2
=
cutoffDistance
*
cutoffDistance
;
amoebaGpu
->
amoebaSim
.
cutoffDistance2
=
cutoffDistance
*
cutoffDistance
;
amoebaGpu
->
amoebaSim
.
sqrtPi
=
sqrt
(
3.1415926535897932384626433832795
);
amoebaGpu
->
amoebaSim
.
sqrtPi
=
sqrt
(
3.1415926535897932384626433832795
);
amoebaGpu
->
amoebaSim
.
electric
=
electricConstant
;
amoebaGpu
->
amoebaSim
.
electric
=
electricConstant
;
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
=
alphaEwald
;
amoebaGpu
->
gpuContext
->
sim
.
nonbondedCutoff
=
cutoffDistance
;
amoebaGpu
->
gpuContext
->
sim
.
nonbondedCutoff
=
cutoffDistance
;
tabulateErfc
(
amoebaGpu
->
gpuContext
);
if
(
amoebaGpu
->
amoebaSim
.
dielec
<
1.0e-05
){
if
(
amoebaGpu
->
amoebaSim
.
dielec
<
1.0e-05
){
amoebaGpu
->
amoebaSim
.
dielec
=
1.0
f
;
amoebaGpu
->
amoebaSim
.
dielec
=
1.0
f
;
...
@@ -2766,7 +2766,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
...
@@ -2766,7 +2766,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpu
);
SetCalculateAmoebaElectrostaticSim
(
amoebaGpu
);
SetCalculateAmoebaElectrostaticSim
(
amoebaGpu
);
SetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpu
);
SetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpu
);
SetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpu
);
SetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpu
);
SetCalculateAmoebaKirkwoodSim
(
amoebaGpu
);
SetCalculateAmoebaKirkwoodSim
(
amoebaGpu
);
SetCalculateAmoebaKirkwoodEDiffSim
(
amoebaGpu
);
SetCalculateAmoebaKirkwoodEDiffSim
(
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
2ef05285
...
@@ -106,9 +106,9 @@ extern void SetCalculateAmoebaElectrostaticSim( amoebaGpuContext amoebaGpu );
...
@@ -106,9 +106,9 @@ extern void SetCalculateAmoebaElectrostaticSim( amoebaGpuContext amoebaGpu );
extern
void
GetCalculateAmoebaElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
GetCalculateAmoebaElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
cudaComputeAmoebaElectrostatic
(
amoebaGpuContext
amoebaGpu
);
extern
void
cudaComputeAmoebaElectrostatic
(
amoebaGpuContext
amoebaGpu
);
extern
void
SetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpuContext
amoebaGpu
);
extern
void
SetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpuContext
amoebaGpu
);
extern
void
GetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpuContext
amoebaGpu
);
extern
void
GetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpuContext
amoebaGpu
);
extern
void
cudaComputeAmoeba
RealSpaceEwald
(
amoebaGpuContext
amoebaGpu
);
extern
void
cudaComputeAmoeba
PmeElectrostatic
(
amoebaGpuContext
amoebaGpu
);
extern
void
SetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpuContext
gpu
);
extern
void
SetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpuContext
gpu
);
extern
void
GetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpuContext
gpu
);
extern
void
GetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpuContext
gpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
View file @
2ef05285
...
@@ -307,7 +307,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -307,7 +307,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleAtomCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>&
multipoleAtomCovalentInfo
,
const
std
::
vector
<
int
>&
covalentDegree
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
const
std
::
vector
<
int
>&
minCovalentIndices
,
const
std
::
vector
<
int
>&
minCovalentPolarizationIndices
,
int
maxCovalentRange
,
int
mutualInducedIterationMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
mutualInducedIterationMethod
,
int
mutualInducedMaxIterations
,
float
mutualInducedTargetEpsilon
,
int
nonbondedMethod
,
float
cutoffDistance
,
float
electricConstant
);
int
nonbondedMethod
,
float
cutoffDistance
,
float
alphaEwald
,
float
electricConstant
);
extern
"C"
extern
"C"
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCuda
RealSpaceEwald
.cu
→
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCuda
PmeDirectElectrostatic
.cu
View file @
2ef05285
...
@@ -11,34 +11,24 @@
...
@@ -11,34 +11,24 @@
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
void
SetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpuContext
amoebaGpu
)
void
SetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpuContext
amoebaGpu
)
{
{
cudaError_t
status
;
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoeba
RealSpaceEwald
Sim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
RTERROR
(
status
,
"SetCalculateAmoeba
PmeDirectElectrostatic
Sim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoeba
RealSpaceEwald
Sim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
RTERROR
(
status
,
"SetCalculateAmoeba
PmeDirectElectrostatic
Sim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
}
}
void
GetCalculateAmoeba
RealSpaceEwald
Sim
(
amoebaGpuContext
amoebaGpu
)
void
GetCalculateAmoeba
PmeDirectElectrostatic
Sim
(
amoebaGpuContext
amoebaGpu
)
{
{
cudaError_t
status
;
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoeba
RealSpaceEwald
Sim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
RTERROR
(
status
,
"GetCalculateAmoeba
PmeDirectElectrostatic
Sim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaRealSpaceEwaldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
RTERROR
(
status
,
"GetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
texture
<
float
,
1
,
cudaReadModeElementType
>
tabulatedErfcRef
;
static
__device__
float
fastErfc
(
float
r
)
{
float
normalized
=
cSim
.
tabulatedErfcScale
*
r
;
int
index
=
(
int
)
normalized
;
float
fract2
=
normalized
-
index
;
float
fract1
=
1.0
f
-
fract2
;
return
fract1
*
tex1Dfetch
(
tabulatedErfcRef
,
index
)
+
fract2
*
tex1Dfetch
(
tabulatedErfcRef
,
index
+
1
);
}
}
static
int
const
PScaleIndex
=
0
;
static
int
const
PScaleIndex
=
0
;
...
@@ -60,7 +50,7 @@ static int const MScaleIndex = 3;
...
@@ -60,7 +50,7 @@ static int const MScaleIndex = 3;
//static int const Ddsc72Index = 16;
//static int const Ddsc72Index = 16;
static
int
const
LastScalingIndex
=
4
;
static
int
const
LastScalingIndex
=
4
;
struct
RealSpaceEwald
Particle
{
struct
PmeDirectElectrostatic
Particle
{
// coordinates charge
// coordinates charge
...
@@ -97,30 +87,59 @@ struct RealSpaceEwaldParticle {
...
@@ -97,30 +87,59 @@ struct RealSpaceEwaldParticle {
};
};
__device__
void
calculateRealSpaceEwaldPairIxn_kernel
(
RealSpaceEwaldParticle
&
atomI
,
RealSpaceEwaldParticle
&
atomJ
,
// self-energy for PME
float
scalingDistanceCutoff
,
float
*
scalingFactors
,
float
*
outputForce
,
float
outputTorque
[
2
][
3
],
__device__
static
void
calculatePmeSelfEnergyElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
float
*
energy
)
float
*
energy
{
float
term
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
fterm
=
-
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
)
*
cSim
.
alphaEwald
/
cAmoebaSim
.
sqrtPi
;
float
cii
=
atomI
.
q
*
atomI
.
q
;
float
dii
=
atomI
.
labFrameDipole
[
0
]
*
atomI
.
labFrameDipole
[
0
]
+
atomI
.
labFrameDipole
[
1
]
*
atomI
.
labFrameDipole
[
1
]
+
atomI
.
labFrameDipole
[
2
]
*
atomI
.
labFrameDipole
[
2
];
float
qii
=
atomI
.
labFrameQuadrupole
[
0
]
*
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
4
]
*
atomI
.
labFrameQuadrupole
[
4
]
+
atomI
.
labFrameQuadrupole
[
8
]
*
atomI
.
labFrameQuadrupole
[
8
]
+
2.0
f
*
(
atomI
.
labFrameQuadrupole
[
1
]
*
atomI
.
labFrameQuadrupole
[
1
]
+
atomI
.
labFrameQuadrupole
[
2
]
*
atomI
.
labFrameQuadrupole
[
2
]
+
atomI
.
labFrameQuadrupole
[
5
]
*
atomI
.
labFrameQuadrupole
[
5
]);
float
uii
=
atomI
.
labFrameDipole
[
0
]
*
atomI
.
inducedDipole
[
0
]
+
atomI
.
labFrameDipole
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
atomI
.
labFrameDipole
[
2
]
*
atomI
.
inducedDipole
[
2
];
*
energy
=
(
cii
+
term
*
(
dii
/
3.0
f
+
2.0
f
*
term
*
qii
/
5.0
f
));
*
energy
+=
term
*
uii
/
3.0
f
;
*
energy
*=
fterm
;
}
// self-torque for PME
__device__
static
void
calculatePmeSelfTorqueElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
)
{
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
float
uix
=
0.5
f
*
(
atomI
.
inducedDipole
[
0
]
+
atomI
.
inducedDipoleP
[
0
]);
float
uiy
=
0.5
f
*
(
atomI
.
inducedDipole
[
1
]
+
atomI
.
inducedDipoleP
[
1
]);
float
uiz
=
0.5
f
*
(
atomI
.
inducedDipole
[
2
]
+
atomI
.
inducedDipoleP
[
2
]);
atomI
.
torque
[
0
]
+=
term
*
(
atomI
.
labFrameDipole
[
1
]
*
uiz
-
atomI
.
labFrameDipole
[
2
]
*
uiy
);
atomI
.
torque
[
1
]
+=
term
*
(
atomI
.
labFrameDipole
[
2
]
*
uix
-
atomI
.
labFrameDipole
[
0
]
*
uiz
);
atomI
.
torque
[
2
]
+=
term
*
(
atomI
.
labFrameDipole
[
0
]
*
uiy
-
atomI
.
labFrameDipole
[
1
]
*
uix
);
}
__device__
void
calculatePmeDirectElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
float
*
scalingFactors
,
float
*
outputForce
,
float
outputTorque
[
2
][
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
float4
*
debugArray
#endif
#endif
){
){
float
e
,
ei
,
f
;
float
e
,
ei
;
//float gfd,gfdr;
//float xix,yix,zix;
//float xiy,yiy,ziy;
//float xiz,yiz,ziz;
//float xkx,ykx,zkx;
//float xky,yky,zky;
//float xkz,ykz,zkz;
//float rr1,rr3;
//float rr5,rr7,rr9,rr11;
float
erl
,
erli
;
float
erl
,
erli
;
//float frcxi[4],frcxk[4];
//float frcyi[4],frcyk[4];
//float frczi[4],frczk[4];
float
di
[
4
],
qi
[
10
];
float
di
[
4
],
qi
[
10
];
float
dk
[
4
],
qk
[
10
];
float
dk
[
4
],
qk
[
10
];
float
fridmp
[
4
],
findmp
[
4
];
float
fridmp
[
4
],
findmp
[
4
];
...
@@ -130,7 +149,6 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
...
@@ -130,7 +149,6 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float
ttm2i
[
4
],
ttm3i
[
4
];
float
ttm2i
[
4
],
ttm3i
[
4
];
float
ttm2r
[
4
],
ttm3r
[
4
];
float
ttm2r
[
4
],
ttm3r
[
4
];
float
ttm2ri
[
4
],
ttm3ri
[
4
];
float
ttm2ri
[
4
],
ttm3ri
[
4
];
//float fdir[4]
float
dixdk
[
4
];
float
dixdk
[
4
];
float
dkxui
[
4
],
dixuk
[
4
];
float
dkxui
[
4
],
dixuk
[
4
];
float
dixukp
[
4
],
dkxuip
[
4
];
float
dixukp
[
4
],
dkxuip
[
4
];
...
@@ -159,7 +177,7 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
...
@@ -159,7 +177,7 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float
gfr
[
8
],
gfri
[
7
];
float
gfr
[
8
],
gfri
[
7
];
float
gti
[
7
],
gtri
[
7
];
float
gti
[
7
],
gtri
[
7
];
f
=
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
f
loat
conversionFactor
=
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
// set the permanent multipole and induced dipole values;
// set the permanent multipole and induced dipole values;
...
@@ -193,28 +211,29 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
...
@@ -193,28 +211,29 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
if
(
r2
<=
cAmoebaSim
.
cutoffDistance2
){
if
(
r2
<=
cAmoebaSim
.
cutoffDistance2
){
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
ck
=
atomJ
.
q
;
float
ck
=
atomJ
.
q
;
dk
[
1
]
=
atomJ
.
labFrameDipole
[
0
];
dk
[
1
]
=
atomJ
.
labFrameDipole
[
0
];
dk
[
2
]
=
atomJ
.
labFrameDipole
[
1
];
dk
[
2
]
=
atomJ
.
labFrameDipole
[
1
];
dk
[
3
]
=
atomJ
.
labFrameDipole
[
2
];
dk
[
3
]
=
atomJ
.
labFrameDipole
[
2
];
qk
[
1
]
=
atomJ
.
labFrameQuadrupole
[
0
];
qk
[
1
]
=
atomJ
.
labFrameQuadrupole
[
0
];
qk
[
2
]
=
atomJ
.
labFrameQuadrupole
[
1
];
qk
[
2
]
=
atomJ
.
labFrameQuadrupole
[
1
];
qk
[
3
]
=
atomJ
.
labFrameQuadrupole
[
2
];
qk
[
3
]
=
atomJ
.
labFrameQuadrupole
[
2
];
qk
[
4
]
=
atomJ
.
labFrameQuadrupole
[
3
];
qk
[
4
]
=
atomJ
.
labFrameQuadrupole
[
3
];
qk
[
5
]
=
atomJ
.
labFrameQuadrupole
[
4
];
qk
[
5
]
=
atomJ
.
labFrameQuadrupole
[
4
];
qk
[
6
]
=
atomJ
.
labFrameQuadrupole
[
5
];
qk
[
6
]
=
atomJ
.
labFrameQuadrupole
[
5
];
qk
[
7
]
=
atomJ
.
labFrameQuadrupole
[
6
];
qk
[
7
]
=
atomJ
.
labFrameQuadrupole
[
6
];
qk
[
8
]
=
atomJ
.
labFrameQuadrupole
[
7
];
qk
[
8
]
=
atomJ
.
labFrameQuadrupole
[
7
];
qk
[
9
]
=
atomJ
.
labFrameQuadrupole
[
8
];
qk
[
9
]
=
atomJ
.
labFrameQuadrupole
[
8
];
// calculate the real space error function terms;
// calculate the real space error function terms;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
bn
[
0
]
=
fastE
rfc
(
ralpha
)
/
r
;
bn
[
0
]
=
e
rfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
0.0
f
;
float
alsq2n
=
0.0
f
;
...
@@ -250,17 +269,17 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
...
@@ -250,17 +269,17 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float
scale5
=
1.0
f
;
float
scale5
=
1.0
f
;
float
scale7
=
1.0
f
;
float
scale7
=
1.0
f
;
ddsc3
[
0
]
=
0.0
f
;
ddsc3
[
1
]
=
0.0
f
;
ddsc3
[
1
]
=
0.0
f
;
ddsc3
[
2
]
=
0.0
f
;
ddsc3
[
2
]
=
0.0
f
;
ddsc3
[
3
]
=
0.0
f
;
ddsc5
[
0
]
=
0.0
f
;
ddsc5
[
1
]
=
0.0
f
;
ddsc5
[
1
]
=
0.0
f
;
ddsc5
[
2
]
=
0.0
f
;
ddsc5
[
2
]
=
0.0
f
;
ddsc5
[
3
]
=
0.0
f
;
ddsc7
[
0
]
=
0.0
f
;
ddsc7
[
1
]
=
0.0
f
;
ddsc7
[
1
]
=
0.0
f
;
ddsc7
[
2
]
=
0.0
f
;
ddsc7
[
2
]
=
0.0
f
;
ddsc7
[
3
]
=
0.0
f
;
float
pdk
=
atomJ
.
damp
;
float
pdk
=
atomJ
.
damp
;
float
ptk
=
atomJ
.
thole
;
float
ptk
=
atomJ
.
thole
;
...
@@ -280,9 +299,11 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
...
@@ -280,9 +299,11 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
ddsc3
[
1
]
=
temp3
*
xr
;
ddsc3
[
1
]
=
temp3
*
xr
;
ddsc3
[
2
]
=
temp3
*
yr
;
ddsc3
[
2
]
=
temp3
*
yr
;
ddsc3
[
3
]
=
temp3
*
zr
;
ddsc3
[
3
]
=
temp3
*
zr
;
ddsc5
[
1
]
=
temp5
*
ddsc3
[
1
];
ddsc5
[
1
]
=
temp5
*
ddsc3
[
1
];
ddsc5
[
2
]
=
temp5
*
ddsc3
[
2
];
ddsc5
[
2
]
=
temp5
*
ddsc3
[
2
];
ddsc5
[
3
]
=
temp5
*
ddsc3
[
3
];
ddsc5
[
3
]
=
temp5
*
ddsc3
[
3
];
ddsc7
[
1
]
=
temp7
*
ddsc5
[
1
];
ddsc7
[
1
]
=
temp7
*
ddsc5
[
1
];
ddsc7
[
2
]
=
temp7
*
ddsc5
[
2
];
ddsc7
[
2
]
=
temp7
*
ddsc5
[
2
];
ddsc7
[
3
]
=
temp7
*
ddsc5
[
3
];
ddsc7
[
3
]
=
temp7
*
ddsc5
[
3
];
...
@@ -508,8 +529,8 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
...
@@ -508,8 +529,8 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
+
rr7
*
gli
[
3
]
*
psc7
);
+
rr7
*
gli
[
3
]
*
psc7
);
e
=
e
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
erl
;
e
=
e
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
erl
;
ei
=
ei
-
erli
;
ei
=
ei
-
erli
;
e
=
f
*
e
;
e
=
conversionFactor
*
e
;
ei
=
f
*
ei
;
ei
=
conversionFactor
*
ei
;
// em = em + e;
// em = em + e;
// ep = ep + ei;
// ep = ep + ei;
...
@@ -679,12 +700,9 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
...
@@ -679,12 +700,9 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
+
(
glip
[
2
]
+
glip
[
7
])
*
scalingFactors
[
DScaleIndex
]);
+
(
glip
[
2
]
+
glip
[
7
])
*
scalingFactors
[
DScaleIndex
]);
float
temp7
=
0.5
f
*
rr7
*
(
gli
[
3
]
*
scalingFactors
[
PScaleIndex
]
float
temp7
=
0.5
f
*
rr7
*
(
gli
[
3
]
*
scalingFactors
[
PScaleIndex
]
+
glip
[
3
]
*
scalingFactors
[
DScaleIndex
]);
+
glip
[
3
]
*
scalingFactors
[
DScaleIndex
]);
fridmp
[
1
]
=
temp3
*
ddsc3
[
1
]
+
temp5
*
ddsc5
[
1
]
fridmp
[
1
]
=
temp3
*
ddsc3
[
1
]
+
temp5
*
ddsc5
[
1
]
+
temp7
*
ddsc7
[
1
];
+
temp7
*
ddsc7
[
1
];
fridmp
[
2
]
=
temp3
*
ddsc3
[
2
]
+
temp5
*
ddsc5
[
2
]
+
temp7
*
ddsc7
[
2
];
fridmp
[
2
]
=
temp3
*
ddsc3
[
2
]
+
temp5
*
ddsc5
[
2
]
fridmp
[
3
]
=
temp3
*
ddsc3
[
3
]
+
temp5
*
ddsc5
[
3
]
+
temp7
*
ddsc7
[
3
];
+
temp7
*
ddsc7
[
2
];
fridmp
[
3
]
=
temp3
*
ddsc3
[
3
]
+
temp5
*
ddsc5
[
3
]
+
temp7
*
ddsc7
[
3
];
// find some scaling terms for induced-induced force
// find some scaling terms for induced-induced force
...
@@ -833,84 +851,186 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
...
@@ -833,84 +851,186 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
// handle the case where scaling is used
// handle the case where scaling is used
for
(
int
j
=
1
;
j
<=
3
;
j
++
){
for
(
int
j
=
1
;
j
<=
3
;
j
++
){
ftm2
[
j
]
=
f
*
(
ftm2
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ftm2r
[
j
]);
ftm2
[
j
]
=
(
ftm2
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ftm2r
[
j
]);
ftm2i
[
j
]
=
f
*
(
ftm2i
[
j
]
-
ftm2ri
[
j
]);
ftm2i
[
j
]
=
(
ftm2i
[
j
]
-
ftm2ri
[
j
]);
ttm2
[
j
]
=
f
*
(
ttm2
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ttm2r
[
j
]);
ttm2
[
j
]
=
(
ttm2
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ttm2r
[
j
]);
ttm2i
[
j
]
=
f
*
(
ttm2i
[
j
]
-
ttm2ri
[
j
]);
ttm2i
[
j
]
=
(
ttm2i
[
j
]
-
ttm2ri
[
j
]);
ttm3
[
j
]
=
f
*
(
ttm3
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ttm3r
[
j
]);
ttm3
[
j
]
=
(
ttm3
[
j
]
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
ttm3r
[
j
]);
ttm3i
[
j
]
=
f
*
(
ttm3i
[
j
]
-
ttm3ri
[
j
]);
ttm3i
[
j
]
=
(
ttm3i
[
j
]
-
ttm3ri
[
j
]);
}
}
// increment gradient due to force and torque on first site;
// increment gradient due to force and torque on first site;
outputForce
[
0
]
=
-
conversionFactor
*
(
ftm2
[
1
]
+
ftm2i
[
1
]);
outputForce
[
1
]
=
-
conversionFactor
*
(
ftm2
[
2
]
+
ftm2i
[
2
]);
outputForce
[
2
]
=
-
conversionFactor
*
(
ftm2
[
3
]
+
ftm2i
[
3
]);
outputTorque
[
0
][
0
]
=
conversionFactor
*
(
ttm2
[
1
]
+
ttm2i
[
1
]);
outputTorque
[
0
][
1
]
=
conversionFactor
*
(
ttm2
[
2
]
+
ttm2i
[
2
]);
outputTorque
[
0
][
2
]
=
conversionFactor
*
(
ttm2
[
3
]
+
ttm2i
[
3
]);
outputTorque
[
1
][
0
]
=
conversionFactor
*
(
ttm3
[
1
]
+
ttm3i
[
1
]);
outputTorque
[
1
][
1
]
=
conversionFactor
*
(
ttm3
[
2
]
+
ttm3i
[
2
]);
outputTorque
[
1
][
2
]
=
conversionFactor
*
(
ttm3
[
3
]
+
ttm3i
[
3
]);
//outputTorque[1][2] = conversionFactor*(ttm3_2 + ttm3i_2);
#ifdef AMOEBA_DEBUG
int
debugIndex
=
0
;
float
idTracker
=
1.0
f
;
/*
/*
dem(1,ii) = dem(1,ii) + ftm2[1];
debugArray[debugIndex].x = atomI.labFrameDipole[0];
dem(2,ii) = dem(2,ii) + ftm2[2];
debugArray[debugIndex].y = atomI.labFrameDipole[1];
dem(3,ii) = dem(3,ii) + ftm2[3];
debugArray[debugIndex].z = atomI.labFrameDipole[2];
dep(1,ii) = dep(1,ii) + ftm2i[1];
debugArray[debugIndex].w = r2;
dep(2,ii) = dep(2,ii) + ftm2i[2];
dep(3,ii) = dep(3,ii) + ftm2i[3];
debugIndex++;
call torque (i,ttm2,ttm2i,frcxi,frcyi,frczi);
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.labFrameDipole[0];
// increment gradient due to force and torque on second site;
debugArray[debugIndex].y = atomJ.labFrameDipole[1];
debugArray[debugIndex].z = atomJ.labFrameDipole[2];
dem(1,kk) = dem(1,kk) - ftm2[1];
debugArray[debugIndex].w = cSim.alphaEwald;
dem(2,kk) = dem(2,kk) - ftm2[2];
dem(3,kk) = dem(3,kk) - ftm2[3];
debugIndex++;
dep(1,kk) = dep(1,kk) - ftm2i[1];
idTracker += 1.0;
dep(2,kk) = dep(2,kk) - ftm2i[2];
debugArray[debugIndex].x = atomI.inducedDipole[0];
dep(3,kk) = dep(3,kk) - ftm2i[3];
debugArray[debugIndex].y = atomI.inducedDipole[1];
call torque (k,ttm3,ttm3i,frcxk,frcyk,frczk);
debugArray[debugIndex].z = atomI.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipole[0];
debugArray[debugIndex].y = atomJ.inducedDipole[1];
debugArray[debugIndex].z = atomJ.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipoleP[0];
debugArray[debugIndex].y = atomI.inducedDipoleP[1];
debugArray[debugIndex].z = atomI.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipoleP[0];
debugArray[debugIndex].y = atomJ.inducedDipoleP[1];
debugArray[debugIndex].z = atomJ.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ftm2[1];
debugArray[debugIndex].y = conversionFactor*ftm2[2];
debugArray[debugIndex].z = conversionFactor*ftm2[3];
debugArray[debugIndex].w = idTracker;
debugIndex++;
*/
*/
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm2i
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm2i
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm2i
[
3
];
debugArray
[
debugIndex
].
w
=
r2
;
debugIndex
++
;
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
fridmp
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
fridmp
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
fridmp
[
3
];
debugArray
[
debugIndex
].
w
=
cSim
.
alphaEwald
;
debugIndex
++
;
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
findmp
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
findmp
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
findmp
[
3
];
debugArray
[
debugIndex
].
w
=
cSim
.
alphaEwald
+
1.0
f
;
debugIndex
++
;
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm2
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm2
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm2
[
3
];
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm2i
[
1
];
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm2i
[
2
];
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm2i
[
3
];
debugArray
[
debugIndex
].
w
=
idTracker
;
#endif
}
else
{
outputForce
[
0
]
=
0.0
f
;
outputForce
[
1
]
=
0.0
f
;
outputForce
[
2
]
=
0.0
f
;
outputTorque
[
0
][
0
]
=
0.0
f
;
outputTorque
[
0
][
1
]
=
0.0
f
;
outputTorque
[
0
][
2
]
=
0.0
f
;
outputTorque
[
1
][
0
]
=
0.0
f
;
outputTorque
[
1
][
1
]
=
0.0
f
;
outputTorque
[
1
][
2
]
=
0.0
f
;
#ifdef AMOEBA_DEBUG
for
(
int
ii
=
0
;
ii
<
20
;
ii
++
){
debugArray
[
ii
].
x
=
0.0
f
;
debugArray
[
ii
].
y
=
0.0
f
;
debugArray
[
ii
].
z
=
0.0
f
;
debugArray
[
ii
].
w
=
(
float
)
ii
;
}
#endif
}
}
return
;
return
;
}
}
__device__
void
loadRealSpaceEwaldShared
(
struct
RealSpaceEwaldParticle
*
sA
,
unsigned
int
atomI
,
__device__
void
loadPmeDirectElectrostaticShared
(
struct
PmeDirectElectrostaticParticle
*
sA
,
unsigned
int
atomI
)
float4
*
atomCoord
,
float
*
labFrameDipoleJ
,
float
*
labQuadrupole
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float2
*
dampingFactorAndThole
)
{
{
// coordinates & charge
// coordinates & charge
sA
->
x
=
cSim
.
pPosq
[
atomI
].
x
;
sA
->
x
=
atomCoord
[
atomI
].
x
;
sA
->
y
=
cSim
.
pPosq
[
atomI
].
y
;
sA
->
y
=
atomCoord
[
atomI
].
y
;
sA
->
z
=
cSim
.
pPosq
[
atomI
].
z
;
sA
->
z
=
atomCoord
[
atomI
].
z
;
sA
->
q
=
cSim
.
pPosq
[
atomI
].
w
;
sA
->
q
=
atomCoord
[
atomI
].
w
;
// lab dipole
// lab dipole
sA
->
labFrameDipole
[
0
]
=
labFrameDipoleJ
[
atomI
*
3
];
sA
->
labFrameDipole
[
0
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
];
sA
->
labFrameDipole
[
1
]
=
labFrameDipoleJ
[
atomI
*
3
+
1
];
sA
->
labFrameDipole
[
1
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
+
1
];
sA
->
labFrameDipole
[
2
]
=
labFrameDipoleJ
[
atomI
*
3
+
2
];
sA
->
labFrameDipole
[
2
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
+
2
];
// lab quadrupole
// lab quadrupole
sA
->
labFrameQuadrupole
[
0
]
=
lab
Quadrupole
[
atomI
*
9
];
sA
->
labFrameQuadrupole
[
0
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
];
sA
->
labFrameQuadrupole
[
1
]
=
lab
Quadrupole
[
atomI
*
9
+
1
];
sA
->
labFrameQuadrupole
[
1
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
1
];
sA
->
labFrameQuadrupole
[
2
]
=
lab
Quadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole
[
2
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole
[
3
]
=
lab
Quadrupole
[
atomI
*
9
+
3
];
sA
->
labFrameQuadrupole
[
3
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
3
];
sA
->
labFrameQuadrupole
[
4
]
=
lab
Quadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole
[
4
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole
[
5
]
=
lab
Quadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole
[
5
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole
[
6
]
=
lab
Quadrupole
[
atomI
*
9
+
6
];
sA
->
labFrameQuadrupole
[
6
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
6
];
sA
->
labFrameQuadrupole
[
7
]
=
lab
Quadrupole
[
atomI
*
9
+
7
];
sA
->
labFrameQuadrupole
[
7
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
7
];
sA
->
labFrameQuadrupole
[
8
]
=
lab
Quadrupole
[
atomI
*
9
+
8
];
sA
->
labFrameQuadrupole
[
8
]
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
8
];
// induced dipole
// induced dipole
sA
->
inducedDipole
[
0
]
=
i
nducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
[
0
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
];
sA
->
inducedDipole
[
1
]
=
i
nducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
[
1
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
+
1
];
sA
->
inducedDipole
[
2
]
=
i
nducedDipole
[
atomI
*
3
+
2
];
sA
->
inducedDipole
[
2
]
=
cAmoebaSim
.
pI
nducedDipole
[
atomI
*
3
+
2
];
// induced dipole polar
// induced dipole polar
sA
->
inducedDipoleP
[
0
]
=
i
nducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipoleP
[
0
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
];
sA
->
inducedDipoleP
[
1
]
=
i
nducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipoleP
[
1
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipoleP
[
2
]
=
i
nducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
inducedDipoleP
[
2
]
=
cAmoebaSim
.
pI
nducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
damp
=
d
ampingFactorAndThole
[
atomI
].
x
;
sA
->
damp
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
x
;
sA
->
thole
=
d
ampingFactorAndThole
[
atomI
].
y
;
sA
->
thole
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
y
;
}
}
...
@@ -918,11 +1038,11 @@ __device__ void loadRealSpaceEwaldShared( struct RealSpaceEwaldParticle* sA, uns
...
@@ -918,11 +1038,11 @@ __device__ void loadRealSpaceEwaldShared( struct RealSpaceEwaldParticle* sA, uns
#undef USE_OUTPUT_BUFFER_PER_WARP
#undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##N2##b
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCuda
RealSpaceEwald
.h"
#include "kCalculateAmoebaCuda
PmeDirectElectrostatic
.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCuda
RealSpaceEwald
.h"
#include "kCalculateAmoebaCuda
PmeDirectElectrostatic
.h"
// reduce psWorkArray_3_1 -> force
// reduce psWorkArray_3_1 -> force
// reduce psWorkArray_3_2 -> torque
// reduce psWorkArray_3_2 -> torque
...
@@ -932,23 +1052,27 @@ static void kReduceForceTorque(amoebaGpuContext amoebaGpu )
...
@@ -932,23 +1052,27 @@ static void kReduceForceTorque(amoebaGpuContext amoebaGpu )
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psForce
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psForce
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReduce
RealSpaceEwald
Force"
);
LAUNCHERROR
(
"kReduce
PmeDirectElectrostatic
Force"
);
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psTorque
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psTorque
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReduce
RealSpaceEwald
Torque"
);
LAUNCHERROR
(
"kReduce
PmeDirectElectrostatic
Torque"
);
}
}
//#define GET_INDUCED_DIPOLE_FROM_FILE
#ifdef GET_INDUCED_DIPOLE_FROM_FILE
#include <stdlib.h>
#endif
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque
Compute Amoeba
dirrect space portion of
electrostatic force & torque
@param amoebaGpu amoebaGpu context
@param amoebaGpu amoebaGpu context
@param gpu OpenMM gpu Cuda context
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
cudaComputeAmoeba
RealSpaceEwald
(
amoebaGpuContext
amoebaGpu
)
void
cudaComputeAmoeba
PmeDirectElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
{
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
...
@@ -956,7 +1080,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
...
@@ -956,7 +1080,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
static
unsigned
int
threadsPerBlock
=
0
;
static
unsigned
int
threadsPerBlock
=
0
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoeba
RealSpaceEwald
"
;
static
const
char
*
methodName
=
"cudaComputeAmoeba
PmeDirectElectrostatic
"
;
static
int
timestep
=
0
;
static
int
timestep
=
0
;
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
timestep
++
;
timestep
++
;
...
@@ -983,7 +1107,38 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
...
@@ -983,7 +1107,38 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
debugArray
->
Upload
();
unsigned
int
targetAtom
=
0
;
unsigned
int
targetAtom
=
10
;
#endif
#ifdef GET_INDUCED_DIPOLE_FROM_FILE
std
::
string
fileName
=
"waterInducedDipole.txt"
;
StringVectorVector
fileContents
;
readFile
(
fileName
,
fileContents
);
unsigned
int
offset
=
0
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Read file: %s %u
\n
"
,
fileName
.
c_str
(),
fileContents
.
size
()
);
fflush
(
amoebaGpu
->
log
);
for
(
unsigned
int
ii
=
1
;
ii
<
fileContents
.
size
()
-
1
;
ii
++
){
StringVector
lineTokens
=
fileContents
[
ii
];
unsigned
int
lineTokenIndex
=
1
;
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
offset
-=
3
;
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
}
float
conversion
=
0.1
f
;
for
(
int
ii
=
0
;
ii
<
3
*
gpu
->
natoms
;
ii
++
){
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
ii
]
*=
conversion
;
}
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
=
5.4459052e+00
f
;
SetCalculateAmoebaPmeDirectElectrostaticSim
(
amoebaGpu
);
amoebaGpu
->
psInducedDipole
->
Upload
();
amoebaGpu
->
psInducedDipolePolar
->
Upload
();
#endif
#endif
// on first pass, set threads/block
// on first pass, set threads/block
...
@@ -996,20 +1151,15 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
...
@@ -996,20 +1151,15 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
RealSpaceEwald
Particle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostatic
Particle
)),
maxThreads
);
}
}
kClearFields_3
(
amoebaGpu
,
2
);
kClearFields_3
(
amoebaGpu
,
2
);
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoeba
CudaRealSpaceEwald
N2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
RealSpaceEwald
Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoeba
PmeDirectElectrostatic
N2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostatic
Particle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameQuadrupole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
@@ -1021,22 +1171,14 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
...
@@ -1021,22 +1171,14 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
}
else
{
}
else
{
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoeba
CudaRealSpaceEwald
N2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoeba
PmeDirectElectrostatic
N2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
RealSpaceEwaldParticle
),
sizeof
(
RealSpaceEwald
Particle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
sizeof
(
PmeDirectElectrostaticParticle
),
sizeof
(
PmeDirectElectrostatic
Particle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
#endif
cudaChannelFormatDesc
channelDesc
=
cudaCreateChannelDesc
<
float
>
();
kCalculateAmoebaPmeDirectElectrostaticN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
cudaBindTexture
(
NULL
,
&
tabulatedErfcRef
,
gpu
->
psTabulatedErfc
->
_pDevData
,
&
channelDesc
,
gpu
->
psTabulatedErfc
->
_length
*
sizeof
(
float
));
kCalculateAmoebaCudaRealSpaceEwaldN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
RealSpaceEwaldParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameQuadrupole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
@@ -1045,108 +1187,69 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
...
@@ -1045,108 +1187,69 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoeba
CudaRealSpaceEwald
N2Forces"
);
LAUNCHERROR
(
"kCalculateAmoeba
PmeDirectElectrostatic
N2Forces"
);
kReduceForceTorque
(
amoebaGpu
);
kReduceForceTorque
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psForce
->
Download
();
amoebaGpu
->
psForce
->
Download
();
amoebaGpu
->
psTorque
->
Download
();
amoebaGpu
->
psTorque
->
Download
();
debugArray
->
Download
();
debugArray
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished RealSpaceEwald kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished PmeDirectElectrostatic kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
int
maxPrint
=
1400
;
int
maxPrint
=
1400
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
float
conversion
=
1.0
f
/
41.84
f
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
int
indexOffset
=
ii
*
3
;
// force
// force
(
void
)
fprintf
(
amoebaGpu
->
log
,
"RealSpaceEwaldF [%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
],
(
void
)
fprintf
(
amoebaGpu
->
log
,
"PmeDirectElectrostaticF [%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
1
],
conversion
*
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
conversion
*
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
1
],
conversion
*
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
// torque
// torque
(
void
)
fprintf
(
amoebaGpu
->
log
,
"RealSpaceEwaldT [%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
],
(
void
)
fprintf
(
amoebaGpu
->
log
,
"PmeDirectElectrostaticT [%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
1
],
conversion
*
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
conversion
*
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
1
],
conversion
*
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
// coords
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
#if 0
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
(void) fprintf( amoebaGpu->log,"x[%16.9e %16.9e %16.9e] ",
ii
=
gpu
->
natoms
-
maxPrint
;
gpu->psPosq4->_pSysStream[0][ii].x,
}
gpu->psPosq4->_pSysStream[0][ii].y,
}
gpu->psPosq4->_pSysStream[0][ii].z);
if
(
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugElec
\n
"
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
for( int jj = 0; jj < gpu->natoms && jj < 5; jj++ ){
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int debugIndex = jj*gpu->natoms + ii;
int
debugIndex
=
jj
;
float xx = gpu->psPosq4->_pSysStream[0][jj].x - gpu->psPosq4->_pSysStream[0][ii].x;
for
(
int
kk
=
0
;
kk
<
15
;
kk
++
){
float yy = gpu->psPosq4->_pSysStream[0][jj].y - gpu->psPosq4->_pSysStream[0][ii].y;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d [%16.9e %16.9e %16.9e %16.9e] E11
\n
"
,
targetAtom
,
jj
,
float zz = gpu->psPosq4->_pSysStream[0][jj].z - gpu->psPosq4->_pSysStream[0][ii].z;
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
(void) fprintf( amoebaGpu->log,"\n%4d %4d delta [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e] ",
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
ii, jj, xx, yy, zz,
debugIndex
+=
paddedNumberOfAtoms
;
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y, debugArray->_pSysStream[0][debugIndex].z );
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
}
#endif
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectForceTorque"
,
fileId
,
outputVector
);
}
}
}
if
(
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugElec
\n
"
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
for
(
int
kk
=
0
;
kk
<
5
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d [%16.9e %16.9e %16.9e %16.9e] E11
\n
"
,
targetAtom
,
jj
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Tiled F & T
\n
"
,
methodName
);
fflush
(
amoebaGpu
->
log
);
int
maxPrint
=
12
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
// print cpu & gpu reductions
int
offset
=
3
*
ii
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d F[%16.7e %16.7e %16.7e] T[%16.7e %16.7e %16.7e]
\n
"
,
ii
,
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
offset
+
2
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
offset
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
offset
+
1
],
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
offset
+
2
]
);
if
(
(
ii
==
maxPrint
)
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
))
)
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaForceTorque"
,
fileId
,
outputVector
);
}
}
}
delete
debugArray
;
delete
debugArray
;
...
@@ -1155,3 +1258,19 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
...
@@ -1155,3 +1258,19 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
}
}
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque using PME
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void
cudaComputeAmoebaPmeElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
kCalculateAmoebaPME
(
amoebaGpu
);
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCuda
RealSpaceEwald
.h
→
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCuda
PmeDirectElectrostatic
.h
View file @
2ef05285
...
@@ -34,15 +34,8 @@ __launch_bounds__(128, 1)
...
@@ -34,15 +34,8 @@ __launch_bounds__(128, 1)
#else
#else
__launch_bounds__
(
64
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaCudaRealSpaceEwald
,
Forces_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectElectrostatic
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
float
*
outputForce
,
float
*
outputTorque
float4
*
atomCoord
,
float
*
labFrameDipole
,
float
*
labFrameQuadrupole
,
float
*
inducedDipole
,
float
*
inducedDipolePolar
,
float
*
outputForce
,
float
*
outputTorque
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
,
float4
*
debugArray
,
unsigned
int
targetAtom
...
@@ -50,10 +43,11 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
...
@@ -50,10 +43,11 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
){
){
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
int
pullIndexMax
=
12
;
float4
pullBack
[
20
];
float4
pullBack
[
20
];
#endif
#endif
extern
__shared__
RealSpaceEwald
Particle
sA
[];
extern
__shared__
PmeDirectElectrostatic
Particle
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
...
@@ -80,12 +74,10 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
...
@@ -80,12 +74,10 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
RealSpaceEwald
Particle
*
psA
=
&
sA
[
tbx
];
PmeDirectElectrostatic
Particle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
unsigned
int
atomI
=
x
+
tgx
;
RealSpaceEwaldParticle
localParticle
;
PmeDirectElectrostaticParticle
localParticle
;
loadRealSpaceEwaldShared
(
&
localParticle
,
atomI
,
loadPmeDirectElectrostaticShared
(
&
localParticle
,
atomI
);
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
localParticle
.
force
[
0
]
=
0
.
0
f
;
localParticle
.
force
[
0
]
=
0
.
0
f
;
localParticle
.
force
[
1
]
=
0
.
0
f
;
localParticle
.
force
[
1
]
=
0
.
0
f
;
...
@@ -105,9 +97,7 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
...
@@ -105,9 +97,7 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
// load shared data
// load shared data
loadRealSpaceEwaldShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
,
loadPmeDirectElectrostaticShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
if
(
!
bExclusionFlag
)
if
(
!
bExclusionFlag
)
{
{
...
@@ -122,9 +112,8 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
...
@@ -122,9 +112,8 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
float
torque
[
2
][
3
];
float
torque
[
2
][
3
];
float
energy
;
float
energy
;
calculateRealSpaceEwaldPairIxn_kernel
(
localParticle
,
psA
[
j
],
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
j
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
scalingFactors
,
force
,
torque
,
&
energy
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
#endif
#endif
...
@@ -142,7 +131,7 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
...
@@ -142,7 +131,7 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
}
}
}
}
...
@@ -171,9 +160,8 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
...
@@ -171,9 +160,8 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
// force
// force
float
energy
;
float
energy
;
calculateRealSpaceEwaldPairIxn_kernel
(
localParticle
,
psA
[
j
],
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
j
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
scalingFactors
,
force
,
torque
,
&
energy
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
#endif
#endif
...
@@ -194,63 +182,59 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
...
@@ -194,63 +182,59 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
if
(
atomI
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
float
blockId
=
1
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
z
=
1
.
0
f
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
w
=
(
float
)
y
;
debugArray
[
index
].
z
=
(
float
)
y
;
/*
debugArray
[
index
].
w
=
blockId
;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? force[0] : 0.0f;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray[index].y = mask ? force[1] : 0.0f;
debugArray
[
index
].
x
=
mask
?
force
[
0
]
:
0
.
0
f
;
debugArray[index].z = mask ? force[2] : 0.0f;
debugArray
[
index
].
y
=
mask
?
force
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
force
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[0][0] : 0.0f;
debugArray[index].y = mask ? torque[0][1] : 0.0f;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray[index].z = mask ? torque[0][2] : 0.0f;
debugArray
[
index
].
x
=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
*/
debugArray
[
index
].
y
=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
w
=
blockId
;
int
pullIndex
=
0
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
for
(
int
pullIndex
=
0
;
pullIndex
<
pullIndexMax
;
pullIndex
++
){
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
}
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
#endif
}
}
}
}
// include self energy and self torque
if
(
atomI
<
cAmoebaSim
.
numberOfAtoms
){
calculatePmeSelfTorqueElectrostaticPairIxn_kernel
(
localParticle
);
//float energy;
//calculatePmeSelfEnergyElectrostaticPairIxn_kernel( localParticle, &energy );
//totalEnergy += energy;
}
// Write results
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
#ifdef USE_OUTPUT_BUFFER_PER_WARP
...
@@ -300,9 +284,7 @@ if( atomI == targetAtom ){
...
@@ -300,9 +284,7 @@ if( atomI == targetAtom ){
{
{
// load shared data
// load shared data
loadRealSpaceEwaldShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
),
loadPmeDirectElectrostaticShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
cAmoebaSim
.
pDampingFactorAndThole
);
}
}
...
@@ -325,9 +307,8 @@ if( atomI == targetAtom ){
...
@@ -325,9 +307,8 @@ if( atomI == targetAtom ){
unsigned
int
atomJ
=
y
+
tj
;
unsigned
int
atomJ
=
y
+
tj
;
float
energy
;
float
energy
;
calculateRealSpaceEwaldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
tj
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
scalingFactors
,
force
,
torque
,
&
energy
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
#endif
#endif
...
@@ -345,75 +326,59 @@ if( atomI == targetAtom ){
...
@@ -345,75 +326,59 @@ if( atomI == targetAtom ){
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
// add force and torque to atom J due atom I
// add force and torque to atom J due atom I
psA
[
tj
].
force
[
0
]
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
tj
].
force
[
0
]
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
tj
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
tj
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
tj
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
tj
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
0
]
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
0
]
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
1
]
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
1
]
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
2
]
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
2
]
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
atomJ
==
targetAtom
){
if
(
atomI
==
targetAtom
||
atomJ
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
1
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
1
:
0
;
float
forceSign
=
(
atomI
==
targetAtom
)
?
1
.
0
f
:
-
1
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
2
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
y
;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? forceSign*force[0] : 0.0f;
debugArray[index].y = mask ? forceSign*force[1] : 0.0f;
debugArray[index].z = mask ? forceSign*force[2] : 0.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexI][0] : 0.0f;
debugArray[index].y = mask ? torque[indexI][1] : 0.0f;
debugArray[index].z = mask ? torque[indexI][2] : 0.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexJ][0] : 0.0f;
debugArray[index].y = mask ? torque[indexJ][1] : 0.0f;
debugArray[index].z = mask ? torque[indexJ][2] : 0.0f;
*/
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
int
pullIndex
=
0
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
1
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
1
:
0
;
float
forceSign
=
(
atomI
==
targetAtom
)
?
1
.
0
f
:
-
1
.
0
f
;
float
blockId
=
2
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
(
float
)
y
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceSign
*
force
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceSign
*
force
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceSign
*
force
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
indexI
][
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
indexJ
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
indexJ
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
indexJ
][
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
for
(
int
pullIndex
=
0
;
pullIndex
<
pullIndexMax
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
}
#endif
#endif
...
@@ -449,9 +414,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -449,9 +414,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// force
// force
float
energy
;
float
energy
;
calculateRealSpaceEwaldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
tj
],
cAmoebaSim
.
scalingDistanceCutoff
,
scalingFactors
,
scalingFactors
,
force
,
torque
,
&
energy
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
#endif
#endif
...
@@ -471,74 +435,58 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -471,74 +435,58 @@ if( atomI == targetAtom || atomJ == targetAtom ){
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
// add force and torque to atom J due atom I
// add force and torque to atom J due atom I
psA
[
tj
].
force
[
0
]
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
tj
].
force
[
0
]
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
tj
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
tj
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
tj
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
tj
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
0
]
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
0
]
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
1
]
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
1
]
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
2
]
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
psA
[
tj
].
torque
[
2
]
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
atomJ
==
targetAtom
){
if
(
atomI
==
targetAtom
||
atomJ
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
1
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
1
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
1
:
0
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
1
:
0
;
float
forceSign
=
(
atomI
==
targetAtom
)
?
1
.
0
f
:
-
1
.
0
f
;
float
forceSign
=
(
atomI
==
targetAtom
)
?
1
.
0
f
:
-
1
.
0
f
;
float
blockId
=
3
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
3
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
y
;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? forceSign*force[0] : 0.0f;
debugArray[index].y = mask ? forceSign*force[1] : 0.0f;
debugArray[index].z = mask ? forceSign*force[2] : 0.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexI][0] : 0.0f;
debugArray[index].y = mask ? torque[indexI][1] : 0.0f;
debugArray[index].z = mask ? torque[indexI][2] : 0.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexJ][0] : 0.0f;
debugArray[index].y = mask ? torque[indexJ][1] : 0.0f;
debugArray[index].z = mask ? torque[indexJ][2] : 0.0f;
*/
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
int
pullIndex
=
0
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
(
float
)
y
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
x
=
mask
?
forceSign
*
force
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
y
=
mask
?
forceSign
*
force
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
z
=
mask
?
forceSign
*
force
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
mask
?
torque
[
indexI
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
mask
?
torque
[
indexI
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
mask
?
torque
[
indexI
][
2
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
blockId
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
pullIndex
++
;
debugArray
[
index
].
x
=
mask
?
torque
[
indexJ
][
0
]
:
0
.
0
f
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
mask
?
torque
[
indexJ
][
1
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
mask
?
torque
[
indexJ
][
2
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
blockId
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
for
(
int
pullIndex
=
0
;
pullIndex
<
pullIndexMax
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
#if 0
#if 0
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
...
@@ -550,13 +498,6 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -550,13 +498,6 @@ if( atomI == targetAtom || atomJ == targetAtom ){
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = scalingFactors[MScaleIndex];
debugArray[index].x = scalingFactors[MScaleIndex];
debugArray[index].y = mScaleVal;
debugArray[index].y = mScaleVal;
for( int pIndex = 0; pIndex < 14; pIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pIndex].x;
debugArray[index].y = pullBack[pIndex].y;
debugArray[index].z = pullBack[pIndex].z;
debugArray[index].w = pullBack[pIndex].w;
}
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = labFrameDipole[3*atomI];
debugArray[index].x = labFrameDipole[3*atomI];
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
2ef05285
...
@@ -322,7 +322,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
...
@@ -322,7 +322,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
static
void
cudaComputeAmoebaPme
Direct
FixedEField
(
amoebaGpuContext
amoebaGpu
)
{
{
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
...
@@ -487,3 +487,9 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -487,3 +487,9 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
}
}
}
}
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
{
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
kCalculateAmoebaPMEFixedMultipoleField
(
amoebaGpu
);
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
2ef05285
...
@@ -616,6 +616,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -616,6 +616,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// matrix multiply
// matrix multiply
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
...
@@ -654,7 +655,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -654,7 +655,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevStream
[
0
],
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psInducedDipolePolar
->
_pDevStream
[
0
],
amoebaGpu
->
psE_Field
->
_pDevStream
[
0
],
amoebaGpu
->
psE_FieldPolar
->
_pDevStream
[
0
],
amoebaGpu
->
psE_Field
->
_pDevStream
[
0
],
amoebaGpu
->
psE_FieldPolar
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
// get total epsilon -- performing sums on gpu
// get total epsilon -- performing sums on gpu
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
View file @
2ef05285
...
@@ -390,7 +390,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
...
@@ -390,7 +390,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
if
(
amoebaGpu
->
multipoleNonbondedMethod
==
AMOEBA_NO_CUTOFF
){
if
(
amoebaGpu
->
multipoleNonbondedMethod
==
AMOEBA_NO_CUTOFF
){
cudaComputeAmoebaElectrostatic
(
amoebaGpu
);
cudaComputeAmoebaElectrostatic
(
amoebaGpu
);
}
else
{
}
else
{
cudaComputeAmoeba
RealSpaceEwald
(
amoebaGpu
);
cudaComputeAmoeba
PmeElectrostatic
(
amoebaGpu
);
}
}
// map torques to forces
// map torques to forces
...
...
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
View file @
2ef05285
...
@@ -2008,6 +2008,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
...
@@ -2008,6 +2008,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
multipoleForce
->
setNonbondedMethod
(
AmoebaMultipoleForce
::
NoCutoff
);
multipoleForce
->
setNonbondedMethod
(
AmoebaMultipoleForce
::
NoCutoff
);
}
}
multipoleForce
->
setCutoffDistance
(
cutoffDistance
);
multipoleForce
->
setCutoffDistance
(
cutoffDistance
);
multipoleForce
->
setAEwald
(
aewald
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
box
[
0
],
0.0
,
0.0
),
Vec3
(
0.0
,
box
[
1
],
0.0
),
Vec3
(
0.0
,
0.0
,
box
[
2
])
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
box
[
0
],
0.0
,
0.0
),
Vec3
(
0.0
,
box
[
1
],
0.0
),
Vec3
(
0.0
,
0.0
,
box
[
2
])
);
if
(
log
){
if
(
log
){
(
void
)
fprintf
(
log
,
"%s number of MultipoleParameter terms=%d usePme=%d aewald=%15.7e cutoffDistance=%12.4f
\n
"
,
(
void
)
fprintf
(
log
,
"%s number of MultipoleParameter terms=%d usePme=%d aewald=%15.7e cutoffDistance=%12.4f
\n
"
,
...
@@ -2111,6 +2112,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
...
@@ -2111,6 +2112,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
double
polarityConversion
=
AngstromToNm
*
AngstromToNm
*
AngstromToNm
;
double
polarityConversion
=
AngstromToNm
*
AngstromToNm
*
AngstromToNm
;
double
dampingFactorConversion
=
sqrt
(
AngstromToNm
);
double
dampingFactorConversion
=
sqrt
(
AngstromToNm
);
multipoleForce
->
setAEwald
(
multipoleForce
->
getAEwald
()
/
AngstromToNm
);
multipoleForce
->
setCutoffDistance
(
multipoleForce
->
getCutoffDistance
()
*
AngstromToNm
);
multipoleForce
->
setCutoffDistance
(
multipoleForce
->
getCutoffDistance
()
*
AngstromToNm
);
multipoleForce
->
setScalingDistanceCutoff
(
multipoleForce
->
getScalingDistanceCutoff
()
*
AngstromToNm
);
multipoleForce
->
setScalingDistanceCutoff
(
multipoleForce
->
getScalingDistanceCutoff
()
*
AngstromToNm
);
...
@@ -2165,8 +2167,8 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
...
@@ -2165,8 +2167,8 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
(
useOpenMMUnits
?
"OpenMM"
:
"Amoeba"
)
);
(
useOpenMMUnits
?
"OpenMM"
:
"Amoeba"
)
);
std
::
string
nonbondedMethod
=
multipoleForce
->
getNonbondedMethod
(
)
==
AmoebaMultipoleForce
::
PME
?
"PME"
:
"NoCutoff"
;
std
::
string
nonbondedMethod
=
multipoleForce
->
getNonbondedMethod
(
)
==
AmoebaMultipoleForce
::
PME
?
"PME"
:
"NoCutoff"
;
(
void
)
fprintf
(
log
,
"NonbondedMethod=%s cutoff=%15.7e.
\n
"
,
nonbondedMethod
.
c_str
(),
(
void
)
fprintf
(
log
,
"NonbondedMethod=%s
aEwald=%15.7e
cutoff=%15.7e.
\n
"
,
nonbondedMethod
.
c_str
(),
multipoleForce
->
getCutoffDistance
()
);
multipoleForce
->
getAEwald
(),
multipoleForce
->
getCutoffDistance
()
);
Vec3
a
,
b
,
c
;
Vec3
a
,
b
,
c
;
system
.
getDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
system
.
getDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment