Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
4d763c4d
Commit
4d763c4d
authored
Jan 05, 2011
by
Mark Friedrichs
Browse files
Fixed bug in direct PME
Added diagnostics for handling reorder of atoms
parent
8b8defe8
Changes
20
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
546 additions
and
313 deletions
+546
-313
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
+1
-1
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+15
-13
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+56
-13
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
...cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+8
-8
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+5
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
...tforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
+8
-8
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+245
-99
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+150
-120
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+21
-10
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+9
-10
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+4
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+5
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+2
-2
No files found.
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
View file @
4d763c4d
...
...
@@ -38,7 +38,7 @@ using namespace OpenMM;
using
std
::
string
;
using
std
::
vector
;
AmoebaMultipoleForce
::
AmoebaMultipoleForce
()
:
nonbondedMethod
(
NoCutoff
),
pmeBSplineOrder
(
5
),
cutoffDistance
(
1.0
),
ewaldErrorTol
(
5
e-4
),
mutualInducedIterationMethod
(
SOR
),
mutualInducedMaxIterations
(
60
),
AmoebaMultipoleForce
::
AmoebaMultipoleForce
()
:
nonbondedMethod
(
NoCutoff
),
pmeBSplineOrder
(
5
),
cutoffDistance
(
1.0
),
ewaldErrorTol
(
1
e-4
),
mutualInducedIterationMethod
(
SOR
),
mutualInducedMaxIterations
(
60
),
mutualInducedTargetEpsilon
(
1.0e-05
),
scalingDistanceCutoff
(
100.0
),
electricConstant
(
138.9354558456
)
{
}
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
4d763c4d
...
...
@@ -779,17 +779,15 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
if
(
data
.
getMultipoleForceCount
()
==
0
){
gpuCopyWorkUnit
(
gpu
);
}
//if( data.getApplyCutoff() && (data.getMultipoleForceCount() % 100) == 0 ){
//gpuReorderAtoms(gpu->gpuContext);
//}
data
.
incrementMultipoleForceCount
();
data
.
initializeGpu
();
if
(
0
&&
data
.
getLog
()
){
(
void
)
fprintf
(
data
.
getLog
(),
"computeAmoebaMultipoleForce
\n
"
);
(
void
)
fprintf
(
data
.
getLog
(),
"
In
computeAmoebaMultipoleForce
\n
"
);
(
void
)
fflush
(
data
.
getLog
());
}
data
.
initializeGpu
();
// calculate Born radii
if
(
data
.
getHasAmoebaGeneralizedKirkwood
()
){
...
...
@@ -974,23 +972,27 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
nb
.
setCutoffDistance
(
force
.
getCutoffDistance
());
std
::
vector
<
int
>
pmeGridDimension
;
force
.
getPmeGridDimensions
(
pmeGridDimension
);
if
(
1
||
pmeGridDimension
[
0
]
==
0
){
int
pmeParametersSetBasedOnEwaldErrorTolerance
;
if
(
pmeGridDimension
[
0
]
==
0
){
NonbondedForceImpl
::
calcPMEParameters
(
system
,
nb
,
alpha
,
xsize
,
ysize
,
zsize
);
/*
alpha = 5.446;
xsize = 60;
ysize = 48;
zsize = 48;
*/
pmeParametersSetBasedOnEwaldErrorTolerance
=
1
;
}
else
{
alpha
=
force
.
getAEwald
();
xsize
=
pmeGridDimension
[
0
];
ysize
=
pmeGridDimension
[
1
];
zsize
=
pmeGridDimension
[
2
];
pmeParametersSetBasedOnEwaldErrorTolerance
=
0
;
}
if
(
data
.
getLog
()
){
(
void
)
fprintf
(
data
.
getLog
(),
"AmoebaMultipoleForce: PME parameters tol=%12.3e cutoff=%12.3f alpha=%12.3f [%d %d %d]
\n
"
,
(
void
)
fprintf
(
data
.
getLog
(),
"AmoebaMultipoleForce: PME parameters tol=%12.3e cutoff=%12.3f alpha=%12.3f [%d %d %d]
-
"
,
force
.
getEwaldErrorTolerance
(),
force
.
getCutoffDistance
(),
alpha
,
xsize
,
ysize
,
zsize
);
if
(
pmeParametersSetBasedOnEwaldErrorTolerance
){
(
void
)
fprintf
(
data
.
getLog
(),
" parameters set based on error tolerance and OpenMM algorithm.
\n
"
);
}
else
{
double
impliedTolerance
=
alpha
*
force
.
getCutoffDistance
();
impliedTolerance
=
0.5
*
exp
(
-
(
impliedTolerance
*
impliedTolerance
)
);
(
void
)
fprintf
(
data
.
getLog
(),
" using input parameters implied tolerance=%12.3e
\n
"
,
impliedTolerance
);
}
(
void
)
fflush
(
data
.
getLog
()
);
}
gpuSetAmoebaPMEParameters
(
data
.
getAmoebaGpu
(),
(
float
)
alpha
,
xsize
,
ysize
,
zsize
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
View file @
4d763c4d
...
...
@@ -3909,6 +3909,33 @@ void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId
(
void
)
fclose
(
filePtr
);
}
CUDAStream
<
float
>*
reorderFloat
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
arrayToReorder
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
CUDAStream
<
float
>*
reorderdArray
=
new
CUDAStream
<
float
>
(
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
,
1
,
"TempReorder"
);
int
*
order
=
gpu
->
psAtomIndex
->
_pSysData
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
reorderdArray
->
_pSysStream
[
0
][
order
[
ii
]]
=
arrayToReorder
->
_pSysStream
[
0
][
ii
];
}
return
reorderdArray
;
}
CUDAStream
<
float4
>*
reorderFloat4
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float4
>*
arrayToReorder
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
CUDAStream
<
float4
>*
reorderdArray
=
new
CUDAStream
<
float4
>
(
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
,
1
,
"TempReorder4"
);
int
*
order
=
gpu
->
psAtomIndex
->
_pSysData
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
reorderdArray
->
_pSysStream
[
0
][
order
[
ii
]].
x
=
arrayToReorder
->
_pSysStream
[
0
][
ii
].
x
;
reorderdArray
->
_pSysStream
[
0
][
order
[
ii
]].
y
=
arrayToReorder
->
_pSysStream
[
0
][
ii
].
y
;
reorderdArray
->
_pSysStream
[
0
][
order
[
ii
]].
z
=
arrayToReorder
->
_pSysStream
[
0
][
ii
].
z
;
reorderdArray
->
_pSysStream
[
0
][
order
[
ii
]].
w
=
arrayToReorder
->
_pSysStream
[
0
][
ii
].
w
;
}
return
reorderdArray
;
}
/**---------------------------------------------------------------------------------------
Load contents of arrays into vector
...
...
@@ -3920,7 +3947,9 @@ void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId
--------------------------------------------------------------------------------------- */
void
cudaLoadCudaFloatArray
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float
>*
array
,
VectorOfDoubleVectors
&
outputVector
)
void
cudaLoadCudaFloatArray
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float
>*
array
,
VectorOfDoubleVectors
&
outputVector
,
int
*
order
)
{
// ---------------------------------------------------------------------------------------
...
...
@@ -3929,14 +3958,19 @@ void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDA
// ---------------------------------------------------------------------------------------
array
->
Download
();
int
running
Index
=
0
;
int
order
Index
=
0
;
outputVector
.
resize
(
numberOfParticles
);
for
(
int
ii
=
0
;
ii
<
numberOfParticles
;
ii
++
){
if
(
order
){
orderIndex
=
order
[
ii
];
}
else
{
orderIndex
=
ii
;
}
for
(
int
jj
=
0
;
jj
<
entriesPerParticle
;
jj
++
)
{
outputVector
[
ii
].
push_back
(
array
->
_pSysStream
[
0
][
runningIndex
++
]
);
outputVector
[
orderIndex
].
push_back
(
array
->
_pSysStream
[
0
][
entriesPerParticle
*
ii
+
jj
]
);
}
}
}
...
...
@@ -3975,6 +4009,7 @@ void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerParticle, CUD
}
}
/**---------------------------------------------------------------------------------------
Load contents of arrays into vector
...
...
@@ -3983,10 +4018,12 @@ void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerParticle, CUD
@param entriesPerParticle entries/particles array
@param array cuda array
@param outputVector output vector
@param order if set, reorder entries
--------------------------------------------------------------------------------------- */
void
cudaLoadCudaFloat4Array
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float4
>*
array
,
VectorOfDoubleVectors
&
outputVector
)
void
cudaLoadCudaFloat4Array
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float4
>*
array
,
VectorOfDoubleVectors
&
outputVector
,
int
*
order
)
{
// ---------------------------------------------------------------------------------------
...
...
@@ -3996,21 +4033,27 @@ void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUD
array
->
Download
();
int
runningIndex
=
0
;
int
orderIndex
;
outputVector
.
resize
(
numberOfParticles
);
for
(
int
ii
=
0
;
ii
<
numberOfParticles
;
ii
++
){
if
(
order
){
orderIndex
=
order
[
runningIndex
];
}
else
{
orderIndex
=
runningIndex
;
}
if
(
entriesPerParticle
>
0
){
outputVector
[
ii
].
push_back
(
array
->
_pSysStream
[
0
][
runningIndex
].
x
);
outputVector
[
orderIndex
].
push_back
(
array
->
_pSysStream
[
0
][
ii
].
x
);
}
if
(
entriesPerParticle
>
1
){
outputVector
[
ii
].
push_back
(
array
->
_pSysStream
[
0
][
runningIndex
].
y
);
outputVector
[
orderIndex
].
push_back
(
array
->
_pSysStream
[
0
][
ii
].
y
);
}
if
(
entriesPerParticle
>
2
){
outputVector
[
ii
].
push_back
(
array
->
_pSysStream
[
0
][
runningIndex
].
z
);
outputVector
[
orderIndex
].
push_back
(
array
->
_pSysStream
[
0
][
ii
].
z
);
}
if
(
entriesPerParticle
>
3
){
outputVector
[
ii
].
push_back
(
array
->
_pSysStream
[
0
][
runningIndex
].
w
);
outputVector
[
orderIndex
].
push_back
(
array
->
_pSysStream
[
0
][
ii
].
w
);
}
runningIndex
++
;
}
...
...
@@ -4353,8 +4396,8 @@ void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration){
}
(
void
)
fflush
(
amoebaGpu
->
log
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psVelm4
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
NULL
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psVelm4
,
outputVector
,
NULL
);
/*
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
4d763c4d
...
...
@@ -147,9 +147,9 @@ extern void cudaWriteFloat1AndFloat1ArraysToFile( int numberOfAtoms, char* fname
int
entriesPerAtom2
,
CUDAStream
<
float
>*
array2
);
extern
void
readFile
(
std
::
string
fileName
,
StringVectorVector
&
fileContents
);
extern
void
cudaLoadCudaFloatArray
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float
>*
array
,
VectorOfDoubleVectors
&
outputVector
);
extern
void
cudaLoadCudaFloatArray
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float
>*
array
,
VectorOfDoubleVectors
&
outputVector
,
int
*
order
);
extern
void
cudaLoadCudaFloat2Array
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float2
>*
array
,
VectorOfDoubleVectors
&
outputVector
);
extern
void
cudaLoadCudaFloat4Array
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float4
>*
array
,
VectorOfDoubleVectors
&
outputVector
);
extern
void
cudaLoadCudaFloat4Array
(
int
numberOfParticles
,
int
entriesPerParticle
,
CUDAStream
<
float4
>*
array
,
VectorOfDoubleVectors
&
outputVector
,
int
*
order
);
extern
void
cudaWriteVectorOfDoubleVectorsToFile
(
char
*
fname
,
std
::
vector
<
int
>&
fileId
,
VectorOfDoubleVectors
&
outputVector
);
extern
void
kClearFloat
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
entries
,
CUDAStream
<
float
>*
fieldToClear
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
4d763c4d
...
...
@@ -947,9 +947,9 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaForceTorque"
,
fileId
,
outputVector
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
View file @
4d763c4d
...
...
@@ -562,9 +562,9 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psGk_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psGk_Field
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEAndGkField"
,
fileId
,
outputVector
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
View file @
4d763c4d
...
...
@@ -307,8 +307,8 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
4d763c4d
...
...
@@ -1748,10 +1748,10 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
amoebaGpu
->
gpuContext
->
natoms
,
3
,
amoebaGpu
->
gpuContext
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
amoebaGpu
->
gpuContext
->
natoms
,
1
,
amoebaGpu
->
gpuContext
->
psBornRadii
,
outputVector
);
cudaLoadCudaFloat2Array
(
amoebaGpu
->
gpuContext
->
natoms
,
2
,
amoebaGpu
->
gpuContext
->
psObcData
,
outputVector
);
cudaLoadCudaFloatArray
(
amoebaGpu
->
gpuContext
->
natoms
,
1
,
amoebaGpu
->
gpuContext
->
psBornForce
,
outputVector
);
cudaLoadCudaFloat4Array
(
amoebaGpu
->
gpuContext
->
natoms
,
3
,
amoebaGpu
->
gpuContext
->
psPosq4
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
amoebaGpu
->
gpuContext
->
natoms
,
1
,
amoebaGpu
->
gpuContext
->
psBornRadii
,
outputVector
,
NULL
);
cudaLoadCudaFloat2Array
(
amoebaGpu
->
gpuContext
->
natoms
,
2
,
amoebaGpu
->
gpuContext
->
psObcData
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
amoebaGpu
->
gpuContext
->
natoms
,
1
,
amoebaGpu
->
gpuContext
->
psBornForce
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaBornForce"
,
fileId
,
outputVector
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kReduceToBornForcePrefactor: exiting.
\n
"
);
(
void
)
fprintf
(
stderr
,
"kReduceToBornForcePrefactor: exiting.
\n
"
);
(
void
)
fflush
(
stderr
);
...
...
@@ -2069,8 +2069,8 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psKirkwoodForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psKirkwoodForce
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaForceTorque"
,
fileId
,
outputVector
);
}
...
...
@@ -2113,8 +2113,8 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psKirkwoodForce
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psKirkwoodForce
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaKirkwoodForce"
,
fileId
,
outputVector
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
4d763c4d
...
...
@@ -1215,9 +1215,9 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psKirkwoodEDiffForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psKirkwoodEDiffForce
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaForceTorque"
,
fileId
,
outputVector
);
}
...
...
@@ -1260,8 +1260,8 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psKirkwoodEDiffForce
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psKirkwoodEDiffForce
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaKirkwoodEDiffForce"
,
fileId
,
outputVector
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
View file @
4d763c4d
...
...
@@ -829,8 +829,8 @@ void cudaComputeAmoebaMapTorques( amoebaGpuContext amoebaGpu, CUDAStream<float>*
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaVacuumElecForce"
,
fileId
,
outputVector
);
}
#endif
...
...
@@ -1031,9 +1031,9 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
4
,
gpu
->
psForce4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
psForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
psTorque
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
4
,
gpu
->
psForce4
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
psForce
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
psTorque
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaVacuumElecForce"
,
fileId
,
outputVector
);
}
#endif
...
...
@@ -1123,9 +1123,9 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce2( amoebaGpuContext amoebaGpu,
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
4
,
gpu
->
psForce4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
4
,
gpu
->
psForce4
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaVacuumElecForce"
,
fileId
,
outputVector
);
}
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
4d763c4d
...
...
@@ -925,9 +925,9 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaMI_GK"
,
fileId
,
outputVector
);
}
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
4d763c4d
...
...
@@ -594,8 +594,8 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
// cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaMI"
,
fileId
,
outputVector
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
4d763c4d
...
...
@@ -152,11 +152,12 @@ __device__ static void calculatePmeSelfTorqueElectrostaticPairIxn_kernel( PmeDir
}
__device__
void
calculatePmeDirectElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
float
*
scalingFactors
,
float
*
outputForce
,
float3
outputTorque
[
3
],
float
*
e
nergy
float
*
scalingFactors
,
float
4
forceTorqueE
nergy
[
3
]
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
){
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
zr
=
atomJ
.
z
-
atomI
.
z
;
...
...
@@ -210,12 +211,11 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// calculate the real space error function terms;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
0.0
f
;
if
(
cSim
.
alphaEwald
>
0.0
f
){
if
(
cSim
.
alphaEwald
>
0.0
f
){
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
}
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
...
...
@@ -263,7 +263,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
if
(
damp
!=
0.0
f
){
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
ratio
=
r
/
damp
;
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
if
(
damp
>
-
50.0
f
){
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
...
...
@@ -510,7 +510,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
e
=
e
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
erl
;
ei
=
ei
-
erli
;
*
energy
=
-
conversionFactor
*
(
e
+
ei
);
forceTorqueEnergy
[
0
].
w
=
-
conversionFactor
*
(
e
+
ei
);
// increment the total intramolecular energy; assumes;
// intramolecular distances are less than half of cell;
...
...
@@ -604,6 +604,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
+
(
sci3
+
scip3
)
*
bn2
*
dk1
+
gfi4
*
(
qkui1
+
qkuip1
-
qiuk1
-
qiukp1
))
+
gfi5
*
qir1
+
gfi6
*
qkr1
;
float
ftm2i2
=
gfi1
*
yr
+
0.5
f
*
(
gfi2
*
(
atomI
.
inducedDipole
[
1
]
+
atomI
.
inducedDipoleP
[
1
])
+
bn2
*
(
sci4
*
atomI
.
inducedDipoleP
[
1
]
+
scip4
*
atomI
.
inducedDipole
[
1
])
...
...
@@ -613,6 +614,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
+
(
sci3
+
scip3
)
*
bn2
*
dk2
+
gfi4
*
(
qkui2
+
qkuip2
-
qiuk2
-
qiukp2
))
+
gfi5
*
qir2
+
gfi6
*
qkr2
;
float
ftm2i3
=
gfi1
*
zr
+
0.5
f
*
(
gfi2
*
(
atomI
.
inducedDipole
[
2
]
+
atomI
.
inducedDipoleP
[
2
])
+
bn2
*
(
sci4
*
atomI
.
inducedDipoleP
[
2
]
+
scip4
*
atomI
.
inducedDipole
[
2
])
...
...
@@ -674,10 +676,13 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
float
temp3
=
0.5
f
*
rr3
*
((
gli1
+
gli6
)
*
scalingFactors
[
PScaleIndex
]
+
(
glip1
+
glip6
)
*
scalingFactors
[
DScaleIndex
]);
float
temp5
=
0.5
f
*
rr5
*
((
gli2
+
gli7
)
*
scalingFactors
[
PScaleIndex
]
+
(
glip2
+
glip7
)
*
scalingFactors
[
DScaleIndex
]);
float
temp7
=
0.5
f
*
rr7
*
(
gli3
*
scalingFactors
[
PScaleIndex
]
+
glip3
*
scalingFactors
[
DScaleIndex
]);
float
fridmp1
=
temp3
*
ddsc31
+
temp5
*
ddsc51
+
temp7
*
ddsc71
;
float
fridmp2
=
temp3
*
ddsc32
+
temp5
*
ddsc52
+
temp7
*
ddsc72
;
float
fridmp3
=
temp3
*
ddsc33
+
temp5
*
ddsc53
+
temp7
*
ddsc73
;
...
...
@@ -692,9 +697,9 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// modify the forces for partially excluded interactions
ftm2i1
-=
fridmp1
-
findmp1
;
ftm2i2
-=
fridmp2
-
findmp2
;
ftm2i3
-=
fridmp3
-
findmp3
;
ftm2i1
-=
(
fridmp1
+
findmp1
)
;
ftm2i2
-=
(
fridmp2
+
findmp2
)
;
ftm2i3
-=
(
fridmp3
+
findmp3
)
;
// correction to convert mutual to direct polarization force
...
...
@@ -851,18 +856,18 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// increment gradient due to force and torque on first site;
outputForce
[
0
]
=
conversionFactor
*
(
ftm21
+
ftm2i1
);
outputForce
[
1
]
=
conversionFactor
*
(
ftm22
+
ftm2i2
);
outputForce
[
2
]
=
conversionFactor
*
(
ftm23
+
ftm2i3
);
forceTorqueEnergy
[
0
].
x
=
conversionFactor
*
(
ftm21
+
ftm2i1
);
forceTorqueEnergy
[
0
].
y
=
conversionFactor
*
(
ftm22
+
ftm2i2
);
forceTorqueEnergy
[
0
].
z
=
conversionFactor
*
(
ftm23
+
ftm2i3
);
conversionFactor
*=
-
1.0
;
outputTorque
[
0
].
x
=
conversionFactor
*
(
ttm21
+
ttm2i1
);
outputTorque
[
1
].
x
=
conversionFactor
*
(
ttm22
+
ttm2i2
);
outputTorque
[
2
].
x
=
conversionFactor
*
(
ttm23
+
ttm2i3
);
forceTorqueEnergy
[
1
].
x
=
conversionFactor
*
(
ttm21
+
ttm2i1
);
forceTorqueEnergy
[
1
].
y
=
conversionFactor
*
(
ttm22
+
ttm2i2
);
forceTorqueEnergy
[
1
].
z
=
conversionFactor
*
(
ttm23
+
ttm2i3
);
outputTorque
[
1
].
x
=
conversionFactor
*
(
ttm31
+
ttm3i1
);
outputTorque
[
1
].
y
=
conversionFactor
*
(
ttm32
+
ttm3i2
);
outputTorque
[
1
].
z
=
conversionFactor
*
(
ttm33
+
ttm3i3
);
forceTorqueEnergy
[
2
].
x
=
conversionFactor
*
(
ttm31
+
ttm3i1
);
forceTorqueEnergy
[
2
].
y
=
conversionFactor
*
(
ttm32
+
ttm3i2
);
forceTorqueEnergy
[
2
].
z
=
conversionFactor
*
(
ttm33
+
ttm3i3
);
#ifdef AMOEBA_DEBUG
int
debugIndex
=
0
;
...
...
@@ -915,64 +920,103 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
debugArray[debugIndex].z = conversionFactor*ftm23;
debugArray[debugIndex].w = idTracker;
debugIndex++;
*/
idTracker += 1.0;
debugArray[debugIndex].x = e;
debugArray[debugIndex].y = ei;
debugArray[debugIndex].z = erl;
debugArray[debugIndex].w = erli;
debugIndex++;
idTracker
+=
1.0
;
*/
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
r2
;
debugArray
[
debugIndex
].
y
=
cSim
.
alphaEwald
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
fridmp3
;
debugArray
[
debugIndex
].
w
=
115.0
;
debugArray
[
debugIndex
].
z
=
conversionFactor
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm21
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm22
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm23
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
1.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
findmp1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
findmp2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
findmp3
;
debugArray
[
debugIndex
].
w
=
cSim
.
alphaEwald
+
1.0
f
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ftm2i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ftm2i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ftm2i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
1.0
;
/*
idTracker += 100.0;
debugArray[debugIndex].x = fridmp1;
debugArray[debugIndex].y = fridmp2;
debugArray[debugIndex].z = fridmp3;
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 100.0;
debugArray[debugIndex].x = findmp1;
debugArray[debugIndex].y = findmp2;
debugArray[debugIndex].z = findmp3;
debugArray[debugIndex].w = idTracker;
debugIndex++;
*/
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm21
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm22
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm23
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
1.0
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm2i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm2i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm2i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm31
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm32
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm33
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
idTracker
+=
100.0
;
debugArray
[
debugIndex
].
x
=
conversionFactor
*
ttm3i1
;
debugArray
[
debugIndex
].
y
=
conversionFactor
*
ttm3i2
;
debugArray
[
debugIndex
].
z
=
conversionFactor
*
ttm3i3
;
debugArray
[
debugIndex
].
w
=
idTracker
;
debugIndex
++
;
#endif
}
else
{
outputForce
[
0
]
=
0.0
f
;
outputForce
[
1
]
=
0.0
f
;
outputForce
[
2
]
=
0.0
f
;
forceTorqueEnergy
[
0
].
x
=
0.0
f
;
forceTorqueEnergy
[
0
].
y
=
0.0
f
;
forceTorqueEnergy
[
0
].
z
=
0.0
f
;
outputTorque
[
0
].
x
=
0.0
f
;
outputTorque
[
0
].
y
=
0.0
f
;
outputTorque
[
0
].
z
=
0.0
f
;
forceTorqueEnergy
[
1
].
x
=
0.0
f
;
forceTorqueEnergy
[
1
].
y
=
0.0
f
;
forceTorqueEnergy
[
1
].
z
=
0.0
f
;
outputTorque
[
1
].
x
=
0.0
f
;
outputTorque
[
1
].
y
=
0.0
f
;
outputTorque
[
1
].
z
=
0.0
f
;
forceTorqueEnergy
[
2
].
x
=
0.0
f
;
forceTorqueEnergy
[
2
].
y
=
0.0
f
;
forceTorqueEnergy
[
2
].
z
=
0.0
f
;
*
energy
=
0.0
f
;
forceTorqueEnergy
[
0
].
w
=
0.0
f
;
#ifdef AMOEBA_DEBUG
for
(
int
ii
=
0
;
ii
<
5
;
ii
++
){
for
(
int
ii
=
0
;
ii
<
12
;
ii
++
){
debugArray
[
ii
].
x
=
0.0
f
;
debugArray
[
ii
].
y
=
0.0
f
;
debugArray
[
ii
].
z
=
0.0
f
;
debugArray
[
ii
].
w
=
(
float
)
(
11
*
ii
);
debugArray
[
ii
].
w
=
(
float
)
(
-
ii
);
}
#endif
...
...
@@ -1047,12 +1091,52 @@ static void kReduceForceTorque(amoebaGpuContext amoebaGpu )
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psForce
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeDirectElectrostaticForce"
);
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psTorque
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeDirectElectrostaticTorque"
);
}
/**---------------------------------------------------------------------------------------
Zero gpu->psForce4
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
static
void
zeroForce
(
amoebaGpuContext
amoebaGpu
)
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
for
(
int
ii
=
0
;
ii
<
amoebaGpu
->
gpuContext
->
natoms
;
ii
++
){
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
=
0.0
f
;
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
=
0.0
f
;
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
=
0.0
f
;
}
gpu
->
psForce4
->
Upload
();
}
/**---------------------------------------------------------------------------------------
Copy gpu->psForce4 to amoebaGpu->psForce
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
static
void
copyForce
(
amoebaGpuContext
amoebaGpu
,
float
conversion
)
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpu
->
psForce4
->
Download
();
int
indexOffset
=
0
;
for
(
int
ii
=
0
;
ii
<
amoebaGpu
->
gpuContext
->
natoms
;
ii
++
){
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
]
=
conversion
*
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
);
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
1
]
=
conversion
*
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
);
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
2
]
=
conversion
*
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
);
indexOffset
+=
3
;
}
amoebaGpu
->
psForce
->
Upload
();
}
//#define GET_INDUCED_DIPOLE_FROM_FILE
#ifdef GET_INDUCED_DIPOLE_FROM_FILE
#include <stdlib.h>
...
...
@@ -1098,11 +1182,12 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
unsigned
int
targetAtom
=
10
;
unsigned
int
targetAtom
=
49
;
#endif
#ifdef GET_INDUCED_DIPOLE_FROM_FILE
std
::
string
fileName
=
"waterInducedDipole.txt"
;
//std::string fileName = "waterInducedDipole.txt";
std
::
string
fileName
=
"water_3_MI.txt"
;
StringVectorVector
fileContents
;
readFile
(
fileName
,
fileContents
);
unsigned
int
offset
=
0
;
...
...
@@ -1152,7 +1237,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostaticParticle
)),
sizeof
(
PmeDirectElectrostaticParticle
)
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces
no warp
: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Obuf=%u ixnCt=%u workUnits=%u gpu->nonbond_threads_per_block=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Obuf=%u ixnCt=%u workUnits=%u gpu->nonbond_threads_per_block=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
PmeDirectElectrostaticParticle
),
(
sizeof
(
PmeDirectElectrostaticParticle
))
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
nonbond_threads_per_block
);
...
...
@@ -1173,9 +1258,6 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
else
{
// gpu->sim.pInteractingWorkUnit,
// amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeDirectElectrostaticCutoffForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
...
...
@@ -1196,11 +1278,12 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psTorque
->
Download
();
debugArray
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished PmeDirectElectrostatic kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
int
maxPrint
=
5
;
float
conversion
=
1.0
f
/
41.84
f
;
float
forceSum
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished PmeDirectElectrostatic kernel execution conversion=%.5f
\n
"
,
conversion
);
(
void
)
fflush
(
amoebaGpu
->
log
);
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
...
...
@@ -1230,6 +1313,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
/*
gpu->psEnergy->Download();
double energy = 0.0;
for( unsigned int ii = 0; ii < gpu->sim.energyOutputBuffers; ii++ ){
...
...
@@ -1240,36 +1324,41 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
}
(void) fprintf( amoebaGpu->log,"Force sums: [%16.9e %16.9e %16.9e] Energy=%16.9e\n", forceSum[0], forceSum[1], forceSum[2], energy );
if
(
0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugElecAll
\n
"
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
*
gpu
->
natoms
;
jj
++
){
if
(
fabs
(
debugArray
->
_pSysStream
[
0
][
jj
].
w
-
111.0
)
<
1.0e-04
){
int
debugIndex
=
jj
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%8d [%16.9e %16.9e %16.9e %16.9e] Enr11
\n
"
,
jj
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%8d [%16.9e %16.9e %16.9e %16.9e] Enr12
\n
"
,
jj
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
}
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
*/
if
(
0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugElec
\n
"
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
float
torqueSum0
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
float
torqueSum1
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
int
offset0
=
7
*
paddedNumberOfAtoms
;
int
offset1
=
8
*
paddedNumberOfAtoms
;
int
offset2
=
7
*
paddedNumberOfAtoms
;
int
offset3
=
8
*
paddedNumberOfAtoms
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
for
(
int
kk
=
0
;
kk
<
6
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d [%16.9e %16.9e %16.9e %16.9e] E11
\n
"
,
targetAtom
,
jj
,
if
(
fabs
(
debugArray
->
_pSysStream
[
0
][
debugIndex
+
5
*
paddedNumberOfAtoms
].
y
)
<
1.0e-10
)
continue
;
if
(
jj
!=
targetAtom
){
torqueSum0
[
0
]
+=
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset0
].
x
+
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset1
].
x
;
torqueSum0
[
1
]
+=
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset0
].
y
+
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset1
].
y
;
torqueSum0
[
2
]
+=
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset0
].
z
+
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset1
].
z
;
torqueSum1
[
0
]
+=
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset2
].
x
+
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset3
].
x
;
torqueSum1
[
1
]
+=
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset2
].
y
+
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset3
].
y
;
torqueSum1
[
2
]
+=
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset2
].
z
+
debugArray
->
_pSysStream
[
0
][
debugIndex
+
offset3
].
z
;
}
if
(
jj
==
2
){
offset0
+=
2
*
paddedNumberOfAtoms
;
offset1
+=
2
*
paddedNumberOfAtoms
;
}
for
(
int
kk
=
0
;
kk
<
12
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d %5d [%16.9e %16.9e %16.9e %16.9e] E11
\n
"
,
targetAtom
,
jj
,
kk
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e] Sum
\n
"
,
targetAtom
,
jj
,
torqueSum0
[
0
],
torqueSum0
[
1
],
torqueSum0
[
2
],
torqueSum1
[
0
],
torqueSum1
[
1
],
torqueSum1
[
2
]);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
}
...
...
@@ -1279,9 +1368,9 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectForceTorque"
,
fileId
,
outputVector
);
}
...
...
@@ -1291,8 +1380,16 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
cudaComputeAmoebaMapTorquesAndAddTotalForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
,
amoebaGpu
->
psForce
,
gpu
->
psForce4
);
}
if
(
0
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
copyForce
(
amoebaGpu
,
-
1.0
f
/
41.84
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectForce"
,
fileId
,
outputVector
);
}
}
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque using PME
...
...
@@ -1303,6 +1400,55 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
void
cudaComputeAmoebaPmeElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
zeroForce
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpu
);
copyForce
(
amoebaGpu
,
-
1.0
f
/
41.84
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeRecipForce"
,
fileId
,
outputVector
);
zeroForce
(
amoebaGpu
);
}
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
zeroForce
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
copyForce
(
amoebaGpu
,
-
1.0
f
/
41.84
f
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"yCudaPmeDirectForce"
,
fileId
,
outputVector
);
zeroForce
(
amoebaGpu
);
}
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
zeroForce
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpu
);
copyForce
(
amoebaGpu
,
-
1.0
f
/
41.84
f
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeForce"
,
fileId
,
outputVector
);
}
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
copyForce
(
amoebaGpu
,
-
1.0
f
/
41.84
f
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPrePmeForce"
,
fileId
,
outputVector
);
}
//zeroForce( amoebaGpu );
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpu
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
4d763c4d
...
...
@@ -43,7 +43,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
){
#ifdef AMOEBA_DEBUG
int
maxPullIndex
=
2
;
int
maxPullIndex
=
7
;
float4
pullBack
[
12
];
#endif
...
...
@@ -56,6 +56,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float
totalEnergy
=
0
.
0
f
;
float4
forceTorqueEnergy
[
3
];
float
scalingFactors
[
LastScalingIndex
];
...
...
@@ -90,10 +91,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
localParticle
.
torque
[
1
]
=
0
.
0
f
;
localParticle
.
torque
[
2
]
=
0
.
0
f
;
scalingFactors
[
PScaleIndex
]
=
1
.
0
f
;
scalingFactors
[
DScaleIndex
]
=
1
.
0
f
;
scalingFactors
[
UScaleIndex
]
=
1
.
0
f
;
scalingFactors
[
MScaleIndex
]
=
1
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
...
...
@@ -116,9 +114,6 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
force
[
3
];
float3
torque
[
2
];
unsigned
int
atomJ
=
y
+
j
;
// set scale factors
...
...
@@ -132,9 +127,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
// force
float
energy
;
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
j
],
scalingFactors
,
force
,
torque
,
&
energy
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
j
],
scalingFactors
,
forceTorqueEnergy
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -143,28 +136,25 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
unsigned
int
mask
=
(
(
atomI
==
atomJ
)
||
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
(
atomJ
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
if
(
(
atomI
!=
atomJ
)
&&
(
atomI
<
cAmoebaSim
.
numberOfAtoms
)
&&
(
atomJ
<
cAmoebaSim
.
numberOfAtoms
)
)
{
localParticle
.
force
[
0
]
+=
forceTorqueEnergy
[
0
].
x
;
localParticle
.
force
[
1
]
+=
forceTorqueEnergy
[
0
].
y
;
localParticle
.
force
[
2
]
+=
forceTorqueEnergy
[
0
].
z
;
localParticle
.
f
or
c
e
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
f
or
c
e
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
f
or
c
e
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
t
or
qu
e
[
0
]
+=
forceTorqueEnergy
[
1
].
x
;
localParticle
.
t
or
qu
e
[
1
]
+=
forceTorqueEnergy
[
1
].
y
;
localParticle
.
t
or
qu
e
[
2
]
+=
forceTorqueEnergy
[
1
].
z
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
].
x
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
].
y
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
].
z
:
0
.
0
f
;
// energy for each diagonal-block ixn included twice, hence factor of 0.5
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
totalEnergy
+=
0
.
5
*
forceTorqueEnergy
[
0
].
w
;
}
#ifdef AMOEBA_DEBUG
/*
energy = mask ? 0.5*energy : 0.0f;
if( atomI < 200 && (fabs( energy ) > 1.0e+8 || energy != energy) ){
debugSetup( atomI, atomJ, debugArray, pullBack );
} */
if
(
atomI
==
targetAtom
||
atomJ
==
targetAtom
){
if
(
atomI
==
targetAtom
){
unsigned
int
mask
=
(
(
atomI
==
atomJ
)
||
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
(
atomJ
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
float
blockId
=
1
.
0
f
;
...
...
@@ -174,23 +164,24 @@ if( atomI == targetAtom ){
debugArray
[
index
].
w
=
blockId
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
force
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
force
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
force
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
w
=
blockId
;
debugArray
[
index
].
x
=
mask
?
force
TorqueEnergy
[
0
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
force
TorqueEnergy
[
0
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
force
TorqueEnergy
[
0
].
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
mask
?
forceTorqueEnergy
[
0
].
w
:
0
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
].
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
mask
?
energy
:
0
.
0
f
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
1
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
1
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
1
].
z
:
0
.
0
f
;
float
offsetF
=
(
float
)(
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
));
debugArray
[
index
].
w
=
offsetF
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
].
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
2
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
2
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
2
].
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
offsetF
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
...
...
@@ -290,14 +281,11 @@ if( atomI == targetAtom ){
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
if
(
(
flags
&
(
1
<<
j
)
)
!=
0
)
{
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
unsigned
int
atomJ
=
y
+
jIdx
;
float
force
[
3
];
float3
torque
[
2
];
// set scale factors
if
(
bExclusionFlag
)
...
...
@@ -309,9 +297,8 @@ if( atomI == targetAtom ){
// force
float
energy
;
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
scalingFactors
,
force
,
t
orque
,
&
e
nergy
scalingFactors
,
force
T
orque
E
nergy
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -319,41 +306,41 @@ if( atomI == targetAtom ){
// check if atoms out-of-bounds
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
(
atomJ
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
if
(
(
atomI
<
cAmoebaSim
.
numberOfAtoms
)
&&
(
atomJ
<
cAmoebaSim
.
numberOfAtoms
)
)
{
// add force and torque to atom I due atom J
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
forceTorqueEnergy
[
0
].
x
;
localParticle
.
force
[
1
]
+=
forceTorqueEnergy
[
0
].
y
;
localParticle
.
force
[
2
]
+=
forceTorqueEnergy
[
0
].
z
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
].
x
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
].
y
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
].
z
:
0
.
0
f
;
totalEnergy
+=
forceTorqueEnergy
[
0
].
w
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
forceTorqueEnergy
[
1
].
x
;
localParticle
.
torque
[
1
]
+=
forceTorqueEnergy
[
1
].
y
;
localParticle
.
torque
[
2
]
+=
forceTorqueEnergy
[
1
].
z
;
// add force and torque to atom J due atom I
if
(
flags
==
0xFFFFFFFF
){
psA
[
jIdx
].
force
[
0
]
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
0
]
-=
forceTorqueEnergy
[
0
].
x
;
psA
[
jIdx
].
force
[
1
]
-=
forceTorqueEnergy
[
0
].
y
;
psA
[
jIdx
].
force
[
2
]
-=
forceTorqueEnergy
[
0
].
z
;
psA
[
jIdx
].
torque
[
0
]
+=
mask
?
torque
[
1
].
x
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
1
]
+=
mask
?
torque
[
1
].
y
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
2
]
+=
mask
?
torque
[
1
].
z
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
0
]
+=
forceTorqueEnergy
[
2
].
x
;
psA
[
jIdx
].
torque
[
1
]
+=
forceTorqueEnergy
[
2
].
y
;
psA
[
jIdx
].
torque
[
2
]
+=
forceTorqueEnergy
[
2
].
z
;
}
else
{
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
mask
?
0
.
0
f
:
force
[
0
];
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
mask
?
0
.
0
f
:
force
[
1
];
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
mask
?
0
.
0
f
:
force
[
2
];
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
forceTorqueEnergy
[
0
]
.
x
;
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
forceTorqueEnergy
[
1
]
.
y
;
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
forceTorqueEnergy
[
2
]
.
z
;
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
].
x
;
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
].
y
;
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
].
z
;
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
forceTorqueEnergy
[
2
].
x
;
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
forceTorqueEnergy
[
2
].
y
;
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
forceTorqueEnergy
[
2
].
z
;
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
...
...
@@ -379,15 +366,58 @@ if( atomI == targetAtom ){
psA
[
jIdx
].
torque
[
2
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
}
}
}
}
// end of atoms out-of-bounds
}
// end of flags&(1<<j block
#ifdef AMOEBA_DEBUG
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
unsigned
int
atomJ
=
y
+
jIdx
;
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
(
atomJ
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
if
(
atomI
==
targetAtom
||
atomJ
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
(
float
)
y
;
debugArray
[
index
].
w
=
(
flags
==
0xFFFFFFFF
)
?
(
float
)
-
141
.
0
f
:
-
151
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
0
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
0
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
0
].
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
mask
?
forceTorqueEnergy
[
0
].
w
:
0
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
1
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
1
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
1
].
z
:
0
.
0
f
;
float
offsetF
=
(
float
)(
3
*
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
));
debugArray
[
index
].
w
=
offsetF
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
2
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
2
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
2
].
z
:
0
.
0
f
;
offsetF
=
(
float
)
(
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
));
debugArray
[
index
].
w
=
offsetF
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// end of j-loop
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
#ifdef USE_OUTPUT_BUFFER_PER_WARP
float
of
;
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
...
...
@@ -441,7 +471,7 @@ if( atomI == targetAtom ){
of
+=
sA
[
threadIdx
.
x
].
torque
[
2
];
outputTorque
[
offset
+
2
]
=
of
;
#else
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
outputForce
[
offset
]
=
localParticle
.
force
[
0
];
...
...
@@ -462,7 +492,7 @@ if( atomI == targetAtom ){
outputTorque
[
offset
+
1
]
=
sA
[
threadIdx
.
x
].
torque
[
1
];
outputTorque
[
offset
+
2
]
=
sA
[
threadIdx
.
x
].
torque
[
2
];
#endif
#endif
lasty
=
y
;
}
// end of pInteractionFlag block
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
4d763c4d
...
...
@@ -565,9 +565,9 @@ if( fabs(debugArray->_pSysStream[0][jj+3*paddedNumberOfAtoms].x) > 0.0 ){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
delete
debugArray
;
...
...
@@ -578,9 +578,9 @@ if( fabs(debugArray->_pSysStream[0][jj+3*paddedNumberOfAtoms].x) > 0.0 ){
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
...
...
@@ -590,4 +590,15 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
{
kCalculateAmoebaPMEFixedMultipoles
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
4d763c4d
...
...
@@ -193,6 +193,7 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
fields
[
2
].
z
=
0.0
f
;
fields
[
2
].
w
=
0.0
f
;
}
/*
#ifdef AMOEBA_DEBUG
pullBack[0].x = xr;
pullBack[0].y = yr;
...
...
@@ -204,7 +205,6 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
pullBack[1].z = bn2;
pullBack[1].w = exp2a;
/*
pullBack[1].x = atomJ.x - atomI.x;
pullBack[1].y = atomJ.y - atomI.y;
pullBack[1].z = atomJ.z - atomI.z;
...
...
@@ -212,8 +212,8 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
pullBack[1].x = scale3;
pullBack[1].y = scale5;
pullBack[1].z = scale7;
*/
#endif
*/
}
// Include versions of the kernels for N^2 calculations.
...
...
@@ -385,8 +385,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: scalingDistanceCutoff=%.5f
\n
"
,
methodName
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
...
...
@@ -748,9 +747,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
...
...
@@ -780,9 +779,9 @@ fflush( amoebaGpu->log );
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
);
//
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeMI"
,
fileId
,
outputVector
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
4d763c4d
...
...
@@ -122,6 +122,7 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
].
z
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
].
z
:
0
.
0
f
;
/*
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+j) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+j) : atomI;
...
...
@@ -173,7 +174,6 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
...
...
@@ -189,10 +189,10 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
*/
}
// Write results
...
...
@@ -308,6 +308,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
}
/*
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+jIdx) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+jIdx) : atomI;
...
...
@@ -360,6 +361,7 @@ if( atomI == targetAtom || (y+jIdx) == targetAtom ){
debugArray[index].w = flag;
}
#endif
*/
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
View file @
4d763c4d
...
...
@@ -528,8 +528,8 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
particles
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
particles
,
9
,
amoebaGpu
->
psRotationMatrix
,
outputVector
);
cudaLoadCudaFloat4Array
(
particles
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
particles
,
9
,
amoebaGpu
->
psRotationMatrix
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaRotationMatrices"
,
fileId
,
outputVector
);
}
if
(
0
){
...
...
@@ -539,9 +539,9 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
particles
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
particles
,
3
,
amoebaGpu
->
psLabFrameDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
particles
,
9
,
amoebaGpu
->
psLabFrameQuadrupole
,
outputVector
);
cudaLoadCudaFloat4Array
(
particles
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
particles
,
3
,
amoebaGpu
->
psLabFrameDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloatArray
(
particles
,
9
,
amoebaGpu
->
psLabFrameQuadrupole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaRotatedMoments"
,
fileId
,
outputVector
);
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
4d763c4d
...
...
@@ -715,8 +715,8 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
psTempForce
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
psTempForce
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaVdw"
,
fileId
,
outputVector
);
delete
psTempForce
;
//exit(0);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
4d763c4d
...
...
@@ -631,8 +631,8 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
psTempForce
,
outputVector
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
NULL
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
psTempForce
,
outputVector
,
NULL
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaWca"
,
fileId
,
outputVector
);
delete
psTempForce
;
//exit(0);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment