Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
1aff5bcb
"...serialization/tests/TestSerializeAmoebaAngleForce.cpp" did not exist on "5d9b6481e9a8736859f6d9892e55d554db210079"
Commit
1aff5bcb
authored
Apr 07, 2011
by
Mark Friedrichs
Browse files
Torques now mapped to forces only once per force calculation
parent
101f206d
Changes
19
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
50 additions
and
278 deletions
+50
-278
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+0
-5
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+6
-114
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
...cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+2
-94
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
...platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
+7
-8
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+3
-9
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
+7
-8
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+4
-4
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
...eba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
+3
-18
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
...atforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.h
...latforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+1
-1
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
1aff5bcb
...
@@ -793,17 +793,12 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
...
@@ -793,17 +793,12 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
if
(
data
.
getHasAmoebaGeneralizedKirkwood
()
){
if
(
data
.
getHasAmoebaGeneralizedKirkwood
()
){
kCalculateObcGbsaBornSum
(
gpu
->
gpuContext
);
kCalculateObcGbsaBornSum
(
gpu
->
gpuContext
);
kReduceObcGbsaBornSum
(
gpu
->
gpuContext
);
kReduceObcGbsaBornSum
(
gpu
->
gpuContext
);
//initializeCudaFloatArray( gpu->gpuContext->natoms, 1, gpu->gpuContext->psBornRadii, 0.1 );
//initializeCudaFloatArray( gpu->gpuContext->natoms, 1, gpu->gpuContext->psObcChain, 0.0 );
}
}
// multipoles
// multipoles
kCalculateAmoebaMultipoleForces
(
gpu
,
data
.
getHasAmoebaGeneralizedKirkwood
()
);
kCalculateAmoebaMultipoleForces
(
gpu
,
data
.
getHasAmoebaGeneralizedKirkwood
()
);
//kClearForces(gpu->gpuContext);
//kClearEnergy(gpu->gpuContext);
// GK
// GK
if
(
data
.
getHasAmoebaGeneralizedKirkwood
()
){
if
(
data
.
getHasAmoebaGeneralizedKirkwood
()
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
1aff5bcb
...
@@ -104,7 +104,7 @@ extern void cudaWriteFloat4AndFloat1ArraysToFile( int numberOfAtoms, const std::
...
@@ -104,7 +104,7 @@ extern void cudaWriteFloat4AndFloat1ArraysToFile( int numberOfAtoms, const std::
extern
void
SetCalculateAmoebaElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
SetCalculateAmoebaElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
GetCalculateAmoebaElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
GetCalculateAmoebaElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
cudaComputeAmoebaElectrostatic
(
amoebaGpuContext
amoebaGpu
);
extern
void
cudaComputeAmoebaElectrostatic
(
amoebaGpuContext
amoebaGpu
,
int
addTorqueToForce
);
extern
void
SetCalculateAmoebaPmeDirectElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
SetCalculateAmoebaPmeDirectElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
GetCalculateAmoebaPmeDirectElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
extern
void
GetCalculateAmoebaPmeDirectElectrostaticSim
(
amoebaGpuContext
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
1aff5bcb
...
@@ -705,7 +705,7 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
...
@@ -705,7 +705,7 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psTorque
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psTorque
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceElectrostaticTorque"
);
LAUNCHERROR
(
"kReduceElectrostaticTorque"
);
}
}
...
@@ -718,7 +718,7 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
...
@@ -718,7 +718,7 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
cudaComputeAmoebaElectrostatic
(
amoebaGpuContext
amoebaGpu
)
void
cudaComputeAmoebaElectrostatic
(
amoebaGpuContext
amoebaGpu
,
int
addTorqueToForce
)
{
{
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
...
@@ -768,7 +768,6 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -768,7 +768,6 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
}
}
kClearFields_3
(
amoebaGpu
,
1
);
kClearFields_3
(
amoebaGpu
,
1
);
LAUNCHERROR
(
"kClearFields_3 kCalculateAmoebaCudaElectrostatic"
);
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
...
@@ -812,117 +811,10 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -812,117 +811,10 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
}
}
LAUNCHERROR
(
"kCalculateAmoebaCudaElectrostaticN2Forces"
);
LAUNCHERROR
(
"kCalculateAmoebaCudaElectrostaticN2Forces"
);
if
(
addTorqueToForce
){
kReduceTorque
(
amoebaGpu
);
kReduceTorque
(
amoebaGpu
);
LAUNCHERROR
(
"kReduceForceTorque"
);
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psTorque
->
Download
();
debugArray
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished Electrostatic kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
int
maxPrint
=
1400
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// torque
(
void
)
fprintf
(
amoebaGpu
->
log
,
"ElectrostaticT [%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psTorque
->
_pSysData
[
indexOffset
],
amoebaGpu
->
psTorque
->
_pSysData
[
indexOffset
+
1
],
amoebaGpu
->
psTorque
->
_pSysData
[
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
if
(
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugElec
\n
"
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
for
(
int
kk
=
0
;
kk
<
8
;
kk
++
){
float
conversion
=
kk
>=
1
&&
kk
<=
8
?
1.0
f
/
4.184
f
:
1.0
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d [%16.9e %16.9e %16.9e %16.9e] E11
\n
"
,
targetAtom
,
jj
,
conversion
*
debugArray
->
_pSysData
[
debugIndex
].
x
,
conversion
*
debugArray
->
_pSysData
[
debugIndex
].
y
,
conversion
*
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
}
if
(
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugElec
\n
"
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex1
=
jj
+
paddedNumberOfAtoms
;
int
debugIndex2
=
jj
+
5
*
paddedNumberOfAtoms
;
int
debugIndex3
=
jj
+
6
*
paddedNumberOfAtoms
;
int
debugIndex4
=
jj
+
4
*
paddedNumberOfAtoms
;
int
debugIndex5
=
jj
+
7
*
paddedNumberOfAtoms
;
float
conversion
=
1.0
f
/
4.184
f
;
int
i1
,
i2
;
if
(
jj
<
targetAtom
){
i1
=
jj
;
i2
=
targetAtom
;
}
else
{
i1
=
targetAtom
;
i2
=
jj
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e %16.9e F11
\n
"
,
i1
,
i2
,
conversion
*
debugArray
->
_pSysData
[
debugIndex1
].
x
,
conversion
*
debugArray
->
_pSysData
[
debugIndex1
].
y
,
conversion
*
debugArray
->
_pSysData
[
debugIndex1
].
z
,
conversion
*
debugArray
->
_pSysData
[
debugIndex2
].
x
,
conversion
*
debugArray
->
_pSysData
[
debugIndex2
].
y
,
conversion
*
debugArray
->
_pSysData
[
debugIndex2
].
z
,
conversion
*
debugArray
->
_pSysData
[
debugIndex3
].
x
,
conversion
*
debugArray
->
_pSysData
[
debugIndex3
].
y
,
conversion
*
debugArray
->
_pSysData
[
debugIndex3
].
z
,
conversion
*
debugArray
->
_pSysData
[
debugIndex5
].
x
,
conversion
*
debugArray
->
_pSysData
[
debugIndex5
].
y
,
conversion
*
debugArray
->
_pSysData
[
debugIndex5
].
z
);
}
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Tiled F & T
\n
"
,
methodName
);
fflush
(
amoebaGpu
->
log
);
int
maxPrint
=
12
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
// print cpu & gpu reductions
int
offset
=
3
*
ii
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d T[%16.7e %16.7e %16.7e]
\n
"
,
ii
,
amoebaGpu
->
psTorque
->
_pSysData
[
offset
],
amoebaGpu
->
psTorque
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psTorque
->
_pSysData
[
offset
+
2
]
);
if
(
(
ii
==
maxPrint
)
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
))
)
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
NULL
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
,
NULL
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaTorque"
,
fileId
,
outputVector
);
}
}
delete
debugArray
;
#endif
if
(
0
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
View file @
1aff5bcb
...
@@ -41,17 +41,17 @@ static void kReduceEAndGkFields(amoebaGpuContext amoebaGpu )
...
@@ -41,17 +41,17 @@ static void kReduceEAndGkFields(amoebaGpuContext amoebaGpu )
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceEAndGK_Fields1"
);
LAUNCHERROR
(
"kReduceEAndGK_Fields1"
);
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceEAndGK_Fields2"
);
LAUNCHERROR
(
"kReduceEAndGK_Fields2"
);
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_3
->
_pDevData
,
amoebaGpu
->
psGk_Field
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_3
->
_pDevData
,
amoebaGpu
->
psGk_Field
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceEAndGK_Fields3"
);
LAUNCHERROR
(
"kReduceEAndGK_Fields3"
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
View file @
1aff5bcb
...
@@ -39,12 +39,12 @@ static void kReduceE_Fields_kernel(amoebaGpuContext amoebaGpu )
...
@@ -39,12 +39,12 @@ static void kReduceE_Fields_kernel(amoebaGpuContext amoebaGpu )
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceE_Fields1"
);
LAUNCHERROR
(
"kReduceE_Fields1"
);
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceE_Fields2"
);
LAUNCHERROR
(
"kReduceE_Fields2"
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
1aff5bcb
...
@@ -1531,19 +1531,6 @@ __device__ void zeroKirkwoodParticleSharedField( struct KirkwoodParticle* sA )
...
@@ -1531,19 +1531,6 @@ __device__ void zeroKirkwoodParticleSharedField( struct KirkwoodParticle* sA )
#define METHOD_NAME(a, b) a##N2ByWarp##b
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaKirkwood.h"
#include "kCalculateAmoebaCudaKirkwood.h"
// reduce psWorkArray_3_1 -> force
// reduce psWorkArray_3_2 -> torque
static
void
kReduceTorque
(
amoebaGpuContext
amoebaGpu
)
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psTorque
->
_pDevData
);
LAUNCHERROR
(
"kReduceTorque"
);
}
// reduce psWorkArray_1_1 -> dBorn
// reduce psWorkArray_1_1 -> dBorn
// reduce psWorkArray_1_2 -> dBornPolar
// reduce psWorkArray_1_2 -> dBornPolar
...
@@ -1555,13 +1542,13 @@ static void kReduce_dBorn(amoebaGpuContext amoebaGpu )
...
@@ -1555,13 +1542,13 @@ static void kReduce_dBorn(amoebaGpuContext amoebaGpu )
/*
/*
kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms, gpu->sim.outputBuffers,
gpu->sim.paddedNumberOfAtoms, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_1_1->_pDevData, amoebaGpu->psBorn->_pDevData );
amoebaGpu->psWorkArray_1_1->_pDevData, amoebaGpu->psBorn->_pDevData
, 0
);
LAUNCHERROR("kReduce_dBorn1");
LAUNCHERROR("kReduce_dBorn1");
*/
*/
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_1_2
->
_pDevData
,
amoebaGpu
->
psBornPolar
->
_pDevData
);
amoebaGpu
->
psWorkArray_1_2
->
_pDevData
,
amoebaGpu
->
psBornPolar
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduce_dBorn2"
);
LAUNCHERROR
(
"kReduce_dBorn2"
);
}
}
...
@@ -1841,7 +1828,6 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
...
@@ -1841,7 +1828,6 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
}
}
kClearFields_1
(
amoebaGpu
);
kClearFields_1
(
amoebaGpu
);
kClearFields_3
(
amoebaGpu
,
6
);
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
...
@@ -1871,78 +1857,10 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
...
@@ -1871,78 +1857,10 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
);
);
#endif
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoebaCudaKirkwoodN2Forces"
);
LAUNCHERROR
(
"kCalculateAmoebaCudaKirkwoodN2Forces"
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psWorkArray_3_1
->
Download
();
amoebaGpu
->
psWorkArray_1_1
->
Download
();
amoebaGpu
->
psWorkArray_1_2
->
Download
();
/*
amoebaGpu->psLabFrameDipole->Download();
amoebaGpu->psLabFrameQuadrupole->Download();
amoebaGpu->psInducedDipoleS->Download();
amoebaGpu->psInducedDipolePolarS->Download();
for( int ii = 0; ii < gpu->natoms; ii++ ){
int indexOffset3 = ii*3;
int indexOffset9 = ii*9;
(void) fprintf( amoebaGpu->log, "%5d [%14.7e %14.7e %14.7e] q[%14.7e %14.7e %14.7e]\n", ii,
amoebaGpu->psLabFrameDipole->_pSysData[indexOffset3],
amoebaGpu->psLabFrameDipole->_pSysData[indexOffset3+1],
amoebaGpu->psLabFrameDipole->_pSysData[indexOffset3+2],
amoebaGpu->psLabFrameQuadrupole->_pSysData[indexOffset9],
amoebaGpu->psLabFrameQuadrupole->_pSysData[indexOffset9+1],
amoebaGpu->psLabFrameQuadrupole->_pSysData[indexOffset9+2] );
(void) fprintf( amoebaGpu->log, "%5d [%14.7e %14.7e %14.7e] q[%14.7e %14.7e %14.7e]\n", ii,
amoebaGpu->psInducedDipoleS->_pSysData[indexOffset3],
amoebaGpu->psInducedDipoleS->_pSysData[indexOffset3+1],
amoebaGpu->psInducedDipoleS->_pSysData[indexOffset3+2],
amoebaGpu->psInducedDipolePolarS->_pSysData[indexOffset3],
amoebaGpu->psInducedDipolePolarS->_pSysData[indexOffset3+1],
amoebaGpu->psInducedDipolePolarS->_pSysData[indexOffset3+2] );
}
*/
debugArray
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Target Info
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5d DebugGk
\n
"
,
targetAtom
,
jj
);
for
(
int
kk
=
0
;
kk
<
8
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysData
[
debugIndex
].
x
,
debugArray
->
_pSysData
[
debugIndex
].
y
,
debugArray
->
_pSysData
[
debugIndex
].
z
,
debugArray
->
_pSysData
[
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
}
#endif
kReduceTorque
(
amoebaGpu
);
if
(
0
){
std
::
vector
<
int
>
fileId
;
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
reduceAndCopyCUDAStreamFloat4
(
gpu
->
psForce4
,
amoebaGpu
->
psWorkArray_3_1
,
1.0
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkArray_3_1
,
outputVector
,
NULL
,
1.0
f
/
4.184
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
,
NULL
,
1.0
f
/
4.184
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaKirkwoodForceTorque"
,
fileId
,
outputVector
);
}
kReduceToBornForcePrefactor
(
amoebaGpu
);
kReduceToBornForcePrefactor
(
amoebaGpu
);
// map torques to forces
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
if
(
0
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
...
@@ -1969,15 +1887,5 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
...
@@ -1969,15 +1887,5 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
kCalculateAmoebaKirkwoodEDiff
(
amoebaGpu
);
kCalculateAmoebaKirkwoodEDiff
(
amoebaGpu
);
if
(
0
){
std
::
vector
<
int
>
fileId
;
VectorOfDoubleVectors
outputVector
;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
reduceAndCopyCUDAStreamFloat4
(
gpu
->
psForce4
,
amoebaGpu
->
psWorkArray_3_1
,
1.0
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkArray_3_1
,
outputVector
,
NULL
,
1.0
f
/
4.184
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psTorque
,
outputVector
,
NULL
,
1.0
f
/
4.184
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaKirkwoodForceTorquePostEDiff"
,
fileId
,
outputVector
);
}
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
View file @
1aff5bcb
...
@@ -54,8 +54,7 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
...
@@ -54,8 +54,7 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
// pWorkArray_3_1 == force
// pWorkArray_3_1 == torque
// pWorkArray_3_2 == torque
// pWorkArray_1_1 == dBorn
// pWorkArray_1_1 == dBorn
// pWorkArray_1_2 == dBornPolar
// pWorkArray_1_2 == dBornPolar
...
@@ -220,7 +219,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -220,7 +219,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
of
;
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
of
;
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
lo
ad3dArray
BufferPerWarp
(
3
*
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
a
d
d3dArray
(
3
*
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
#else
#else
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
...
@@ -229,7 +228,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -229,7 +228,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
dBornPolarSum
;
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
dBornPolarSum
;
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
lo
ad3dArray
(
3
*
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
a
d
d3dArray
(
3
*
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
#endif
#endif
}
}
...
@@ -377,7 +376,7 @@ if( mask || !mask ){
...
@@ -377,7 +376,7 @@ if( mask || !mask ){
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
of
;
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
of
;
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
lo
ad3dArray
BufferPerWarp
(
3
*
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
a
d
d3dArray
(
3
*
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
offset
=
y
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
;
offset
=
y
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
;
...
@@ -390,7 +389,7 @@ if( mask || !mask ){
...
@@ -390,7 +389,7 @@ if( mask || !mask ){
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
of
;
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
of
;
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
lo
ad3dArray
BufferPerWarp
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
a
d
d3dArray
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
#else
#else
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
...
@@ -398,7 +397,7 @@ if( mask || !mask ){
...
@@ -398,7 +397,7 @@ if( mask || !mask ){
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
dBornPolarSum
;
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
dBornPolarSum
;
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
lo
ad3dArray
(
3
*
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
a
d
d3dArray
(
3
*
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
...
@@ -406,7 +405,7 @@ if( mask || !mask ){
...
@@ -406,7 +405,7 @@ if( mask || !mask ){
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
sA
[
threadIdx
.
x
].
dBornRadiusPolar
;
cAmoebaSim
.
pWorkArray_1_2
[
offset
]
=
sA
[
threadIdx
.
x
].
dBornRadiusPolar
;
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
lo
ad3dArray
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
a
d
d3dArray
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
cAmoebaSim
.
pWorkArray_3_1
);
#endif
#endif
lasty
=
y
;
lasty
=
y
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
1aff5bcb
...
@@ -936,7 +936,7 @@ static void kReduceTorque( amoebaGpuContext amoebaGpu )
...
@@ -936,7 +936,7 @@ static void kReduceTorque( amoebaGpuContext amoebaGpu )
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psTorque
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psTorque
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceForceTorqueKirkwoodEDiff"
);
LAUNCHERROR
(
"kReduceForceTorqueKirkwoodEDiff"
);
}
}
...
@@ -985,9 +985,6 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
...
@@ -985,9 +985,6 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
unsigned
int
targetAtom
=
0
;
unsigned
int
targetAtom
=
0
;
#endif
#endif
kClearFields_3
(
amoebaGpu
,
6
);
LAUNCHERROR
(
"kClearFields_3_kCalculateAmoebaCudaKirkwoodEDiff"
);
static
unsigned
int
threadsPerBlock
=
0
;
static
unsigned
int
threadsPerBlock
=
0
;
if
(
threadsPerBlock
==
0
){
if
(
threadsPerBlock
==
0
){
unsigned
int
maxThreads
;
unsigned
int
maxThreads
;
...
@@ -1047,14 +1044,11 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
...
@@ -1047,14 +1044,11 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
#endif
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoebaCudaKirkwoodEDiffN2Forces"
);
LAUNCHERROR
(
"kCalculateAmoebaCudaKirkwoodEDiffN2Forces"
);
kReduceTorque
(
amoebaGpu
);
// reduce and map torques to forces
LAUNCHERROR
(
"kReduceForceTorque_kCalculateAmoebaCudaKirkwoodEDiff"
);
// map torques to forces
kReduceTorque
(
amoebaGpu
);
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
View file @
1aff5bcb
...
@@ -251,12 +251,12 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -251,12 +251,12 @@ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
;
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
;
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
lo
ad3dArray
BufferPerWarp
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
a
d
d3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
#else
#else
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
lo
ad3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
a
d
d3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
#endif
#endif
...
@@ -437,23 +437,22 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -437,23 +437,22 @@ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
;
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
;
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
lo
ad3dArray
BufferPerWarp
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
a
d
d3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
offset
=
y
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
;
offset
=
y
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
;
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
lo
ad3dArray
BufferPerWarp
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
outputTorque
);
a
d
d3dArray
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
outputTorque
);
#else
#else
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
load3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
add3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
;
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
lo
ad3dArray
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
outputTorque
);
a
d
d3dArray
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
outputTorque
);
#endif
#endif
lasty
=
y
;
lasty
=
y
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
1aff5bcb
...
@@ -418,22 +418,22 @@ static void kReduceMutualInducedAndGkFields(amoebaGpuContext amoebaGpu,
...
@@ -418,22 +418,22 @@ static void kReduceMutualInducedAndGkFields(amoebaGpuContext amoebaGpu,
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields1"
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields1"
);
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
outputPolarArray
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
outputPolarArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields2"
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields2"
);
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_3
->
_pDevData
,
outputArrayS
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_3
->
_pDevData
,
outputArrayS
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields3"
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields3"
);
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_4
->
_pDevData
,
outputPolarArrayS
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_4
->
_pDevData
,
outputPolarArrayS
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields4"
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields4"
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
1aff5bcb
...
@@ -220,12 +220,12 @@ static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<fl
...
@@ -220,12 +220,12 @@ static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<fl
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceMI_Fields1"
);
LAUNCHERROR
(
"kReduceMI_Fields1"
);
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
outputPolarArray
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
outputPolarArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceMI_Fields2"
);
LAUNCHERROR
(
"kReduceMI_Fields2"
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
View file @
1aff5bcb
...
@@ -867,15 +867,15 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
...
@@ -867,15 +867,15 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
multipole
[
8
]
=
2
*
cAmoebaSim
.
pLabFrameQuadrupole
[
i
*
9
+
2
];
multipole
[
8
]
=
2
*
cAmoebaSim
.
pLabFrameQuadrupole
[
i
*
9
+
2
];
multipole
[
9
]
=
2
*
cAmoebaSim
.
pLabFrameQuadrupole
[
i
*
9
+
5
];
multipole
[
9
]
=
2
*
cAmoebaSim
.
pLabFrameQuadrupole
[
i
*
9
+
5
];
float
*
phidp
=
&
cAmoebaSim
.
pPhidp
[
20
*
i
];
float
*
phidp
=
&
cAmoebaSim
.
pPhidp
[
20
*
i
];
cAmoebaSim
.
pTorque
[
3
*
i
]
=
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
3
]
*
yscale
*
phidp
[
2
]
-
multipole
[
2
]
*
zscale
*
phidp
[
3
]
cAmoebaSim
.
pTorque
[
3
*
i
]
+
=
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
3
]
*
yscale
*
phidp
[
2
]
-
multipole
[
2
]
*
zscale
*
phidp
[
3
]
+
2.0
f
*
(
multipole
[
6
]
-
multipole
[
5
])
*
zscale
*
zscale
*
phidp
[
9
]
+
2.0
f
*
(
multipole
[
6
]
-
multipole
[
5
])
*
zscale
*
zscale
*
phidp
[
9
]
+
multipole
[
8
]
*
yscale
*
yscale
*
phidp
[
7
]
+
multipole
[
9
]
*
xscale
*
yscale
*
phidp
[
5
]
+
multipole
[
8
]
*
yscale
*
yscale
*
phidp
[
7
]
+
multipole
[
9
]
*
xscale
*
yscale
*
phidp
[
5
]
-
multipole
[
7
]
*
yscale
*
zscale
*
phidp
[
8
]
-
multipole
[
9
]
*
xscale
*
zscale
*
phidp
[
6
]);
-
multipole
[
7
]
*
yscale
*
zscale
*
phidp
[
8
]
-
multipole
[
9
]
*
xscale
*
zscale
*
phidp
[
6
]);
cAmoebaSim
.
pTorque
[
3
*
i
+
1
]
=
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
1
]
*
zscale
*
phidp
[
3
]
-
multipole
[
3
]
*
xscale
*
phidp
[
1
]
cAmoebaSim
.
pTorque
[
3
*
i
+
1
]
+
=
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
1
]
*
zscale
*
phidp
[
3
]
-
multipole
[
3
]
*
xscale
*
phidp
[
1
]
+
2.0
f
*
(
multipole
[
4
]
-
multipole
[
6
])
*
zscale
*
zscale
*
phidp
[
8
]
+
2.0
f
*
(
multipole
[
4
]
-
multipole
[
6
])
*
zscale
*
zscale
*
phidp
[
8
]
+
multipole
[
7
]
*
zscale
*
zscale
*
phidp
[
9
]
+
multipole
[
8
]
*
xscale
*
zscale
*
phidp
[
6
]
+
multipole
[
7
]
*
zscale
*
zscale
*
phidp
[
9
]
+
multipole
[
8
]
*
xscale
*
zscale
*
phidp
[
6
]
-
multipole
[
8
]
*
xscale
*
xscale
*
phidp
[
4
]
-
multipole
[
9
]
*
yscale
*
yscale
*
phidp
[
7
]);
-
multipole
[
8
]
*
xscale
*
xscale
*
phidp
[
4
]
-
multipole
[
9
]
*
yscale
*
yscale
*
phidp
[
7
]);
cAmoebaSim
.
pTorque
[
3
*
i
+
2
]
=
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
2
]
*
xscale
*
phidp
[
1
]
-
multipole
[
1
]
*
yscale
*
phidp
[
2
]
cAmoebaSim
.
pTorque
[
3
*
i
+
2
]
+
=
0.5
f
*
cAmoebaSim
.
electric
*
(
multipole
[
2
]
*
xscale
*
phidp
[
1
]
-
multipole
[
1
]
*
yscale
*
phidp
[
2
]
+
2.0
f
*
(
multipole
[
5
]
-
multipole
[
4
])
*
yscale
*
yscale
*
phidp
[
7
]
+
2.0
f
*
(
multipole
[
5
]
-
multipole
[
4
])
*
yscale
*
yscale
*
phidp
[
7
]
+
multipole
[
7
]
*
xscale
*
xscale
*
phidp
[
4
]
+
multipole
[
9
]
*
yscale
*
zscale
*
phidp
[
8
]
+
multipole
[
7
]
*
xscale
*
xscale
*
phidp
[
4
]
+
multipole
[
9
]
*
yscale
*
zscale
*
phidp
[
8
]
-
multipole
[
7
]
*
xscale
*
yscale
*
phidp
[
5
]
-
multipole
[
8
]
*
zscale
*
zscale
*
phidp
[
9
]);
-
multipole
[
7
]
*
xscale
*
yscale
*
phidp
[
5
]
-
multipole
[
8
]
*
zscale
*
zscale
*
phidp
[
9
]);
...
@@ -1016,20 +1016,6 @@ void kCalculateAmoebaPMEFixedMultipoles(amoebaGpuContext amoebaGpu)
...
@@ -1016,20 +1016,6 @@ void kCalculateAmoebaPMEFixedMultipoles(amoebaGpuContext amoebaGpu)
kComputeFixedMultipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kComputeFixedMultipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kComputeFixedMultipoleForceAndEnergy"
);
LAUNCHERROR
(
"kComputeFixedMultipoleForceAndEnergy"
);
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
kReduceForces
(
gpu
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psForce4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaRecipForceOnlyFixed"
,
fileId
,
outputVector
);
kClearForces
(
gpu
);
}
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
}
}
/**
/**
...
@@ -1062,5 +1048,4 @@ void kCalculateAmoebaPMEInducedDipoleForces(amoebaGpuContext amoebaGpu)
...
@@ -1062,5 +1048,4 @@ void kCalculateAmoebaPMEInducedDipoleForces(amoebaGpuContext amoebaGpu)
kComputeInducedDipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kComputeInducedDipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kComputeInducedDipoleForceAndEnergy"
);
LAUNCHERROR
(
"kComputeInducedDipoleForceAndEnergy"
);
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
1aff5bcb
...
@@ -1090,7 +1090,7 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
...
@@ -1090,7 +1090,7 @@ static void kReduceTorque(amoebaGpuContext amoebaGpu )
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psTorque
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psTorque
->
_pDevData
,
1
);
LAUNCHERROR
(
"kReducePmeDirectElectrostaticTorque"
);
LAUNCHERROR
(
"kReducePmeDirectElectrostaticTorque"
);
}
}
...
@@ -1185,7 +1185,6 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -1185,7 +1185,6 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces"
);
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces"
);
kReduceTorque
(
amoebaGpu
);
kReduceTorque
(
amoebaGpu
);
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
}
}
...
@@ -1201,5 +1200,6 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -1201,5 +1200,6 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
{
{
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpu
);
cudaComputeAmoebaMapTorqueAndAddToForce
(
amoebaGpu
,
amoebaGpu
->
psTorque
);
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
1aff5bcb
...
@@ -357,12 +357,12 @@ static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<fl
...
@@ -357,12 +357,12 @@ static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<fl
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReducePmeMI_Fields1"
);
LAUNCHERROR
(
"kReducePmeMI_Fields1"
);
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
outputPolarArray
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
outputPolarArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReducePmeMI_Fields2"
);
LAUNCHERROR
(
"kReducePmeMI_Fields2"
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
View file @
1aff5bcb
...
@@ -487,7 +487,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
...
@@ -487,7 +487,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
// calculate electrostatic forces
// calculate electrostatic forces
if
(
amoebaGpu
->
multipoleNonbondedMethod
==
AMOEBA_NO_CUTOFF
){
if
(
amoebaGpu
->
multipoleNonbondedMethod
==
AMOEBA_NO_CUTOFF
){
cudaComputeAmoebaElectrostatic
(
amoebaGpu
);
cudaComputeAmoebaElectrostatic
(
amoebaGpu
,
(
hasAmoebaGeneralizedKirkwood
?
0
:
1
)
);
}
else
{
}
else
{
cudaComputeAmoebaPmeElectrostatic
(
amoebaGpu
);
cudaComputeAmoebaPmeElectrostatic
(
amoebaGpu
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
View file @
1aff5bcb
...
@@ -166,7 +166,7 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
...
@@ -166,7 +166,7 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
#endif
void
kReduceFields_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn
,
float
*
fieldOut
)
void
kReduceFields_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn
,
float
*
fieldOut
,
int
addTo
)
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
@@ -174,7 +174,7 @@ void kReduceFields_kernel( unsigned int fieldComponents, unsigned int outputBuff
...
@@ -174,7 +174,7 @@ void kReduceFields_kernel( unsigned int fieldComponents, unsigned int outputBuff
while
(
pos
<
fieldComponents
)
while
(
pos
<
fieldComponents
)
{
{
float
totalField
=
0.0
f
;
float
totalField
=
addTo
?
fieldOut
[
pos
]
:
0.0
f
;
float
*
pFt
=
fieldIn
+
pos
;
float
*
pFt
=
fieldIn
+
pos
;
unsigned
int
i
=
outputBuffers
;
unsigned
int
i
=
outputBuffers
;
while
(
i
>=
4
)
while
(
i
>=
4
)
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.h
View file @
1aff5bcb
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#include "amoebaCudaKernels.h"
#include "amoebaCudaKernels.h"
__global__
void
kReduceFields_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn
,
float
*
fieldOut
);
__global__
void
kReduceFields_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn
,
float
*
fieldOut
,
int
addTo
);
__global__
void
kReduceAndCombineFields_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn1
,
float
*
fieldIn2
,
float
*
fieldOut
);
__global__
void
kReduceAndCombineFields_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn1
,
float
*
fieldIn2
,
float
*
fieldOut
);
__global__
void
kReduceFieldsToFloat4_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn
,
float4
*
fieldOut
);
__global__
void
kReduceFieldsToFloat4_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn
,
float4
*
fieldOut
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
1aff5bcb
...
@@ -452,7 +452,7 @@ static void kReduceVdw14_7(amoebaGpuContext amoebaGpu, CUDAStream<float>* output
...
@@ -452,7 +452,7 @@ static void kReduceVdw14_7(amoebaGpuContext amoebaGpu, CUDAStream<float>* output
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceVdw14_7"
);
LAUNCHERROR
(
"kReduceVdw14_7"
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
1aff5bcb
...
@@ -360,7 +360,7 @@ static void kReduceWcaDispersion(amoebaGpuContext amoebaGpu, CUDAStream<float>*
...
@@ -360,7 +360,7 @@ static void kReduceWcaDispersion(amoebaGpuContext amoebaGpu, CUDAStream<float>*
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReduceWcaDispersion"
);
LAUNCHERROR
(
"kReduceWcaDispersion"
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment