Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
1b5ee8f9
Commit
1b5ee8f9
authored
Dec 10, 2010
by
Mark Friedrichs
Browse files
Fixed bug in Vdw w/o cutoffs if bOutputBufferPerWarp is set
parent
db8a55b3
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
93 additions
and
47 deletions
+93
-47
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+4
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+3
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+0
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+80
-35
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
.../platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
+5
-5
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
1b5ee8f9
...
@@ -1129,6 +1129,10 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
...
@@ -1129,6 +1129,10 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
force
.
getSigmaCombiningRule
(),
force
.
getEpsilonCombiningRule
(),
force
.
getSigmaCombiningRule
(),
force
.
getEpsilonCombiningRule
(),
allExclusions
,
force
.
getPBC
(),
static_cast
<
float
>
(
force
.
getCutoff
())
);
allExclusions
,
force
.
getPBC
(),
static_cast
<
float
>
(
force
.
getCutoff
())
);
data
.
getAmoebaGpu
()
->
gpuContext
->
forces
.
push_back
(
new
ForceInfo
(
force
));
data
.
getAmoebaGpu
()
->
gpuContext
->
forces
.
push_back
(
new
ForceInfo
(
force
));
if
(
data
.
getLog
()
){
(
void
)
fprintf
(
data
.
getLog
(),
"CudaCalcAmoebaVdwForceKernel PBC=%d getUseNeighborList=%d
\n
"
,
force
.
getPBC
(),
force
.
getUseNeighborList
()
);
}
data
.
setUseVdwNeighborList
(
force
.
getUseNeighborList
()
);
data
.
setUseVdwNeighborList
(
force
.
getUseNeighborList
()
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
View file @
1b5ee8f9
...
@@ -306,7 +306,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -306,7 +306,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
//
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
1b5ee8f9
...
@@ -247,8 +247,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
...
@@ -247,8 +247,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
static
const
char
*
methodName
=
"cudaComputeAmoebaMutualInducedFieldMatrixMultiply"
;
static
const
char
*
methodName
=
"cudaComputeAmoebaMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: scalingDistanceCutoff=%.5f
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
methodName
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
...
@@ -594,17 +593,16 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
...
@@ -594,17 +593,16 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
/*
if
(
0
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
//
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaMI"
,
fileId
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaMI"
,
fileId
,
outputVector
);
}
}
*/
#endif
#endif
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
View file @
1b5ee8f9
...
@@ -599,7 +599,6 @@ if( 0 ){
...
@@ -599,7 +599,6 @@ if( 0 ){
x
=
(
x
>>
17
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
}
}
}
else
{
}
}
cudaComputeAmoebaPmeFixedEField
(
amoebaGpu
);
cudaComputeAmoebaPmeFixedEField
(
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
1b5ee8f9
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
#include "amoebaScaleFactors.h"
#include "amoebaScaleFactors.h"
#include <stdio.h>
#include <stdio.h>
extern
int
isNanOrInfinity
(
double
number
);
using
namespace
std
;
using
namespace
std
;
...
@@ -36,8 +37,11 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu)
...
@@ -36,8 +37,11 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
RTERROR
(
status
,
"GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
}
#define AMOEBA_DEBUG
//#define AMOEBA_DEBUG_PRINT
//#undef AMOEBA_DEBUG
#undef AMOEBA_DEBUG_PRINT
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
zeroVdw14_7SharedForce
(
struct
Vdw14_7Particle
*
sA
)
__device__
void
zeroVdw14_7SharedForce
(
struct
Vdw14_7Particle
*
sA
)
{
{
...
@@ -434,8 +438,8 @@ static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDA
...
@@ -434,8 +438,8 @@ static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDA
static
void
kReduceVdw14_7
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
outputArray
)
static
void
kReduceVdw14_7
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
outputArray
)
{
{
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
kReduceFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
outputArray
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
outputArray
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReduceVdw14_7"
);
LAUNCHERROR
(
"kReduceVdw14_7"
);
}
}
...
@@ -486,17 +490,19 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -486,17 +490,19 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
_PRINT
static
const
char
*
methodName
=
"kCalculateAmoebaVdw14_7Forces"
;
static
const
char
*
methodName
=
"kCalculateAmoebaVdw14_7Forces"
;
if
(
1
&&
amoebaGpu
->
log
){
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s:
\n
"
,
methodName
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s:
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
}
#ifdef AMOEBA_DEBUG
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
debugArray
->
Upload
();
int
targetAtom
=
342
;
int
targetAtom
=
342
;
#endif
#endif
#endif
// set threads/block first time through
// set threads/block first time through
...
@@ -517,12 +523,30 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -517,12 +523,30 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
_PRINT
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Apply cutoff=%d warp=%d
\n
"
,
applyCutoff
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Apply cutoff=%d warp=%d
\n
"
,
applyCutoff
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
Vdw14_7Particle
),
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
),
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
if
(
0
){
gpu
->
psInteractionCount
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
unsigned
int
totalWarps
=
(
amoebaGpu
->
nonbondBlocks
*
threadsPerBlock
)
/
GRID
;
float
ratiof
=
(
float
)
totalWarps
/
(
float
)
amoebaGpu
->
psVdwWorkUnit
->
_length
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ixn warps=%u count=%u
\n
"
,
totalWarps
,
gpu
->
psInteractionCount
->
_pSysStream
[
0
][
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
amoebaGpu
->
psVdwWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysStream
[
0
][
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
warp
=
(
float
)(
ii
)
*
ratiof
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u [%5u %5u %1u] %10u warp=%15.6f
\n
"
,
ii
,
x
,
y
,
exclusions
,
warp
);
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
#endif
...
@@ -541,28 +565,28 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -541,28 +565,28 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksVdwPeriodic"
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksVdwPeriodic"
);
if
(
1
){
if
(
0
){
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Vdw Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysStream
[
0
][
0
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Vdw Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysStream
[
0
][
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
];
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u %8u [%5u %5u %1u] %10u "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysStream
[
0
][
ii
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u %8u [%5u %5u %1u] %10u "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysStream
[
0
][
ii
]
);
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysStream
[
0
][
ii
];
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysStream
[
0
][
ii
];
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
exclusions
=
(
x
&
0x1
);
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
}
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaVdw14_7CutoffByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7CutoffByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
...
@@ -578,6 +602,7 @@ if( 1 ){
...
@@ -578,6 +602,7 @@ if( 1 ){
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
#endif
#endif
}
else
{
}
else
{
kCalculateAmoebaVdw14_7Cutoff_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7Cutoff_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
...
@@ -598,9 +623,8 @@ if( 1 ){
...
@@ -598,9 +623,8 @@ if( 1 ){
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
//amoebaGpu->psVdwWorkUnit->_pDevStream[0],
kCalculateAmoebaVdw14_7N2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7N2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psVdwWorkUnit
->
_pDevStream
[
0
]
,
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevStream
[
0
],
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevStream
[
0
],
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwSigmaCombiningRule
,
...
@@ -612,6 +636,7 @@ if( 1 ){
...
@@ -612,6 +636,7 @@ if( 1 ){
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
#endif
#endif
}
else
{
}
else
{
kCalculateAmoebaVdw14_7N2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7N2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psVdwWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psVdwWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
...
@@ -629,14 +654,15 @@ if( 1 ){
...
@@ -629,14 +654,15 @@ if( 1 ){
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7N2"
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7N2"
);
}
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
debugArray
->
Download
();
#ifdef AMOEBA_DEBUG_PRINT
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished 14-7 kernel execution
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished 14-7 kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#ifdef AMOEBA_DEBUG
debugArray
->
Download
();
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
double
cutOff
=
1.0e+03
;
double
cutOff
=
1.0e+03
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
...
@@ -654,7 +680,26 @@ if( 1 ){
...
@@ -654,7 +680,26 @@ if( 1 ){
debugIndex
+=
paddedNumberOfAtoms
;
debugIndex
+=
paddedNumberOfAtoms
;
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
}
#endif
amoebaGpu
->
psWorkArray_3_2
->
Download
();
amoebaGpu
->
psWorkArray_3_1
->
Download
();
//for( int jj = 0; jj < 3*gpu->natoms; jj += 3 )
for
(
int
jj
=
0
;
jj
<
3
*
gpu
->
natoms
;
jj
+=
3
){
for
(
int
kk
=
0
;
kk
<
amoebaGpu
->
outputBuffers
;
kk
++
){
float
delta
=
fabs
(
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
+
2
]
+
1.0
f
);
if
(
delta
<
5.0e-06
||
isNanOrInfinity
(
(
double
)
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
]
)
||
isNanOrInfinity
(
(
double
)
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
+
2
]
)
)
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d %6d [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]
\n
"
,
jj
,
kk
,
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
],
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
+
1
],
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
+
2
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
kk
][
jj
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
kk
][
jj
+
1
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
kk
][
jj
+
2
]
);
}
}
}
}
#endif
#endif
...
@@ -667,7 +712,7 @@ if( 1 ){
...
@@ -667,7 +712,7 @@ if( 1 ){
CUDAStream
<
float4
>*
psTempForce
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
CUDAStream
<
float4
>*
psTempForce
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
kClearFloat4
(
amoebaGpu
,
paddedNumberOfAtoms
,
psTempForce
);
kClearFloat4
(
amoebaGpu
,
paddedNumberOfAtoms
,
psTempForce
);
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
psTempForce
);
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
psTempForce
);
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
psTempForce
);
//
kCalculateAmoebaVdw14_7NonReduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, psTempForce );
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
View file @
1b5ee8f9
...
@@ -144,10 +144,10 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
...
@@ -144,10 +144,10 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
// add to field at atomI the field due atomJ's dipole
// add to field at atomI the field due atomJ's dipole
forceSum
[
0
]
+=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
forceSum
[
0
]
+=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
forceSum
[
1
]
+=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
forceSum
[
1
]
+=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
forceSum
[
2
]
+=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
forceSum
[
2
]
+=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
f
*
energy
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
...
@@ -193,7 +193,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -193,7 +193,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
load3dArrayBufferPerWarp
(
offset
,
forceSum
,
outputForce
);
load3dArrayBufferPerWarp
(
offset
,
forceSum
,
outputForce
);
#else
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
forceSum
,
outputForce
);
load3dArray
(
offset
,
forceSum
,
outputForce
);
#endif
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment