Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
1b5ee8f9
"serialization/src/XmlSerializer.cpp" did not exist on "6ddebdb28b4a19e9496e86b8e509617407df8e3d"
Commit
1b5ee8f9
authored
Dec 10, 2010
by
Mark Friedrichs
Browse files
Fixed bug in Vdw w/o cutoffs if bOutputBufferPerWarp is set
parent
db8a55b3
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
93 additions
and
47 deletions
+93
-47
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+4
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+3
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+0
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+80
-35
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
.../platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
+5
-5
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
1b5ee8f9
...
@@ -1129,6 +1129,10 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
...
@@ -1129,6 +1129,10 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
force
.
getSigmaCombiningRule
(),
force
.
getEpsilonCombiningRule
(),
force
.
getSigmaCombiningRule
(),
force
.
getEpsilonCombiningRule
(),
allExclusions
,
force
.
getPBC
(),
static_cast
<
float
>
(
force
.
getCutoff
())
);
allExclusions
,
force
.
getPBC
(),
static_cast
<
float
>
(
force
.
getCutoff
())
);
data
.
getAmoebaGpu
()
->
gpuContext
->
forces
.
push_back
(
new
ForceInfo
(
force
));
data
.
getAmoebaGpu
()
->
gpuContext
->
forces
.
push_back
(
new
ForceInfo
(
force
));
if
(
data
.
getLog
()
){
(
void
)
fprintf
(
data
.
getLog
(),
"CudaCalcAmoebaVdwForceKernel PBC=%d getUseNeighborList=%d
\n
"
,
force
.
getPBC
(),
force
.
getUseNeighborList
()
);
}
data
.
setUseVdwNeighborList
(
force
.
getUseNeighborList
()
);
data
.
setUseVdwNeighborList
(
force
.
getUseNeighborList
()
);
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
View file @
1b5ee8f9
...
@@ -306,7 +306,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -306,7 +306,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
//
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
1b5ee8f9
...
@@ -247,8 +247,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
...
@@ -247,8 +247,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
static
const
char
*
methodName
=
"cudaComputeAmoebaMutualInducedFieldMatrixMultiply"
;
static
const
char
*
methodName
=
"cudaComputeAmoebaMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: scalingDistanceCutoff=%.5f
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
methodName
,
amoebaGpu
->
scalingDistanceCutoff
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
...
@@ -594,17 +593,16 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
...
@@ -594,17 +593,16 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
/*
if
(
0
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
//
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaMI"
,
fileId
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaMI"
,
fileId
,
outputVector
);
}
}
*/
#endif
#endif
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
View file @
1b5ee8f9
...
@@ -599,7 +599,6 @@ if( 0 ){
...
@@ -599,7 +599,6 @@ if( 0 ){
x
=
(
x
>>
17
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
}
}
}
else
{
}
}
cudaComputeAmoebaPmeFixedEField
(
amoebaGpu
);
cudaComputeAmoebaPmeFixedEField
(
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
1b5ee8f9
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
#include "amoebaScaleFactors.h"
#include "amoebaScaleFactors.h"
#include <stdio.h>
#include <stdio.h>
extern
int
isNanOrInfinity
(
double
number
);
using
namespace
std
;
using
namespace
std
;
...
@@ -36,8 +37,11 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu)
...
@@ -36,8 +37,11 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu)
RTERROR
(
status
,
"GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
RTERROR
(
status
,
"GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
}
#define AMOEBA_DEBUG
//#define AMOEBA_DEBUG_PRINT
//#undef AMOEBA_DEBUG
#undef AMOEBA_DEBUG_PRINT
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__
void
zeroVdw14_7SharedForce
(
struct
Vdw14_7Particle
*
sA
)
__device__
void
zeroVdw14_7SharedForce
(
struct
Vdw14_7Particle
*
sA
)
{
{
...
@@ -486,17 +490,19 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -486,17 +490,19 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
_PRINT
static
const
char
*
methodName
=
"kCalculateAmoebaVdw14_7Forces"
;
static
const
char
*
methodName
=
"kCalculateAmoebaVdw14_7Forces"
;
if
(
1
&&
amoebaGpu
->
log
){
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s:
\n
"
,
methodName
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s:
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
}
#ifdef AMOEBA_DEBUG
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
debugArray
->
Upload
();
int
targetAtom
=
342
;
int
targetAtom
=
342
;
#endif
#endif
#endif
// set threads/block first time through
// set threads/block first time through
...
@@ -517,12 +523,30 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -517,12 +523,30 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
_PRINT
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Apply cutoff=%d warp=%d
\n
"
,
applyCutoff
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Apply cutoff=%d warp=%d
\n
"
,
applyCutoff
,
gpu
->
bOutputBufferPerWarp
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
Vdw14_7Particle
),
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
),
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
if
(
0
){
gpu
->
psInteractionCount
->
Download
();
amoebaGpu
->
psVdwWorkUnit
->
Download
();
unsigned
int
totalWarps
=
(
amoebaGpu
->
nonbondBlocks
*
threadsPerBlock
)
/
GRID
;
float
ratiof
=
(
float
)
totalWarps
/
(
float
)
amoebaGpu
->
psVdwWorkUnit
->
_length
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ixn warps=%u count=%u
\n
"
,
totalWarps
,
gpu
->
psInteractionCount
->
_pSysStream
[
0
][
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
amoebaGpu
->
psVdwWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
amoebaGpu
->
psVdwWorkUnit
->
_pSysStream
[
0
][
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
warp
=
(
float
)(
ii
)
*
ratiof
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u [%5u %5u %1u] %10u warp=%15.6f
\n
"
,
ii
,
x
,
y
,
exclusions
,
warp
);
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
#endif
...
@@ -541,7 +565,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -541,7 +565,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksVdwPeriodic"
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksVdwPeriodic"
);
if
(
1
){
if
(
0
){
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractingWorkUnit
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
gpu
->
psInteractionFlag
->
Download
();
...
@@ -562,7 +586,7 @@ if( 1 ){
...
@@ -562,7 +586,7 @@ if( 1 ){
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
" AmGpu %8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
}
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaVdw14_7CutoffByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7CutoffByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
...
@@ -578,6 +602,7 @@ if( 1 ){
...
@@ -578,6 +602,7 @@ if( 1 ){
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
#endif
#endif
}
else
{
}
else
{
kCalculateAmoebaVdw14_7Cutoff_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7Cutoff_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
...
@@ -598,9 +623,8 @@ if( 1 ){
...
@@ -598,9 +623,8 @@ if( 1 ){
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
//amoebaGpu->psVdwWorkUnit->_pDevStream[0],
kCalculateAmoebaVdw14_7N2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7N2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psVdwWorkUnit
->
_pDevStream
[
0
]
,
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevStream
[
0
],
amoebaGpu
->
psVdwSigmaEpsilon
->
_pDevStream
[
0
],
amoebaGpu
->
vdwSigmaCombiningRule
,
amoebaGpu
->
vdwSigmaCombiningRule
,
...
@@ -612,6 +636,7 @@ if( 1 ){
...
@@ -612,6 +636,7 @@ if( 1 ){
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
]
);
#endif
#endif
}
else
{
}
else
{
kCalculateAmoebaVdw14_7N2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaVdw14_7N2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
Vdw14_7Particle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psVdwWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psVdwWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
amoebaGpu
->
psAmoebaVdwCoordinates
->
_pDevStream
[
0
],
...
@@ -629,14 +654,15 @@ if( 1 ){
...
@@ -629,14 +654,15 @@ if( 1 ){
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7N2"
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7N2"
);
}
}
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
debugArray
->
Download
();
#ifdef AMOEBA_DEBUG_PRINT
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished 14-7 kernel execution
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished 14-7 kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#ifdef AMOEBA_DEBUG
debugArray
->
Download
();
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
double
cutOff
=
1.0e+03
;
double
cutOff
=
1.0e+03
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
...
@@ -655,6 +681,25 @@ if( 1 ){
...
@@ -655,6 +681,25 @@ if( 1 ){
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
}
#endif
amoebaGpu
->
psWorkArray_3_2
->
Download
();
amoebaGpu
->
psWorkArray_3_1
->
Download
();
//for( int jj = 0; jj < 3*gpu->natoms; jj += 3 )
for
(
int
jj
=
0
;
jj
<
3
*
gpu
->
natoms
;
jj
+=
3
){
for
(
int
kk
=
0
;
kk
<
amoebaGpu
->
outputBuffers
;
kk
++
){
float
delta
=
fabs
(
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
+
2
]
+
1.0
f
);
if
(
delta
<
5.0e-06
||
isNanOrInfinity
(
(
double
)
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
]
)
||
isNanOrInfinity
(
(
double
)
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
+
2
]
)
)
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d %6d [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]
\n
"
,
jj
,
kk
,
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
],
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
+
1
],
amoebaGpu
->
psWorkArray_3_1
->
_pSysStream
[
kk
][
jj
+
2
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
kk
][
jj
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
kk
][
jj
+
1
],
amoebaGpu
->
psWorkArray_3_2
->
_pSysStream
[
kk
][
jj
+
2
]
);
}
}
}
}
#endif
#endif
...
@@ -667,7 +712,7 @@ if( 1 ){
...
@@ -667,7 +712,7 @@ if( 1 ){
CUDAStream
<
float4
>*
psTempForce
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
CUDAStream
<
float4
>*
psTempForce
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
kClearFloat4
(
amoebaGpu
,
paddedNumberOfAtoms
,
psTempForce
);
kClearFloat4
(
amoebaGpu
,
paddedNumberOfAtoms
,
psTempForce
);
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
psTempForce
);
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
psTempForce
);
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
psTempForce
);
//
kCalculateAmoebaVdw14_7NonReduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, psTempForce );
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
View file @
1b5ee8f9
...
@@ -147,7 +147,7 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
...
@@ -147,7 +147,7 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
forceSum
[
0
]
+=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
forceSum
[
0
]
+=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
forceSum
[
1
]
+=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
forceSum
[
1
]
+=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
forceSum
[
2
]
+=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
forceSum
[
2
]
+=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
f
*
energy
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment