Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b20978e1
Commit
b20978e1
authored
Oct 06, 2010
by
Mark Friedrichs
Browse files
Fix for direct PME
parent
a9054686
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
139 additions
and
105 deletions
+139
-105
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
+0
-2
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.h
+2
-1
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+9
-8
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+25
-17
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+4
-16
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+51
-26
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+10
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+25
-25
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
...platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+10
-6
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
View file @
b20978e1
...
...
@@ -47,9 +47,7 @@ AmoebaCudaData::AmoebaCudaData( CudaPlatform::PlatformData& data ) : cudaPlatfor
}
AmoebaCudaData
::~
AmoebaCudaData
()
{
(
void
)
fprintf
(
stderr
,
"In AmoebaCudaData destructor
\n
"
);
fflush
(
stderr
);
amoebaGpuShutDown
(
amoebaGpu
);
(
void
)
fprintf
(
stderr
,
"Done AmoebaCudaData destructor
\n
"
);
fflush
(
stderr
);
}
void
AmoebaCudaData
::
decrementKernelCount
(
void
)
{
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.h
View file @
b20978e1
...
...
@@ -167,9 +167,10 @@ public:
*/
void
setApplyCutoff
(
int
applyCutoff
);
CudaPlatform
::
PlatformData
&
cudaPlatformData
;
private:
CudaPlatform
::
PlatformData
&
cudaPlatformData
;
amoebaGpuContext
amoebaGpu
;
bool
hasAmoebaBonds
,
hasAmoebaGeneralizedKirkwood
,
hasAmoebaMultipole
;
int
multipoleForceCount
;
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
b20978e1
...
...
@@ -670,11 +670,11 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
amoebaGpuContext
gpu
=
data
.
getAmoebaGpu
();
if
(
data
.
getMultipoleForceCount
()
==
0
){
gpuCopyInteractingWorkUnit
(
gpu
);
}
if
(
data
.
getApplyCutoff
()
&&
(
data
.
getMultipoleForceCount
()
%
100
)
==
0
){
gpuReorderAtoms
(
gpu
->
gpuContext
);
gpuCopyWorkUnit
(
gpu
);
}
//if( data.getApplyCutoff() && (data.getMultipoleForceCount() % 100) == 0 ){
//gpuReorderAtoms(gpu->gpuContext);
//}
data
.
incrementMultipoleForceCount
();
data
.
initializeGpu
();
...
...
@@ -875,6 +875,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
}
gpuSetAmoebaPMEParameters
(
data
.
getAmoebaGpu
(),
(
float
)
alpha
,
xsize
,
ysize
,
zsize
);
data
.
setApplyCutoff
(
1
);
data
.
cudaPlatformData
.
nonbondedMethod
=
PARTICLE_MESH_EWALD
;
amoebaGpuContext
amoebaGpu
=
data
.
getAmoebaGpu
();
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpu
->
sim
.
nonbondedCutoffSqr
=
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
();
...
...
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
View file @
b20978e1
...
...
@@ -4305,7 +4305,7 @@ void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration){
--------------------------------------------------------------------------------------- */
void
gpuCopy
Interacting
WorkUnit
(
amoebaGpuContext
amoebaGpu
){
void
gpuCopyWorkUnit
(
amoebaGpuContext
amoebaGpu
){
// ---------------------------------------------------------------------------------------
...
...
@@ -4315,7 +4315,7 @@ void gpuCopyInteractingWorkUnit( amoebaGpuContext amoebaGpu ){
amoebaGpu
->
psWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"gpuCopyInteractingWorkUnit called -- to be removed.
\n
"
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
]
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
//
gpu->psInteractingWorkUnit->_pSysStream[0][ii] = amoebaGpu->psWorkUnit->_pSysStream[0][ii];
gpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
]
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
}
gpu
->
psInteractingWorkUnit
->
Upload
();
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
View file @
b20978e1
...
...
@@ -344,7 +344,7 @@ extern "C"
void
gpuSetAmoebaBondOffsets
(
amoebaGpuContext
gpu
);
extern
"C"
void
gpuCopy
Interacting
WorkUnit
(
amoebaGpuContext
gpu
);
void
gpuCopyWorkUnit
(
amoebaGpuContext
gpu
);
/*
extern "C"
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
b20978e1
...
...
@@ -255,6 +255,14 @@ if( atomI == targetAtom ){
}
else
{
if
(
lasty
!=
y
)
{
// load shared data
loadPmeDirectElectrostaticShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
}
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
...
...
@@ -346,36 +354,36 @@ if( atomI == targetAtom ){
}
else
{
p
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
mask
?
0
.
0
f
:
force
[
0
];
p
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
mask
?
0
.
0
f
:
force
[
1
];
p
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
mask
?
0
.
0
f
:
force
[
2
];
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
mask
?
0
.
0
f
:
force
[
0
];
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
mask
?
0
.
0
f
:
force
[
1
];
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
mask
?
0
.
0
f
:
force
[
2
];
p
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
][
0
];
p
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
][
1
];
p
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
][
2
];
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
][
1
];
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
][
2
];
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
1
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
2
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
4
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
8
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
force
[
0
]
-=
p
sA
[
threadIdx
.
x
].
tempForce
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempForce
[
0
];
psA
[
jIdx
].
force
[
1
]
-=
p
sA
[
threadIdx
.
x
].
tempForce
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempForce
[
1
];
psA
[
jIdx
].
force
[
2
]
-=
p
sA
[
threadIdx
.
x
].
tempForce
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempForce
[
2
];
psA
[
jIdx
].
force
[
0
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
0
];
psA
[
jIdx
].
force
[
1
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
1
];
psA
[
jIdx
].
force
[
2
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
2
];
psA
[
jIdx
].
torque
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
0
];
psA
[
jIdx
].
torque
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
psA
[
jIdx
].
torque
[
0
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
0
];
psA
[
jIdx
].
torque
[
1
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
2
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
b20978e1
...
...
@@ -429,7 +429,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
// print intermediate results for the targetAtom
unsigned
int
targetAtom
=
354
;
unsigned
int
targetAtom
=
1280
;
#endif
kClearFields_3
(
amoebaGpu
,
2
);
...
...
@@ -458,8 +458,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
else
{
//amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
...
...
@@ -553,24 +551,14 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
amoebaGpu
->
gpuContext
->
psPosq4
->
Download
();
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
if
(
fabs
(
debugArray
->
_pSysStream
[
0
][
jj
+
paddedNumberOfAtoms
].
x
)
>
0.0
){
if
(
fabs
(
debugArray
->
_pSysStream
[
0
][
jj
+
3
*
paddedNumberOfAtoms
].
x
)
>
0.0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d PmeFixedEField
\n
"
,
jj
);
for
(
int
kk
=
0
;
kk
<
6
;
kk
++
){
for
(
int
kk
=
0
;
kk
<
7
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e ] [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e] p
\n
"
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
x
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
y
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
z
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
x
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
x
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
y
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
y
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
z
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
z
,
(
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
x
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
x
)
/
5.50
f
,
(
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
y
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
y
)
/
5.50
f
,
(
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
z
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
z
)
/
5.50
f
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
...
...
@@ -591,7 +579,7 @@ if( fabs(debugArray->_pSysStream[0][jj+paddedNumberOfAtoms].x) > 0.0 ){
}
#endif
if
(
1
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
b20978e1
...
...
@@ -140,18 +140,30 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
2
];
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
if
(
atomI
==
targetAtom
||
targetAtom
==
(
y
+
j
)
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
unsigned
int
indices
[
4
]
=
{
indexI
,
indexJ
,
indexI
+
1
,
indexJ
+
1
};
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
float
flag
=
7
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
bExclusionFlag
;
debugArray
[
index
].
y
=
(
float
)
(
tgx
);
debugArray
[
index
].
z
=
(
float
)
j
;
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
dScaleMask
;
debugArray
[
index
].
y
=
(
float
)
pScaleMask
.
x
;
debugArray
[
index
].
z
=
(
float
)
pScaleMask
.
y
;
debugArray
[
index
].
w
=
flag
;
for
(
int
ii
=
0
;
ii
<
4
;
ii
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
0
];
...
...
@@ -159,6 +171,7 @@ if( atomI == targetAtom ){
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
2
];
debugArray
[
index
].
w
=
flag
;
}
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
...
...
@@ -186,11 +199,6 @@ if( atomI == targetAtom ){
}
else
{
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
// flags = 0xFFFFFFFF;
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
if
(
lasty
!=
y
)
{
// load coordinates, charge, ...
...
...
@@ -199,6 +207,11 @@ if( atomI == targetAtom ){
}
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
// zero shared fields
zeroFixedFieldParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
...
...
@@ -254,36 +267,36 @@ if( atomI == targetAtom ){
}
else
{
p
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
p
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
1
];
p
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
1
];
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
1
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
2
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
4
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
8
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
eField
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
eField
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
eField
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
eField
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
eField
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
eField
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
eFieldP
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
eFieldP
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
eFieldP
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
psA
[
jIdx
].
eFieldP
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
eFieldP
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
eFieldP
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
}
}
...
...
@@ -300,6 +313,18 @@ if( (atomI == targetAtom || (y + jIdx) == targetAtom) ){
debugArray
[
index
].
w
=
pScaleValue
;
float
flag
=
9
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
bExclusionFlag
;
debugArray
[
index
].
y
=
(
float
)
(
tgx
);
debugArray
[
index
].
z
=
(
float
)
j
;
debugArray
[
index
].
w
=
jIdx
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
dScaleMask
;
debugArray
[
index
].
y
=
(
float
)
pScaleMask
.
x
;
debugArray
[
index
].
z
=
(
float
)
pScaleMask
.
y
;
debugArray
[
index
].
w
=
(
float
)
flags
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
indexI
][
1
];
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
b20978e1
...
...
@@ -759,13 +759,22 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
}
#endif
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
fflush
(
amoebaGpu
->
log
);
if
(
amoebaGpu
->
mutualInducedCurrentEpsilon
!=
amoebaGpu
->
mutualInducedCurrentEpsilon
)
exit
(
0
);
iteration
++
;
}
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
if
(
1
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
b20978e1
...
...
@@ -209,11 +209,6 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
#endif
}
else
{
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
if
(
lasty
!=
y
)
...
...
@@ -225,6 +220,11 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
);
}
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
// zero shared fields
zeroMutualInducedParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
...
...
@@ -273,36 +273,36 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
}
else
{
p
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
0
];
p
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
1
];
p
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
2
];
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
1
];
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
2
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
0
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
1
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
2
];
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
0
];
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
1
];
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
2
];
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
1
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
2
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
4
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
8
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
field
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
field
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
field
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
field
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
field
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
field
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
fieldPolar
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
fieldPolar
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
fieldPolar
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
psA
[
jIdx
].
fieldPolar
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
fieldPolar
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
fieldPolar
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
View file @
b20978e1
...
...
@@ -396,16 +396,20 @@ if( 0 ){
amoebaGpu
->
psWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysStream
[
0
][
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
//unsigned int y = ((x >> 2) & 0x7fff);
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Cell %8u %8u [%5u %5u %1u] "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
// x = (x >> 17);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u %8u [%5u %5u %1u] %10u "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysStream
[
0
][
ii
]
);
x
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" %8u [%5u %5u %1u]
%10u
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysStream
[
0
][
ii
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
AmGpu
%8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
}
}
else
{
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment