Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b20978e1
"olla/vscode:/vscode.git/clone" did not exist on "b2c89feb483b9120b8b11f61e26ad6e7a5cc4166"
Commit
b20978e1
authored
Oct 06, 2010
by
Mark Friedrichs
Browse files
Fix for direct PME
parent
a9054686
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
139 additions
and
105 deletions
+139
-105
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
+0
-2
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.h
+2
-1
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+9
-8
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+25
-17
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+4
-16
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+51
-26
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+10
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+25
-25
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
...platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+10
-6
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
View file @
b20978e1
...
@@ -47,9 +47,7 @@ AmoebaCudaData::AmoebaCudaData( CudaPlatform::PlatformData& data ) : cudaPlatfor
...
@@ -47,9 +47,7 @@ AmoebaCudaData::AmoebaCudaData( CudaPlatform::PlatformData& data ) : cudaPlatfor
}
}
AmoebaCudaData
::~
AmoebaCudaData
()
{
AmoebaCudaData
::~
AmoebaCudaData
()
{
(
void
)
fprintf
(
stderr
,
"In AmoebaCudaData destructor
\n
"
);
fflush
(
stderr
);
amoebaGpuShutDown
(
amoebaGpu
);
amoebaGpuShutDown
(
amoebaGpu
);
(
void
)
fprintf
(
stderr
,
"Done AmoebaCudaData destructor
\n
"
);
fflush
(
stderr
);
}
}
void
AmoebaCudaData
::
decrementKernelCount
(
void
)
{
void
AmoebaCudaData
::
decrementKernelCount
(
void
)
{
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaData.h
View file @
b20978e1
...
@@ -167,9 +167,10 @@ public:
...
@@ -167,9 +167,10 @@ public:
*/
*/
void
setApplyCutoff
(
int
applyCutoff
);
void
setApplyCutoff
(
int
applyCutoff
);
CudaPlatform
::
PlatformData
&
cudaPlatformData
;
private:
private:
CudaPlatform
::
PlatformData
&
cudaPlatformData
;
amoebaGpuContext
amoebaGpu
;
amoebaGpuContext
amoebaGpu
;
bool
hasAmoebaBonds
,
hasAmoebaGeneralizedKirkwood
,
hasAmoebaMultipole
;
bool
hasAmoebaBonds
,
hasAmoebaGeneralizedKirkwood
,
hasAmoebaMultipole
;
int
multipoleForceCount
;
int
multipoleForceCount
;
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
b20978e1
...
@@ -670,11 +670,11 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
...
@@ -670,11 +670,11 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
amoebaGpuContext
gpu
=
data
.
getAmoebaGpu
();
amoebaGpuContext
gpu
=
data
.
getAmoebaGpu
();
if
(
data
.
getMultipoleForceCount
()
==
0
){
if
(
data
.
getMultipoleForceCount
()
==
0
){
gpuCopyInteractingWorkUnit
(
gpu
);
gpuCopyWorkUnit
(
gpu
);
}
if
(
data
.
getApplyCutoff
()
&&
(
data
.
getMultipoleForceCount
()
%
100
)
==
0
){
gpuReorderAtoms
(
gpu
->
gpuContext
);
}
}
//if( data.getApplyCutoff() && (data.getMultipoleForceCount() % 100) == 0 ){
//gpuReorderAtoms(gpu->gpuContext);
//}
data
.
incrementMultipoleForceCount
();
data
.
incrementMultipoleForceCount
();
data
.
initializeGpu
();
data
.
initializeGpu
();
...
@@ -875,6 +875,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -875,6 +875,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
}
}
gpuSetAmoebaPMEParameters
(
data
.
getAmoebaGpu
(),
(
float
)
alpha
,
xsize
,
ysize
,
zsize
);
gpuSetAmoebaPMEParameters
(
data
.
getAmoebaGpu
(),
(
float
)
alpha
,
xsize
,
ysize
,
zsize
);
data
.
setApplyCutoff
(
1
);
data
.
setApplyCutoff
(
1
);
data
.
cudaPlatformData
.
nonbondedMethod
=
PARTICLE_MESH_EWALD
;
amoebaGpuContext
amoebaGpu
=
data
.
getAmoebaGpu
();
amoebaGpuContext
amoebaGpu
=
data
.
getAmoebaGpu
();
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpu
->
sim
.
nonbondedCutoffSqr
=
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
();
gpu
->
sim
.
nonbondedCutoffSqr
=
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
();
...
...
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
View file @
b20978e1
...
@@ -4305,7 +4305,7 @@ void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration){
...
@@ -4305,7 +4305,7 @@ void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration){
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
gpuCopy
Interacting
WorkUnit
(
amoebaGpuContext
amoebaGpu
){
void
gpuCopyWorkUnit
(
amoebaGpuContext
amoebaGpu
){
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
...
@@ -4315,7 +4315,7 @@ void gpuCopyInteractingWorkUnit( amoebaGpuContext amoebaGpu ){
...
@@ -4315,7 +4315,7 @@ void gpuCopyInteractingWorkUnit( amoebaGpuContext amoebaGpu ){
amoebaGpu
->
psWorkUnit
->
Download
();
amoebaGpu
->
psWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"gpuCopyInteractingWorkUnit called -- to be removed.
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"gpuCopyInteractingWorkUnit called -- to be removed.
\n
"
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
]
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
//
gpu->psInteractingWorkUnit->_pSysStream[0][ii] = amoebaGpu->psWorkUnit->_pSysStream[0][ii];
gpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
]
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
gpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
]
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
}
}
gpu
->
psInteractingWorkUnit
->
Upload
();
gpu
->
psInteractingWorkUnit
->
Upload
();
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
View file @
b20978e1
...
@@ -344,7 +344,7 @@ extern "C"
...
@@ -344,7 +344,7 @@ extern "C"
void
gpuSetAmoebaBondOffsets
(
amoebaGpuContext
gpu
);
void
gpuSetAmoebaBondOffsets
(
amoebaGpuContext
gpu
);
extern
"C"
extern
"C"
void
gpuCopy
Interacting
WorkUnit
(
amoebaGpuContext
gpu
);
void
gpuCopyWorkUnit
(
amoebaGpuContext
gpu
);
/*
/*
extern "C"
extern "C"
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
b20978e1
...
@@ -255,6 +255,14 @@ if( atomI == targetAtom ){
...
@@ -255,6 +255,14 @@ if( atomI == targetAtom ){
}
else
{
}
else
{
if
(
lasty
!=
y
)
{
// load shared data
loadPmeDirectElectrostaticShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
}
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
if
(
flags
==
0
)
{
// No interactions in this block.
// No interactions in this block.
...
@@ -346,36 +354,36 @@ if( atomI == targetAtom ){
...
@@ -346,36 +354,36 @@ if( atomI == targetAtom ){
}
else
{
}
else
{
p
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
mask
?
0
.
0
f
:
force
[
0
];
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
mask
?
0
.
0
f
:
force
[
0
];
p
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
mask
?
0
.
0
f
:
force
[
1
];
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
mask
?
0
.
0
f
:
force
[
1
];
p
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
mask
?
0
.
0
f
:
force
[
2
];
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
mask
?
0
.
0
f
:
force
[
2
];
p
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
][
0
];
p
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
][
1
];
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
][
1
];
p
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
][
2
];
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
][
2
];
if
(
tgx
%
2
==
0
){
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
1
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
}
if
(
tgx
%
4
==
0
){
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
2
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
}
if
(
tgx
%
8
==
0
){
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
4
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
}
if
(
tgx
%
16
==
0
){
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
8
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
}
if
(
tgx
==
0
)
if
(
tgx
==
0
)
{
{
psA
[
jIdx
].
force
[
0
]
-=
p
sA
[
threadIdx
.
x
].
tempForce
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempForce
[
0
];
psA
[
jIdx
].
force
[
0
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
0
];
psA
[
jIdx
].
force
[
1
]
-=
p
sA
[
threadIdx
.
x
].
tempForce
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempForce
[
1
];
psA
[
jIdx
].
force
[
1
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
1
];
psA
[
jIdx
].
force
[
2
]
-=
p
sA
[
threadIdx
.
x
].
tempForce
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempForce
[
2
];
psA
[
jIdx
].
force
[
2
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
2
];
psA
[
jIdx
].
torque
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
0
];
psA
[
jIdx
].
torque
[
0
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
0
];
psA
[
jIdx
].
torque
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
1
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
psA
[
jIdx
].
torque
[
2
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
}
}
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
b20978e1
...
@@ -429,7 +429,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -429,7 +429,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
// print intermediate results for the targetAtom
// print intermediate results for the targetAtom
unsigned
int
targetAtom
=
354
;
unsigned
int
targetAtom
=
1280
;
#endif
#endif
kClearFields_3
(
amoebaGpu
,
2
);
kClearFields_3
(
amoebaGpu
,
2
);
...
@@ -458,8 +458,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -458,8 +458,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
#endif
}
else
{
}
else
{
//amoebaGpu->psWorkUnit->_pDevStream[0],
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
...
@@ -553,24 +551,14 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -553,24 +551,14 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
amoebaGpu
->
gpuContext
->
psPosq4
->
Download
();
amoebaGpu
->
gpuContext
->
psPosq4
->
Download
();
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
int
debugIndex
=
jj
;
if
(
fabs
(
debugArray
->
_pSysStream
[
0
][
jj
+
paddedNumberOfAtoms
].
x
)
>
0.0
){
if
(
fabs
(
debugArray
->
_pSysStream
[
0
][
jj
+
3
*
paddedNumberOfAtoms
].
x
)
>
0.0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d PmeFixedEField
\n
"
,
jj
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d PmeFixedEField
\n
"
,
jj
);
for
(
int
kk
=
0
;
kk
<
6
;
kk
++
){
for
(
int
kk
=
0
;
kk
<
7
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
debugIndex
+=
paddedNumberOfAtoms
;
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e ] [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e] p
\n
"
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
x
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
y
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
z
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
x
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
x
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
y
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
y
,
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
z
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
z
,
(
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
x
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
x
)
/
5.50
f
,
(
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
y
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
y
)
/
5.50
f
,
(
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
jj
].
z
-
amoebaGpu
->
gpuContext
->
psPosq4
->
_pSysStream
[
0
][
0
].
z
)
/
5.50
f
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
}
...
@@ -591,7 +579,7 @@ if( fabs(debugArray->_pSysStream[0][jj+paddedNumberOfAtoms].x) > 0.0 ){
...
@@ -591,7 +579,7 @@ if( fabs(debugArray->_pSysStream[0][jj+paddedNumberOfAtoms].x) > 0.0 ){
}
}
#endif
#endif
if
(
1
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
b20978e1
...
@@ -140,18 +140,30 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
...
@@ -140,18 +140,30 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
2
];
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
2
];
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
if
(
atomI
==
targetAtom
||
targetAtom
==
(
y
+
j
)
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
indexI
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
unsigned
int
indices
[
4
]
=
{
indexI
,
indexJ
,
indexI
+
1
,
indexJ
+
1
};
unsigned
int
indices
[
4
]
=
{
indexI
,
indexJ
,
indexI
+
1
,
indexJ
+
1
};
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
float
flag
=
7
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
bExclusionFlag
;
debugArray
[
index
].
y
=
(
float
)
(
tgx
);
debugArray
[
index
].
z
=
(
float
)
j
;
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
dScaleMask
;
debugArray
[
index
].
y
=
(
float
)
pScaleMask
.
x
;
debugArray
[
index
].
z
=
(
float
)
pScaleMask
.
y
;
debugArray
[
index
].
w
=
flag
;
for
(
int
ii
=
0
;
ii
<
4
;
ii
++
){
for
(
int
ii
=
0
;
ii
<
4
;
ii
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
0
];
debugArray
[
index
].
x
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
0
];
...
@@ -159,6 +171,7 @@ if( atomI == targetAtom ){
...
@@ -159,6 +171,7 @@ if( atomI == targetAtom ){
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
2
];
debugArray
[
index
].
z
=
match
?
0
.
0
f
:
ijField
[
indices
[
ii
]][
2
];
debugArray
[
index
].
w
=
flag
;
debugArray
[
index
].
w
=
flag
;
}
}
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
...
@@ -186,11 +199,6 @@ if( atomI == targetAtom ){
...
@@ -186,11 +199,6 @@ if( atomI == targetAtom ){
}
else
{
}
else
{
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
// flags = 0xFFFFFFFF;
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
if
(
lasty
!=
y
)
{
if
(
lasty
!=
y
)
{
// load coordinates, charge, ...
// load coordinates, charge, ...
...
@@ -199,6 +207,11 @@ if( atomI == targetAtom ){
...
@@ -199,6 +207,11 @@ if( atomI == targetAtom ){
}
}
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
// zero shared fields
// zero shared fields
zeroFixedFieldParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
zeroFixedFieldParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
...
@@ -254,36 +267,36 @@ if( atomI == targetAtom ){
...
@@ -254,36 +267,36 @@ if( atomI == targetAtom ){
}
else
{
}
else
{
p
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
p
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
1
];
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
1
];
p
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
if
(
tgx
%
2
==
0
){
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
1
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
}
if
(
tgx
%
4
==
0
){
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
2
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
}
if
(
tgx
%
8
==
0
){
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
4
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
}
if
(
tgx
%
16
==
0
){
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
8
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
}
if
(
tgx
==
0
)
if
(
tgx
==
0
)
{
{
psA
[
jIdx
].
eField
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
eField
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
eField
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
eField
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
eField
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
eField
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
eFieldP
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
eFieldP
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
eFieldP
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
eFieldP
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
eFieldP
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
psA
[
jIdx
].
eFieldP
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
}
}
}
}
...
@@ -300,6 +313,18 @@ if( (atomI == targetAtom || (y + jIdx) == targetAtom) ){
...
@@ -300,6 +313,18 @@ if( (atomI == targetAtom || (y + jIdx) == targetAtom) ){
debugArray
[
index
].
w
=
pScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
float
flag
=
9
.
0
f
;
float
flag
=
9
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
bExclusionFlag
;
debugArray
[
index
].
y
=
(
float
)
(
tgx
);
debugArray
[
index
].
z
=
(
float
)
j
;
debugArray
[
index
].
w
=
jIdx
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
(
float
)
dScaleMask
;
debugArray
[
index
].
y
=
(
float
)
pScaleMask
.
x
;
debugArray
[
index
].
z
=
(
float
)
pScaleMask
.
y
;
debugArray
[
index
].
w
=
(
float
)
flags
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
indexI
][
0
];
debugArray
[
index
].
x
=
outOfBounds
?
0
.
0
f
:
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
indexI
][
1
];
debugArray
[
index
].
y
=
outOfBounds
?
0
.
0
f
:
ijField
[
indexI
][
1
];
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
b20978e1
...
@@ -759,13 +759,22 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -759,13 +759,22 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
}
}
#endif
#endif
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysStream
[
0
][
2
],
done
);
fflush
(
amoebaGpu
->
log
);
if
(
amoebaGpu
->
mutualInducedCurrentEpsilon
!=
amoebaGpu
->
mutualInducedCurrentEpsilon
)
exit
(
0
);
iteration
++
;
iteration
++
;
}
}
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
if
(
1
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
b20978e1
...
@@ -209,11 +209,6 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -209,11 +209,6 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
#endif
#endif
}
else
{
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
}
else
{
if
(
lasty
!=
y
)
if
(
lasty
!=
y
)
...
@@ -225,6 +220,11 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -225,6 +220,11 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
);
loadMutualInducedShared
(
&
(
sA
[
threadIdx
.
x
]),
atomJ
);
}
}
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
// zero shared fields
// zero shared fields
zeroMutualInducedParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
zeroMutualInducedParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
...
@@ -273,36 +273,36 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -273,36 +273,36 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
}
else
{
}
else
{
p
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
0
];
p
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
1
];
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
1
];
p
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
2
];
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
2
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
0
];
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
0
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
1
];
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
1
];
p
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
2
];
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
2
];
if
(
tgx
%
2
==
0
){
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
1
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
}
if
(
tgx
%
4
==
0
){
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
2
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
}
if
(
tgx
%
8
==
0
){
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
4
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
}
if
(
tgx
%
16
==
0
){
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
p
sA
[
threadIdx
.
x
],
p
sA
[
threadIdx
.
x
+
8
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
}
if
(
tgx
==
0
)
if
(
tgx
==
0
)
{
{
psA
[
jIdx
].
field
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
field
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
field
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
field
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
field
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
field
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
fieldPolar
[
0
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
fieldPolar
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
fieldPolar
[
1
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
fieldPolar
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
fieldPolar
[
2
]
+=
p
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
p
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
psA
[
jIdx
].
fieldPolar
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
}
}
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
View file @
b20978e1
...
@@ -396,16 +396,20 @@ if( 0 ){
...
@@ -396,16 +396,20 @@ if( 0 ){
amoebaGpu
->
psWorkUnit
->
Download
();
amoebaGpu
->
psWorkUnit
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysStream
[
0
][
0
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ixn count=%u
\n
"
,
gpu
->
psInteractionCount
->
_pSysStream
[
0
][
0
]
);
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
for
(
unsigned
int
ii
=
0
;
ii
<
gpu
->
psInteractingWorkUnit
->
_length
;
ii
++
){
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
];
unsigned
int
x
=
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
//unsigned int y = ((x >> 2) & 0x7fff);
unsigned
int
exclusions
=
(
x
&
0x1
);
unsigned
int
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Cell %8u %8u [%5u %5u %1u] "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
// x = (x >> 17);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"GpuCell %8u %8u [%5u %5u %1u] %10u "
,
ii
,
gpu
->
psInteractingWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysStream
[
0
][
ii
]
);
x
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
x
=
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
];
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
exclusions
=
(
x
&
0x1
);
exclusions
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
" %8u [%5u %5u %1u]
%10u
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
,
gpu
->
psInteractionFlag
->
_pSysStream
[
0
][
ii
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
AmGpu
%8u [%5u %5u %1u]
\n
"
,
amoebaGpu
->
psWorkUnit
->
_pSysStream
[
0
][
ii
],
x
,
y
,
exclusions
);
}
}
}
else
{
}
else
{
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment