Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8a331fb9
Commit
8a331fb9
authored
Apr 20, 2011
by
Mark Friedrichs
Browse files
Direct space optimizations
parent
af4d503a
Changes
17
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
1046 additions
and
290 deletions
+1046
-290
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+6
-5
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
...cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
...a/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
+0
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+890
-65
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+6
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+101
-15
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+31
-190
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+1
-1
No files found.
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
View file @
8a331fb9
...
...
@@ -3449,19 +3449,20 @@ tgx = 0;
Get threads/block
@param amoebaGpu amoebaGpuContext
@param sharedMemoryPerThread shared memory/thread
@param amoebaGpu amoebaGpuContext
@param sharedMemoryPerThread shared memory/thread
@param sharedMemoryPerBlock shared memory/block
@return threadsPerBlock
--------------------------------------------------------------------------------------- */
unsigned
int
getThreadsPerBlock
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
sharedMemoryPerThread
)
unsigned
int
getThreadsPerBlock
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
sharedMemoryPerThread
,
unsigned
int
sharedMemoryPerBlock
)
{
unsigned
int
grid
=
amoebaGpu
->
gpuContext
->
grid
;
unsigned
int
threadsPerBlock
=
(
amoebaGpu
->
gpuContext
->
sharedMemoryPerBlock
+
grid
-
1
)
/
(
grid
*
sharedMemoryPerThread
);
unsigned
int
threadsPerBlock
=
(
sharedMemoryPerBlock
+
grid
-
1
)
/
(
grid
*
sharedMemoryPerThread
);
threadsPerBlock
=
threadsPerBlock
<
1
?
1
:
threadsPerBlock
;
threadsPerBlock
*=
grid
;
threadsPerBlock
*=
grid
;
return
threadsPerBlock
;
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
8a331fb9
...
...
@@ -160,7 +160,7 @@ extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAS
extern
void
kClearFloat4
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
entries
,
CUDAStream
<
float4
>*
fieldToClear
);
extern
void
kClearFields_1
(
amoebaGpuContext
amoebaGpu
);
extern
void
kClearFields_3
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
numberToClear
);
extern
unsigned
int
getThreadsPerBlock
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
sharedMemoryPerThread
);
extern
unsigned
int
getThreadsPerBlock
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
sharedMemoryPerThread
,
unsigned
int
sharedMemoryPerBlock
);
//extern int isNanOrInfinity( double number );
extern
void
trackMutualInducedIterations
(
amoebaGpuContext
amoebaGpu
,
int
iteration
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
8a331fb9
...
...
@@ -759,7 +759,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
ElectrostaticParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
ElectrostaticParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
kClearFields_3
(
amoebaGpu
,
1
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
View file @
8a331fb9
...
...
@@ -362,7 +362,7 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
kClearFields_3
(
amoebaGpu
,
3
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
View file @
8a331fb9
...
...
@@ -108,7 +108,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
8a331fb9
...
...
@@ -1813,7 +1813,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
8a331fb9
...
...
@@ -978,7 +978,7 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
maxThreads
=
96
;
else
maxThreads
=
32
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodEDiffParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodEDiffParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
8a331fb9
...
...
@@ -490,7 +490,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
8a331fb9
...
...
@@ -276,7 +276,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
View file @
8a331fb9
...
...
@@ -4,7 +4,6 @@ struct MutualInducedParticle {
float
x
;
float
y
;
float
z
;
float
q
;
float
inducedDipole
[
3
];
float
inducedDipolePolar
[
3
];
...
...
@@ -41,7 +40,6 @@ __device__ static void loadMutualInducedShared( MutualInducedParticle* sA, unsig
sA
->
x
=
posq
.
x
;
sA
->
y
=
posq
.
y
;
sA
->
z
=
posq
.
z
;
sA
->
q
=
posq
.
w
;
// dipole
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
8a331fb9
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
8a331fb9
...
...
@@ -239,6 +239,9 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// No interactions in this block.
}
else
{
#ifdef CALCULATE_FULL_TILE
flags
=
0xFFFFFFFF
;
#endif
sA
[
threadIdx
.
x
].
force
[
0
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
1
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
2
]
=
0
.
0
f
;
...
...
@@ -311,7 +314,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
psA
[
jIdx
].
torque
[
0
]
+=
forceTorqueEnergy
[
2
].
x
;
psA
[
jIdx
].
torque
[
1
]
+=
forceTorqueEnergy
[
2
].
y
;
psA
[
jIdx
].
torque
[
2
]
+=
forceTorqueEnergy
[
2
].
z
;
#ifndef CALCULATE_FULL_TILE
}
else
{
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
forceTorqueEnergy
[
0
].
x
;
...
...
@@ -345,6 +349,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
psA
[
jIdx
].
torque
[
1
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
2
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
}
#endif
}
}
// end of atoms out-of-bounds
}
// end of flags&(1<<j block
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
8a331fb9
...
...
@@ -437,7 +437,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
maxThreads
=
192
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
if
(
gpu
->
bOutputBufferPerWarp
){
...
...
@@ -469,7 +469,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)
,
gpu
->
sharedMemoryPerBlock
),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaCutoffForces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u warp=%d
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
8a331fb9
...
...
@@ -37,10 +37,9 @@ void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
#undef AMOEBA_DEBUG
#undef INCLUDE_MI_FIELD_BUFFERS
#define INCLUDE_MI_FIELD_BUFFERS
//
#define INCLUDE_MI_FIELD_BUFFERS
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#undef INCLUDE_MI_FIELD_BUFFERS
#ifdef INCLUDE_MI_FIELD_BUFFERS
__device__
void
sumTempBuffer
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
){
atomI
.
tempBuffer
[
0
]
+=
atomJ
.
tempBuffer
[
0
];
...
...
@@ -51,6 +50,93 @@ __device__ void sumTempBuffer( MutualInducedParticle& atomI, MutualInducedPartic
atomI
.
tempBufferP
[
1
]
+=
atomJ
.
tempBufferP
[
1
];
atomI
.
tempBufferP
[
2
]
+=
atomJ
.
tempBufferP
[
2
];
}
#endif
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
__device__
void
setupMutualInducedFieldPairIxn_kernel
(
const
MutualInducedParticle
&
atomI
,
const
MutualInducedParticle
&
atomJ
,
const
float
uscale
,
float4
*
delta
,
float
*
preFactor2
)
{
// compute thedelta->xeal space portion of the Ewald summation
delta
->
x
=
atomJ
.
x
-
atomI
.
x
;
delta
->
y
=
atomJ
.
y
-
atomI
.
y
;
delta
->
z
=
atomJ
.
z
-
atomI
.
z
;
// pdelta->xiodic boundary conditions
delta
->
x
-=
floor
(
delta
->
x
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
delta
->
y
-=
floor
(
delta
->
y
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
delta
->
z
-=
floor
(
delta
->
z
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
(
delta
->
x
*
delta
->
x
)
+
(
delta
->
y
*
delta
->
y
)
+
(
delta
->
z
*
delta
->
z
);
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
}
}
float
dsc3
=
uscale
*
scale3
;
float
dsc5
=
uscale
*
scale5
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
rr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
delta
->
w
=
rr3
-
bn1
;
*
preFactor2
=
bn2
-
rr5
;
}
else
{
delta
->
w
=
*
preFactor2
=
0.0
f
;
}
}
__device__
void
calculateMutualInducedFieldPairIxn_kernel
(
const
float
inducedDipole
[
3
],
const
float4
delta
,
const
float
preFactor2
,
float
fieldSum
[
3
]
)
{
float
preFactor3
=
preFactor2
*
(
inducedDipole
[
0
]
*
delta
.
x
+
inducedDipole
[
1
]
*
delta
.
y
+
inducedDipole
[
2
]
*
delta
.
z
);
fieldSum
[
0
]
+=
preFactor3
*
delta
.
x
+
delta
.
w
*
inducedDipole
[
0
];
fieldSum
[
1
]
+=
preFactor3
*
delta
.
y
+
delta
.
w
*
inducedDipole
[
1
];
fieldSum
[
2
]
+=
preFactor3
*
delta
.
z
+
delta
.
w
*
inducedDipole
[
2
];
}
__device__
void
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
const
float
inducedDipole
[
3
],
const
float4
delta
,
const
float
preFactor2
,
float
fieldSum
[
3
]
)
{
float
preFactor3
=
preFactor2
*
(
inducedDipole
[
0
]
*
delta
.
x
+
inducedDipole
[
1
]
*
delta
.
y
+
inducedDipole
[
2
]
*
delta
.
z
);
fieldSum
[
0
]
=
preFactor3
*
delta
.
x
+
delta
.
w
*
inducedDipole
[
0
];
fieldSum
[
1
]
=
preFactor3
*
delta
.
y
+
delta
.
w
*
inducedDipole
[
1
];
fieldSum
[
2
]
=
preFactor3
*
delta
.
z
+
delta
.
w
*
inducedDipole
[
2
];
}
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
...
...
@@ -385,7 +471,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
...
...
@@ -573,17 +659,17 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
//
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
//
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectMI"
,
fileId
,
outputVector
);
}
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
// cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
// cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectMI"
,
fileId
,
outputVector
);
}
// get total epsilon -- performing sums on gpu
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
8a331fb9
...
...
@@ -100,99 +100,17 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float4
ijField
[
3
];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
uscale
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
mask
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
].
x
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
1
].
x
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
2
].
x
:
0
.
0
f
;
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
0
].
z
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
].
z
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
].
z
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
/*
if( atomI == targetAtom || (y+j) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+j) : atomI;
unsigned int pullBackIndex = 0;
unsigned int indexI = 0;
unsigned int indexJ = indexI ? 0 : 2;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + j);
debugArray[index].z = cSim.nonbondedCutoffSqr;
debugArray[index].w = 6.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
pullBackIndex++;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
index += cSim.paddedNumberOfAtoms;
float flag = 6.0f;
debugArray[index].x = ijField[0].x;
debugArray[index].y = ijField[1].x;
debugArray[index].z = ijField[2].x;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[0].x;
debugArray[index].y = ijField[1].x;
debugArray[index].z = ijField[2].x;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[0].z;
debugArray[index].y = ijField[1].z;
debugArray[index].z = ijField[2].z;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[0].z;
debugArray[index].y = ijField[1].z;
debugArray[index].z = ijField[2].z;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[0].x;
debugArray[index].y = match ? 0.0f : ijField[1].x;
debugArray[index].z = match ? 0.0f : ijField[2].x;
index += cSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
}
*/
#endif
float4
delta
;
float
prefactor2
;
if
(
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
){
delta
.
w
=
prefactor2
=
0
.
0
f
;
}
else
{
setupMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
uscale
,
&
delta
,
&
prefactor2
);
}
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
j
].
inducedDipole
,
delta
,
prefactor2
,
fieldSum
);
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
j
].
inducedDipolePolar
,
delta
,
prefactor2
,
fieldPolarSum
);
}
...
...
@@ -226,6 +144,10 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
// No interactions in this block.
}
else
{
#ifndef INCLUDE_MI_FIELD_BUFFERS
flags
=
0xFFFFFFFF
;
#endif
// zero shared fields
zeroMutualInducedParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
...
...
@@ -235,53 +157,25 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
float4
ijField
[
3
];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
uscale
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
].
x
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
1
].
x
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
2
].
x
:
0
.
0
f
;
// add to polar field at atomI the field due atomJ's dipole
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
0
].
z
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
].
z
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
].
z
:
0
.
0
f
;
// add to field at atomJ the field due atomI's dipole
float4
delta
;
float
prefactor2
;
if
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
){
delta
.
w
=
prefactor2
=
0
.
0
f
;
}
else
{
setupMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
uscale
,
&
delta
,
&
prefactor2
);
}
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
jIdx
].
inducedDipole
,
delta
,
prefactor2
,
fieldSum
);
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
jIdx
].
inducedDipolePolar
,
delta
,
prefactor2
,
fieldPolarSum
);
#ifndef INCLUDE_MI_FIELD_BUFFERS
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
psA
[
jIdx
].
field
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
psA
[
jIdx
].
fieldPolar
);
#else
if
(
flags
==
0xFFFFFFFF
){
psA
[
jIdx
].
field
[
0
]
+=
mask
?
ijField
[
0
].
y
:
0
.
0
f
;
psA
[
jIdx
].
field
[
1
]
+=
mask
?
ijField
[
1
].
y
:
0
.
0
f
;
psA
[
jIdx
].
field
[
2
]
+=
mask
?
ijField
[
2
].
y
:
0
.
0
f
;
// add to polar field at atomJ the field due atomI's dipole
psA
[
jIdx
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
0
].
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
1
].
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
2
].
w
:
0
.
0
f
;
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
psA
[
jIdx
].
field
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
psA
[
jIdx
].
fieldPolar
);
}
else
{
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
ijField
[
0
].
y
:
0
.
0
;
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
ijField
[
1
].
y
:
0
.
0
;
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
ijField
[
2
].
y
:
0
.
0
;
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
ijField
[
0
].
w
:
0
.
0
;
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
ijField
[
1
].
w
:
0
.
0
;
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
ijField
[
2
].
w
:
0
.
0
;
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
sA
[
threadIdx
.
x
].
tempBuffer
);
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
sA
[
threadIdx
.
x
].
tempBufferP
);
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
...
...
@@ -308,61 +202,8 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
}
}
/*
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+jIdx) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+jIdx) : atomI;
unsigned int pullBackIndex = 0;
unsigned int indexI = 0;
unsigned int indexJ = indexI ? 0 : 2;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + jIdx);
debugArray[index].z = cSim.nonbondedCutoffSqr;
debugArray[index].w = 7.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
pullBackIndex++;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
index += cSim.paddedNumberOfAtoms;
float flag = 7.0f;
debugArray[index].x = ijField[indexI][0];
debugArray[index].y = ijField[indexI][1];
debugArray[index].z = ijField[indexI][2];
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ][0];
debugArray[index].y = ijField[indexJ][1];
debugArray[index].z = ijField[indexJ][2];
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI+1][0];
debugArray[index].y = ijField[indexI+1][1];
debugArray[index].z = ijField[indexI+1][2];
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ+1][0];
debugArray[index].y = ijField[indexJ+1][1];
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
}
#endif
*/
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
8a331fb9
...
...
@@ -531,7 +531,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
maxThreads
=
192
;
else
maxThreads
=
128
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
if
(
0
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
8a331fb9
...
...
@@ -382,7 +382,7 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
maxThreads
=
192
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
#ifdef AMOEBA_DEBUG
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment