Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8a331fb9
Commit
8a331fb9
authored
Apr 20, 2011
by
Mark Friedrichs
Browse files
Direct space optimizations
parent
af4d503a
Changes
17
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
1046 additions
and
290 deletions
+1046
-290
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+6
-5
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
...cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
...a/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
+0
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+890
-65
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+6
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+101
-15
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+31
-190
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+1
-1
No files found.
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
View file @
8a331fb9
...
@@ -3449,19 +3449,20 @@ tgx = 0;
...
@@ -3449,19 +3449,20 @@ tgx = 0;
Get threads/block
Get threads/block
@param amoebaGpu amoebaGpuContext
@param amoebaGpu amoebaGpuContext
@param sharedMemoryPerThread shared memory/thread
@param sharedMemoryPerThread shared memory/thread
@param sharedMemoryPerBlock shared memory/block
@return threadsPerBlock
@return threadsPerBlock
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
unsigned
int
getThreadsPerBlock
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
sharedMemoryPerThread
)
unsigned
int
getThreadsPerBlock
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
sharedMemoryPerThread
,
unsigned
int
sharedMemoryPerBlock
)
{
{
unsigned
int
grid
=
amoebaGpu
->
gpuContext
->
grid
;
unsigned
int
grid
=
amoebaGpu
->
gpuContext
->
grid
;
unsigned
int
threadsPerBlock
=
(
amoebaGpu
->
gpuContext
->
sharedMemoryPerBlock
+
grid
-
1
)
/
(
grid
*
sharedMemoryPerThread
);
unsigned
int
threadsPerBlock
=
(
sharedMemoryPerBlock
+
grid
-
1
)
/
(
grid
*
sharedMemoryPerThread
);
threadsPerBlock
=
threadsPerBlock
<
1
?
1
:
threadsPerBlock
;
threadsPerBlock
=
threadsPerBlock
<
1
?
1
:
threadsPerBlock
;
threadsPerBlock
*=
grid
;
threadsPerBlock
*=
grid
;
return
threadsPerBlock
;
return
threadsPerBlock
;
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
8a331fb9
...
@@ -160,7 +160,7 @@ extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAS
...
@@ -160,7 +160,7 @@ extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAS
extern
void
kClearFloat4
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
entries
,
CUDAStream
<
float4
>*
fieldToClear
);
extern
void
kClearFloat4
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
entries
,
CUDAStream
<
float4
>*
fieldToClear
);
extern
void
kClearFields_1
(
amoebaGpuContext
amoebaGpu
);
extern
void
kClearFields_1
(
amoebaGpuContext
amoebaGpu
);
extern
void
kClearFields_3
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
numberToClear
);
extern
void
kClearFields_3
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
numberToClear
);
extern
unsigned
int
getThreadsPerBlock
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
sharedMemoryPerThread
);
extern
unsigned
int
getThreadsPerBlock
(
amoebaGpuContext
amoebaGpu
,
unsigned
int
sharedMemoryPerThread
,
unsigned
int
sharedMemoryPerBlock
);
//extern int isNanOrInfinity( double number );
//extern int isNanOrInfinity( double number );
extern
void
trackMutualInducedIterations
(
amoebaGpuContext
amoebaGpu
,
int
iteration
);
extern
void
trackMutualInducedIterations
(
amoebaGpuContext
amoebaGpu
,
int
iteration
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
8a331fb9
...
@@ -759,7 +759,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
...
@@ -759,7 +759,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
ElectrostaticParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
ElectrostaticParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
kClearFields_3
(
amoebaGpu
,
1
);
kClearFields_3
(
amoebaGpu
,
1
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
View file @
8a331fb9
...
@@ -362,7 +362,7 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
...
@@ -362,7 +362,7 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
kClearFields_3
(
amoebaGpu
,
3
);
kClearFields_3
(
amoebaGpu
,
3
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
View file @
8a331fb9
...
@@ -108,7 +108,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -108,7 +108,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
8a331fb9
...
@@ -1813,7 +1813,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
...
@@ -1813,7 +1813,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
8a331fb9
...
@@ -978,7 +978,7 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
...
@@ -978,7 +978,7 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
maxThreads
=
96
;
maxThreads
=
96
;
else
else
maxThreads
=
32
;
maxThreads
=
32
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodEDiffParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodEDiffParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
8a331fb9
...
@@ -490,7 +490,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
...
@@ -490,7 +490,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
8a331fb9
...
@@ -276,7 +276,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
...
@@ -276,7 +276,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
View file @
8a331fb9
...
@@ -4,7 +4,6 @@ struct MutualInducedParticle {
...
@@ -4,7 +4,6 @@ struct MutualInducedParticle {
float
x
;
float
x
;
float
y
;
float
y
;
float
z
;
float
z
;
float
q
;
float
inducedDipole
[
3
];
float
inducedDipole
[
3
];
float
inducedDipolePolar
[
3
];
float
inducedDipolePolar
[
3
];
...
@@ -41,7 +40,6 @@ __device__ static void loadMutualInducedShared( MutualInducedParticle* sA, unsig
...
@@ -41,7 +40,6 @@ __device__ static void loadMutualInducedShared( MutualInducedParticle* sA, unsig
sA
->
x
=
posq
.
x
;
sA
->
x
=
posq
.
x
;
sA
->
y
=
posq
.
y
;
sA
->
y
=
posq
.
y
;
sA
->
z
=
posq
.
z
;
sA
->
z
=
posq
.
z
;
sA
->
q
=
posq
.
w
;
// dipole
// dipole
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
8a331fb9
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
8a331fb9
...
@@ -239,6 +239,9 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -239,6 +239,9 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// No interactions in this block.
// No interactions in this block.
}
else
{
}
else
{
#ifdef CALCULATE_FULL_TILE
flags
=
0xFFFFFFFF
;
#endif
sA
[
threadIdx
.
x
].
force
[
0
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
0
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
1
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
1
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
2
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
2
]
=
0
.
0
f
;
...
@@ -311,7 +314,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -311,7 +314,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
psA
[
jIdx
].
torque
[
0
]
+=
forceTorqueEnergy
[
2
].
x
;
psA
[
jIdx
].
torque
[
0
]
+=
forceTorqueEnergy
[
2
].
x
;
psA
[
jIdx
].
torque
[
1
]
+=
forceTorqueEnergy
[
2
].
y
;
psA
[
jIdx
].
torque
[
1
]
+=
forceTorqueEnergy
[
2
].
y
;
psA
[
jIdx
].
torque
[
2
]
+=
forceTorqueEnergy
[
2
].
z
;
psA
[
jIdx
].
torque
[
2
]
+=
forceTorqueEnergy
[
2
].
z
;
#ifndef CALCULATE_FULL_TILE
}
else
{
}
else
{
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
forceTorqueEnergy
[
0
].
x
;
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
forceTorqueEnergy
[
0
].
x
;
...
@@ -345,6 +349,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -345,6 +349,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
psA
[
jIdx
].
torque
[
1
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
1
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
2
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
psA
[
jIdx
].
torque
[
2
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
}
}
#endif
}
}
}
// end of atoms out-of-bounds
}
// end of atoms out-of-bounds
}
// end of flags&(1<<j block
}
// end of flags&(1<<j block
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
8a331fb9
...
@@ -437,7 +437,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -437,7 +437,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
maxThreads
=
192
;
maxThreads
=
192
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
...
@@ -469,7 +469,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -469,7 +469,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeDirectFixedEField: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u shrd=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)
,
gpu
->
sharedMemoryPerBlock
),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
);
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
FixedFieldParticle
)
+
sizeof
(
float3
))
*
threadsPerBlock
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaCutoffForces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u warp=%d
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaCutoffForces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u warp=%d
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
8a331fb9
...
@@ -37,10 +37,9 @@ void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
...
@@ -37,10 +37,9 @@ void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext amoebaGpu)
#undef AMOEBA_DEBUG
#undef AMOEBA_DEBUG
#undef INCLUDE_MI_FIELD_BUFFERS
#undef INCLUDE_MI_FIELD_BUFFERS
#define INCLUDE_MI_FIELD_BUFFERS
//
#define INCLUDE_MI_FIELD_BUFFERS
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#undef INCLUDE_MI_FIELD_BUFFERS
#ifdef INCLUDE_MI_FIELD_BUFFERS
__device__
void
sumTempBuffer
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
){
__device__
void
sumTempBuffer
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
){
atomI
.
tempBuffer
[
0
]
+=
atomJ
.
tempBuffer
[
0
];
atomI
.
tempBuffer
[
0
]
+=
atomJ
.
tempBuffer
[
0
];
...
@@ -51,6 +50,93 @@ __device__ void sumTempBuffer( MutualInducedParticle& atomI, MutualInducedPartic
...
@@ -51,6 +50,93 @@ __device__ void sumTempBuffer( MutualInducedParticle& atomI, MutualInducedPartic
atomI
.
tempBufferP
[
1
]
+=
atomJ
.
tempBufferP
[
1
];
atomI
.
tempBufferP
[
1
]
+=
atomJ
.
tempBufferP
[
1
];
atomI
.
tempBufferP
[
2
]
+=
atomJ
.
tempBufferP
[
2
];
atomI
.
tempBufferP
[
2
]
+=
atomJ
.
tempBufferP
[
2
];
}
}
#endif
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
__device__
void
setupMutualInducedFieldPairIxn_kernel
(
const
MutualInducedParticle
&
atomI
,
const
MutualInducedParticle
&
atomJ
,
const
float
uscale
,
float4
*
delta
,
float
*
preFactor2
)
{
// compute thedelta->xeal space portion of the Ewald summation
delta
->
x
=
atomJ
.
x
-
atomI
.
x
;
delta
->
y
=
atomJ
.
y
-
atomI
.
y
;
delta
->
z
=
atomJ
.
z
-
atomI
.
z
;
// pdelta->xiodic boundary conditions
delta
->
x
-=
floor
(
delta
->
x
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
delta
->
y
-=
floor
(
delta
->
y
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
delta
->
z
-=
floor
(
delta
->
z
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
(
delta
->
x
*
delta
->
x
)
+
(
delta
->
y
*
delta
->
y
)
+
(
delta
->
z
*
delta
->
z
);
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
}
}
float
dsc3
=
uscale
*
scale3
;
float
dsc5
=
uscale
*
scale5
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
rr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
delta
->
w
=
rr3
-
bn1
;
*
preFactor2
=
bn2
-
rr5
;
}
else
{
delta
->
w
=
*
preFactor2
=
0.0
f
;
}
}
__device__
void
calculateMutualInducedFieldPairIxn_kernel
(
const
float
inducedDipole
[
3
],
const
float4
delta
,
const
float
preFactor2
,
float
fieldSum
[
3
]
)
{
float
preFactor3
=
preFactor2
*
(
inducedDipole
[
0
]
*
delta
.
x
+
inducedDipole
[
1
]
*
delta
.
y
+
inducedDipole
[
2
]
*
delta
.
z
);
fieldSum
[
0
]
+=
preFactor3
*
delta
.
x
+
delta
.
w
*
inducedDipole
[
0
];
fieldSum
[
1
]
+=
preFactor3
*
delta
.
y
+
delta
.
w
*
inducedDipole
[
1
];
fieldSum
[
2
]
+=
preFactor3
*
delta
.
z
+
delta
.
w
*
inducedDipole
[
2
];
}
__device__
void
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
const
float
inducedDipole
[
3
],
const
float4
delta
,
const
float
preFactor2
,
float
fieldSum
[
3
]
)
{
float
preFactor3
=
preFactor2
*
(
inducedDipole
[
0
]
*
delta
.
x
+
inducedDipole
[
1
]
*
delta
.
y
+
inducedDipole
[
2
]
*
delta
.
z
);
fieldSum
[
0
]
=
preFactor3
*
delta
.
x
+
delta
.
w
*
inducedDipole
[
0
];
fieldSum
[
1
]
=
preFactor3
*
delta
.
y
+
delta
.
w
*
inducedDipole
[
1
];
fieldSum
[
2
]
=
preFactor3
*
delta
.
z
+
delta
.
w
*
inducedDipole
[
2
];
}
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
...
@@ -385,7 +471,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
...
@@ -385,7 +471,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
@@ -573,17 +659,17 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -573,17 +659,17 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
if
(
0
){
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
//
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
// cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
//
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData );
// cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectMI"
,
fileId
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectMI"
,
fileId
,
outputVector
);
}
}
// get total epsilon -- performing sums on gpu
// get total epsilon -- performing sums on gpu
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
8a331fb9
...
@@ -100,99 +100,17 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
...
@@ -100,99 +100,17 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
{
float4
ijField
[
3
];
// load coords, charge, ...
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
uscale
,
ijField
float4
delta
;
#ifdef AMOEBA_DEBUG
float
prefactor2
;
,
pullBack
if
(
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
){
#endif
delta
.
w
=
prefactor2
=
0
.
0
f
;
);
}
else
{
setupMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
uscale
,
&
delta
,
&
prefactor2
);
unsigned
int
mask
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
}
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
j
].
inducedDipole
,
delta
,
prefactor2
,
fieldSum
);
// add to field at atomI the field due atomJ's dipole
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
j
].
inducedDipolePolar
,
delta
,
prefactor2
,
fieldPolarSum
);
fieldSum
[
0
]
+=
mask
?
ijField
[
0
].
x
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
1
].
x
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
2
].
x
:
0
.
0
f
;
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
0
].
z
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
].
z
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
].
z
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
/*
if( atomI == targetAtom || (y+j) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+j) : atomI;
unsigned int pullBackIndex = 0;
unsigned int indexI = 0;
unsigned int indexJ = indexI ? 0 : 2;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + j);
debugArray[index].z = cSim.nonbondedCutoffSqr;
debugArray[index].w = 6.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
pullBackIndex++;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
index += cSim.paddedNumberOfAtoms;
float flag = 6.0f;
debugArray[index].x = ijField[0].x;
debugArray[index].y = ijField[1].x;
debugArray[index].z = ijField[2].x;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[0].x;
debugArray[index].y = ijField[1].x;
debugArray[index].z = ijField[2].x;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[0].z;
debugArray[index].y = ijField[1].z;
debugArray[index].z = ijField[2].z;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[0].z;
debugArray[index].y = ijField[1].z;
debugArray[index].z = ijField[2].z;
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[0].x;
debugArray[index].y = match ? 0.0f : ijField[1].x;
debugArray[index].z = match ? 0.0f : ijField[2].x;
index += cSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
}
*/
#endif
}
}
...
@@ -226,6 +144,10 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -226,6 +144,10 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
// No interactions in this block.
// No interactions in this block.
}
else
{
}
else
{
#ifndef INCLUDE_MI_FIELD_BUFFERS
flags
=
0xFFFFFFFF
;
#endif
// zero shared fields
// zero shared fields
zeroMutualInducedParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
zeroMutualInducedParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
...
@@ -235,53 +157,25 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -235,53 +157,25 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
if
((
flags
&
(
1
<<
j
))
!=
0
)
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
{
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
float4
ijField
[
3
];
float4
delta
;
float
prefactor2
;
// load coords, charge, ...
if
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
){
delta
.
w
=
prefactor2
=
0
.
0
f
;
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
uscale
,
ijField
}
else
{
#ifdef AMOEBA_DEBUG
setupMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
uscale
,
&
delta
,
&
prefactor2
);
,
pullBack
}
#endif
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
jIdx
].
inducedDipole
,
delta
,
prefactor2
,
fieldSum
);
);
calculateMutualInducedFieldPairIxn_kernel
(
psA
[
jIdx
].
inducedDipolePolar
,
delta
,
prefactor2
,
fieldPolarSum
);
#ifndef INCLUDE_MI_FIELD_BUFFERS
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
)
?
0
:
1
;
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
psA
[
jIdx
].
field
);
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
psA
[
jIdx
].
fieldPolar
);
// add to field at atomI the field due atomJ's dipole
#else
fieldSum
[
0
]
+=
mask
?
ijField
[
0
].
x
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
1
].
x
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
2
].
x
:
0
.
0
f
;
// add to polar field at atomI the field due atomJ's dipole
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
0
].
z
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
].
z
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
].
z
:
0
.
0
f
;
// add to field at atomJ the field due atomI's dipole
if
(
flags
==
0xFFFFFFFF
){
if
(
flags
==
0xFFFFFFFF
){
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
psA
[
jIdx
].
field
);
psA
[
jIdx
].
field
[
0
]
+=
mask
?
ijField
[
0
].
y
:
0
.
0
f
;
calculateMutualInducedFieldPairIxn_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
psA
[
jIdx
].
fieldPolar
);
psA
[
jIdx
].
field
[
1
]
+=
mask
?
ijField
[
1
].
y
:
0
.
0
f
;
psA
[
jIdx
].
field
[
2
]
+=
mask
?
ijField
[
2
].
y
:
0
.
0
f
;
// add to polar field at atomJ the field due atomI's dipole
psA
[
jIdx
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
0
].
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
1
].
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
2
].
w
:
0
.
0
f
;
}
else
{
}
else
{
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
localParticle
.
inducedDipole
,
delta
,
prefactor2
,
sA
[
threadIdx
.
x
].
tempBuffer
);
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
ijField
[
0
].
y
:
0
.
0
;
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
localParticle
.
inducedDipolePolar
,
delta
,
prefactor2
,
sA
[
threadIdx
.
x
].
tempBufferP
);
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
ijField
[
1
].
y
:
0
.
0
;
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
ijField
[
2
].
y
:
0
.
0
;
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
ijField
[
0
].
w
:
0
.
0
;
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
ijField
[
1
].
w
:
0
.
0
;
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
ijField
[
2
].
w
:
0
.
0
;
if
(
tgx
%
2
==
0
){
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
...
@@ -308,61 +202,8 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -308,61 +202,8 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
}
}
}
}
/*
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+jIdx) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+jIdx) : atomI;
unsigned int pullBackIndex = 0;
unsigned int indexI = 0;
unsigned int indexJ = indexI ? 0 : 2;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + jIdx);
debugArray[index].z = cSim.nonbondedCutoffSqr;
debugArray[index].w = 7.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
pullBackIndex++;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
index += cSim.paddedNumberOfAtoms;
float flag = 7.0f;
debugArray[index].x = ijField[indexI][0];
debugArray[index].y = ijField[indexI][1];
debugArray[index].z = ijField[indexI][2];
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ][0];
debugArray[index].y = ijField[indexJ][1];
debugArray[index].z = ijField[indexJ][2];
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI+1][0];
debugArray[index].y = ijField[indexI+1][1];
debugArray[index].z = ijField[indexI+1][2];
debugArray[index].w = flag;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ+1][0];
debugArray[index].y = ijField[indexJ+1][1];
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
}
#endif
#endif
*/
}
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
8a331fb9
...
@@ -531,7 +531,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -531,7 +531,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
maxThreads
=
192
;
maxThreads
=
192
;
else
else
maxThreads
=
128
;
maxThreads
=
128
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
if
(
0
){
if
(
0
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
8a331fb9
...
@@ -382,7 +382,7 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
...
@@ -382,7 +382,7 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
maxThreads
=
192
;
maxThreads
=
192
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
)
,
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment