Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
0aca702a
Commit
0aca702a
authored
Aug 04, 2010
by
Peter Eastman
Browse files
Fixed errors running on compute 1.1 devices
parent
c9b1338b
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
44 additions
and
10 deletions
+44
-10
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+8
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+8
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+19
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+8
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
+1
-1
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
0aca702a
...
...
@@ -1190,7 +1190,14 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
kClearFields_3
(
amoebaGpu
,
6
);
if
(
threadsPerBlock
==
0
){
threadsPerBlock
=
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodEDiffParticle
));
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
192
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
96
;
else
maxThreads
=
32
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodEDiffParticle
)),
maxThreads
);
}
if
(
amoebaGpu
->
log
&&
timestep
==
1
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
0aca702a
...
...
@@ -574,7 +574,14 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
// set threads/block first time through
if
(
threadsPerBlock
==
0
){
threadsPerBlock
=
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
));
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
256
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)),
maxThreads
);
}
if
(
gpu
->
bOutputBufferPerWarp
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
0aca702a
...
...
@@ -176,7 +176,7 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
...
...
@@ -270,7 +270,14 @@ void kCalculateAmoebaVdw14_7Reduction_kernel( float* inputForce, float4* outputF
static
void
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
vdwOutputArray
,
CUDAStream
<
float4
>*
forceOutputArray
)
{
kCalculateAmoebaVdw14_7Reduction_kernel
<<<
amoebaGpu
->
gpuContext
->
sim
.
blocks
,
384
>>>
(
unsigned
int
threadsPerBlock
;
if
(
amoebaGpu
->
gpuContext
->
sm_version
>=
SM_20
)
threadsPerBlock
=
GF1XX_NONBOND_THREADS_PER_BLOCK
;
else
if
(
amoebaGpu
->
gpuContext
->
sm_version
>=
SM_12
)
threadsPerBlock
=
GT2XX_NONBOND_THREADS_PER_BLOCK
;
else
threadsPerBlock
=
G8X_NONBOND_THREADS_PER_BLOCK
;
kCalculateAmoebaVdw14_7Reduction_kernel
<<<
amoebaGpu
->
gpuContext
->
sim
.
blocks
,
threadsPerBlock
>>>
(
vdwOutputArray
->
_pDevStream
[
0
],
forceOutputArray
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Reduction"
);
}
...
...
@@ -282,7 +289,7 @@ static void kCalculateAmoebaVdw14_7Reduction(amoebaGpuContext amoebaGpu, CUDAStr
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
...
...
@@ -361,9 +368,16 @@ static void kCalculateAmoebaVdw14_7CoordinateReduction(amoebaGpuContext amoebaGp
CUDAStream
<
float4
>*
coordinateArray
,
CUDAStream
<
float4
>*
reducedCoordinateArray
)
{
kCalculateAmoebaVdw14_7CoordinateReduction_kernel
<<<
amoebaGpu
->
gpuContext
->
sim
.
blocks
,
384
>>>
(
unsigned
int
threadsPerBlock
;
if
(
amoebaGpu
->
gpuContext
->
sm_version
>=
SM_20
)
threadsPerBlock
=
GF1XX_THREADS_PER_BLOCK
;
else
if
(
amoebaGpu
->
gpuContext
->
sm_version
>=
SM_12
)
threadsPerBlock
=
GT2XX_THREADS_PER_BLOCK
;
else
threadsPerBlock
=
G8X_THREADS_PER_BLOCK
;
kCalculateAmoebaVdw14_7CoordinateReduction_kernel
<<<
amoebaGpu
->
gpuContext
->
sim
.
blocks
,
threadsPerBlock
>>>
(
coordinateArray
->
_pDevStream
[
0
],
reducedCoordinateArray
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Reduction"
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7
Coordinate
Reduction"
);
}
// perform reduction of force on H's and add to heavy atom partner
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
0aca702a
...
...
@@ -408,8 +408,14 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
// set threads/block first time through
if
(
threadsPerBlock
==
0
){
threadsPerBlock
=
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
));
threadsPerBlock
=
128
;
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
384
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
192
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
)),
maxThreads
);
}
if
(
gpu
->
bOutputBufferPerWarp
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
View file @
0aca702a
...
...
@@ -27,7 +27,7 @@
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
384
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
192
,
1
)
#else
__launch_bounds__
(
64
,
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment