Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
0aca702a
"...ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "0843c5f34651bf49f418f5336a9b7bafc2b8f89a"
Commit
0aca702a
authored
Aug 04, 2010
by
Peter Eastman
Browse files
Fixed errors running on compute 1.1 devices
parent
c9b1338b
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
44 additions
and
10 deletions
+44
-10
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+8
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+8
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+19
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
+8
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
+1
-1
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
0aca702a
...
@@ -1190,7 +1190,14 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
...
@@ -1190,7 +1190,14 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
kClearFields_3
(
amoebaGpu
,
6
);
kClearFields_3
(
amoebaGpu
,
6
);
if
(
threadsPerBlock
==
0
){
if
(
threadsPerBlock
==
0
){
threadsPerBlock
=
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodEDiffParticle
));
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
192
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
96
;
else
maxThreads
=
32
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodEDiffParticle
)),
maxThreads
);
}
}
if
(
amoebaGpu
->
log
&&
timestep
==
1
){
if
(
amoebaGpu
->
log
&&
timestep
==
1
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
0aca702a
...
@@ -574,7 +574,14 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
...
@@ -574,7 +574,14 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
// set threads/block first time through
// set threads/block first time through
if
(
threadsPerBlock
==
0
){
if
(
threadsPerBlock
==
0
){
threadsPerBlock
=
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
));
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
256
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
)),
maxThreads
);
}
}
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
0aca702a
...
@@ -176,7 +176,7 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4
...
@@ -176,7 +176,7 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
...
@@ -270,7 +270,14 @@ void kCalculateAmoebaVdw14_7Reduction_kernel( float* inputForce, float4* outputF
...
@@ -270,7 +270,14 @@ void kCalculateAmoebaVdw14_7Reduction_kernel( float* inputForce, float4* outputF
static
void
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
vdwOutputArray
,
CUDAStream
<
float4
>*
forceOutputArray
)
static
void
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
vdwOutputArray
,
CUDAStream
<
float4
>*
forceOutputArray
)
{
{
kCalculateAmoebaVdw14_7Reduction_kernel
<<<
amoebaGpu
->
gpuContext
->
sim
.
blocks
,
384
>>>
(
unsigned
int
threadsPerBlock
;
if
(
amoebaGpu
->
gpuContext
->
sm_version
>=
SM_20
)
threadsPerBlock
=
GF1XX_NONBOND_THREADS_PER_BLOCK
;
else
if
(
amoebaGpu
->
gpuContext
->
sm_version
>=
SM_12
)
threadsPerBlock
=
GT2XX_NONBOND_THREADS_PER_BLOCK
;
else
threadsPerBlock
=
G8X_NONBOND_THREADS_PER_BLOCK
;
kCalculateAmoebaVdw14_7Reduction_kernel
<<<
amoebaGpu
->
gpuContext
->
sim
.
blocks
,
threadsPerBlock
>>>
(
vdwOutputArray
->
_pDevStream
[
0
],
forceOutputArray
->
_pDevStream
[
0
]
);
vdwOutputArray
->
_pDevStream
[
0
],
forceOutputArray
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Reduction"
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Reduction"
);
}
}
...
@@ -282,7 +289,7 @@ static void kCalculateAmoebaVdw14_7Reduction(amoebaGpuContext amoebaGpu, CUDAStr
...
@@ -282,7 +289,7 @@ static void kCalculateAmoebaVdw14_7Reduction(amoebaGpuContext amoebaGpu, CUDAStr
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
...
@@ -361,9 +368,16 @@ static void kCalculateAmoebaVdw14_7CoordinateReduction(amoebaGpuContext amoebaGp
...
@@ -361,9 +368,16 @@ static void kCalculateAmoebaVdw14_7CoordinateReduction(amoebaGpuContext amoebaGp
CUDAStream
<
float4
>*
coordinateArray
,
CUDAStream
<
float4
>*
coordinateArray
,
CUDAStream
<
float4
>*
reducedCoordinateArray
)
CUDAStream
<
float4
>*
reducedCoordinateArray
)
{
{
kCalculateAmoebaVdw14_7CoordinateReduction_kernel
<<<
amoebaGpu
->
gpuContext
->
sim
.
blocks
,
384
>>>
(
unsigned
int
threadsPerBlock
;
if
(
amoebaGpu
->
gpuContext
->
sm_version
>=
SM_20
)
threadsPerBlock
=
GF1XX_THREADS_PER_BLOCK
;
else
if
(
amoebaGpu
->
gpuContext
->
sm_version
>=
SM_12
)
threadsPerBlock
=
GT2XX_THREADS_PER_BLOCK
;
else
threadsPerBlock
=
G8X_THREADS_PER_BLOCK
;
kCalculateAmoebaVdw14_7CoordinateReduction_kernel
<<<
amoebaGpu
->
gpuContext
->
sim
.
blocks
,
threadsPerBlock
>>>
(
coordinateArray
->
_pDevStream
[
0
],
reducedCoordinateArray
->
_pDevStream
[
0
]
);
coordinateArray
->
_pDevStream
[
0
],
reducedCoordinateArray
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7Reduction"
);
LAUNCHERROR
(
"kCalculateAmoebaVdw14_7
Coordinate
Reduction"
);
}
}
// perform reduction of force on H's and add to heavy atom partner
// perform reduction of force on H's and add to heavy atom partner
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.cu
View file @
0aca702a
...
@@ -408,8 +408,14 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
...
@@ -408,8 +408,14 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
// set threads/block first time through
// set threads/block first time through
if
(
threadsPerBlock
==
0
){
if
(
threadsPerBlock
==
0
){
threadsPerBlock
=
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
));
unsigned
int
maxThreads
;
threadsPerBlock
=
128
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
384
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
192
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
WcaDispersionParticle
)),
maxThreads
);
}
}
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
View file @
0aca702a
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
384
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
192
,
1
)
__launch_bounds__
(
192
,
1
)
#else
#else
__launch_bounds__
(
64
,
1
)
__launch_bounds__
(
64
,
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment