Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
306d99e8
Commit
306d99e8
authored
Apr 08, 2009
by
Peter Eastman
Browse files
Enhancements to CUDAStream to reduce the risk of bugs and make debugging easier
parent
968cb132
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
292 additions
and
284 deletions
+292
-284
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+20
-12
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+257
-257
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+15
-15
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
306d99e8
...
@@ -76,40 +76,42 @@ struct CUDAStream : public SoADeviceObject
...
@@ -76,40 +76,42 @@ struct CUDAStream : public SoADeviceObject
T
**
_pDevStream
;
T
**
_pDevStream
;
T
*
_pSysData
;
T
*
_pSysData
;
T
*
_pDevData
;
T
*
_pDevData
;
CUDAStream
(
int
length
,
int
subStreams
=
1
);
std
::
string
_name
;
CUDAStream
(
unsigned
int
length
,
unsigned
int
subStreams
=
1
);
CUDAStream
(
int
length
,
int
subStreams
=
1
,
std
::
string
name
=
""
);
CUDAStream
(
unsigned
int
length
,
int
subStreams
=
1
);
CUDAStream
(
unsigned
int
length
,
unsigned
int
subStreams
=
1
,
std
::
string
name
=
""
);
CUDAStream
(
int
length
,
unsigned
int
subStreams
=
1
);
CUDAStream
(
unsigned
int
length
,
int
subStreams
=
1
,
std
::
string
name
=
""
);
CUDAStream
(
int
length
,
unsigned
int
subStreams
=
1
,
std
::
string
name
=
""
);
virtual
~
CUDAStream
();
virtual
~
CUDAStream
();
void
Allocate
();
void
Allocate
();
void
Deallocate
();
void
Deallocate
();
void
Upload
();
void
Upload
();
void
Download
();
void
Download
();
void
Collapse
(
unsigned
int
newstreams
=
1
,
unsigned
int
interleave
=
1
);
void
Collapse
(
unsigned
int
newstreams
=
1
,
unsigned
int
interleave
=
1
);
T
&
operator
[](
int
index
);
};
};
float
CompareStreams
(
CUDAStream
<
float
>&
s1
,
CUDAStream
<
float
>&
s2
,
float
tolerance
,
unsigned
int
maxindex
=
0
);
float
CompareStreams
(
CUDAStream
<
float
>&
s1
,
CUDAStream
<
float
>&
s2
,
float
tolerance
,
unsigned
int
maxindex
=
0
);
template
<
typename
T
>
template
<
typename
T
>
CUDAStream
<
T
>::
CUDAStream
(
int
length
,
unsigned
int
subStreams
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
)
CUDAStream
<
T
>::
CUDAStream
(
int
length
,
unsigned
int
subStreams
,
std
::
string
name
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
)
,
_name
(
name
)
{
{
Allocate
();
Allocate
();
}
}
template
<
typename
T
>
template
<
typename
T
>
CUDAStream
<
T
>::
CUDAStream
(
unsigned
int
length
,
int
subStreams
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
)
CUDAStream
<
T
>::
CUDAStream
(
unsigned
int
length
,
int
subStreams
,
std
::
string
name
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
)
,
_name
(
name
)
{
{
Allocate
();
Allocate
();
}
}
template
<
typename
T
>
template
<
typename
T
>
CUDAStream
<
T
>::
CUDAStream
(
unsigned
int
length
,
unsigned
int
subStreams
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
)
CUDAStream
<
T
>::
CUDAStream
(
unsigned
int
length
,
unsigned
int
subStreams
,
std
::
string
name
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
)
,
_name
(
name
)
{
{
Allocate
();
Allocate
();
}
}
template
<
typename
T
>
template
<
typename
T
>
CUDAStream
<
T
>::
CUDAStream
(
int
length
,
int
subStreams
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
)
CUDAStream
<
T
>::
CUDAStream
(
int
length
,
int
subStreams
,
std
::
string
name
)
:
_length
(
length
),
_subStreams
(
subStreams
),
_stride
((
length
+
0xf
)
&
0xfffffff0
)
,
_name
(
name
)
{
{
Allocate
();
Allocate
();
}
}
...
@@ -129,7 +131,7 @@ void CUDAStream<T>::Allocate()
...
@@ -129,7 +131,7 @@ void CUDAStream<T>::Allocate()
_pSysData
=
new
T
[
_subStreams
*
_stride
];
_pSysData
=
new
T
[
_subStreams
*
_stride
];
status
=
cudaMalloc
((
void
**
)
&
_pDevData
,
_stride
*
_subStreams
*
sizeof
(
T
));
status
=
cudaMalloc
((
void
**
)
&
_pDevData
,
_stride
*
_subStreams
*
sizeof
(
T
));
RTERROR
(
status
,
"
cudaMalloc CUDAStream::Allocate failed"
);
RTERROR
(
status
,
(
_name
+
":
cudaMalloc
in
CUDAStream::Allocate failed"
)
.
c_str
())
;
for
(
unsigned
int
i
=
0
;
i
<
_subStreams
;
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
_subStreams
;
i
++
)
{
{
...
@@ -149,7 +151,7 @@ void CUDAStream<T>::Deallocate()
...
@@ -149,7 +151,7 @@ void CUDAStream<T>::Deallocate()
delete
[]
_pSysData
;
delete
[]
_pSysData
;
_pSysData
=
NULL
;
_pSysData
=
NULL
;
status
=
cudaFree
(
_pDevData
);
status
=
cudaFree
(
_pDevData
);
RTERROR
(
status
,
"
cudaFree CUDAStream::Deallocate failed"
)
;
RTERROR
(
status
,
(
_name
+
":
cudaFree
in
CUDAStream::Deallocate failed"
)
.
c_str
());
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -157,7 +159,7 @@ void CUDAStream<T>::Upload()
...
@@ -157,7 +159,7 @@ void CUDAStream<T>::Upload()
{
{
cudaError_t
status
;
cudaError_t
status
;
status
=
cudaMemcpy
(
_pDevData
,
_pSysData
,
_stride
*
_subStreams
*
sizeof
(
T
),
cudaMemcpyHostToDevice
);
status
=
cudaMemcpy
(
_pDevData
,
_pSysData
,
_stride
*
_subStreams
*
sizeof
(
T
),
cudaMemcpyHostToDevice
);
RTERROR
(
status
,
"
cudaMemcpy CUDAStream::Upload failed"
);
RTERROR
(
status
,
(
_name
+
":
cudaMemcpy
in
CUDAStream::Upload failed"
)
.
c_str
())
;
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -165,7 +167,7 @@ void CUDAStream<T>::Download()
...
@@ -165,7 +167,7 @@ void CUDAStream<T>::Download()
{
{
cudaError_t
status
;
cudaError_t
status
;
status
=
cudaMemcpy
(
_pSysData
,
_pDevData
,
_stride
*
_subStreams
*
sizeof
(
T
),
cudaMemcpyDeviceToHost
);
status
=
cudaMemcpy
(
_pSysData
,
_pDevData
,
_stride
*
_subStreams
*
sizeof
(
T
),
cudaMemcpyDeviceToHost
);
RTERROR
(
status
,
"
cudaMemcpy CUDAStream::Download failed"
);
RTERROR
(
status
,
(
_name
+
":
cudaMemcpy
in
CUDAStream::Download failed"
)
.
c_str
())
;
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -210,6 +212,12 @@ void CUDAStream<T>::Collapse(unsigned int newstreams, unsigned int interleave)
...
@@ -210,6 +212,12 @@ void CUDAStream<T>::Collapse(unsigned int newstreams, unsigned int interleave)
delete
[]
pTemp
;
delete
[]
pTemp
;
}
}
template
<
typename
T
>
T
&
CUDAStream
<
T
>::
operator
[](
int
index
)
{
return
_pSysData
[
index
];
}
static
const
unsigned
int
GRID
=
32
;
static
const
unsigned
int
GRID
=
32
;
static
const
unsigned
int
GRIDBITS
=
5
;
static
const
unsigned
int
GRIDBITS
=
5
;
static
const
int
G8X_NONBOND_THREADS_PER_BLOCK
=
256
;
static
const
int
G8X_NONBOND_THREADS_PER_BLOCK
=
256
;
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
306d99e8
This diff is collapsed.
Click to expand it.
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
306d99e8
...
@@ -428,8 +428,8 @@ void testBlockInteractions(bool periodic) {
...
@@ -428,8 +428,8 @@ void testBlockInteractions(bool periodic) {
data
.
gpu
->
psGridBoundingBox
->
Download
();
data
.
gpu
->
psGridBoundingBox
->
Download
();
data
.
gpu
->
psGridCenter
->
Download
();
data
.
gpu
->
psGridCenter
->
Download
();
for
(
int
i
=
0
;
i
<
numBlocks
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numBlocks
;
i
++
)
{
float4
gridSize
=
data
.
gpu
->
psGridBoundingBox
->
_pSysData
[
i
];
float4
gridSize
=
(
*
data
.
gpu
->
psGridBoundingBox
)
[
i
];
float4
center
=
data
.
gpu
->
psGridCenter
->
_pSysData
[
i
];
float4
center
=
(
*
data
.
gpu
->
psGridCenter
)
[
i
];
if
(
periodic
)
{
if
(
periodic
)
{
ASSERT
(
gridSize
.
x
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
x
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
y
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
y
<
0.5
*
boxSize
);
...
@@ -437,7 +437,7 @@ void testBlockInteractions(bool periodic) {
...
@@ -437,7 +437,7 @@ void testBlockInteractions(bool periodic) {
}
}
float
minx
=
0.0
,
maxx
=
0.0
,
miny
=
0.0
,
maxy
=
0.0
,
minz
=
0.0
,
maxz
=
0.0
,
radius
=
0.0
;
float
minx
=
0.0
,
maxx
=
0.0
,
miny
=
0.0
,
maxy
=
0.0
,
minz
=
0.0
,
maxz
=
0.0
,
radius
=
0.0
;
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
{
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
{
float4
pos
=
data
.
gpu
->
psPosq4
->
_pSysData
[
i
*
blockSize
+
j
];
float4
pos
=
(
*
data
.
gpu
->
psPosq4
)
[
i
*
blockSize
+
j
];
float
dx
=
pos
.
x
-
center
.
x
;
float
dx
=
pos
.
x
-
center
.
x
;
float
dy
=
pos
.
y
-
center
.
y
;
float
dy
=
pos
.
y
-
center
.
y
;
float
dz
=
pos
.
z
-
center
.
z
;
float
dz
=
pos
.
z
-
center
.
z
;
...
@@ -467,7 +467,7 @@ void testBlockInteractions(bool periodic) {
...
@@ -467,7 +467,7 @@ void testBlockInteractions(bool periodic) {
// Verify that interactions were identified correctly.
// Verify that interactions were identified correctly.
data
.
gpu
->
psInteractionCount
->
Download
();
data
.
gpu
->
psInteractionCount
->
Download
();
int
numWithInteractions
=
data
.
gpu
->
psInteractionCount
->
_pSysData
[
0
];
int
numWithInteractions
=
(
*
data
.
gpu
->
psInteractionCount
)
[
0
];
vector
<
bool
>
hasInteractions
(
data
.
gpu
->
sim
.
workUnits
,
false
);
vector
<
bool
>
hasInteractions
(
data
.
gpu
->
sim
.
workUnits
,
false
);
data
.
gpu
->
psInteractingWorkUnit
->
Download
();
data
.
gpu
->
psInteractingWorkUnit
->
Download
();
data
.
gpu
->
psInteractionFlag
->
Download
();
data
.
gpu
->
psInteractionFlag
->
Download
();
...
@@ -475,7 +475,7 @@ void testBlockInteractions(bool periodic) {
...
@@ -475,7 +475,7 @@ void testBlockInteractions(bool periodic) {
const
unsigned
int
grid
=
data
.
gpu
->
grid
;
const
unsigned
int
grid
=
data
.
gpu
->
grid
;
const
unsigned
int
dim
=
(
atoms
+
(
grid
-
1
))
/
grid
;
const
unsigned
int
dim
=
(
atoms
+
(
grid
-
1
))
/
grid
;
for
(
int
i
=
0
;
i
<
numWithInteractions
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numWithInteractions
;
i
++
)
{
unsigned
int
workUnit
=
data
.
gpu
->
psInteractingWorkUnit
->
_pSysData
[
i
];
unsigned
int
workUnit
=
(
*
data
.
gpu
->
psInteractingWorkUnit
)
[
i
];
unsigned
int
x
=
(
workUnit
>>
17
);
unsigned
int
x
=
(
workUnit
>>
17
);
unsigned
int
y
=
((
workUnit
>>
2
)
&
0x7fff
);
unsigned
int
y
=
((
workUnit
>>
2
)
&
0x7fff
);
int
tile
=
(
x
>
y
?
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
:
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
);
int
tile
=
(
x
>
y
?
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
:
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
);
...
@@ -483,10 +483,10 @@ void testBlockInteractions(bool periodic) {
...
@@ -483,10 +483,10 @@ void testBlockInteractions(bool periodic) {
// Make sure this tile really should have been flagged based on bounding volumes.
// Make sure this tile really should have been flagged based on bounding volumes.
float4
gridSize1
=
data
.
gpu
->
psGridBoundingBox
->
_pSysData
[
x
];
float4
gridSize1
=
(
*
data
.
gpu
->
psGridBoundingBox
)
[
x
];
float4
gridSize2
=
data
.
gpu
->
psGridBoundingBox
->
_pSysData
[
y
];
float4
gridSize2
=
(
*
data
.
gpu
->
psGridBoundingBox
)
[
y
];
float4
center1
=
data
.
gpu
->
psGridCenter
->
_pSysData
[
x
];
float4
center1
=
(
*
data
.
gpu
->
psGridCenter
)
[
x
];
float4
center2
=
data
.
gpu
->
psGridCenter
->
_pSysData
[
y
];
float4
center2
=
(
*
data
.
gpu
->
psGridCenter
)
[
y
];
float
dx
=
center1
.
x
-
center2
.
x
;
float
dx
=
center1
.
x
-
center2
.
x
;
float
dy
=
center1
.
y
-
center2
.
y
;
float
dy
=
center1
.
y
-
center2
.
y
;
float
dz
=
center1
.
z
-
center2
.
z
;
float
dz
=
center1
.
z
-
center2
.
z
;
...
@@ -502,12 +502,12 @@ void testBlockInteractions(bool periodic) {
...
@@ -502,12 +502,12 @@ void testBlockInteractions(bool periodic) {
// Check the interaction flags.
// Check the interaction flags.
unsigned
int
flags
=
data
.
gpu
->
psInteractionFlag
->
_pSysData
[
i
];
unsigned
int
flags
=
(
*
data
.
gpu
->
psInteractionFlag
)
[
i
];
for
(
int
atom2
=
0
;
atom2
<
32
;
atom2
++
)
{
for
(
int
atom2
=
0
;
atom2
<
32
;
atom2
++
)
{
if
((
flags
&
1
)
==
0
)
{
if
((
flags
&
1
)
==
0
)
{
float4
pos2
=
data
.
gpu
->
psPosq4
->
_pSysData
[
y
*
blockSize
+
atom2
];
float4
pos2
=
(
*
data
.
gpu
->
psPosq4
)
[
y
*
blockSize
+
atom2
];
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
float4
pos1
=
data
.
gpu
->
psPosq4
->
_pSysData
[
x
*
blockSize
+
atom1
];
float4
pos1
=
(
*
data
.
gpu
->
psPosq4
)
[
x
*
blockSize
+
atom1
];
float
dx
=
pos2
.
x
-
pos1
.
x
;
float
dx
=
pos2
.
x
-
pos1
.
x
;
float
dy
=
pos2
.
y
-
pos1
.
y
;
float
dy
=
pos2
.
y
-
pos1
.
y
;
float
dz
=
pos2
.
z
-
pos1
.
z
;
float
dz
=
pos2
.
z
-
pos1
.
z
;
...
@@ -536,13 +536,13 @@ void testBlockInteractions(bool periodic) {
...
@@ -536,13 +536,13 @@ void testBlockInteractions(bool periodic) {
data
.
gpu
->
psWorkUnit
->
Download
();
data
.
gpu
->
psWorkUnit
->
Download
();
for
(
int
i
=
0
;
i
<
hasInteractions
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
hasInteractions
.
size
();
i
++
)
if
(
!
hasInteractions
[
i
])
{
if
(
!
hasInteractions
[
i
])
{
unsigned
int
workUnit
=
data
.
gpu
->
psWorkUnit
->
_pSysData
[
i
];
unsigned
int
workUnit
=
(
*
data
.
gpu
->
psWorkUnit
)
[
i
];
unsigned
int
x
=
(
workUnit
>>
17
);
unsigned
int
x
=
(
workUnit
>>
17
);
unsigned
int
y
=
((
workUnit
>>
2
)
&
0x7fff
);
unsigned
int
y
=
((
workUnit
>>
2
)
&
0x7fff
);
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
float4
pos1
=
data
.
gpu
->
psPosq4
->
_pSysData
[
x
*
blockSize
+
atom1
];
float4
pos1
=
(
*
data
.
gpu
->
psPosq4
)
[
x
*
blockSize
+
atom1
];
for
(
int
atom2
=
0
;
atom2
<
blockSize
;
++
atom2
)
{
for
(
int
atom2
=
0
;
atom2
<
blockSize
;
++
atom2
)
{
float4
pos2
=
data
.
gpu
->
psPosq4
->
_pSysData
[
y
*
blockSize
+
atom2
];
float4
pos2
=
(
*
data
.
gpu
->
psPosq4
)
[
y
*
blockSize
+
atom2
];
float
dx
=
pos1
.
x
-
pos2
.
x
;
float
dx
=
pos1
.
x
-
pos2
.
x
;
float
dy
=
pos1
.
y
-
pos2
.
y
;
float
dy
=
pos1
.
y
-
pos2
.
y
;
float
dz
=
pos1
.
z
-
pos2
.
z
;
float
dz
=
pos1
.
z
-
pos2
.
z
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment