Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
41bcc20f
Commit
41bcc20f
authored
Apr 08, 2013
by
Peter Eastman
Browse files
Use peer to peer copying between GPUs when available
parent
c0a43bfc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
49 additions
and
15 deletions
+49
-15
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+1
-1
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+28
-14
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+20
-0
No files found.
platforms/cuda/include/CudaPlatform.h
View file @
41bcc20f
...
...
@@ -106,7 +106,7 @@ public:
void
syncContexts
();
std
::
vector
<
CudaContext
*>
contexts
;
std
::
vector
<
double
>
contextEnergy
;
bool
removeCM
;
bool
removeCM
,
peerAccessSupported
;
int
cmMotionFrequency
;
int
stepCount
,
computeForceCount
;
double
time
;
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
41bcc20f
...
...
@@ -31,10 +31,10 @@ using namespace OpenMM;
using
namespace
std
;
#define CHECK_RESULT(result) \
#define CHECK_RESULT(result
, prefix
) \
if (result != CUDA_SUCCESS) { \
std::stringstream m; \
m<<
errorMessage
<<": "<<cu.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
m<<
prefix
<<": "<<cu.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
throw OpenMMException(m.str());\
}
...
...
@@ -70,8 +70,15 @@ public:
// Copy coordinates over to this device and execute the kernel.
cu
.
setAsCurrent
();
if
(
cu
.
getContextIndex
()
>
0
)
cu
.
getPosq
().
upload
(
pinnedMemory
,
false
);
if
(
cu
.
getContextIndex
()
>
0
)
{
if
(
cu
.
getPlatformData
().
peerAccessSupported
&&
cu
.
getPlatformData
().
contexts
.
size
()
<
3
)
{
CudaContext
&
context0
=
*
cu
.
getPlatformData
().
contexts
[
0
];
int
numBytes
=
cu
.
getPosq
().
getSize
()
*
cu
.
getPosq
().
getElementSize
();
CHECK_RESULT
(
cuMemcpyPeerAsync
(
cu
.
getPosq
().
getDevicePointer
(),
cu
.
getContext
(),
context0
.
getPosq
().
getDevicePointer
(),
context0
.
getContext
(),
numBytes
,
0
),
"Error copying positions"
);
}
else
cu
.
getPosq
().
upload
(
pinnedMemory
,
false
);
}
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
private:
...
...
@@ -86,9 +93,9 @@ private:
class
CudaParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
CudaContext
::
WorkTask
{
public:
FinishComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
,
CudaArray
&
contextForces
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
)
{
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
)
,
contextForces
(
contextForces
)
{
}
void
execute
()
{
// Execute the kernel, then download forces.
...
...
@@ -97,11 +104,17 @@ public:
if
(
includeForce
)
{
if
(
cu
.
getContextIndex
()
>
0
)
{
int
numAtoms
=
cu
.
getPaddedNumAtoms
();
cu
.
getForce
().
download
(
&
pinnedMemory
[(
cu
.
getContextIndex
()
-
1
)
*
numAtoms
*
3
]);
if
(
cu
.
getPlatformData
().
peerAccessSupported
)
{
int
numBytes
=
numAtoms
*
3
*
sizeof
(
long
long
);
int
offset
=
(
cu
.
getContextIndex
()
-
1
)
*
numBytes
;
CudaContext
&
context0
=
*
cu
.
getPlatformData
().
contexts
[
0
];
CHECK_RESULT
(
cuMemcpyPeer
(
contextForces
.
getDevicePointer
()
+
offset
,
context0
.
getContext
(),
cu
.
getForce
().
getDevicePointer
(),
cu
.
getContext
(),
numBytes
),
"Error copying forces"
);
}
else
cu
.
getForce
().
download
(
&
pinnedMemory
[(
cu
.
getContextIndex
()
-
1
)
*
numAtoms
*
3
]);
}
else
{
string
errorMessage
=
"Error synchronizing CUDA context"
;
CHECK_RESULT
(
cuCtxSynchronize
());
CHECK_RESULT
(
cuCtxSynchronize
(),
"Error synchronizing CUDA context"
);
}
}
completionTime
=
getTime
();
...
...
@@ -115,6 +128,7 @@ private:
double
&
energy
;
long
long
&
completionTime
;
long
long
*
pinnedMemory
;
CudaArray
&
contextForces
;
};
CudaParallelCalcForcesAndEnergyKernel
::
CudaParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
)
:
...
...
@@ -150,14 +164,14 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
cu
.
setAsCurrent
();
if
(
contextForces
==
NULL
)
{
contextForces
=
CudaArray
::
create
<
long
long
>
(
cu
,
3
*
(
data
.
contexts
.
size
()
-
1
)
*
cu
.
getPaddedNumAtoms
(),
"contextForces"
);
string
errorMessage
=
"Error allocating pinned memory"
;
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedForceBuffer
,
3
*
(
data
.
contexts
.
size
()
-
1
)
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
long
long
),
CU_MEMHOSTALLOC_PORTABLE
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedPositionBuffer
,
cu
.
getPaddedNumAtoms
()
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
)),
CU_MEMHOSTALLOC_PORTABLE
));
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedForceBuffer
,
3
*
(
data
.
contexts
.
size
()
-
1
)
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
long
long
),
CU_MEMHOSTALLOC_PORTABLE
),
"Error allocating pinned memory"
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedPositionBuffer
,
cu
.
getPaddedNumAtoms
()
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
)),
CU_MEMHOSTALLOC_PORTABLE
),
"Error allocating pinned memory"
);
}
// Copy coordinates over to each device and execute the kernel.
cu
.
getPosq
().
download
(
pinnedPositionBuffer
);
if
(
!
(
cu
.
getPlatformData
().
peerAccessSupported
&&
cu
.
getPlatformData
().
contexts
.
size
()
<
3
))
cu
.
getPosq
().
download
(
pinnedPositionBuffer
);
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
data
.
contextEnergy
[
i
]
=
0.0
;
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
...
...
@@ -170,7 +184,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
));
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
*
contextForces
));
}
data
.
syncContexts
();
double
energy
=
0.0
;
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
41bcc20f
...
...
@@ -192,6 +192,26 @@ CudaPlatform::PlatformData::PlatformData(const System& system, const string& dev
propertyValues
[
CudaPlatform
::
CudaCompiler
()]
=
compilerProperty
;
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
// Determine whether peer-to-peer copying is supported, and enable it if so.
peerAccessSupported
=
true
;
for
(
int
i
=
1
;
i
<
contexts
.
size
();
i
++
)
{
int
canAccess
;
cuDeviceCanAccessPeer
(
&
canAccess
,
contexts
[
i
]
->
getDevice
(),
contexts
[
0
]
->
getDevice
());
if
(
!
canAccess
)
{
peerAccessSupported
=
false
;
break
;
}
}
if
(
peerAccessSupported
)
{
for
(
int
i
=
1
;
i
<
contexts
.
size
();
i
++
)
{
contexts
[
0
]
->
setAsCurrent
();
CHECK_RESULT
(
cuCtxEnablePeerAccess
(
contexts
[
i
]
->
getContext
(),
0
),
"Error enabling peer access"
);
contexts
[
i
]
->
setAsCurrent
();
CHECK_RESULT
(
cuCtxEnablePeerAccess
(
contexts
[
0
]
->
getContext
(),
0
),
"Error enabling peer access"
);
}
}
}
CudaPlatform
::
PlatformData
::~
PlatformData
()
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment