Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
55bae85f
Commit
55bae85f
authored
Oct 23, 2014
by
peastman
Browse files
Merge pull request #668 from peastman/multi
Optimizations to multi-GPU calculations
parents
e19cefde
390e0a6b
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
34 additions
and
25 deletions
+34
-25
platforms/cuda/include/CudaParallelKernels.h
platforms/cuda/include/CudaParallelKernels.h
+1
-0
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+10
-0
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+16
-9
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+7
-16
No files found.
platforms/cuda/include/CudaParallelKernels.h
View file @
55bae85f
...
...
@@ -85,6 +85,7 @@ private:
void
*
pinnedPositionBuffer
;
long
long
*
pinnedForceBuffer
;
CUfunction
sumKernel
;
CUevent
event
;
};
/**
...
...
platforms/cuda/src/CudaContext.cpp
View file @
55bae85f
...
...
@@ -154,6 +154,16 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT
(
cuCtxCreate
(
&
context
,
flags
,
device
));
contextIsValid
=
true
;
CHECK_RESULT
(
cuCtxSetCacheConfig
(
CU_FUNC_CACHE_PREFER_SHARED
));
if
(
contextIndex
>
0
)
{
int
canAccess
;
cuDeviceCanAccessPeer
(
&
canAccess
,
getDevice
(),
platformData
.
contexts
[
0
]
->
getDevice
());
if
(
canAccess
)
{
platformData
.
contexts
[
0
]
->
setAsCurrent
();
CHECK_RESULT
(
cuCtxEnablePeerAccess
(
getContext
(),
0
));
setAsCurrent
();
CHECK_RESULT
(
cuCtxEnablePeerAccess
(
platformData
.
contexts
[
0
]
->
getContext
(),
0
));
}
}
numAtoms
=
system
.
getNumParticles
();
paddedNumAtoms
=
TileSize
*
((
numAtoms
+
TileSize
-
1
)
/
TileSize
);
numAtomBlocks
=
(
paddedNumAtoms
+
(
TileSize
-
1
))
/
TileSize
;
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
55bae85f
...
...
@@ -63,22 +63,24 @@ if (result != CUDA_SUCCESS) { \
class
CudaParallelCalcForcesAndEnergyKernel
::
BeginComputationTask
:
public
CudaContext
::
WorkTask
{
public:
BeginComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
)
{
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
,
CUevent
event
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
)
,
event
(
event
)
{
}
void
execute
()
{
// Copy coordinates over to this device and execute the kernel.
cu
.
setAsCurrent
();
if
(
cu
.
getContextIndex
()
>
0
)
{
if
(
cu
.
getPlatformData
().
peerAccessSupported
&&
cu
.
getPlatformData
().
contexts
.
size
()
<
3
)
{
if
(
cu
.
getPlatformData
().
peerAccessSupported
&&
false
)
{
// Why is the peer-to-peer copy slower???
CudaContext
&
context0
=
*
cu
.
getPlatformData
().
contexts
[
0
];
int
numBytes
=
cu
.
getPosq
().
getSize
()
*
cu
.
getPosq
().
getElementSize
();
CHECK_RESULT
(
cuMemcpy
Peer
Async
(
cu
.
getPosq
().
getDevicePointer
(),
cu
.
getContext
(),
context0
.
getPosq
().
getDevicePointer
(),
context0
.
getContext
(),
numBytes
,
0
),
"Error copying positions"
);
CHECK_RESULT
(
cuMemcpyAsync
(
cu
.
getPosq
().
getDevicePointer
(),
context0
.
getPosq
().
getDevicePointer
(),
numBytes
,
0
),
"Error copying positions"
);
}
else
else
{
cuStreamWaitEvent
(
cu
.
getCurrentStream
(),
event
,
0
);
cu
.
getPosq
().
upload
(
pinnedMemory
,
false
);
}
}
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
private:
...
...
@@ -88,6 +90,7 @@ private:
bool
includeForce
,
includeEnergy
;
int
groups
;
void
*
pinnedMemory
;
CUevent
event
;
};
class
CudaParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
CudaContext
::
WorkTask
{
...
...
@@ -108,7 +111,7 @@ public:
int
numBytes
=
numAtoms
*
3
*
sizeof
(
long
long
);
int
offset
=
(
cu
.
getContextIndex
()
-
1
)
*
numBytes
;
CudaContext
&
context0
=
*
cu
.
getPlatformData
().
contexts
[
0
];
CHECK_RESULT
(
cuMemcpy
Peer
(
contextForces
.
getDevicePointer
()
+
offset
,
context0
.
getContext
(),
cu
.
getForce
().
getDevicePointer
(),
cu
.
getContext
(),
numBytes
),
"Error copying forces"
);
CHECK_RESULT
(
cuMemcpy
(
contextForces
.
getDevicePointer
()
+
offset
,
cu
.
getForce
().
getDevicePointer
(),
numBytes
),
"Error copying forces"
);
}
else
cu
.
getForce
().
download
(
&
pinnedMemory
[(
cu
.
getContextIndex
()
-
1
)
*
numAtoms
*
3
]);
...
...
@@ -146,6 +149,7 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel()
cuMemFreeHost
(
pinnedPositionBuffer
);
if
(
pinnedForceBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedForceBuffer
);
cuEventDestroy
(
event
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
...
...
@@ -157,6 +161,7 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
getKernel
(
i
).
initialize
(
system
);
for
(
int
i
=
0
;
i
<
(
int
)
contextNonbondedFractions
.
size
();
i
++
)
contextNonbondedFractions
[
i
]
=
1
/
(
double
)
contextNonbondedFractions
.
size
();
CHECK_RESULT
(
cuEventCreate
(
&
event
,
0
),
"Error creating event"
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
...
...
@@ -170,13 +175,15 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
// Copy coordinates over to each device and execute the kernel.
if
(
!
(
cu
.
getPlatformData
().
peerAccessSupported
&&
cu
.
getPlatformData
().
contexts
.
size
()
<
3
))
cu
.
getPosq
().
download
(
pinnedPositionBuffer
);
if
(
!
(
cu
.
getPlatformData
().
peerAccessSupported
&&
false
))
{
// Why is this faster than a peer-to-peer copy???
cu
.
getPosq
().
download
(
pinnedPositionBuffer
,
false
);
cuEventRecord
(
event
,
cu
.
getCurrentStream
());
}
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
data
.
contextEnergy
[
i
]
=
0.0
;
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
BeginComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
pinnedPositionBuffer
));
thread
.
addTask
(
new
BeginComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
pinnedPositionBuffer
,
event
));
}
}
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
55bae85f
...
...
@@ -229,22 +229,13 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
// Determine whether peer-to-peer copying is supported, and enable it if so.
peerAccessSupported
=
false
;
// Disable until I figure out why it usually makes things slower
// peerAccessSupported = true;
// for (int i = 1; i < contexts.size(); i++) {
// int canAccess;
// cuDeviceCanAccessPeer(&canAccess, contexts[i]->getDevice(), contexts[0]->getDevice());
// if (!canAccess) {
// peerAccessSupported = false;
// break;
// }
// }
if
(
peerAccessSupported
)
{
peerAccessSupported
=
true
;
for
(
int
i
=
1
;
i
<
contexts
.
size
();
i
++
)
{
contexts
[
0
]
->
setAsCurrent
();
CHECK_RESULT
(
cuCtxEnablePeerAccess
(
contexts
[
i
]
->
getContext
(),
0
),
"Error enabling peer access"
);
contexts
[
i
]
->
setAsCurrent
();
CHECK_RESULT
(
cuCtxEnablePeerAccess
(
contexts
[
0
]
->
getContext
(),
0
),
"Error enabling peer access"
);
int
canAccess
;
cuDeviceCanAccessPeer
(
&
canAccess
,
contexts
[
i
]
->
getDevice
(),
contexts
[
0
]
->
getDevice
());
if
(
!
canAccess
)
{
peerAccessSupported
=
false
;
break
;
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment