Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
67a3b4c0
Commit
67a3b4c0
authored
Nov 05, 2014
by
peastman
Browse files
Further improvements to multi-GPU performance
parent
f32c804b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
8 deletions
+10
-8
platforms/cuda/include/CudaParallelKernels.h
platforms/cuda/include/CudaParallelKernels.h
+1
-0
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+9
-8
No files found.
platforms/cuda/include/CudaParallelKernels.h
View file @
67a3b4c0
...
@@ -86,6 +86,7 @@ private:
...
@@ -86,6 +86,7 @@ private:
long
long
*
pinnedForceBuffer
;
long
long
*
pinnedForceBuffer
;
CUfunction
sumKernel
;
CUfunction
sumKernel
;
CUevent
event
;
CUevent
event
;
CUstream
peerCopyStream
;
};
};
/**
/**
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
67a3b4c0
...
@@ -71,10 +71,9 @@ public:
...
@@ -71,10 +71,9 @@ public:
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
if
(
cu
.
getContextIndex
()
>
0
)
{
if
(
cu
.
getContextIndex
()
>
0
)
{
if
(
!
cu
.
getPlatformData
().
peerAccessSupported
)
{
cuStreamWaitEvent
(
cu
.
getCurrentStream
(),
event
,
0
);
cuStreamWaitEvent
(
cu
.
getCurrentStream
(),
event
,
0
);
if
(
!
cu
.
getPlatformData
().
peerAccessSupported
)
cu
.
getPosq
().
upload
(
pinnedMemory
,
false
);
cu
.
getPosq
().
upload
(
pinnedMemory
,
false
);
}
}
}
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
}
...
@@ -146,6 +145,7 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel()
...
@@ -146,6 +145,7 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel()
if
(
pinnedForceBuffer
!=
NULL
)
if
(
pinnedForceBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedForceBuffer
);
cuMemFreeHost
(
pinnedForceBuffer
);
cuEventDestroy
(
event
);
cuEventDestroy
(
event
);
cuStreamDestroy
(
peerCopyStream
);
}
}
void
CudaParallelCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
void
CudaParallelCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
...
@@ -158,6 +158,7 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
...
@@ -158,6 +158,7 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
for
(
int
i
=
0
;
i
<
(
int
)
contextNonbondedFractions
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
contextNonbondedFractions
.
size
();
i
++
)
contextNonbondedFractions
[
i
]
=
1
/
(
double
)
contextNonbondedFractions
.
size
();
contextNonbondedFractions
[
i
]
=
1
/
(
double
)
contextNonbondedFractions
.
size
();
CHECK_RESULT
(
cuEventCreate
(
&
event
,
0
),
"Error creating event"
);
CHECK_RESULT
(
cuEventCreate
(
&
event
,
0
),
"Error creating event"
);
CHECK_RESULT
(
cuStreamCreate
(
&
peerCopyStream
,
CU_STREAM_NON_BLOCKING
),
"Error creating stream"
);
}
}
void
CudaParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
void
CudaParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
...
@@ -177,11 +178,11 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
...
@@ -177,11 +178,11 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
}
}
else
{
else
{
int
numBytes
=
cu
.
getPosq
().
getSize
()
*
cu
.
getPosq
().
getElementSize
();
int
numBytes
=
cu
.
getPosq
().
getSize
()
*
cu
.
getPosq
().
getElementSize
();
for
(
int
i
=
1
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
cuEventRecord
(
event
,
cu
.
getCurrentStream
());
data
.
contexts
[
i
]
->
setAsCurrent
(
);
cuStreamWaitEvent
(
peerCopyStream
,
event
,
0
);
CHECK_RESULT
(
cuMemcpyAsync
(
data
.
contexts
[
i
]
->
getPosq
().
getDevicePointer
(),
cu
.
getPosq
().
getDevicePointer
(),
numBytes
,
0
),
"Error copying positions"
);
for
(
int
i
=
1
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
}
CHECK_RESULT
(
cuMemcpyAsync
(
data
.
contexts
[
i
]
->
getPosq
().
getDevicePointer
(),
cu
.
getPosq
().
getDevicePointer
(),
numBytes
,
peerCopyStream
),
"Error copying positions"
);
cu
.
setAsCurrent
(
);
cu
EventRecord
(
event
,
peerCopyStream
);
}
}
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
data
.
contextEnergy
[
i
]
=
0.0
;
data
.
contextEnergy
[
i
]
=
0.0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment