Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f7ef2dd0
Commit
f7ef2dd0
authored
Mar 10, 2015
by
peastman
Browse files
More bug fixes to multi-GPU
parent
bd666c27
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
55 deletions
+20
-55
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+1
-16
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+9
-11
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+1
-17
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+9
-11
No files found.
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
f7ef2dd0
...
...
@@ -264,14 +264,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortedBlockCenter
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
=
new
CudaArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
if
(
context
.
getUseDoublePrecision
())
{
vector
<
double4
>
oldPositionsVec
(
numAtoms
,
make_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
float4
>
oldPositionsVec
(
numAtoms
,
make_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
rebuildNeighborList
=
CudaArray
::
create
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockSorter
=
new
CudaSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
unsigned
int
>
count
(
1
,
0
);
...
...
@@ -402,14 +394,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
if
(
context
.
getUseDoublePrecision
())
{
vector
<
double4
>
oldPositionsVec
(
numAtoms
,
make_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
float4
>
oldPositionsVec
(
numAtoms
,
make_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
forceRebuildNeighborList
=
true
;
}
void
CudaNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
f7ef2dd0
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -93,9 +93,9 @@ private:
class
CudaParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
CudaContext
::
WorkTask
{
public:
FinishComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
,
CudaArray
&
contextForces
,
bool
&
valid
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
,
CudaArray
&
contextForces
,
bool
&
valid
,
int
&
numTiles
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
contextForces
(
contextForces
),
valid
(
valid
)
{
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
contextForces
(
contextForces
),
valid
(
valid
)
,
numTiles
(
numTiles
)
{
}
void
execute
()
{
// Execute the kernel, then download forces.
...
...
@@ -120,6 +120,10 @@ public:
cu
.
getForce
().
download
(
&
pinnedMemory
[(
cu
.
getContextIndex
()
-
1
)
*
numAtoms
*
3
]);
}
}
if
(
cu
.
getNonbondedUtilities
().
getUsePeriodic
()
&&
numTiles
>
cu
.
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
valid
=
false
;
cu
.
getNonbondedUtilities
().
updateNeighborListSize
();
}
}
private:
ContextImpl
&
context
;
...
...
@@ -132,6 +136,7 @@ private:
long
long
*
pinnedMemory
;
CudaArray
&
contextForces
;
bool
&
valid
;
int
&
numTiles
;
};
CudaParallelCalcForcesAndEnergyKernel
::
CudaParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
)
:
...
...
@@ -201,16 +206,9 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
*
contextForces
,
valid
));
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
*
contextForces
,
valid
,
tileCounts
[
i
]
));
}
data
.
syncContexts
();
if
(
data
.
contexts
[
0
]
->
getNonbondedUtilities
().
getUsePeriodic
())
{
for
(
int
i
=
0
;
i
<
(
int
)
tileCounts
.
size
();
i
++
)
if
(
tileCounts
[
i
]
>
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
valid
=
false
;
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
updateNeighborListSize
();
}
}
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contextEnergy
.
size
();
i
++
)
energy
+=
data
.
contextEnergy
[
i
];
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
f7ef2dd0
...
...
@@ -282,14 +282,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
sortedBlockCenter
=
new
OpenCLArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
=
new
OpenCLArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
=
new
OpenCLArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
if
(
context
.
getUseDoublePrecision
())
{
vector
<
mm_double4
>
oldPositionsVec
(
numAtoms
,
mm_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
mm_float4
>
oldPositionsVec
(
numAtoms
,
mm_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
rebuildNeighborList
=
OpenCLArray
::
create
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockSorter
=
new
OpenCLSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
cl_uint
>
count
(
1
,
0
);
...
...
@@ -447,15 +439,7 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
int
numAtoms
=
context
.
getNumAtoms
();
if
(
context
.
getUseDoublePrecision
())
{
vector
<
mm_double4
>
oldPositionsVec
(
numAtoms
,
mm_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
mm_float4
>
oldPositionsVec
(
numAtoms
,
mm_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
true
);
}
void
OpenCLNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
f7ef2dd0
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -79,9 +79,9 @@ private:
class
OpenCLParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
FinishComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
void
*
pinnedMemory
,
bool
&
valid
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
void
*
pinnedMemory
,
bool
&
valid
,
int
&
numTiles
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
valid
(
valid
)
{
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
valid
(
valid
)
,
numTiles
(
numTiles
)
{
}
void
execute
()
{
// Execute the kernel, then download forces.
...
...
@@ -98,6 +98,10 @@ public:
cl
.
getQueue
().
finish
();
}
completionTime
=
getTime
();
if
(
cl
.
getNonbondedUtilities
().
getUsePeriodic
()
&&
numTiles
>
cl
.
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
valid
=
false
;
cl
.
getNonbondedUtilities
().
updateNeighborListSize
();
}
}
private:
ContextImpl
&
context
;
...
...
@@ -109,6 +113,7 @@ private:
long
long
&
completionTime
;
void
*
pinnedMemory
;
bool
&
valid
;
int
&
numTiles
;
};
OpenCLParallelCalcForcesAndEnergyKernel
::
OpenCLParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
OpenCLPlatform
::
PlatformData
&
data
)
:
...
...
@@ -162,16 +167,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
OpenCLContext
&
cl
=
*
data
.
contexts
[
i
];
OpenCLContext
::
WorkThread
&
thread
=
cl
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cl
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceMemory
,
valid
));
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cl
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceMemory
,
valid
,
tileCounts
[
i
]
));
}
data
.
syncContexts
();
if
(
data
.
contexts
[
0
]
->
getNonbondedUtilities
().
getUsePeriodic
())
{
for
(
int
i
=
0
;
i
<
(
int
)
tileCounts
.
size
();
i
++
)
if
(
tileCounts
[
i
]
>
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
valid
=
false
;
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
updateNeighborListSize
();
}
}
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contextEnergy
.
size
();
i
++
)
energy
+=
data
.
contextEnergy
[
i
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment