Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f7ef2dd0
"openmmapi/vscode:/vscode.git/clone" did not exist on "7398e5a310b128e26084b8279874a87bca140ab8"
Commit
f7ef2dd0
authored
Mar 10, 2015
by
peastman
Browse files
More bug fixes to multi-GPU
parent
bd666c27
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
55 deletions
+20
-55
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+1
-16
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+9
-11
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+1
-17
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+9
-11
No files found.
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
f7ef2dd0
...
@@ -264,14 +264,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -264,14 +264,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortedBlockCenter
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockCenter
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
sortedBlockBoundingBox
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
=
new
CudaArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
oldPositions
=
new
CudaArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
if
(
context
.
getUseDoublePrecision
())
{
vector
<
double4
>
oldPositionsVec
(
numAtoms
,
make_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
float4
>
oldPositionsVec
(
numAtoms
,
make_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
rebuildNeighborList
=
CudaArray
::
create
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
rebuildNeighborList
=
CudaArray
::
create
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockSorter
=
new
CudaSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
blockSorter
=
new
CudaSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
unsigned
int
>
count
(
1
,
0
);
vector
<
unsigned
int
>
count
(
1
,
0
);
...
@@ -402,14 +394,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
...
@@ -402,14 +394,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
if
(
forceArgs
.
size
()
>
0
)
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
if
(
context
.
getUseDoublePrecision
())
{
forceRebuildNeighborList
=
true
;
vector
<
double4
>
oldPositionsVec
(
numAtoms
,
make_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
float4
>
oldPositionsVec
(
numAtoms
,
make_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
}
}
void
CudaNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
void
CudaNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
f7ef2dd0
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -93,9 +93,9 @@ private:
...
@@ -93,9 +93,9 @@ private:
class
CudaParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
CudaContext
::
WorkTask
{
public:
public:
FinishComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
FinishComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
,
CudaArray
&
contextForces
,
bool
&
valid
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
,
CudaArray
&
contextForces
,
bool
&
valid
,
int
&
numTiles
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
contextForces
(
contextForces
),
valid
(
valid
)
{
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
contextForces
(
contextForces
),
valid
(
valid
)
,
numTiles
(
numTiles
)
{
}
}
void
execute
()
{
void
execute
()
{
// Execute the kernel, then download forces.
// Execute the kernel, then download forces.
...
@@ -120,6 +120,10 @@ public:
...
@@ -120,6 +120,10 @@ public:
cu
.
getForce
().
download
(
&
pinnedMemory
[(
cu
.
getContextIndex
()
-
1
)
*
numAtoms
*
3
]);
cu
.
getForce
().
download
(
&
pinnedMemory
[(
cu
.
getContextIndex
()
-
1
)
*
numAtoms
*
3
]);
}
}
}
}
if
(
cu
.
getNonbondedUtilities
().
getUsePeriodic
()
&&
numTiles
>
cu
.
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
valid
=
false
;
cu
.
getNonbondedUtilities
().
updateNeighborListSize
();
}
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
...
@@ -132,6 +136,7 @@ private:
...
@@ -132,6 +136,7 @@ private:
long
long
*
pinnedMemory
;
long
long
*
pinnedMemory
;
CudaArray
&
contextForces
;
CudaArray
&
contextForces
;
bool
&
valid
;
bool
&
valid
;
int
&
numTiles
;
};
};
CudaParallelCalcForcesAndEnergyKernel
::
CudaParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
)
:
CudaParallelCalcForcesAndEnergyKernel
::
CudaParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
)
:
...
@@ -201,16 +206,9 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
...
@@ -201,16 +206,9 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
*
contextForces
,
valid
));
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
*
contextForces
,
valid
,
tileCounts
[
i
]
));
}
}
data
.
syncContexts
();
data
.
syncContexts
();
if
(
data
.
contexts
[
0
]
->
getNonbondedUtilities
().
getUsePeriodic
())
{
for
(
int
i
=
0
;
i
<
(
int
)
tileCounts
.
size
();
i
++
)
if
(
tileCounts
[
i
]
>
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
valid
=
false
;
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
updateNeighborListSize
();
}
}
double
energy
=
0.0
;
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contextEnergy
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contextEnergy
.
size
();
i
++
)
energy
+=
data
.
contextEnergy
[
i
];
energy
+=
data
.
contextEnergy
[
i
];
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
f7ef2dd0
...
@@ -282,14 +282,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -282,14 +282,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
sortedBlockCenter
=
new
OpenCLArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockCenter
=
new
OpenCLArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
=
new
OpenCLArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
sortedBlockBoundingBox
=
new
OpenCLArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
=
new
OpenCLArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
oldPositions
=
new
OpenCLArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
if
(
context
.
getUseDoublePrecision
())
{
vector
<
mm_double4
>
oldPositionsVec
(
numAtoms
,
mm_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
mm_float4
>
oldPositionsVec
(
numAtoms
,
mm_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
rebuildNeighborList
=
OpenCLArray
::
create
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
rebuildNeighborList
=
OpenCLArray
::
create
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockSorter
=
new
OpenCLSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
blockSorter
=
new
OpenCLSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
cl_uint
>
count
(
1
,
0
);
vector
<
cl_uint
>
count
(
1
,
0
);
...
@@ -447,15 +439,7 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
...
@@ -447,15 +439,7 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
int
numAtoms
=
context
.
getNumAtoms
();
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
true
);
if
(
context
.
getUseDoublePrecision
())
{
vector
<
mm_double4
>
oldPositionsVec
(
numAtoms
,
mm_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
mm_float4
>
oldPositionsVec
(
numAtoms
,
mm_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
}
}
void
OpenCLNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
void
OpenCLNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
f7ef2dd0
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -79,9 +79,9 @@ private:
...
@@ -79,9 +79,9 @@ private:
class
OpenCLParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
OpenCLContext
::
WorkTask
{
class
OpenCLParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
public:
FinishComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
FinishComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
void
*
pinnedMemory
,
bool
&
valid
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
void
*
pinnedMemory
,
bool
&
valid
,
int
&
numTiles
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
valid
(
valid
)
{
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
valid
(
valid
)
,
numTiles
(
numTiles
)
{
}
}
void
execute
()
{
void
execute
()
{
// Execute the kernel, then download forces.
// Execute the kernel, then download forces.
...
@@ -98,6 +98,10 @@ public:
...
@@ -98,6 +98,10 @@ public:
cl
.
getQueue
().
finish
();
cl
.
getQueue
().
finish
();
}
}
completionTime
=
getTime
();
completionTime
=
getTime
();
if
(
cl
.
getNonbondedUtilities
().
getUsePeriodic
()
&&
numTiles
>
cl
.
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
valid
=
false
;
cl
.
getNonbondedUtilities
().
updateNeighborListSize
();
}
}
}
private:
private:
ContextImpl
&
context
;
ContextImpl
&
context
;
...
@@ -109,6 +113,7 @@ private:
...
@@ -109,6 +113,7 @@ private:
long
long
&
completionTime
;
long
long
&
completionTime
;
void
*
pinnedMemory
;
void
*
pinnedMemory
;
bool
&
valid
;
bool
&
valid
;
int
&
numTiles
;
};
};
OpenCLParallelCalcForcesAndEnergyKernel
::
OpenCLParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
OpenCLPlatform
::
PlatformData
&
data
)
:
OpenCLParallelCalcForcesAndEnergyKernel
::
OpenCLParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
OpenCLPlatform
::
PlatformData
&
data
)
:
...
@@ -162,16 +167,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
...
@@ -162,16 +167,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
OpenCLContext
&
cl
=
*
data
.
contexts
[
i
];
OpenCLContext
&
cl
=
*
data
.
contexts
[
i
];
OpenCLContext
::
WorkThread
&
thread
=
cl
.
getWorkThread
();
OpenCLContext
::
WorkThread
&
thread
=
cl
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cl
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceMemory
,
valid
));
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cl
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceMemory
,
valid
,
tileCounts
[
i
]
));
}
}
data
.
syncContexts
();
data
.
syncContexts
();
if
(
data
.
contexts
[
0
]
->
getNonbondedUtilities
().
getUsePeriodic
())
{
for
(
int
i
=
0
;
i
<
(
int
)
tileCounts
.
size
();
i
++
)
if
(
tileCounts
[
i
]
>
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
valid
=
false
;
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
updateNeighborListSize
();
}
}
double
energy
=
0.0
;
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contextEnergy
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contextEnergy
.
size
();
i
++
)
energy
+=
data
.
contextEnergy
[
i
];
energy
+=
data
.
contextEnergy
[
i
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment