Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
1085085d
Commit
1085085d
authored
Feb 04, 2016
by
peastman
Browse files
Merge pull request #1381 from peastman/parallelpositions
Parallelized the code for downloading positions
parents
73a59c33
8d293880
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
201 additions
and
58 deletions
+201
-58
openmmapi/include/openmm/internal/timer.h
openmmapi/include/openmm/internal/timer.h
+59
-0
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+2
-1
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+3
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+54
-21
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+9
-3
platforms/cuda/tests/TestCudaFFT3D.cpp
platforms/cuda/tests/TestCudaFFT3D.cpp
+1
-1
platforms/cuda/tests/TestCudaRandom.cpp
platforms/cuda/tests/TestCudaRandom.cpp
+1
-1
platforms/cuda/tests/TestCudaSort.cpp
platforms/cuda/tests/TestCudaSort.cpp
+1
-1
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+2
-1
platforms/opencl/include/OpenCLPlatform.h
platforms/opencl/include/OpenCLPlatform.h
+3
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+54
-21
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+9
-3
platforms/opencl/tests/TestOpenCLFFT.cpp
platforms/opencl/tests/TestOpenCLFFT.cpp
+1
-1
platforms/opencl/tests/TestOpenCLRandom.cpp
platforms/opencl/tests/TestOpenCLRandom.cpp
+1
-1
platforms/opencl/tests/TestOpenCLSort.cpp
platforms/opencl/tests/TestOpenCLSort.cpp
+1
-1
No files found.
openmmapi/include/openmm/internal/timer.h
0 → 100644
View file @
1085085d
#ifndef OPENMM_TIMER_H_
#define OPENMM_TIMER_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This header provides a static function for querying the current system time in seconds.
* It is useful when profiling.
*/
#ifdef _MSC_VER
#include <Windows.h>
static
double
getCurrentTime
()
{
FILETIME
ft
;
GetSystemTimeAsFileTime
(
&
ft
);
// 100-nanoseconds since 1-1-1601
ULARGE_INTEGER
result
;
result
.
LowPart
=
ft
.
dwLowDateTime
;
result
.
HighPart
=
ft
.
dwHighDateTime
;
return
1e-7
*
result
.
QuadPart
;
}
#else
#include <sys/time.h>
static
double
getCurrentTime
()
{
struct
timeval
tod
;
gettimeofday
(
&
tod
,
0
);
return
tod
.
tv_sec
+
1e-6
*
tod
.
tv_usec
;
}
#endif
#endif
/*OPENMM_TIMER_H_*/
platforms/cuda/include/CudaKernels.h
View file @
1085085d
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -192,6 +192,7 @@ public:
*/
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
private:
class
GetPositionsTask
;
CudaContext
&
cu
;
};
...
...
platforms/cuda/include/CudaPlatform.h
View file @
1085085d
...
...
@@ -29,6 +29,7 @@
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
#include "windowsExportCuda.h"
namespace
OpenMM
{
...
...
@@ -122,7 +123,7 @@ class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public:
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
,
const
std
::
string
&
hostCompilerProperty
,
const
std
::
string
&
pmeStreamProperty
);
const
std
::
string
&
pmeStreamProperty
,
int
numThreads
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
...
...
@@ -134,6 +135,7 @@ public:
int
stepCount
,
computeForceCount
;
double
time
;
std
::
map
<
std
::
string
,
std
::
string
>
propertyValues
;
ThreadPool
threads
;
};
}
// namespace OpenMM
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
1085085d
...
...
@@ -141,43 +141,76 @@ void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts
[
i
]
->
setTime
(
time
);
}
class
CudaUpdateStateDataKernel
::
GetPositionsTask
:
public
ThreadPool
::
Task
{
public:
GetPositionsTask
(
CudaContext
&
cu
,
vector
<
Vec3
>&
positions
,
vector
<
float4
>&
posCorrection
)
:
cu
(
cu
),
positions
(
positions
),
posCorrection
(
posCorrection
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
int
numParticles
=
cu
.
getNumAtoms
();
Vec3
boxVectors
[
3
];
cu
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
if
(
cu
.
getUseDoublePrecision
())
{
double4
*
posq
=
(
double4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
double4
pos
=
posq
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
if
(
cu
.
getUseMixedPrecision
())
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
float4
pos1
=
posq
[
i
];
float4
pos2
=
posCorrection
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
((
double
)
pos1
.
x
+
(
double
)
pos2
.
x
,
(
double
)
pos1
.
y
+
(
double
)
pos2
.
y
,
(
double
)
pos1
.
z
+
(
double
)
pos2
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
float4
pos
=
posq
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
}
CudaContext
&
cu
;
vector
<
Vec3
>&
positions
;
vector
<
float4
>&
posCorrection
;
};
void
CudaUpdateStateDataKernel
::
getPositions
(
ContextImpl
&
context
,
vector
<
Vec3
>&
positions
)
{
cu
.
setAsCurrent
();
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
positions
.
resize
(
numParticles
);
Vec3
boxVectors
[
3
];
cu
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
vector
<
float4
>
posCorrection
;
if
(
cu
.
getUseDoublePrecision
())
{
double4
*
posq
=
(
double4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
double4
pos
=
posq
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
if
(
cu
.
getUseMixedPrecision
())
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
vector
<
float4
>
posCorrection
;
cu
.
getPosq
().
download
(
posq
);
cu
.
getPosq
().
download
(
posq
,
false
)
;
posCorrection
.
resize
(
numParticles
);
cu
.
getPosqCorrection
().
download
(
posCorrection
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
float4
pos1
=
posq
[
i
];
float4
pos2
=
posCorrection
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
((
double
)
pos1
.
x
+
(
double
)
pos2
.
x
,
(
double
)
pos1
.
y
+
(
double
)
pos2
.
y
,
(
double
)
pos1
.
z
+
(
double
)
pos2
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
float4
pos
=
posq
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask
task
(
cu
,
positions
,
posCorrection
);
cu
.
getPlatformData
().
threads
.
execute
(
task
);
cu
.
getPlatformData
().
threads
.
waitForThreads
();
}
void
CudaUpdateStateDataKernel
::
setPositions
(
ContextImpl
&
context
,
const
vector
<
Vec3
>&
positions
)
{
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
1085085d
...
...
@@ -29,9 +29,10 @@
#include "CudaPlatform.h"
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <cctype>
#include <sstream>
...
...
@@ -175,7 +176,11 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
cpuPmePropValue
=
"false"
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
));
int
threads
=
getNumProcessors
();
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
threads
));
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -184,7 +189,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
)
{
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
size_t
searchPos
=
0
,
nextPos
;
...
...
platforms/cuda/tests/TestCudaFFT3D.cpp
View file @
1085085d
...
...
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()));
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
())
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/cuda/tests/TestCudaRandom.cpp
View file @
1085085d
...
...
@@ -56,7 +56,7 @@ void testGaussian() {
system
.
addParticle
(
1.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()));
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
())
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/cuda/tests/TestCudaSort.cpp
View file @
1085085d
...
...
@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()));
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
())
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
CudaArray
data
(
context
,
array
.
size
(),
4
,
"sortData"
);
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
1085085d
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -170,6 +170,7 @@ public:
*/
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
private:
class
GetPositionsTask
;
OpenCLContext
&
cl
;
};
...
...
platforms/opencl/include/OpenCLPlatform.h
View file @
1085085d
...
...
@@ -29,6 +29,7 @@
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
#include "windowsExportOpenCL.h"
namespace
OpenMM
{
...
...
@@ -107,7 +108,7 @@ public:
class
OPENMM_EXPORT_OPENCL
OpenCLPlatform
::
PlatformData
{
public:
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
pmeStreamProperty
);
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
pmeStreamProperty
,
int
numThreads
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
...
...
@@ -119,6 +120,7 @@ public:
int
stepCount
,
computeForceCount
;
double
time
;
std
::
map
<
std
::
string
,
std
::
string
>
propertyValues
;
ThreadPool
threads
;
};
}
// namespace OpenMM
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
1085085d
...
...
@@ -165,42 +165,75 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts
[
i
]
->
setTime
(
time
);
}
class
OpenCLUpdateStateDataKernel
::
GetPositionsTask
:
public
ThreadPool
::
Task
{
public:
GetPositionsTask
(
OpenCLContext
&
cl
,
vector
<
Vec3
>&
positions
,
vector
<
mm_float4
>&
posCorrection
)
:
cl
(
cl
),
positions
(
positions
),
posCorrection
(
posCorrection
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const
vector
<
int
>&
order
=
cl
.
getAtomIndex
();
int
numParticles
=
cl
.
getNumAtoms
();
Vec3
boxVectors
[
3
];
cl
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
if
(
cl
.
getUseDoublePrecision
())
{
mm_double4
*
posq
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
mm_double4
pos
=
posq
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
if
(
cl
.
getUseMixedPrecision
())
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
mm_float4
pos1
=
posq
[
i
];
mm_float4
pos2
=
posCorrection
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
((
double
)
pos1
.
x
+
(
double
)
pos2
.
x
,
(
double
)
pos1
.
y
+
(
double
)
pos2
.
y
,
(
double
)
pos1
.
z
+
(
double
)
pos2
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
mm_float4
pos
=
posq
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
}
OpenCLContext
&
cl
;
vector
<
Vec3
>&
positions
;
vector
<
mm_float4
>&
posCorrection
;
};
void
OpenCLUpdateStateDataKernel
::
getPositions
(
ContextImpl
&
context
,
vector
<
Vec3
>&
positions
)
{
const
vector
<
cl_int
>&
order
=
cl
.
getAtomIndex
();
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
positions
.
resize
(
numParticles
);
Vec3
boxVectors
[
3
];
cl
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
vector
<
mm_float4
>
posCorrection
;
if
(
cl
.
getUseDoublePrecision
())
{
mm_double4
*
posq
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
mm_double4
pos
=
posq
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
if
(
cl
.
getUseMixedPrecision
())
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
vector
<
mm_float4
>
posCorrection
;
cl
.
getPosq
().
download
(
posq
);
cl
.
getPosq
().
download
(
posq
,
false
)
;
posCorrection
.
resize
(
numParticles
);
cl
.
getPosqCorrection
().
download
(
posCorrection
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
mm_float4
pos1
=
posq
[
i
];
mm_float4
pos2
=
posCorrection
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
((
double
)
pos1
.
x
+
(
double
)
pos2
.
x
,
(
double
)
pos1
.
y
+
(
double
)
pos2
.
y
,
(
double
)
pos1
.
z
+
(
double
)
pos2
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
mm_float4
pos
=
posq
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask
task
(
cl
,
positions
,
posCorrection
);
cl
.
getPlatformData
().
threads
.
execute
(
task
);
cl
.
getPlatformData
().
threads
.
waitForThreads
();
}
void
OpenCLUpdateStateDataKernel
::
setPositions
(
ContextImpl
&
context
,
const
vector
<
Vec3
>&
positions
)
{
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
1085085d
...
...
@@ -28,9 +28,10 @@
#include "OpenCLPlatform.h"
#include "OpenCLKernelFactory.h"
#include "OpenCLKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <cctype>
#include <sstream>
...
...
@@ -165,7 +166,11 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
cpuPmePropValue
=
"false"
;
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
,
cpuPmePropValue
,
pmeStreamPropValue
));
int
threads
=
getNumProcessors
();
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
,
cpuPmePropValue
,
pmeStreamPropValue
,
threads
));
}
void
OpenCLPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -174,7 +179,8 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
}
OpenCLPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
platformPropValue
,
const
string
&
deviceIndexProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
pmeStreamProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
)
{
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
int
platformIndex
=
-
1
;
if
(
platformPropValue
.
length
()
>
0
)
stringstream
(
platformPropValue
)
>>
platformIndex
;
...
...
platforms/opencl/tests/TestOpenCLFFT.cpp
View file @
1085085d
...
...
@@ -54,7 +54,7 @@ template <class Real2>
void
testTransform
(
bool
realToComplex
,
int
xsize
,
int
ysize
,
int
zsize
)
{
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/opencl/tests/TestOpenCLRandom.cpp
View file @
1085085d
...
...
@@ -54,7 +54,7 @@ void testGaussian() {
System
system
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
system
.
addParticle
(
1.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/opencl/tests/TestOpenCLSort.cpp
View file @
1085085d
...
...
@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenCLArray
data
(
context
,
array
.
size
(),
sizeof
(
float
),
"sortData"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment