Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8d293880
Commit
8d293880
authored
Feb 04, 2016
by
Peter Eastman
Browse files
Parallelized the code for downloading positions
parent
73a59c33
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
201 additions
and
58 deletions
+201
-58
openmmapi/include/openmm/internal/timer.h
openmmapi/include/openmm/internal/timer.h
+59
-0
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+2
-1
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+3
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+54
-21
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+9
-3
platforms/cuda/tests/TestCudaFFT3D.cpp
platforms/cuda/tests/TestCudaFFT3D.cpp
+1
-1
platforms/cuda/tests/TestCudaRandom.cpp
platforms/cuda/tests/TestCudaRandom.cpp
+1
-1
platforms/cuda/tests/TestCudaSort.cpp
platforms/cuda/tests/TestCudaSort.cpp
+1
-1
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+2
-1
platforms/opencl/include/OpenCLPlatform.h
platforms/opencl/include/OpenCLPlatform.h
+3
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+54
-21
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+9
-3
platforms/opencl/tests/TestOpenCLFFT.cpp
platforms/opencl/tests/TestOpenCLFFT.cpp
+1
-1
platforms/opencl/tests/TestOpenCLRandom.cpp
platforms/opencl/tests/TestOpenCLRandom.cpp
+1
-1
platforms/opencl/tests/TestOpenCLSort.cpp
platforms/opencl/tests/TestOpenCLSort.cpp
+1
-1
No files found.
openmmapi/include/openmm/internal/timer.h
0 → 100644
View file @
8d293880
#ifndef OPENMM_TIMER_H_
#define OPENMM_TIMER_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This header provides a static function for querying the current system time in seconds.
* It is useful when profiling.
*/
#ifdef _MSC_VER
#include <Windows.h>
static
double
getCurrentTime
()
{
FILETIME
ft
;
GetSystemTimeAsFileTime
(
&
ft
);
// 100-nanoseconds since 1-1-1601
ULARGE_INTEGER
result
;
result
.
LowPart
=
ft
.
dwLowDateTime
;
result
.
HighPart
=
ft
.
dwHighDateTime
;
return
1e-7
*
result
.
QuadPart
;
}
#else
#include <sys/time.h>
static
double
getCurrentTime
()
{
struct
timeval
tod
;
gettimeofday
(
&
tod
,
0
);
return
tod
.
tv_sec
+
1e-6
*
tod
.
tv_usec
;
}
#endif
#endif
/*OPENMM_TIMER_H_*/
platforms/cuda/include/CudaKernels.h
View file @
8d293880
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -192,6 +192,7 @@ public:
*/
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
private:
class
GetPositionsTask
;
CudaContext
&
cu
;
};
...
...
platforms/cuda/include/CudaPlatform.h
View file @
8d293880
...
...
@@ -29,6 +29,7 @@
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
#include "windowsExportCuda.h"
namespace
OpenMM
{
...
...
@@ -122,7 +123,7 @@ class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public:
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
,
const
std
::
string
&
hostCompilerProperty
,
const
std
::
string
&
pmeStreamProperty
);
const
std
::
string
&
pmeStreamProperty
,
int
numThreads
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
...
...
@@ -134,6 +135,7 @@ public:
int
stepCount
,
computeForceCount
;
double
time
;
std
::
map
<
std
::
string
,
std
::
string
>
propertyValues
;
ThreadPool
threads
;
};
}
// namespace OpenMM
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
8d293880
...
...
@@ -141,43 +141,76 @@ void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts
[
i
]
->
setTime
(
time
);
}
class
CudaUpdateStateDataKernel
::
GetPositionsTask
:
public
ThreadPool
::
Task
{
public:
GetPositionsTask
(
CudaContext
&
cu
,
vector
<
Vec3
>&
positions
,
vector
<
float4
>&
posCorrection
)
:
cu
(
cu
),
positions
(
positions
),
posCorrection
(
posCorrection
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
int
numParticles
=
cu
.
getNumAtoms
();
Vec3
boxVectors
[
3
];
cu
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
if
(
cu
.
getUseDoublePrecision
())
{
double4
*
posq
=
(
double4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
double4
pos
=
posq
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
if
(
cu
.
getUseMixedPrecision
())
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
float4
pos1
=
posq
[
i
];
float4
pos2
=
posCorrection
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
((
double
)
pos1
.
x
+
(
double
)
pos2
.
x
,
(
double
)
pos1
.
y
+
(
double
)
pos2
.
y
,
(
double
)
pos1
.
z
+
(
double
)
pos2
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
float4
pos
=
posq
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
}
CudaContext
&
cu
;
vector
<
Vec3
>&
positions
;
vector
<
float4
>&
posCorrection
;
};
void
CudaUpdateStateDataKernel
::
getPositions
(
ContextImpl
&
context
,
vector
<
Vec3
>&
positions
)
{
cu
.
setAsCurrent
();
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
positions
.
resize
(
numParticles
);
Vec3
boxVectors
[
3
];
cu
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
vector
<
float4
>
posCorrection
;
if
(
cu
.
getUseDoublePrecision
())
{
double4
*
posq
=
(
double4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
double4
pos
=
posq
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
if
(
cu
.
getUseMixedPrecision
())
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
vector
<
float4
>
posCorrection
;
cu
.
getPosq
().
download
(
posq
);
cu
.
getPosq
().
download
(
posq
,
false
)
;
posCorrection
.
resize
(
numParticles
);
cu
.
getPosqCorrection
().
download
(
posCorrection
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
float4
pos1
=
posq
[
i
];
float4
pos2
=
posCorrection
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
((
double
)
pos1
.
x
+
(
double
)
pos2
.
x
,
(
double
)
pos1
.
y
+
(
double
)
pos2
.
y
,
(
double
)
pos1
.
z
+
(
double
)
pos2
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
{
float4
*
posq
=
(
float4
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
float4
pos
=
posq
[
i
];
int4
offset
=
cu
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask
task
(
cu
,
positions
,
posCorrection
);
cu
.
getPlatformData
().
threads
.
execute
(
task
);
cu
.
getPlatformData
().
threads
.
waitForThreads
();
}
void
CudaUpdateStateDataKernel
::
setPositions
(
ContextImpl
&
context
,
const
vector
<
Vec3
>&
positions
)
{
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
8d293880
...
...
@@ -29,9 +29,10 @@
#include "CudaPlatform.h"
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <cctype>
#include <sstream>
...
...
@@ -175,7 +176,11 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
cpuPmePropValue
=
"false"
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
));
int
threads
=
getNumProcessors
();
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
threads
));
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -184,7 +189,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
)
{
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
size_t
searchPos
=
0
,
nextPos
;
...
...
platforms/cuda/tests/TestCudaFFT3D.cpp
View file @
8d293880
...
...
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()));
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
())
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/cuda/tests/TestCudaRandom.cpp
View file @
8d293880
...
...
@@ -56,7 +56,7 @@ void testGaussian() {
system
.
addParticle
(
1.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()));
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
())
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/cuda/tests/TestCudaSort.cpp
View file @
8d293880
...
...
@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()));
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
())
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
CudaArray
data
(
context
,
array
.
size
(),
4
,
"sortData"
);
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
8d293880
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -170,6 +170,7 @@ public:
*/
void
loadCheckpoint
(
ContextImpl
&
context
,
std
::
istream
&
stream
);
private:
class
GetPositionsTask
;
OpenCLContext
&
cl
;
};
...
...
platforms/opencl/include/OpenCLPlatform.h
View file @
8d293880
...
...
@@ -29,6 +29,7 @@
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
#include "windowsExportOpenCL.h"
namespace
OpenMM
{
...
...
@@ -107,7 +108,7 @@ public:
class
OPENMM_EXPORT_OPENCL
OpenCLPlatform
::
PlatformData
{
public:
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
pmeStreamProperty
);
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
pmeStreamProperty
,
int
numThreads
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
...
...
@@ -119,6 +120,7 @@ public:
int
stepCount
,
computeForceCount
;
double
time
;
std
::
map
<
std
::
string
,
std
::
string
>
propertyValues
;
ThreadPool
threads
;
};
}
// namespace OpenMM
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
8d293880
...
...
@@ -165,42 +165,75 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts
[
i
]
->
setTime
(
time
);
}
class
OpenCLUpdateStateDataKernel
::
GetPositionsTask
:
public
ThreadPool
::
Task
{
public:
GetPositionsTask
(
OpenCLContext
&
cl
,
vector
<
Vec3
>&
positions
,
vector
<
mm_float4
>&
posCorrection
)
:
cl
(
cl
),
positions
(
positions
),
posCorrection
(
posCorrection
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const
vector
<
int
>&
order
=
cl
.
getAtomIndex
();
int
numParticles
=
cl
.
getNumAtoms
();
Vec3
boxVectors
[
3
];
cl
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
int
numThreads
=
threads
.
getNumThreads
();
int
start
=
threadIndex
*
numParticles
/
numThreads
;
int
end
=
(
threadIndex
+
1
)
*
numParticles
/
numThreads
;
if
(
cl
.
getUseDoublePrecision
())
{
mm_double4
*
posq
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
mm_double4
pos
=
posq
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
if
(
cl
.
getUseMixedPrecision
())
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
mm_float4
pos1
=
posq
[
i
];
mm_float4
pos2
=
posCorrection
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
((
double
)
pos1
.
x
+
(
double
)
pos2
.
x
,
(
double
)
pos1
.
y
+
(
double
)
pos2
.
y
,
(
double
)
pos1
.
z
+
(
double
)
pos2
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
mm_float4
pos
=
posq
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
}
OpenCLContext
&
cl
;
vector
<
Vec3
>&
positions
;
vector
<
mm_float4
>&
posCorrection
;
};
void
OpenCLUpdateStateDataKernel
::
getPositions
(
ContextImpl
&
context
,
vector
<
Vec3
>&
positions
)
{
const
vector
<
cl_int
>&
order
=
cl
.
getAtomIndex
();
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
positions
.
resize
(
numParticles
);
Vec3
boxVectors
[
3
];
cl
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
vector
<
mm_float4
>
posCorrection
;
if
(
cl
.
getUseDoublePrecision
())
{
mm_double4
*
posq
=
(
mm_double4
*
)
cl
.
getPinnedBuffer
();
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
mm_double4
pos
=
posq
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
if
(
cl
.
getUseMixedPrecision
())
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
vector
<
mm_float4
>
posCorrection
;
cl
.
getPosq
().
download
(
posq
);
cl
.
getPosq
().
download
(
posq
,
false
)
;
posCorrection
.
resize
(
numParticles
);
cl
.
getPosqCorrection
().
download
(
posCorrection
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
mm_float4
pos1
=
posq
[
i
];
mm_float4
pos2
=
posCorrection
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
((
double
)
pos1
.
x
+
(
double
)
pos2
.
x
,
(
double
)
pos1
.
y
+
(
double
)
pos2
.
y
,
(
double
)
pos1
.
z
+
(
double
)
pos2
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
else
{
mm_float4
*
posq
=
(
mm_float4
*
)
cl
.
getPinnedBuffer
();
cl
.
getPosq
().
download
(
posq
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
mm_float4
pos
=
posq
[
i
];
mm_int4
offset
=
cl
.
getPosCellOffsets
()[
i
];
positions
[
order
[
i
]]
=
Vec3
(
pos
.
x
,
pos
.
y
,
pos
.
z
)
-
boxVectors
[
0
]
*
offset
.
x
-
boxVectors
[
1
]
*
offset
.
y
-
boxVectors
[
2
]
*
offset
.
z
;
}
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask
task
(
cl
,
positions
,
posCorrection
);
cl
.
getPlatformData
().
threads
.
execute
(
task
);
cl
.
getPlatformData
().
threads
.
waitForThreads
();
}
void
OpenCLUpdateStateDataKernel
::
setPositions
(
ContextImpl
&
context
,
const
vector
<
Vec3
>&
positions
)
{
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
8d293880
...
...
@@ -28,9 +28,10 @@
#include "OpenCLPlatform.h"
#include "OpenCLKernelFactory.h"
#include "OpenCLKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <cctype>
#include <sstream>
...
...
@@ -165,7 +166,11 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
cpuPmePropValue
=
"false"
;
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
,
cpuPmePropValue
,
pmeStreamPropValue
));
int
threads
=
getNumProcessors
();
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
,
cpuPmePropValue
,
pmeStreamPropValue
,
threads
));
}
void
OpenCLPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -174,7 +179,8 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
}
OpenCLPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
platformPropValue
,
const
string
&
deviceIndexProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
pmeStreamProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
)
{
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
int
platformIndex
=
-
1
;
if
(
platformPropValue
.
length
()
>
0
)
stringstream
(
platformPropValue
)
>>
platformIndex
;
...
...
platforms/opencl/tests/TestOpenCLFFT.cpp
View file @
8d293880
...
...
@@ -54,7 +54,7 @@ template <class Real2>
void
testTransform
(
bool
realToComplex
,
int
xsize
,
int
ysize
,
int
zsize
)
{
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/opencl/tests/TestOpenCLRandom.cpp
View file @
8d293880
...
...
@@ -54,7 +54,7 @@ void testGaussian() {
System
system
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
system
.
addParticle
(
1.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/opencl/tests/TestOpenCLSort.cpp
View file @
8d293880
...
...
@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenCLArray
data
(
context
,
array
.
size
(),
sizeof
(
float
),
"sortData"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment