Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8d6a2a01
Commit
8d6a2a01
authored
Oct 16, 2012
by
Peter Eastman
Browse files
Beginnings of mixed/double precision support in OpenCL
parent
a3d5f834
Changes
26
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1299 additions
and
616 deletions
+1299
-616
platforms/opencl/include/OpenCLPlatform.h
platforms/opencl/include/OpenCLPlatform.h
+8
-1
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+199
-73
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+57
-5
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+133
-58
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+474
-184
platforms/opencl/src/OpenCLKernels.h
platforms/opencl/src/OpenCLKernels.h
+4
-1
platforms/opencl/src/OpenCLParameterSet.cpp
platforms/opencl/src/OpenCLParameterSet.cpp
+55
-37
platforms/opencl/src/OpenCLParameterSet.h
platforms/opencl/src/OpenCLParameterSet.h
+8
-6
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+10
-4
platforms/opencl/src/kernels/andersenThermostat.cl
platforms/opencl/src/kernels/andersenThermostat.cl
+7
-5
platforms/opencl/src/kernels/brownian.cl
platforms/opencl/src/kernels/brownian.cl
+30
-10
platforms/opencl/src/kernels/ccma.cl
platforms/opencl/src/kernels/ccma.cl
+35
-26
platforms/opencl/src/kernels/constraints.cl
platforms/opencl/src/kernels/constraints.cl
+15
-4
platforms/opencl/src/kernels/customIntegrator.cl
platforms/opencl/src/kernels/customIntegrator.cl
+36
-7
platforms/opencl/src/kernels/customIntegratorGlobal.cl
platforms/opencl/src/kernels/customIntegratorGlobal.cl
+2
-2
platforms/opencl/src/kernels/customIntegratorPerDof.cl
platforms/opencl/src/kernels/customIntegratorPerDof.cl
+33
-23
platforms/opencl/src/kernels/ewald.cl
platforms/opencl/src/kernels/ewald.cl
+0
-1
platforms/opencl/src/kernels/langevin.cl
platforms/opencl/src/kernels/langevin.cl
+38
-29
platforms/opencl/src/kernels/removeCM.cl
platforms/opencl/src/kernels/removeCM.cl
+11
-6
platforms/opencl/src/kernels/settle.cl
platforms/opencl/src/kernels/settle.cl
+144
-134
No files found.
platforms/opencl/include/OpenCLPlatform.h
View file @
8d6a2a01
...
@@ -68,11 +68,18 @@ public:
...
@@ -68,11 +68,18 @@ public:
static
const
std
::
string
key
=
"OpenCLPlatformIndex"
;
static
const
std
::
string
key
=
"OpenCLPlatformIndex"
;
return
key
;
return
key
;
}
}
/**
* This is the name of the parameter for selecting what numerical precision to use.
*/
static
const
std
::
string
&
OpenCLPrecision
()
{
static
const
std
::
string
key
=
"OpenCLPrecision"
;
return
key
;
}
};
};
class
OPENMM_EXPORT
OpenCLPlatform
::
PlatformData
{
class
OPENMM_EXPORT
OpenCLPlatform
::
PlatformData
{
public:
public:
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
);
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
precisionProperty
);
~
PlatformData
();
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
void
syncContexts
();
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
8d6a2a01
...
@@ -65,10 +65,24 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
...
@@ -65,10 +65,24 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
std
::
cerr
<<
"OpenCL internal error: "
<<
errinfo
<<
std
::
endl
;
std
::
cerr
<<
"OpenCL internal error: "
<<
errinfo
<<
std
::
endl
;
}
}
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
atomsWereReordered
(
false
),
posq
(
NULL
),
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
atomsWereReordered
(
false
),
posq
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useMixedPrecision
=
false
;
}
else
if
(
precision
==
"mixed"
)
{
useDoublePrecision
=
false
;
useMixedPrecision
=
true
;
}
else
if
(
precision
==
"double"
)
{
useDoublePrecision
=
true
;
useMixedPrecision
=
false
;
}
else
throw
OpenMMException
(
"Illegal value for OpenCLPrecision: "
+
precision
);
try
{
try
{
contextIndex
=
platformData
.
contexts
.
size
();
contextIndex
=
platformData
.
contexts
.
size
();
std
::
vector
<
cl
::
Platform
>
platforms
;
std
::
vector
<
cl
::
Platform
>
platforms
;
...
@@ -217,8 +231,27 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -217,8 +231,27 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
device
.
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
device
.
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
bonded
=
new
OpenCLBondedUtilities
(
*
this
);
bonded
=
new
OpenCLBondedUtilities
(
*
this
);
nonbonded
=
new
OpenCLNonbondedUtilities
(
*
this
);
nonbonded
=
new
OpenCLNonbondedUtilities
(
*
this
);
if
(
useDoublePrecision
)
{
posq
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
compilationDefines
[
"convert_real4"
]
=
"convert_double4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_double4"
;
}
else
if
(
useMixedPrecision
)
{
posq
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posqCorrection
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_MIXED_PRECISION"
]
=
"1"
;
compilationDefines
[
"convert_real4"
]
=
"convert_float4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_double4"
;
}
else
{
posq
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posq
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
velm
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"convert_real4"
]
=
"convert_float4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_float4"
;
}
posCellOffsets
.
resize
(
paddedNumAtoms
,
mm_int4
(
0
,
0
,
0
,
0
));
posCellOffsets
.
resize
(
paddedNumAtoms
,
mm_int4
(
0
,
0
,
0
,
0
));
}
}
catch
(
cl
::
Error
err
)
{
catch
(
cl
::
Error
err
)
{
...
@@ -241,6 +274,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -241,6 +274,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
if
(
!
useDoublePrecision
)
{
cl
::
Kernel
accuracyKernel
(
utilities
,
"determineNativeAccuracy"
);
cl
::
Kernel
accuracyKernel
(
utilities
,
"determineNativeAccuracy"
);
OpenCLArray
valuesArray
(
*
this
,
20
,
sizeof
(
mm_float8
),
"values"
);
OpenCLArray
valuesArray
(
*
this
,
20
,
sizeof
(
mm_float8
),
"values"
);
vector
<
mm_float8
>
values
(
valuesArray
.
getSize
());
vector
<
mm_float8
>
values
(
valuesArray
.
getSize
());
...
@@ -269,6 +303,14 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -269,6 +303,14 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines
[
"RECIP"
]
=
(
maxRecipError
<
1e-6
)
?
"native_recip"
:
"1.0f/"
;
compilationDefines
[
"RECIP"
]
=
(
maxRecipError
<
1e-6
)
?
"native_recip"
:
"1.0f/"
;
compilationDefines
[
"EXP"
]
=
(
maxExpError
<
1e-6
)
?
"native_exp"
:
"exp"
;
compilationDefines
[
"EXP"
]
=
(
maxExpError
<
1e-6
)
?
"native_exp"
:
"exp"
;
compilationDefines
[
"LOG"
]
=
(
maxLogError
<
1e-6
)
?
"native_log"
:
"log"
;
compilationDefines
[
"LOG"
]
=
(
maxLogError
<
1e-6
)
?
"native_log"
:
"log"
;
}
else
{
compilationDefines
[
"SQRT"
]
=
"sqrt"
;
compilationDefines
[
"RSQRT"
]
=
"rsqrt"
;
compilationDefines
[
"RECIP"
]
=
"1.0/"
;
compilationDefines
[
"EXP"
]
=
"exp"
;
compilationDefines
[
"LOG"
]
=
"log"
;
}
// Create the work thread used for parallelization when running on multiple devices.
// Create the work thread used for parallelization when running on multiple devices.
...
@@ -311,18 +353,21 @@ OpenCLContext::~OpenCLContext() {
...
@@ -311,18 +353,21 @@ OpenCLContext::~OpenCLContext() {
}
}
void
OpenCLContext
::
initialize
()
{
void
OpenCLContext
::
initialize
()
{
vector
<
mm_float4
>
v
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
double
mass
=
system
.
getParticleMass
(
i
);
v
[
i
].
w
=
(
float
)
(
mass
==
0.0
?
0.0
:
1.0
/
mass
);
}
velm
->
upload
(
v
);
bonded
->
initialize
(
system
);
bonded
->
initialize
(
system
);
numForceBuffers
=
platformData
.
contexts
.
size
();
numForceBuffers
=
platformData
.
contexts
.
size
();
numForceBuffers
=
std
::
max
(
numForceBuffers
,
bonded
->
getNumForceBuffers
());
numForceBuffers
=
std
::
max
(
numForceBuffers
,
bonded
->
getNumForceBuffers
());
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
();
i
++
)
numForceBuffers
=
std
::
max
(
numForceBuffers
,
forces
[
i
]
->
getRequiredForceBuffers
());
numForceBuffers
=
std
::
max
(
numForceBuffers
,
forces
[
i
]
->
getRequiredForceBuffers
());
if
(
useDoublePrecision
)
{
forceBuffers
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
()),
"energyBuffer"
);
}
else
{
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_float
>
(
*
this
,
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
()),
"energyBuffer"
);
}
if
(
supports64BitGlobalAtomics
)
{
if
(
supports64BitGlobalAtomics
)
{
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
longForceBuffer
->
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
longForceBuffer
->
getDeviceBuffer
());
...
@@ -332,12 +377,18 @@ void OpenCLContext::initialize() {
...
@@ -332,12 +377,18 @@ void OpenCLContext::initialize() {
addAutoclearBuffer
(
longForceBuffer
->
getDeviceBuffer
(),
longForceBuffer
->
getSize
()
*
2
);
addAutoclearBuffer
(
longForceBuffer
->
getDeviceBuffer
(),
longForceBuffer
->
getSize
()
*
2
);
}
}
addAutoclearBuffer
(
forceBuffers
->
getDeviceBuffer
(),
forceBuffers
->
getSize
()
*
4
);
addAutoclearBuffer
(
forceBuffers
->
getDeviceBuffer
(),
forceBuffers
->
getSize
()
*
4
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_float
>
(
*
this
,
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
()),
"energyBuffer"
);
addAutoclearBuffer
(
energyBuffer
->
getDeviceBuffer
(),
energyBuffer
->
getSize
());
addAutoclearBuffer
(
energyBuffer
->
getDeviceBuffer
(),
energyBuffer
->
getSize
());
int
bufferBytes
=
max
(
posq
->
getSize
()
*
sizeof
(
mm_float4
),
energyBuffer
->
getSize
()
*
sizeof
(
cl_float
));
int
bufferBytes
=
max
(
posq
->
getSize
()
*
posq
->
getElementSize
(
),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
(
));
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedMemory
=
queue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedMemory
=
queue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
double
mass
=
system
.
getParticleMass
(
i
);
if
(
useDoublePrecision
||
useMixedPrecision
)
((
mm_double4
*
)
pinnedMemory
)[
i
]
=
mm_double4
(
0.0
,
0.0
,
0.0
,
mass
==
0.0
?
0.0
:
1.0
/
mass
);
else
((
mm_float4
*
)
pinnedMemory
)[
i
]
=
mm_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
mass
==
0.0
?
0.0
f
:
(
cl_float
)
(
1.0
/
mass
));
}
velm
->
upload
(
pinnedMemory
);
atomIndexDevice
=
OpenCLArray
::
create
<
cl_int
>
(
*
this
,
paddedNumAtoms
,
"atomIndexDevice"
);
atomIndexDevice
=
OpenCLArray
::
create
<
cl_int
>
(
*
this
,
paddedNumAtoms
,
"atomIndexDevice"
);
atomIndex
.
resize
(
paddedNumAtoms
);
atomIndex
.
resize
(
paddedNumAtoms
);
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
...
@@ -382,6 +433,28 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
...
@@ -382,6 +433,28 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
}
}
if
(
!
compilationDefines
.
empty
())
if
(
!
compilationDefines
.
empty
())
src
<<
endl
;
src
<<
endl
;
if
(
supportsDoublePrecision
)
src
<<
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable
\n
"
;
if
(
useDoublePrecision
)
{
src
<<
"typedef double real;
\n
"
;
src
<<
"typedef double2 real2;
\n
"
;
src
<<
"typedef double4 real4;
\n
"
;
}
else
{
src
<<
"typedef float real;
\n
"
;
src
<<
"typedef float2 real2;
\n
"
;
src
<<
"typedef float4 real4;
\n
"
;
}
if
(
useDoublePrecision
||
useMixedPrecision
)
{
src
<<
"typedef double mixed;
\n
"
;
src
<<
"typedef double2 mixed2;
\n
"
;
src
<<
"typedef double4 mixed4;
\n
"
;
}
else
{
src
<<
"typedef float mixed;
\n
"
;
src
<<
"typedef float2 mixed2;
\n
"
;
src
<<
"typedef float4 mixed4;
\n
"
;
}
for
(
map
<
string
,
string
>::
const_iterator
iter
=
defines
.
begin
();
iter
!=
defines
.
end
();
++
iter
)
{
for
(
map
<
string
,
string
>::
const_iterator
iter
=
defines
.
begin
();
iter
!=
defines
.
end
();
++
iter
)
{
src
<<
"#define "
<<
iter
->
first
;
src
<<
"#define "
<<
iter
->
first
;
if
(
!
iter
->
second
.
empty
())
if
(
!
iter
->
second
.
empty
())
...
@@ -764,11 +837,47 @@ void OpenCLContext::validateMolecules() {
...
@@ -764,11 +837,47 @@ void OpenCLContext::validateMolecules() {
// atoms to their original order, rebuild the list of identical molecules, and sort them
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
// again.
vector
<
mm_int4
>
newCellOffsets
(
numAtoms
);
if
(
useDoublePrecision
)
{
vector
<
mm_double4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_double4
>
newPosq
(
paddedNumAtoms
);
vector
<
mm_double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_double4
>
newVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
}
else
if
(
useMixedPrecision
)
{
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
newPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
oldPosqCorrection
(
paddedNumAtoms
);
vector
<
mm_float4
>
newPosqCorrection
(
paddedNumAtoms
);
vector
<
mm_double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_double4
>
newVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newPosqCorrection
[
index
]
=
oldPosqCorrection
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
}
else
{
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
newPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
newPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_float4
>
newVelm
(
paddedNumAtoms
);
vector
<
mm_float4
>
newVelm
(
paddedNumAtoms
);
vector
<
mm_int4
>
newCellOffsets
(
numAtoms
);
posq
->
download
(
oldPosq
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
->
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
...
@@ -779,12 +888,11 @@ void OpenCLContext::validateMolecules() {
...
@@ -779,12 +888,11 @@ void OpenCLContext::validateMolecules() {
}
}
posq
->
upload
(
newPosq
);
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
velm
->
upload
(
newVelm
);
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
atomIndex
[
i
]
=
i
;
atomIndex
[
i
]
=
i
;
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
->
upload
(
atomIndex
);
findMoleculeGroups
();
findMoleculeGroups
();
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
...
@@ -797,16 +905,29 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -797,16 +905,29 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
if
(
moleculesInvalid
)
if
(
moleculesInvalid
)
validateMolecules
();
validateMolecules
();
atomsWereReordered
=
true
;
atomsWereReordered
=
true
;
if
(
useDoublePrecision
)
reorderAtomsImpl
<
cl_double
,
mm_double4
,
cl_double
,
mm_double4
>
(
enforcePeriodic
);
else
if
(
useMixedPrecision
)
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_double
,
mm_double4
>
(
enforcePeriodic
);
else
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_float
,
mm_float4
>
(
enforcePeriodic
);
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
void
OpenCLContext
::
reorderAtomsImpl
(
bool
enforcePeriodic
)
{
// Find the range of positions and the number of bins along each axis.
// Find the range of positions and the number of bins along each axis.
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
Real4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
Real4
>
oldPosqCorrection
(
paddedNumAtoms
);
vector
<
Mixed4
>
oldVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
->
download
(
oldVelm
);
float
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
if
(
useMixedPrecision
)
float
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
posqCorrection
->
download
(
oldPosqCorrection
);
float
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
if
(
nonbonded
->
getUsePeriodic
())
{
if
(
nonbonded
->
getUsePeriodic
())
{
minx
=
miny
=
minz
=
0.0
;
minx
=
miny
=
minz
=
0.0
;
maxx
=
periodicBoxSize
.
x
;
maxx
=
periodicBoxSize
.
x
;
...
@@ -815,7 +936,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -815,7 +936,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
}
}
else
{
else
{
for
(
int
i
=
1
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
1
;
i
<
numAtoms
;
i
++
)
{
const
mm_float
4
&
pos
=
oldPosq
[
i
];
const
Real
4
&
pos
=
oldPosq
[
i
];
minx
=
min
(
minx
,
pos
.
x
);
minx
=
min
(
minx
,
pos
.
x
);
maxx
=
max
(
maxx
,
pos
.
x
);
maxx
=
max
(
maxx
,
pos
.
x
);
miny
=
min
(
miny
,
pos
.
y
);
miny
=
min
(
miny
,
pos
.
y
);
...
@@ -828,8 +949,9 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -828,8 +949,9 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
// Loop over each group of identical molecules and reorder them.
// Loop over each group of identical molecules and reorder them.
vector
<
int
>
originalIndex
(
numAtoms
);
vector
<
int
>
originalIndex
(
numAtoms
);
vector
<
mm_float4
>
newPosq
(
paddedNumAtoms
);
vector
<
Real4
>
newPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
newVelm
(
paddedNumAtoms
);
vector
<
Real4
>
newPosqCorrection
(
paddedNumAtoms
);
vector
<
Mixed4
>
newVelm
(
paddedNumAtoms
);
vector
<
mm_int4
>
newCellOffsets
(
numAtoms
);
vector
<
mm_int4
>
newCellOffsets
(
numAtoms
);
for
(
int
group
=
0
;
group
<
(
int
)
moleculeGroups
.
size
();
group
++
)
{
for
(
int
group
=
0
;
group
<
(
int
)
moleculeGroups
.
size
();
group
++
)
{
// Find the center of each molecule.
// Find the center of each molecule.
...
@@ -837,15 +959,15 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -837,15 +959,15 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
MoleculeGroup
&
mol
=
moleculeGroups
[
group
];
MoleculeGroup
&
mol
=
moleculeGroups
[
group
];
int
numMolecules
=
mol
.
offsets
.
size
();
int
numMolecules
=
mol
.
offsets
.
size
();
vector
<
int
>&
atoms
=
mol
.
atoms
;
vector
<
int
>&
atoms
=
mol
.
atoms
;
vector
<
mm_float
4
>
molPos
(
numMolecules
);
vector
<
Real
4
>
molPos
(
numMolecules
);
float
invNumAtoms
=
1.0
f
/
atoms
.
size
();
Real
invNumAtoms
=
(
Real
)
(
1.0
/
atoms
.
size
()
)
;
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
molPos
[
i
].
x
=
0.0
f
;
molPos
[
i
].
x
=
0.0
f
;
molPos
[
i
].
y
=
0.0
f
;
molPos
[
i
].
y
=
0.0
f
;
molPos
[
i
].
z
=
0.0
f
;
molPos
[
i
].
z
=
0.0
f
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
const
mm_float
4
&
pos
=
oldPosq
[
atom
];
const
Real
4
&
pos
=
oldPosq
[
atom
];
molPos
[
i
].
x
+=
pos
.
x
;
molPos
[
i
].
x
+=
pos
.
x
;
molPos
[
i
].
y
+=
pos
.
y
;
molPos
[
i
].
y
+=
pos
.
y
;
molPos
[
i
].
z
+=
pos
.
z
;
molPos
[
i
].
z
+=
pos
.
z
;
...
@@ -861,9 +983,9 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -861,9 +983,9 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
int
xcell
=
(
int
)
floor
(
molPos
[
i
].
x
*
invPeriodicBoxSize
.
x
);
int
xcell
=
(
int
)
floor
(
molPos
[
i
].
x
*
invPeriodicBoxSize
.
x
);
int
ycell
=
(
int
)
floor
(
molPos
[
i
].
y
*
invPeriodicBoxSize
.
y
);
int
ycell
=
(
int
)
floor
(
molPos
[
i
].
y
*
invPeriodicBoxSize
.
y
);
int
zcell
=
(
int
)
floor
(
molPos
[
i
].
z
*
invPeriodicBoxSize
.
z
);
int
zcell
=
(
int
)
floor
(
molPos
[
i
].
z
*
invPeriodicBoxSize
.
z
);
float
dx
=
xcell
*
periodicBoxSize
.
x
;
Real
dx
=
xcell
*
periodicBoxSize
.
x
;
float
dy
=
ycell
*
periodicBoxSize
.
y
;
Real
dy
=
ycell
*
periodicBoxSize
.
y
;
float
dz
=
zcell
*
periodicBoxSize
.
z
;
Real
dz
=
zcell
*
periodicBoxSize
.
z
;
if
(
dx
!=
0.0
f
||
dy
!=
0.0
f
||
dz
!=
0.0
f
)
{
if
(
dx
!=
0.0
f
||
dy
!=
0.0
f
||
dz
!=
0.0
f
)
{
molPos
[
i
].
x
-=
dx
;
molPos
[
i
].
x
-=
dx
;
molPos
[
i
].
y
-=
dy
;
molPos
[
i
].
y
-=
dy
;
...
@@ -871,7 +993,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -871,7 +993,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
if
(
enforcePeriodic
)
{
if
(
enforcePeriodic
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
mm_float
4
p
=
oldPosq
[
atom
];
Real
4
p
=
oldPosq
[
atom
];
p
.
x
-=
dx
;
p
.
x
-=
dx
;
p
.
y
-=
dy
;
p
.
y
-=
dy
;
p
.
z
-=
dz
;
p
.
z
-=
dz
;
...
@@ -888,12 +1010,12 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -888,12 +1010,12 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
// Select a bin for each molecule, then sort them by bin.
// Select a bin for each molecule, then sort them by bin.
bool
useHilbert
=
(
numMolecules
>
5000
||
atoms
.
size
()
>
8
);
// For small systems, a simple zigzag curve works better than a Hilbert curve.
bool
useHilbert
=
(
numMolecules
>
5000
||
atoms
.
size
()
>
8
);
// For small systems, a simple zigzag curve works better than a Hilbert curve.
float
binWidth
;
Real
binWidth
;
if
(
useHilbert
)
if
(
useHilbert
)
binWidth
=
(
float
)
(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
binWidth
=
(
Real
)
(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
else
else
binWidth
=
(
float
)
(
0.2
*
nonbonded
->
getCutoffDistance
());
binWidth
=
(
Real
)
(
0.2
*
nonbonded
->
getCutoffDistance
());
float
invBinWidth
=
1.0
f
/
binWidth
;
Real
invBinWidth
=
(
Real
)
(
1.0
/
binWidth
)
;
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
*
invBinWidth
);
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
*
invBinWidth
);
int
ybins
=
1
+
(
int
)
((
maxy
-
miny
)
*
invBinWidth
);
int
ybins
=
1
+
(
int
)
((
maxy
-
miny
)
*
invBinWidth
);
vector
<
pair
<
int
,
int
>
>
molBins
(
numMolecules
);
vector
<
pair
<
int
,
int
>
>
molBins
(
numMolecules
);
...
@@ -928,6 +1050,8 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -928,6 +1050,8 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
int
newIndex
=
mol
.
offsets
[
i
]
+
atoms
[
j
];
int
newIndex
=
mol
.
offsets
[
i
]
+
atoms
[
j
];
originalIndex
[
newIndex
]
=
atomIndex
[
oldIndex
];
originalIndex
[
newIndex
]
=
atomIndex
[
oldIndex
];
newPosq
[
newIndex
]
=
oldPosq
[
oldIndex
];
newPosq
[
newIndex
]
=
oldPosq
[
oldIndex
];
if
(
useMixedPrecision
)
newPosqCorrection
[
newIndex
]
=
oldPosqCorrection
[
oldIndex
];
newVelm
[
newIndex
]
=
oldVelm
[
oldIndex
];
newVelm
[
newIndex
]
=
oldVelm
[
oldIndex
];
newCellOffsets
[
newIndex
]
=
posCellOffsets
[
oldIndex
];
newCellOffsets
[
newIndex
]
=
posCellOffsets
[
oldIndex
];
}
}
...
@@ -941,6 +1065,8 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -941,6 +1065,8 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
posq
->
upload
(
newPosq
);
if
(
useMixedPrecision
)
posqCorrection
->
upload
(
newPosqCorrection
);
velm
->
upload
(
newVelm
);
velm
->
upload
(
newVelm
);
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
->
upload
(
atomIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
...
...
platforms/opencl/src/OpenCLContext.h
View file @
8d6a2a01
...
@@ -62,7 +62,7 @@ struct mm_float2 {
...
@@ -62,7 +62,7 @@ struct mm_float2 {
mm_float2
(
cl_float
x
,
cl_float
y
)
:
x
(
x
),
y
(
y
)
{
mm_float2
(
cl_float
x
,
cl_float
y
)
:
x
(
x
),
y
(
y
)
{
}
}
};
};
struct
mm_float4
{
struct
mm_float4
{
cl_float
x
,
y
,
z
,
w
;
cl_float
x
,
y
,
z
,
w
;
mm_float4
()
{
mm_float4
()
{
}
}
...
@@ -87,6 +87,20 @@ struct mm_float16 {
...
@@ -87,6 +87,20 @@ struct mm_float16 {
s8
(
s8
),
s9
(
s9
),
s10
(
s10
),
s11
(
s11
),
s12
(
s12
),
s13
(
s13
),
s14
(
s14
),
s15
(
15
)
{
s8
(
s8
),
s9
(
s9
),
s10
(
s10
),
s11
(
s11
),
s12
(
s12
),
s13
(
s13
),
s14
(
s14
),
s15
(
15
)
{
}
}
};
};
struct
mm_double2
{
cl_double
x
,
y
;
mm_double2
()
{
}
mm_double2
(
cl_double
x
,
cl_double
y
)
:
x
(
x
),
y
(
y
)
{
}
};
struct
mm_double4
{
cl_double
x
,
y
,
z
,
w
;
mm_double4
()
{
}
mm_double4
(
cl_double
x
,
cl_double
y
,
cl_double
z
,
cl_double
w
)
:
x
(
x
),
y
(
y
),
z
(
z
),
w
(
w
)
{
}
};
struct
mm_ushort2
{
struct
mm_ushort2
{
cl_ushort
x
,
y
;
cl_ushort
x
,
y
;
mm_ushort2
()
{
mm_ushort2
()
{
...
@@ -145,7 +159,7 @@ public:
...
@@ -145,7 +159,7 @@ public:
class
ReorderListener
;
class
ReorderListener
;
static
const
int
ThreadBlockSize
;
static
const
int
ThreadBlockSize
;
static
const
int
TileSize
;
static
const
int
TileSize
;
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
OpenCLPlatform
::
PlatformData
&
platformData
);
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
std
::
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
);
~
OpenCLContext
();
~
OpenCLContext
();
/**
/**
* This is called to initialize internal data structures after all Forces in the system
* This is called to initialize internal data structures after all Forces in the system
...
@@ -198,6 +212,12 @@ public:
...
@@ -198,6 +212,12 @@ public:
OpenCLArray
&
getPosq
()
{
OpenCLArray
&
getPosq
()
{
return
*
posq
;
return
*
posq
;
}
}
/**
* Get the array which contains a correction to the position of each atom. This only exists if getUseMixedPrecision() returns true.
*/
OpenCLArray
&
getPosqCorrection
()
{
return
*
posqCorrection
;
}
/**
/**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/
*/
...
@@ -405,18 +425,38 @@ public:
...
@@ -405,18 +425,38 @@ public:
bool
getSupportsDoublePrecision
()
{
bool
getSupportsDoublePrecision
()
{
return
supportsDoublePrecision
;
return
supportsDoublePrecision
;
}
}
/**
* Get whether double precision is being used.
*/
bool
getUseDoublePrecision
()
{
return
useDoublePrecision
;
}
/**
* Get whether mixed precision is being used.
*/
bool
getUseMixedPrecision
()
{
return
useMixedPrecision
;
}
/**
/**
* Get the size of the periodic box.
* Get the size of the periodic box.
*/
*/
mm_float4
getPeriodicBoxSize
()
const
{
mm_float4
getPeriodicBoxSize
()
const
{
return
periodicBoxSize
;
return
periodicBoxSize
;
}
}
/**
* Get the size of the periodic box.
*/
mm_double4
getPeriodicBoxSizeDouble
()
const
{
return
periodicBoxSizeDouble
;
}
/**
/**
* Set the size of the periodic box.
* Set the size of the periodic box.
*/
*/
void
setPeriodicBoxSize
(
double
xsize
,
double
ysize
,
double
zsize
)
{
void
setPeriodicBoxSize
(
double
xsize
,
double
ysize
,
double
zsize
)
{
periodicBoxSize
=
mm_float4
((
float
)
xsize
,
(
float
)
ysize
,
(
float
)
zsize
,
0
);
periodicBoxSize
=
mm_float4
((
float
)
xsize
,
(
float
)
ysize
,
(
float
)
zsize
,
0
);
invPeriodicBoxSize
=
mm_float4
((
float
)
(
1.0
/
xsize
),
(
float
)
(
1.0
/
ysize
),
(
float
)
(
1.0
/
zsize
),
0
);
invPeriodicBoxSize
=
mm_float4
((
float
)
(
1.0
/
xsize
),
(
float
)
(
1.0
/
ysize
),
(
float
)
(
1.0
/
zsize
),
0
);
periodicBoxSizeDouble
=
mm_double4
(
xsize
,
ysize
,
zsize
,
0
);
invPeriodicBoxSizeDouble
=
mm_double4
(
1.0
/
xsize
,
1.0
/
ysize
,
1.0
/
zsize
,
0
);
}
}
/**
/**
* Get the inverse of the size of the periodic box.
* Get the inverse of the size of the periodic box.
...
@@ -424,6 +464,12 @@ public:
...
@@ -424,6 +464,12 @@ public:
mm_float4
getInvPeriodicBoxSize
()
const
{
mm_float4
getInvPeriodicBoxSize
()
const
{
return
invPeriodicBoxSize
;
return
invPeriodicBoxSize
;
}
}
/**
* Get the inverse of the size of the periodic box.
*/
mm_double4
getInvPeriodicBoxSizeDouble
()
const
{
return
invPeriodicBoxSizeDouble
;
}
/**
/**
* Get the OpenCLIntegrationUtilities for this context.
* Get the OpenCLIntegrationUtilities for this context.
*/
*/
...
@@ -502,6 +548,11 @@ private:
...
@@ -502,6 +548,11 @@ private:
* of molecules and resort the atoms.
* of molecules and resort the atoms.
*/
*/
void
validateMolecules
();
void
validateMolecules
();
/**
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
void
reorderAtomsImpl
(
bool
enforcePeriodic
);
const
System
&
system
;
const
System
&
system
;
double
time
;
double
time
;
OpenCLPlatform
::
PlatformData
&
platformData
;
OpenCLPlatform
::
PlatformData
&
platformData
;
...
@@ -515,9 +566,9 @@ private:
...
@@ -515,9 +566,9 @@ private:
int
numThreadBlocks
;
int
numThreadBlocks
;
int
numForceBuffers
;
int
numForceBuffers
;
int
simdWidth
;
int
simdWidth
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
atomsWereReordered
,
moleculesInvalid
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
moleculesInvalid
;
mm_float4
periodicBoxSize
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
;
mm_
float4
invPeriodicBoxSize
;
mm_
double4
periodicBoxSizeDouble
,
invPeriodicBoxSize
Double
;
std
::
string
defaultOptimizationOptions
;
std
::
string
defaultOptimizationOptions
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
cl
::
Context
context
;
cl
::
Context
context
;
...
@@ -538,6 +589,7 @@ private:
...
@@ -538,6 +589,7 @@ private:
cl
::
Buffer
*
pinnedBuffer
;
cl
::
Buffer
*
pinnedBuffer
;
void
*
pinnedMemory
;
void
*
pinnedMemory
;
OpenCLArray
*
posq
;
OpenCLArray
*
posq
;
OpenCLArray
*
posqCorrection
;
OpenCLArray
*
velm
;
OpenCLArray
*
velm
;
OpenCLArray
*
force
;
OpenCLArray
*
force
;
OpenCLArray
*
forceBuffers
;
OpenCLArray
*
forceBuffers
;
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
8d6a2a01
...
@@ -87,6 +87,13 @@ struct OpenCLIntegrationUtilities::ConstraintOrderer : public binary_function<in
...
@@ -87,6 +87,13 @@ struct OpenCLIntegrationUtilities::ConstraintOrderer : public binary_function<in
}
}
};
};
static
void
setPosqCorrectionArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseMixedPrecision
())
kernel
.
setArg
<
cl
::
Buffer
>
(
index
,
cl
.
getPosqCorrection
().
getDeviceBuffer
());
else
kernel
.
setArg
<
void
*>
(
index
,
NULL
);
}
OpenCLIntegrationUtilities
::
OpenCLIntegrationUtilities
(
OpenCLContext
&
context
,
const
System
&
system
)
:
context
(
context
),
OpenCLIntegrationUtilities
::
OpenCLIntegrationUtilities
(
OpenCLContext
&
context
,
const
System
&
system
)
:
context
(
context
),
posDelta
(
NULL
),
settleAtoms
(
NULL
),
settleParams
(
NULL
),
shakeAtoms
(
NULL
),
shakeParams
(
NULL
),
posDelta
(
NULL
),
settleAtoms
(
NULL
),
settleParams
(
NULL
),
shakeAtoms
(
NULL
),
shakeParams
(
NULL
),
random
(
NULL
),
randomSeed
(
NULL
),
randomPos
(
0
),
stepSize
(
NULL
),
ccmaAtoms
(
NULL
),
ccmaDistance
(
NULL
),
random
(
NULL
),
randomSeed
(
NULL
),
randomPos
(
0
),
stepSize
(
NULL
),
ccmaAtoms
(
NULL
),
ccmaDistance
(
NULL
),
...
@@ -96,12 +103,22 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -96,12 +103,22 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
hasInitializedPosConstraintKernels
(
false
),
hasInitializedVelConstraintKernels
(
false
)
{
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
hasInitializedPosConstraintKernels
(
false
),
hasInitializedVelConstraintKernels
(
false
)
{
// Create workspace arrays.
// Create workspace arrays.
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
{
posDelta
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
mm_double4
>
deltas
(
posDelta
->
getSize
(),
mm_double4
(
0.0
,
0.0
,
0.0
,
0.0
));
posDelta
->
upload
(
deltas
);
stepSize
=
OpenCLArray
::
create
<
mm_double2
>
(
context
,
1
,
"stepSize"
);
vector
<
mm_double2
>
step
(
1
,
mm_double2
(
0.0
,
0.0
));
stepSize
->
upload
(
step
);
}
else
{
posDelta
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
posDelta
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
context
.
getPaddedNumAtoms
(),
"posDelta"
);
vector
<
mm_float4
>
deltas
(
posDelta
->
getSize
(),
mm_float4
(
0.0
,
0.0
,
0.0
,
0.0
));
vector
<
mm_float4
>
deltas
(
posDelta
->
getSize
(),
mm_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
));
posDelta
->
upload
(
deltas
);
posDelta
->
upload
(
deltas
);
stepSize
=
OpenCLArray
::
create
<
mm_float2
>
(
context
,
1
,
"stepSize"
);
stepSize
=
OpenCLArray
::
create
<
mm_float2
>
(
context
,
1
,
"stepSize"
);
vector
<
mm_float2
>
step
(
1
,
mm_float2
(
0.0
f
,
0.0
f
));
vector
<
mm_float2
>
step
(
1
,
mm_float2
(
0.0
f
,
0.0
f
));
stepSize
->
upload
(
step
);
stepSize
->
upload
(
step
);
}
// Create kernels for enforcing constraints.
// Create kernels for enforcing constraints.
...
@@ -458,23 +475,57 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -458,23 +475,57 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Record the CCMA data structures.
// Record the CCMA data structures.
ccmaAtoms
=
OpenCLArray
::
create
<
mm_int2
>
(
context
,
numCCMA
,
"CcmaAtoms"
);
ccmaAtoms
=
OpenCLArray
::
create
<
mm_int2
>
(
context
,
numCCMA
,
"CcmaAtoms"
);
ccmaDistance
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaAtomConstraints
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numAtoms
*
maxAtomConstraints
,
"CcmaAtomConstraints"
);
ccmaAtomConstraints
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numAtoms
*
maxAtomConstraints
,
"CcmaAtomConstraints"
);
ccmaNumAtomConstraints
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaNumAtomConstraints
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numAtoms
,
"CcmaAtomConstraintsIndex"
);
ccmaDelta1
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaConstraintMatrixColumn
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaDelta2
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaConverged
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
2
,
"CcmaConverged"
);
ccmaConverged
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
2
,
"CcmaConverged"
);
ccmaConvergedBuffer
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
2
*
sizeof
(
cl_int
));
ccmaConvergedBuffer
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
2
*
sizeof
(
cl_int
));
ccmaConvergedMemory
=
(
cl_int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
*
ccmaConvergedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
2
*
sizeof
(
cl_int
));
ccmaConvergedMemory
=
(
cl_int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
*
ccmaConvergedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
2
*
sizeof
(
cl_int
));
ccmaReducedMass
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixColumn
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixColumn"
);
ccmaConstraintMatrixValue
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
mm_int2
>
atomsVec
(
ccmaAtoms
->
getSize
());
vector
<
mm_int2
>
atomsVec
(
ccmaAtoms
->
getSize
());
vector
<
mm_float4
>
distanceVec
(
ccmaDistance
->
getSize
());
vector
<
cl_int
>
atomConstraintsVec
(
ccmaAtomConstraints
->
getSize
());
vector
<
cl_int
>
atomConstraintsVec
(
ccmaAtomConstraints
->
getSize
());
vector
<
cl_int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
->
getSize
());
vector
<
cl_int
>
numAtomConstraintsVec
(
ccmaNumAtomConstraints
->
getSize
());
vector
<
cl_float
>
reducedMassVec
(
ccmaReducedMass
->
getSize
());
vector
<
cl_int
>
constraintMatrixColumnVec
(
ccmaConstraintMatrixColumn
->
getSize
());
vector
<
cl_int
>
constraintMatrixColumnVec
(
ccmaConstraintMatrixColumn
->
getSize
());
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
{
ccmaDistance
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
=
OpenCLArray
::
create
<
cl_double
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
=
OpenCLArray
::
create
<
cl_double
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
=
OpenCLArray
::
create
<
cl_double
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
=
OpenCLArray
::
create
<
cl_double
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
mm_double4
>
distanceVec
(
ccmaDistance
->
getSize
());
vector
<
cl_double
>
reducedMassVec
(
ccmaReducedMass
->
getSize
());
vector
<
cl_double
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
->
getSize
());
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
int
index
=
constraintOrder
[
i
];
int
c
=
ccmaConstraints
[
index
];
atomsVec
[
i
].
x
=
atom1
[
c
];
atomsVec
[
i
].
y
=
atom2
[
c
];
distanceVec
[
i
].
w
=
distance
[
c
];
reducedMassVec
[
i
]
=
(
0.5
/
(
1.0
/
system
.
getParticleMass
(
atom1
[
c
])
+
1.0
/
system
.
getParticleMass
(
atom2
[
c
])));
for
(
unsigned
int
j
=
0
;
j
<
matrix
[
index
].
size
();
j
++
)
{
constraintMatrixColumnVec
[
i
+
j
*
numCCMA
]
=
matrix
[
index
][
j
].
first
;
constraintMatrixValueVec
[
i
+
j
*
numCCMA
]
=
matrix
[
index
][
j
].
second
;
}
constraintMatrixColumnVec
[
i
+
matrix
[
index
].
size
()
*
numCCMA
]
=
numCCMA
;
}
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
numAtomConstraintsVec
[
i
]
=
atomConstraints
[
i
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
{
bool
forward
=
(
atom1
[
ccmaConstraints
[
atomConstraints
[
i
][
j
]]]
==
i
);
atomConstraintsVec
[
i
+
j
*
numAtoms
]
=
(
forward
?
inverseOrder
[
atomConstraints
[
i
][
j
]]
+
1
:
-
inverseOrder
[
atomConstraints
[
i
][
j
]]
-
1
);
}
}
ccmaDistance
->
upload
(
distanceVec
);
ccmaReducedMass
->
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
->
upload
(
constraintMatrixValueVec
);
}
else
{
ccmaDistance
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
numCCMA
,
"CcmaDistance"
);
ccmaDelta1
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta1"
);
ccmaDelta2
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
numCCMA
,
"CcmaDelta2"
);
ccmaReducedMass
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
numCCMA
,
"CcmaReducedMass"
);
ccmaConstraintMatrixValue
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
numCCMA
*
maxRowElements
,
"ConstraintMatrixValue"
);
vector
<
mm_float4
>
distanceVec
(
ccmaDistance
->
getSize
());
vector
<
cl_float
>
reducedMassVec
(
ccmaReducedMass
->
getSize
());
vector
<
cl_float
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
->
getSize
());
vector
<
cl_float
>
constraintMatrixValueVec
(
ccmaConstraintMatrixValue
->
getSize
());
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numCCMA
;
i
++
)
{
int
index
=
constraintOrder
[
i
];
int
index
=
constraintOrder
[
i
];
...
@@ -496,13 +547,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -496,13 +547,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
atomConstraintsVec
[
i
+
j
*
numAtoms
]
=
(
forward
?
inverseOrder
[
atomConstraints
[
i
][
j
]]
+
1
:
-
inverseOrder
[
atomConstraints
[
i
][
j
]]
-
1
);
atomConstraintsVec
[
i
+
j
*
numAtoms
]
=
(
forward
?
inverseOrder
[
atomConstraints
[
i
][
j
]]
+
1
:
-
inverseOrder
[
atomConstraints
[
i
][
j
]]
-
1
);
}
}
}
}
ccmaAtoms
->
upload
(
atomsVec
);
ccmaDistance
->
upload
(
distanceVec
);
ccmaDistance
->
upload
(
distanceVec
);
ccmaReducedMass
->
upload
(
reducedMassVec
);
ccmaConstraintMatrixValue
->
upload
(
constraintMatrixValueVec
);
}
ccmaAtoms
->
upload
(
atomsVec
);
ccmaAtomConstraints
->
upload
(
atomConstraintsVec
);
ccmaAtomConstraints
->
upload
(
atomConstraintsVec
);
ccmaNumAtomConstraints
->
upload
(
numAtomConstraintsVec
);
ccmaNumAtomConstraints
->
upload
(
numAtomConstraintsVec
);
ccmaReducedMass
->
upload
(
reducedMassVec
);
ccmaConstraintMatrixColumn
->
upload
(
constraintMatrixColumnVec
);
ccmaConstraintMatrixColumn
->
upload
(
constraintMatrixColumnVec
);
ccmaConstraintMatrixValue
->
upload
(
constraintMatrixValueVec
);
// Create the CCMA kernels.
// Create the CCMA kernels.
...
@@ -584,21 +636,23 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -584,21 +636,23 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
cl
::
Program
vsiteProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
virtualSites
,
defines
);
cl
::
Program
vsiteProgram
=
context
.
createProgram
(
OpenCLKernelSources
::
virtualSites
,
defines
);
vsitePositionKernel
=
cl
::
Kernel
(
vsiteProgram
,
"computeVirtualSites"
);
vsitePositionKernel
=
cl
::
Kernel
(
vsiteProgram
,
"computeVirtualSites"
);
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getPosq
().
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getPosq
().
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
vsite2AvgAtoms
->
getDeviceBuffer
());
setPosqCorrectionArg
(
context
,
vsitePositionKernel
,
1
);
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
vsite2AvgWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
vsite2AvgAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
vsite3AvgAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
vsite2AvgWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
vsite3AvgAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsiteForceKernel
=
cl
::
Kernel
(
vsiteProgram
,
"distributeForces"
);
vsiteForceKernel
=
cl
::
Kernel
(
vsiteProgram
,
"distributeForces"
);
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getPosq
().
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getPosq
().
getDeviceBuffer
());
// Skip argument 1: the force array hasn't been created yet.
setPosqCorrectionArg
(
context
,
vsiteForceKernel
,
1
);
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
vsite2AvgAtoms
->
getDeviceBuffer
());
// Skip argument 2: the force array hasn't been created yet.
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
vsite2AvgWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
vsite2AvgAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
vsite3AvgAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
vsite2AvgWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
vsite3AvgAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
numVsites
=
num2Avg
+
num3Avg
+
numOutOfPlane
;
numVsites
=
num2Avg
+
num3Avg
+
numOutOfPlane
;
}
}
...
@@ -686,11 +740,18 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
...
@@ -686,11 +740,18 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
if
(
!
hasInitialized
)
{
if
(
!
hasInitialized
)
{
settleKernel
.
setArg
<
cl_int
>
(
0
,
settleAtoms
->
getSize
());
settleKernel
.
setArg
<
cl_int
>
(
0
,
settleAtoms
->
getSize
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
posDelta
->
getDeviceBuffer
());
if
(
context
.
getUseMixedPrecision
())
settleKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
context
.
getVelm
().
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosqCorrection
().
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
settleAtoms
->
getDeviceBuffer
());
else
settleKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
settleParams
->
getDeviceBuffer
());
settleKernel
.
setArg
<
void
*>
(
3
,
NULL
);
}
settleKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
posDelta
->
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
context
.
getVelm
().
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
settleAtoms
->
getDeviceBuffer
());
settleKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
settleParams
->
getDeviceBuffer
());
}
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
settleKernel
.
setArg
<
cl_double
>
(
1
,
(
cl_double
)
tol
);
else
settleKernel
.
setArg
<
cl_float
>
(
1
,
(
cl_float
)
tol
);
settleKernel
.
setArg
<
cl_float
>
(
1
,
(
cl_float
)
tol
);
context
.
executeKernel
(
settleKernel
,
settleAtoms
->
getSize
());
context
.
executeKernel
(
settleKernel
,
settleAtoms
->
getSize
());
}
}
...
@@ -698,10 +759,17 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
...
@@ -698,10 +759,17 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
if
(
!
hasInitialized
)
{
if
(
!
hasInitialized
)
{
shakeKernel
.
setArg
<
cl_int
>
(
0
,
shakeAtoms
->
getSize
());
shakeKernel
.
setArg
<
cl_int
>
(
0
,
shakeAtoms
->
getSize
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
->
getDeviceBuffer
());
if
(
context
.
getUseMixedPrecision
())
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
shakeAtoms
->
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosqCorrection
().
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
shakeParams
->
getDeviceBuffer
());
else
shakeKernel
.
setArg
<
void
*>
(
3
,
NULL
);
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
->
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
shakeAtoms
->
getDeviceBuffer
());
shakeKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
shakeParams
->
getDeviceBuffer
());
}
}
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
shakeKernel
.
setArg
<
cl_double
>
(
1
,
(
cl_double
)
tol
);
else
shakeKernel
.
setArg
<
cl_float
>
(
1
,
(
cl_float
)
tol
);
shakeKernel
.
setArg
<
cl_float
>
(
1
,
(
cl_float
)
tol
);
context
.
executeKernel
(
shakeKernel
,
shakeAtoms
->
getSize
());
context
.
executeKernel
(
shakeKernel
,
shakeAtoms
->
getSize
());
}
}
...
@@ -710,6 +778,10 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
...
@@ -710,6 +778,10 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
if
(
context
.
getUseMixedPrecision
())
ccmaDirectionsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosqCorrection
().
getDeviceBuffer
());
else
ccmaDirectionsKernel
.
setArg
<
void
*>
(
3
,
NULL
);
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
ccmaAtoms
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
ccmaDistance
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
->
getDeviceBuffer
());
ccmaForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
constrainVelocities
?
context
.
getVelm
().
getDeviceBuffer
()
:
posDelta
->
getDeviceBuffer
());
...
@@ -730,6 +802,9 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
...
@@ -730,6 +802,9 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
ccmaDelta2
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
ccmaConverged
->
getDeviceBuffer
());
ccmaUpdateKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
ccmaConverged
->
getDeviceBuffer
());
}
}
if
(
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
())
ccmaForceKernel
.
setArg
<
cl_double
>
(
6
,
(
cl_double
)
tol
);
else
ccmaForceKernel
.
setArg
<
cl_float
>
(
6
,
(
cl_float
)
tol
);
ccmaForceKernel
.
setArg
<
cl_float
>
(
6
,
(
cl_float
)
tol
);
context
.
executeKernel
(
ccmaDirectionsKernel
,
ccmaAtoms
->
getSize
());
context
.
executeKernel
(
ccmaDirectionsKernel
,
ccmaAtoms
->
getSize
());
const
int
checkInterval
=
4
;
const
int
checkInterval
=
4
;
...
@@ -764,7 +839,7 @@ void OpenCLIntegrationUtilities::computeVirtualSites() {
...
@@ -764,7 +839,7 @@ void OpenCLIntegrationUtilities::computeVirtualSites() {
void
OpenCLIntegrationUtilities
::
distributeForcesFromVirtualSites
()
{
void
OpenCLIntegrationUtilities
::
distributeForcesFromVirtualSites
()
{
if
(
numVsites
>
0
)
{
if
(
numVsites
>
0
)
{
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
context
.
getForce
().
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getForce
().
getDeviceBuffer
());
context
.
executeKernel
(
vsiteForceKernel
,
numVsites
);
context
.
executeKernel
(
vsiteForceKernel
,
numVsites
);
}
}
}
}
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
8d6a2a01
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLKernels.h
View file @
8d6a2a01
...
@@ -1145,7 +1145,10 @@ private:
...
@@ -1145,7 +1145,10 @@ private:
OpenCLArray
*
uniformRandoms
;
OpenCLArray
*
uniformRandoms
;
OpenCLArray
*
randomSeed
;
OpenCLArray
*
randomSeed
;
OpenCLParameterSet
*
perDofValues
;
OpenCLParameterSet
*
perDofValues
;
mutable
std
::
vector
<
std
::
vector
<
cl_float
>
>
localPerDofValues
;
mutable
std
::
vector
<
std
::
vector
<
cl_float
>
>
localPerDofValuesFloat
;
mutable
std
::
vector
<
std
::
vector
<
cl_double
>
>
localPerDofValuesDouble
;
std
::
vector
<
float
>
contextValuesFloat
;
std
::
vector
<
double
>
contextValuesDouble
;
std
::
vector
<
float
>
contextValues
;
std
::
vector
<
float
>
contextValues
;
std
::
vector
<
std
::
vector
<
cl
::
Kernel
>
>
kernels
;
std
::
vector
<
std
::
vector
<
cl
::
Kernel
>
>
kernels
;
cl
::
Kernel
sumEnergyKernel
,
randomKernel
;
cl
::
Kernel
sumEnergyKernel
,
randomKernel
;
...
...
platforms/opencl/src/OpenCLParameterSet.cpp
View file @
8d6a2a01
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2012
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -32,32 +32,34 @@
...
@@ -32,32 +32,34 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
OpenCLParameterSet
::
OpenCLParameterSet
(
OpenCLContext
&
context
,
int
numParameters
,
int
numObjects
,
const
string
&
name
,
bool
bufferPerParameter
)
:
OpenCLParameterSet
::
OpenCLParameterSet
(
OpenCLContext
&
context
,
int
numParameters
,
int
numObjects
,
const
string
&
name
,
bool
bufferPerParameter
,
bool
useDoublePrecision
)
:
context
(
context
),
numParameters
(
numParameters
),
numObjects
(
numObjects
),
name
(
name
)
{
context
(
context
),
numParameters
(
numParameters
),
numObjects
(
numObjects
),
name
(
name
)
{
int
params
=
numParameters
;
int
params
=
numParameters
;
int
bufferCount
=
0
;
int
bufferCount
=
0
;
elementSize
=
(
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
));
string
elementType
=
(
useDoublePrecision
?
"double"
:
"float"
);
try
{
try
{
if
(
!
bufferPerParameter
)
{
if
(
!
bufferPerParameter
)
{
while
(
params
>
2
)
{
while
(
params
>
2
)
{
cl
::
Buffer
*
buf
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_READ_WRITE
,
numObjects
*
sizeof
(
mm_float4
)
);
cl
::
Buffer
*
buf
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_READ_WRITE
,
numObjects
*
elementSize
*
4
);
std
::
stringstream
name
;
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
4
,
sizeof
(
mm_float4
)
,
*
buf
));
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
4
,
elementSize
*
4
,
*
buf
));
params
-=
4
;
params
-=
4
;
}
}
if
(
params
>
1
)
{
if
(
params
>
1
)
{
cl
::
Buffer
*
buf
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_READ_WRITE
,
numObjects
*
sizeof
(
mm_float2
)
);
cl
::
Buffer
*
buf
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_READ_WRITE
,
numObjects
*
elementSize
*
2
);
std
::
stringstream
name
;
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
2
,
sizeof
(
mm_float2
)
,
*
buf
));
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
2
,
elementSize
*
2
,
*
buf
));
params
-=
2
;
params
-=
2
;
}
}
}
}
while
(
params
>
0
)
{
while
(
params
>
0
)
{
cl
::
Buffer
*
buf
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_READ_WRITE
,
numObjects
*
sizeof
(
cl_float
)
);
cl
::
Buffer
*
buf
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_READ_WRITE
,
numObjects
*
elementSize
);
std
::
stringstream
name
;
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
1
,
sizeof
(
cl_float
)
,
*
buf
));
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
1
,
elementSize
,
*
buf
));
params
--
;
params
--
;
}
}
}
}
...
@@ -73,39 +75,42 @@ OpenCLParameterSet::~OpenCLParameterSet() {
...
@@ -73,39 +75,42 @@ OpenCLParameterSet::~OpenCLParameterSet() {
delete
&
buffers
[
i
].
getMemory
();
delete
&
buffers
[
i
].
getMemory
();
}
}
void
OpenCLParameterSet
::
getParameterValues
(
vector
<
vector
<
cl_float
>
>&
values
)
const
{
template
<
class
T
>
void
OpenCLParameterSet
::
getParameterValues
(
vector
<
vector
<
T
>
>&
values
)
const
{
if
(
sizeof
(
T
)
!=
elementSize
)
throw
OpenMMException
(
"Called getParameterValues() with vector of wrong type"
);
values
.
resize
(
numObjects
);
values
.
resize
(
numObjects
);
for
(
int
i
=
0
;
i
<
numObjects
;
i
++
)
for
(
int
i
=
0
;
i
<
numObjects
;
i
++
)
values
[
i
].
resize
(
numParameters
);
values
[
i
].
resize
(
numParameters
);
try
{
try
{
int
base
=
0
;
int
base
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
get
Typ
e
()
==
"float4"
)
{
if
(
buffers
[
i
].
get
Siz
e
()
==
4
*
elementSize
)
{
vector
<
mm_float4
>
data
(
numObjects
);
vector
<
T
>
data
(
4
*
numObjects
);
context
.
getQueue
().
enqueueReadBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
context
.
getQueue
().
enqueueReadBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
j
]
.
x
;
values
[
j
][
base
]
=
data
[
4
*
j
];
if
(
base
+
1
<
numParameters
)
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
j
].
y
;
values
[
j
][
base
+
1
]
=
data
[
4
*
j
+
1
]
;
if
(
base
+
2
<
numParameters
)
if
(
base
+
2
<
numParameters
)
values
[
j
][
base
+
2
]
=
data
[
j
].
z
;
values
[
j
][
base
+
2
]
=
data
[
4
*
j
+
2
]
;
if
(
base
+
3
<
numParameters
)
if
(
base
+
3
<
numParameters
)
values
[
j
][
base
+
3
]
=
data
[
j
].
w
;
values
[
j
][
base
+
3
]
=
data
[
4
*
j
+
3
]
;
}
}
base
+=
4
;
base
+=
4
;
}
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float2"
)
{
else
if
(
buffers
[
i
].
get
Siz
e
()
==
2
*
elementSize
)
{
vector
<
mm_float2
>
data
(
numObjects
);
vector
<
T
>
data
(
2
*
numObjects
);
context
.
getQueue
().
enqueueReadBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
context
.
getQueue
().
enqueueReadBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
j
]
.
x
;
values
[
j
][
base
]
=
data
[
2
*
j
];
if
(
base
+
1
<
numParameters
)
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
j
].
y
;
values
[
j
][
base
+
1
]
=
data
[
2
*
j
+
1
]
;
}
}
base
+=
2
;
base
+=
2
;
}
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float"
)
{
else
if
(
buffers
[
i
].
get
Siz
e
()
==
elementSize
)
{
vector
<
cl_float
>
data
(
numObjects
);
vector
<
T
>
data
(
numObjects
);
context
.
getQueue
().
enqueueReadBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
context
.
getQueue
().
enqueueReadBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
values
[
j
][
base
]
=
data
[
j
];
values
[
j
][
base
]
=
data
[
j
];
...
@@ -122,36 +127,39 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c
...
@@ -122,36 +127,39 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c
}
}
}
}
void
OpenCLParameterSet
::
setParameterValues
(
const
vector
<
vector
<
cl_float
>
>&
values
)
{
template
<
class
T
>
void
OpenCLParameterSet
::
setParameterValues
(
const
vector
<
vector
<
T
>
>&
values
)
{
if
(
sizeof
(
T
)
!=
elementSize
)
throw
OpenMMException
(
"Called setParameterValues() with vector of wrong type"
);
try
{
try
{
int
base
=
0
;
int
base
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
get
Typ
e
()
==
"float4"
)
{
if
(
buffers
[
i
].
get
Siz
e
()
==
4
*
elementSize
)
{
vector
<
mm_float4
>
data
(
numObjects
);
vector
<
T
>
data
(
4
*
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
j
]
.
x
=
values
[
j
][
base
];
data
[
4
*
j
]
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
if
(
base
+
1
<
numParameters
)
data
[
j
].
y
=
values
[
j
][
base
+
1
];
data
[
4
*
j
+
1
]
=
values
[
j
][
base
+
1
];
if
(
base
+
2
<
numParameters
)
if
(
base
+
2
<
numParameters
)
data
[
j
].
z
=
values
[
j
][
base
+
2
];
data
[
4
*
j
+
2
]
=
values
[
j
][
base
+
2
];
if
(
base
+
3
<
numParameters
)
if
(
base
+
3
<
numParameters
)
data
[
j
].
w
=
values
[
j
][
base
+
3
];
data
[
4
*
j
+
3
]
=
values
[
j
][
base
+
3
];
}
}
context
.
getQueue
().
enqueueWriteBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
context
.
getQueue
().
enqueueWriteBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
base
+=
4
;
base
+=
4
;
}
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float2"
)
{
else
if
(
buffers
[
i
].
get
Siz
e
()
==
2
*
elementSize
)
{
vector
<
mm_float2
>
data
(
numObjects
);
vector
<
T
>
data
(
2
*
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
j
]
.
x
=
values
[
j
][
base
];
data
[
2
*
j
]
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
if
(
base
+
1
<
numParameters
)
data
[
j
].
y
=
values
[
j
][
base
+
1
];
data
[
2
*
j
+
1
]
=
values
[
j
][
base
+
1
];
}
}
context
.
getQueue
().
enqueueWriteBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
context
.
getQueue
().
enqueueWriteBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
base
+=
2
;
base
+=
2
;
}
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float"
)
{
else
if
(
buffers
[
i
].
get
Siz
e
()
==
elementSize
)
{
vector
<
cl_float
>
data
(
numObjects
);
vector
<
T
>
data
(
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
data
[
j
]
=
values
[
j
][
base
];
data
[
j
]
=
values
[
j
][
base
];
context
.
getQueue
().
enqueueWriteBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
context
.
getQueue
().
enqueueWriteBuffer
(
reinterpret_cast
<
cl
::
Buffer
&>
(
buffers
[
i
].
getMemory
()),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
...
@@ -172,16 +180,26 @@ string OpenCLParameterSet::getParameterSuffix(int index, const std::string& extr
...
@@ -172,16 +180,26 @@ string OpenCLParameterSet::getParameterSuffix(int index, const std::string& extr
const
string
suffixes
[]
=
{
".x"
,
".y"
,
".z"
,
".w"
};
const
string
suffixes
[]
=
{
".x"
,
".y"
,
".z"
,
".w"
};
int
buffer
=
-
1
;
int
buffer
=
-
1
;
for
(
int
i
=
0
;
buffer
==
-
1
&&
i
<
(
int
)
buffers
.
size
();
i
++
)
{
for
(
int
i
=
0
;
buffer
==
-
1
&&
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
index
*
sizeof
(
cl_float
)
<
buffers
[
i
].
getSize
())
if
(
index
*
elementSize
<
buffers
[
i
].
getSize
())
buffer
=
i
;
buffer
=
i
;
else
else
index
-=
buffers
[
i
].
getSize
()
/
sizeof
(
cl_float
)
;
index
-=
buffers
[
i
].
getSize
()
/
elementSize
;
}
}
if
(
buffer
==
-
1
)
if
(
buffer
==
-
1
)
throw
OpenMMException
(
"Internal error: Illegal argument to OpenCLParameterSet::getParameterSuffix() ("
+
name
+
")"
);
throw
OpenMMException
(
"Internal error: Illegal argument to OpenCLParameterSet::getParameterSuffix() ("
+
name
+
")"
);
stringstream
suffix
;
stringstream
suffix
;
suffix
<<
(
buffer
+
1
)
<<
extraSuffix
;
suffix
<<
(
buffer
+
1
)
<<
extraSuffix
;
if
(
buffers
[
buffer
].
get
Typ
e
()
!=
"float"
)
if
(
buffers
[
buffer
].
get
Siz
e
()
!=
elementSize
)
suffix
<<
suffixes
[
index
];
suffix
<<
suffixes
[
index
];
return
suffix
.
str
();
return
suffix
.
str
();
}
}
/**
* Define template instantiations for float and double versions of getParameterValues() and setParameterValues().
*/
namespace
OpenMM
{
template
void
OpenCLParameterSet
::
getParameterValues
<
float
>(
vector
<
vector
<
float
>
>&
values
)
const
;
template
void
OpenCLParameterSet
::
setParameterValues
<
float
>(
const
vector
<
vector
<
float
>
>&
values
);
template
void
OpenCLParameterSet
::
getParameterValues
<
double
>(
vector
<
vector
<
double
>
>&
values
)
const
;
template
void
OpenCLParameterSet
::
setParameterValues
<
double
>(
const
vector
<
vector
<
double
>
>&
values
);
}
\ No newline at end of file
platforms/opencl/src/OpenCLParameterSet.h
View file @
8d6a2a01
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2012
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -51,8 +51,9 @@ public:
...
@@ -51,8 +51,9 @@ public:
* @param name the name of the parameter set
* @param name the name of the parameter set
* @param bufferPerParameter if true, a separate cl::Buffer is created for each parameter. If false,
* @param bufferPerParameter if true, a separate cl::Buffer is created for each parameter. If false,
* multiple parameters may be combined into a single buffer.
* multiple parameters may be combined into a single buffer.
* @param useDoublePrecision whether values should be stored as single or double precision
*/
*/
OpenCLParameterSet
(
OpenCLContext
&
context
,
int
numParameters
,
int
numObjects
,
const
std
::
string
&
name
,
bool
bufferPerParameter
=
false
);
OpenCLParameterSet
(
OpenCLContext
&
context
,
int
numParameters
,
int
numObjects
,
const
std
::
string
&
name
,
bool
bufferPerParameter
=
false
,
bool
useDoublePrecision
=
false
);
~
OpenCLParameterSet
();
~
OpenCLParameterSet
();
/**
/**
* Get the number of parameters.
* Get the number of parameters.
...
@@ -71,13 +72,15 @@ public:
...
@@ -71,13 +72,15 @@ public:
*
*
* @param values on exit, values[i][j] contains the value of parameter j for object i
* @param values on exit, values[i][j] contains the value of parameter j for object i
*/
*/
void
getParameterValues
(
std
::
vector
<
std
::
vector
<
cl_float
>
>&
values
)
const
;
template
<
class
T
>
void
getParameterValues
(
std
::
vector
<
std
::
vector
<
T
>
>&
values
)
const
;
/**
/**
* Set the values of all parameters.
* Set the values of all parameters.
*
*
* @param values values[i][j] contains the value of parameter j for object i
* @param values values[i][j] contains the value of parameter j for object i
*/
*/
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
cl_float
>
>&
values
);
template
<
class
T
>
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
T
>
>&
values
);
/**
/**
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data.
* containing the data.
...
@@ -95,8 +98,7 @@ public:
...
@@ -95,8 +98,7 @@ public:
std
::
string
getParameterSuffix
(
int
index
,
const
std
::
string
&
extraSuffix
=
""
)
const
;
std
::
string
getParameterSuffix
(
int
index
,
const
std
::
string
&
extraSuffix
=
""
)
const
;
private:
private:
OpenCLContext
&
context
;
OpenCLContext
&
context
;
int
numParameters
;
int
numParameters
,
numObjects
,
elementSize
;
int
numObjects
;
std
::
string
name
;
std
::
string
name
;
std
::
vector
<
OpenCLNonbondedUtilities
::
ParameterInfo
>
buffers
;
std
::
vector
<
OpenCLNonbondedUtilities
::
ParameterInfo
>
buffers
;
};
};
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
8d6a2a01
...
@@ -76,8 +76,10 @@ OpenCLPlatform::OpenCLPlatform() {
...
@@ -76,8 +76,10 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory
(
RemoveCMMotionKernel
::
Name
(),
factory
);
registerKernelFactory
(
RemoveCMMotionKernel
::
Name
(),
factory
);
platformProperties
.
push_back
(
OpenCLDeviceIndex
());
platformProperties
.
push_back
(
OpenCLDeviceIndex
());
platformProperties
.
push_back
(
OpenCLPlatformIndex
());
platformProperties
.
push_back
(
OpenCLPlatformIndex
());
platformProperties
.
push_back
(
OpenCLPrecision
());
setPropertyDefaultValue
(
OpenCLDeviceIndex
(),
""
);
setPropertyDefaultValue
(
OpenCLDeviceIndex
(),
""
);
setPropertyDefaultValue
(
OpenCLPlatformIndex
(),
""
);
setPropertyDefaultValue
(
OpenCLPlatformIndex
(),
""
);
setPropertyDefaultValue
(
OpenCLPrecision
(),
"single"
);
}
}
bool
OpenCLPlatform
::
supportsDoublePrecision
()
const
{
bool
OpenCLPlatform
::
supportsDoublePrecision
()
const
{
...
@@ -101,7 +103,9 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
...
@@ -101,7 +103,9 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
getPropertyDefaultValue
(
OpenCLPlatformIndex
())
:
properties
.
find
(
OpenCLPlatformIndex
())
->
second
);
getPropertyDefaultValue
(
OpenCLPlatformIndex
())
:
properties
.
find
(
OpenCLPlatformIndex
())
->
second
);
const
string
&
devicePropValue
=
(
properties
.
find
(
OpenCLDeviceIndex
())
==
properties
.
end
()
?
const
string
&
devicePropValue
=
(
properties
.
find
(
OpenCLDeviceIndex
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
OpenCLDeviceIndex
())
:
properties
.
find
(
OpenCLDeviceIndex
())
->
second
);
getPropertyDefaultValue
(
OpenCLDeviceIndex
())
:
properties
.
find
(
OpenCLDeviceIndex
())
->
second
);
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
));
string
precisionPropValue
=
(
properties
.
find
(
OpenCLPrecision
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
OpenCLPrecision
())
:
properties
.
find
(
OpenCLPrecision
())
->
second
);
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
));
}
}
void
OpenCLPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
void
OpenCLPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
@@ -109,7 +113,8 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
...
@@ -109,7 +113,8 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
delete
data
;
delete
data
;
}
}
OpenCLPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
platformPropValue
,
const
string
&
deviceIndexProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
OpenCLPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
platformPropValue
,
const
string
&
deviceIndexProperty
,
const
string
&
precisionProperty
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
int
platformIndex
=
0
;
int
platformIndex
=
0
;
if
(
platformPropValue
.
length
()
>
0
)
if
(
platformPropValue
.
length
()
>
0
)
stringstream
(
platformPropValue
)
>>
platformIndex
;
stringstream
(
platformPropValue
)
>>
platformIndex
;
...
@@ -124,11 +129,11 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
...
@@ -124,11 +129,11 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
if
(
devices
[
i
].
length
()
>
0
)
{
if
(
devices
[
i
].
length
()
>
0
)
{
unsigned
int
deviceIndex
;
unsigned
int
deviceIndex
;
stringstream
(
devices
[
i
])
>>
deviceIndex
;
stringstream
(
devices
[
i
])
>>
deviceIndex
;
contexts
.
push_back
(
new
OpenCLContext
(
system
,
platformIndex
,
deviceIndex
,
*
this
));
contexts
.
push_back
(
new
OpenCLContext
(
system
,
platformIndex
,
deviceIndex
,
precisionProperty
,
*
this
));
}
}
}
}
if
(
contexts
.
size
()
==
0
)
if
(
contexts
.
size
()
==
0
)
contexts
.
push_back
(
new
OpenCLContext
(
system
,
platformIndex
,
-
1
,
*
this
));
contexts
.
push_back
(
new
OpenCLContext
(
system
,
platformIndex
,
-
1
,
precisionProperty
,
*
this
));
stringstream
device
;
stringstream
device
;
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
{
if
(
i
>
0
)
if
(
i
>
0
)
...
@@ -137,6 +142,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
...
@@ -137,6 +142,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
}
}
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceIndex
()]
=
device
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLDeviceIndex
()]
=
device
.
str
();
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
OpenCLExpressionUtilities
::
intToString
(
platformIndex
);
propertyValues
[
OpenCLPlatform
::
OpenCLPlatformIndex
()]
=
OpenCLExpressionUtilities
::
intToString
(
platformIndex
);
propertyValues
[
OpenCLPlatform
::
OpenCLPrecision
()]
=
precisionProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
contextEnergy
.
resize
(
contexts
.
size
());
}
}
...
...
platforms/opencl/src/kernels/andersenThermostat.cl
View file @
8d6a2a01
...
@@ -2,17 +2,19 @@
...
@@ -2,17 +2,19 @@
*
Apply
the
Andersen
thermostat
to
adjust
particle
velocities.
*
Apply
the
Andersen
thermostat
to
adjust
particle
velocities.
*/
*/
__kernel
void
applyAndersenThermostat
(
float
collisionFrequency,
float
kT,
__global
float
4*
velm,
__global
const
float
2*
restrict
stepSize,
__global
const
float4*
restrict
random,
__kernel
void
applyAndersenThermostat
(
float
collisionFrequency,
float
kT,
__global
mixed
4*
velm,
__global
const
mixed
2*
restrict
stepSize,
__global
const
float4*
restrict
random,
unsigned
int
randomIndex,
__global
const
int*
restrict
atomGroups
)
{
unsigned
int
randomIndex,
__global
const
int*
restrict
atomGroups
)
{
float
collisionProbability
=
1.0f-exp
(
-collisionFrequency*stepSize[0].y
)
;
float
collisionProbability
=
1.0f-exp
(
-collisionFrequency*stepSize[0].y
)
;
float
randomRange
=
erf
(
collisionProbability/sqrt
(
2.0f
))
;
float
randomRange
=
erf
(
collisionProbability/sqrt
(
2.0f
))
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
float
4
velocity
=
velm[index]
;
mixed
4
velocity
=
velm[index]
;
float4
selectRand
=
random[randomIndex+atomGroups[index]]
;
float4
selectRand
=
random[randomIndex+atomGroups[index]]
;
float4
velRand
=
random[randomIndex+index]
;
float4
velRand
=
random[randomIndex+index]
;
float
scale
=
(
selectRand.w
>
-randomRange
&&
selectRand.w
<
randomRange
?
0.0f
:
1.0f
)
;
real
scale
=
(
selectRand.w
>
-randomRange
&&
selectRand.w
<
randomRange
?
0
:
1
)
;
float
add
=
(
1.0f-scale
)
*sqrt
(
kT*velocity.w
)
;
real
add
=
(
1-scale
)
*sqrt
(
kT*velocity.w
)
;
velocity.xyz
=
scale*velocity.xyz
+
add*velRand.xyz
;
velocity.x
=
scale*velocity.x
+
add*velRand.x
;
velocity.y
=
scale*velocity.y
+
add*velRand.y
;
velocity.z
=
scale*velocity.z
+
add*velRand.z
;
velm[index]
=
velocity
;
velm[index]
=
velocity
;
}
}
}
}
platforms/opencl/src/kernels/brownian.cl
View file @
8d6a2a01
...
@@ -2,13 +2,16 @@
...
@@ -2,13 +2,16 @@
*
Perform
the
first
step
of
Brownian
integration.
*
Perform
the
first
step
of
Brownian
integration.
*/
*/
__kernel
void
integrateBrownianPart1
(
float
tauDeltaT,
float
noiseAmplitude,
__global
const
float
4*
restrict
force,
__kernel
void
integrateBrownianPart1
(
mixed
tauDeltaT,
mixed
noiseAmplitude,
__global
const
real
4*
restrict
force,
__global
float
4*
restrict
posDelta,
__global
const
float
4*
restrict
velm,
__global
const
float4*
restrict
random,
unsigned
int
randomIndex
)
{
__global
mixed
4*
restrict
posDelta,
__global
const
mixed
4*
restrict
velm,
__global
const
float4*
restrict
random,
unsigned
int
randomIndex
)
{
randomIndex
+=
get_global_id
(
0
)
;
randomIndex
+=
get_global_id
(
0
)
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
float
invMass
=
velm[index].w
;
mixed
invMass
=
velm[index].w
;
if
(
invMass
!=
0.0
)
if
(
invMass
!=
0
)
{
posDelta[index]
=
(
float4
)
(
tauDeltaT*invMass*force[index].xyz
+
noiseAmplitude*sqrt
(
invMass
)
*random[randomIndex].xyz,
0.0f
)
;
posDelta[index]
=
(
mixed4
)
(
tauDeltaT*invMass*force[index].x
+
noiseAmplitude*sqrt
(
invMass
)
*random[randomIndex].x,
tauDeltaT*invMass*force[index].y
+
noiseAmplitude*sqrt
(
invMass
)
*random[randomIndex].y,
tauDeltaT*invMass*force[index].z
+
noiseAmplitude*sqrt
(
invMass
)
*random[randomIndex].z,
0
)
;
}
randomIndex
+=
get_global_size
(
0
)
;
randomIndex
+=
get_global_size
(
0
)
;
}
}
}
}
...
@@ -17,12 +20,29 @@ __kernel void integrateBrownianPart1(float tauDeltaT, float noiseAmplitude, __gl
...
@@ -17,12 +20,29 @@ __kernel void integrateBrownianPart1(float tauDeltaT, float noiseAmplitude, __gl
*
Perform
the
second
step
of
Brownian
integration.
*
Perform
the
second
step
of
Brownian
integration.
*/
*/
__kernel
void
integrateBrownianPart2
(
float
oneOverDeltaT,
__global
float
4*
posq,
__global
float
4*
velm,
__global
const
float
4*
restrict
posDelta
)
{
__kernel
void
integrateBrownianPart2
(
mixed
oneOverDeltaT,
__global
real
4*
posq,
__global
real4*
posqCorrection,
__global
mixed
4*
velm,
__global
const
mixed
4*
restrict
posDelta
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
if
(
velm[index].w
!=
0.0
)
{
if
(
velm[index].w
!=
0
)
{
float4
delta
=
posDelta[index]
;
mixed4
delta
=
posDelta[index]
;
velm[index].xyz
=
oneOverDeltaT*delta.xyz
;
velm[index].x
=
oneOverDeltaT*delta.x
;
posq[index].xyz
=
posq[index].xyz
+
delta.xyz
;
velm[index].y
=
oneOverDeltaT*delta.y
;
velm[index].z
=
oneOverDeltaT*delta.z
;
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
pos.x
+=
delta.x
;
pos.y
+=
delta.y
;
pos.z
+=
delta.z
;
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
(
real4
)
((
real
)
pos.x,
(
real
)
pos.y,
(
real
)
pos.z,
(
real
)
pos.w
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
}
}
}
}
}
}
platforms/opencl/src/kernels/ccma.cl
View file @
8d6a2a01
mixed4
loadPos
(
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
posqCorrection,
int
index
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
return
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
return
posq[index]
;
#
endif
}
/**
/**
*
Compute
the
direction
each
constraint
is
pointing
in.
This
is
called
once
at
the
beginning
of
constraint
evaluation.
*
Compute
the
direction
each
constraint
is
pointing
in.
This
is
called
once
at
the
beginning
of
constraint
evaluation.
*/
*/
__kernel
void
computeConstraintDirections
(
__global
const
int2*
restrict
constraintAtoms,
__global
float
4*
restrict
constraintDistance,
__global
const
float
4*
restrict
atomPositions
)
{
__kernel
void
computeConstraintDirections
(
__global
const
int2*
restrict
constraintAtoms,
__global
mixed
4*
restrict
constraintDistance,
__global
const
real
4*
restrict
atomPositions
,
__global
const
real4*
restrict
posCorrection
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
//
Compute
the
direction
for
this
constraint.
//
Compute
the
direction
for
this
constraint.
int2
atoms
=
constraintAtoms[index]
;
int2
atoms
=
constraintAtoms[index]
;
float
4
dir
=
constraintDistance[index]
;
mixed
4
dir
=
constraintDistance[index]
;
float
4
oldPos1
=
atomPositions
[
atoms.x
]
;
mixed
4
oldPos1
=
loadPos
(
atomPositions
,
posCorrection,
atoms.x
)
;
float
4
oldPos2
=
atomPositions
[
atoms.y
]
;
mixed
4
oldPos2
=
loadPos
(
atomPositions
,
posCorrection,
atoms.y
)
;
dir.x
=
oldPos1.x-oldPos2.x
;
dir.x
=
oldPos1.x-oldPos2.x
;
dir.y
=
oldPos1.y-oldPos2.y
;
dir.y
=
oldPos1.y-oldPos2.y
;
dir.z
=
oldPos1.z-oldPos2.z
;
dir.z
=
oldPos1.z-oldPos2.z
;
...
@@ -19,8 +28,8 @@ __kernel void computeConstraintDirections(__global const int2* restrict constrai
...
@@ -19,8 +28,8 @@ __kernel void computeConstraintDirections(__global const int2* restrict constrai
/**
/**
*
Compute
the
force
applied
by
each
constraint.
*
Compute
the
force
applied
by
each
constraint.
*/
*/
__kernel
void
computeConstraintForce
(
__global
const
int2*
restrict
constraintAtoms,
__global
const
float
4*
restrict
constraintDistance,
__global
const
float
4*
restrict
atomPositions,
__kernel
void
computeConstraintForce
(
__global
const
int2*
restrict
constraintAtoms,
__global
const
mixed
4*
restrict
constraintDistance,
__global
const
mixed
4*
restrict
atomPositions,
__global
const
float
*
restrict
reducedMass,
__global
float
*
restrict
delta1,
__global
int*
restrict
converged,
float
tol,
int
iteration
)
{
__global
const
mixed
*
restrict
reducedMass,
__global
mixed
*
restrict
delta1,
__global
int*
restrict
converged,
mixed
tol,
int
iteration
)
{
__local
int
groupConverged
;
__local
int
groupConverged
;
if
(
converged[1-iteration%2]
)
{
if
(
converged[1-iteration%2]
)
{
if
(
get_global_id
(
0
)
==
0
)
if
(
get_global_id
(
0
)
==
0
)
...
@@ -30,21 +39,21 @@ __kernel void computeConstraintForce(__global const int2* restrict constraintAto
...
@@ -30,21 +39,21 @@ __kernel void computeConstraintForce(__global const int2* restrict constraintAto
if
(
get_local_id
(
0
)
==
0
)
if
(
get_local_id
(
0
)
==
0
)
groupConverged
=
1
;
groupConverged
=
1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float
lowerTol
=
1
.0f-2.0f
*tol+tol*tol
;
mixed
lowerTol
=
1
-2
*tol+tol*tol
;
float
upperTol
=
1
.0f+2.0f
*tol+tol*tol
;
mixed
upperTol
=
1
+2
*tol+tol*tol
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
//
Compute
the
force
due
to
this
constraint.
//
Compute
the
force
due
to
this
constraint.
int2
atoms
=
constraintAtoms[index]
;
int2
atoms
=
constraintAtoms[index]
;
float
4
dir
=
constraintDistance[index]
;
mixed
4
dir
=
constraintDistance[index]
;
float
4
rp_ij
=
atomPositions[atoms.x]-atomPositions[atoms.y]
;
mixed
4
rp_ij
=
atomPositions[atoms.x]-atomPositions[atoms.y]
;
#
ifndef
CONSTRAIN_VELOCITIES
#
ifndef
CONSTRAIN_VELOCITIES
rp_ij.xyz
+=
dir.xyz
;
rp_ij.xyz
+=
dir.xyz
;
#
endif
#
endif
float
rrpr
=
rp_ij.x*dir.x
+
rp_ij.y*dir.y
+
rp_ij.z*dir.z
;
mixed
rrpr
=
rp_ij.x*dir.x
+
rp_ij.y*dir.y
+
rp_ij.z*dir.z
;
float
d_ij2
=
dir.x*dir.x
+
dir.y*dir.y
+
dir.z*dir.z
;
mixed
d_ij2
=
dir.x*dir.x
+
dir.y*dir.y
+
dir.z*dir.z
;
#
ifdef
CONSTRAIN_VELOCITIES
#
ifdef
CONSTRAIN_VELOCITIES
delta1[index]
=
-2
.0f
*reducedMass[index]*rrpr/d_ij2
;
delta1[index]
=
-2*reducedMass[index]*rrpr/d_ij2
;
//
See
whether
it
has
converged.
//
See
whether
it
has
converged.
...
@@ -53,9 +62,9 @@ __kernel void computeConstraintForce(__global const int2* restrict constraintAto
...
@@ -53,9 +62,9 @@ __kernel void computeConstraintForce(__global const int2* restrict constraintAto
converged[iteration%2]
=
0
;
converged[iteration%2]
=
0
;
}
}
#
else
#
else
float
rp2
=
rp_ij.x*rp_ij.x
+
rp_ij.y*rp_ij.y
+
rp_ij.z*rp_ij.z
;
mixed
rp2
=
rp_ij.x*rp_ij.x
+
rp_ij.y*rp_ij.y
+
rp_ij.z*rp_ij.z
;
float
dist2
=
dir.w*dir.w
;
mixed
dist2
=
dir.w*dir.w
;
float
diff
=
dist2
-
rp2
;
mixed
diff
=
dist2
-
rp2
;
delta1[index]
=
(
rrpr
>
d_ij2*1e-6f
?
reducedMass[index]*diff/rrpr
:
0.0f
)
;
delta1[index]
=
(
rrpr
>
d_ij2*1e-6f
?
reducedMass[index]*diff/rrpr
:
0.0f
)
;
//
See
whether
it
has
converged.
//
See
whether
it
has
converged.
...
@@ -71,15 +80,15 @@ __kernel void computeConstraintForce(__global const int2* restrict constraintAto
...
@@ -71,15 +80,15 @@ __kernel void computeConstraintForce(__global const int2* restrict constraintAto
/**
/**
*
Multiply
the
vector
of
constraint
forces
by
the
constraint
matrix.
*
Multiply
the
vector
of
constraint
forces
by
the
constraint
matrix.
*/
*/
__kernel
void
multiplyByConstraintMatrix
(
__global
const
float
*
restrict
delta1,
__global
float
*
restrict
delta2,
__global
const
int*
restrict
constraintMatrixColumn,
__kernel
void
multiplyByConstraintMatrix
(
__global
const
mixed
*
restrict
delta1,
__global
mixed
*
restrict
delta2,
__global
const
int*
restrict
constraintMatrixColumn,
__global
const
float
*
restrict
constraintMatrixValue,
__global
const
int*
restrict
converged,
int
iteration
)
{
__global
const
mixed
*
restrict
constraintMatrixValue,
__global
const
int*
restrict
converged,
int
iteration
)
{
if
(
converged[iteration%2]
)
if
(
converged[iteration%2]
)
return
; // The constraint iteration has already converged.
return
; // The constraint iteration has already converged.
//
Multiply
by
the
inverse
constraint
matrix.
//
Multiply
by
the
inverse
constraint
matrix.
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
float
sum
=
0
.0f
;
mixed
sum
=
0
;
for
(
int
i
=
0
; ; i++) {
for
(
int
i
=
0
; ; i++) {
int
element
=
index+i*NUM_CONSTRAINTS
;
int
element
=
index+i*NUM_CONSTRAINTS
;
int
column
=
constraintMatrixColumn[element]
;
int
column
=
constraintMatrixColumn[element]
;
...
@@ -94,26 +103,26 @@ __kernel void multiplyByConstraintMatrix(__global const float* restrict delta1,
...
@@ -94,26 +103,26 @@ __kernel void multiplyByConstraintMatrix(__global const float* restrict delta1,
/**
/**
*
Update
the
atom
positions
based
on
constraint
forces.
*
Update
the
atom
positions
based
on
constraint
forces.
*/
*/
__kernel
void
updateAtomPositions
(
__global
const
int*
restrict
numAtomConstraints,
__global
const
int*
restrict
atomConstraints,
__global
const
float
4*
restrict
constraintDistance,
__kernel
void
updateAtomPositions
(
__global
const
int*
restrict
numAtomConstraints,
__global
const
int*
restrict
atomConstraints,
__global
const
mixed
4*
restrict
constraintDistance,
__global
float
4*
restrict
atomPositions,
__global
const
float
4*
restrict
velm,
__global
const
float
*
restrict
delta1,
__global
const
float
*
restrict
delta2,
__global
int*
restrict
converged,
int
iteration
)
{
__global
mixed
4*
restrict
atomPositions,
__global
const
mixed
4*
restrict
velm,
__global
const
mixed
*
restrict
delta1,
__global
const
mixed
*
restrict
delta2,
__global
int*
restrict
converged,
int
iteration
)
{
if
(
get_global_id
(
0
)
==
0
)
if
(
get_global_id
(
0
)
==
0
)
converged[1-iteration%2]
=
1
;
converged[1-iteration%2]
=
1
;
if
(
converged[iteration%2]
)
if
(
converged[iteration%2]
)
return
; // The constraint iteration has already converged.
return
; // The constraint iteration has already converged.
float
damping
=
(
iteration
<
2
?
0.5f
:
1.0f
)
;
mixed
damping
=
(
iteration
<
2
?
0.5f
:
1.0f
)
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
//
Compute
the
new
position
of
this
atom.
//
Compute
the
new
position
of
this
atom.
float
4
atomPos
=
atomPositions[index]
;
mixed
4
atomPos
=
atomPositions[index]
;
float
invMass
=
velm[index].w
;
mixed
invMass
=
velm[index].w
;
int
num
=
numAtomConstraints[index]
;
int
num
=
numAtomConstraints[index]
;
for
(
int
i
=
0
; i < num; i++) {
for
(
int
i
=
0
; i < num; i++) {
int
constraint
=
atomConstraints[index+i*NUM_ATOMS]
;
int
constraint
=
atomConstraints[index+i*NUM_ATOMS]
;
bool
forward
=
(
constraint
>
0
)
;
bool
forward
=
(
constraint
>
0
)
;
constraint
=
(
forward
?
constraint-1
:
-constraint-1
)
;
constraint
=
(
forward
?
constraint-1
:
-constraint-1
)
;
float
constraintForce
=
damping*invMass*delta2[constraint]
;
mixed
constraintForce
=
damping*invMass*delta2[constraint]
;
constraintForce
=
(
forward
?
constraintForce
:
-constraintForce
)
;
constraintForce
=
(
forward
?
constraintForce
:
-constraintForce
)
;
float
4
dir
=
constraintDistance[constraint]
;
mixed
4
dir
=
constraintDistance[constraint]
;
atomPos.x
+=
constraintForce*dir.x
;
atomPos.x
+=
constraintForce*dir.x
;
atomPos.y
+=
constraintForce*dir.y
;
atomPos.y
+=
constraintForce*dir.y
;
atomPos.z
+=
constraintForce*dir.z
;
atomPos.z
+=
constraintForce*dir.z
;
...
...
platforms/opencl/src/kernels/constraints.cl
View file @
8d6a2a01
__kernel
void
applyPositionDeltas
(
__global
float
4*
restrict
posq,
__global
float
4*
restrict
posDelta
)
{
__kernel
void
applyPositionDeltas
(
__global
real
4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed
4*
restrict
posDelta
)
{
for
(
unsigned
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
for
(
unsigned
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
float4
position
=
posq[index]
;
#
ifdef
USE_MIXED_PRECISION
position.xyz
+=
posDelta[index].xyz
;
real4
pos1
=
posq[index]
;
posq[index]
=
position
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
mixed4
pos
=
posq[index]
;
#
endif
pos.xyz
+=
posDelta[index].xyz
;
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
(
real4
)
((
real
)
pos.x,
(
real
)
pos.y,
(
real
)
pos.z,
(
real
)
pos.w
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
}
}
}
}
platforms/opencl/src/kernels/customIntegrator.cl
View file @
8d6a2a01
__kernel
void
computeSum
(
__global
const
float*
restrict
sumBuffer,
__global
float*
result,
unsigned
int
outputIndex,
int
bufferSize
)
{
__kernel
void
compute
Float
Sum
(
__global
const
float*
restrict
sumBuffer,
__global
float*
result,
unsigned
int
outputIndex,
int
bufferSize
)
{
__local
float
tempBuffer[WORK_GROUP_SIZE]
;
__local
float
tempBuffer[WORK_GROUP_SIZE]
;
const
unsigned
int
thread
=
get_local_id
(
0
)
;
const
unsigned
int
thread
=
get_local_id
(
0
)
;
float
sum
=
0
.0f
;
float
sum
=
0
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0))
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0))
sum
+=
sumBuffer[index]
;
sum
+=
sumBuffer[index]
;
tempBuffer[thread]
=
sum
;
tempBuffer[thread]
=
sum
;
...
@@ -14,12 +14,41 @@ __kernel void computeSum(__global const float* restrict sumBuffer, __global floa
...
@@ -14,12 +14,41 @@ __kernel void computeSum(__global const float* restrict sumBuffer, __global floa
result[outputIndex]
=
tempBuffer[0]
;
result[outputIndex]
=
tempBuffer[0]
;
}
}
__kernel
void
applyPositionDeltas
(
__global
float4*
restrict
posq,
__global
float4*
restrict
posDelta
)
{
#
ifdef
SUPPORTS_DOUBLE_PRECISION
__kernel
void
computeDoubleSum
(
__global
const
double*
restrict
sumBuffer,
__global
double*
result,
unsigned
int
outputIndex,
int
bufferSize
)
{
__local
double
tempBuffer[WORK_GROUP_SIZE]
;
const
unsigned
int
thread
=
get_local_id
(
0
)
;
double
sum
=
0
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0))
sum
+=
sumBuffer[index]
;
tempBuffer[thread]
=
sum
;
for
(
int
i
=
1
; i < WORK_GROUP_SIZE; i *= 2) {
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
thread%
(
i*2
)
==
0
&&
thread+i
<
WORK_GROUP_SIZE
)
tempBuffer[thread]
+=
tempBuffer[thread+i]
;
}
if
(
thread
==
0
)
result[outputIndex]
=
tempBuffer[0]
;
}
#
endif
__kernel
void
applyPositionDeltas
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
posDelta
)
{
for
(
unsigned
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
for
(
unsigned
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
float4
position
=
posq[index]
;
#
ifdef
USE_MIXED_PRECISION
position.xyz
+=
posDelta[index].xyz
;
real4
pos1
=
posq[index]
;
posq[index]
=
position
;
real4
pos2
=
posqCorrection[index]
;
posDelta[index]
=
(
float4
)
0.0f
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
pos.xyz
+=
posDelta[index].xyz
;
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
(
real4
)
((
real
)
pos.x,
(
real
)
pos.y,
(
real
)
pos.z,
(
real
)
pos.w
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
posDelta[index]
=
(
mixed4
)
0
;
}
}
}
}
...
...
platforms/opencl/src/kernels/customIntegratorGlobal.cl
View file @
8d6a2a01
__kernel
void
computeGlobal
(
__global
float
2*
restrict
dt,
__global
float
*
restrict
globals,
__global
float
*
restrict
params,
__kernel
void
computeGlobal
(
__global
mixed
2*
restrict
dt,
__global
mixed
*
restrict
globals,
__global
mixed
*
restrict
params,
float
uniform,
float
gaussian,
__global
const
float
*
restrict
energy
)
{
float
uniform,
float
gaussian,
__global
const
real
*
restrict
energy
)
{
COMPUTE_STEP
COMPUTE_STEP
}
}
platforms/opencl/src/kernels/customIntegratorPerDof.cl
View file @
8d6a2a01
#
ifdef
SUPPORTS_DOUBLE_PRECISION
/**
#
pragma
OPENCL
EXTENSION
cl_khr_fp64
:
enable
*
Load
the
position
of
a
particle.
*/
mixed4
loadPos
(
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
posqCorrection,
int
index
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
return
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
return
posq[index]
;
#
endif
}
/**
*
Store
the
position
of
a
particle.
*/
void
storePos
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
int
index,
mixed4
pos
)
{
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
(
real4
)
((
real
)
pos.x,
(
real
)
pos.y,
(
real
)
pos.z,
(
real
)
pos.w
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
#
endif
}
__kernel
void
computePerDof
(
__global
float
4*
restrict
posq,
__global
float
4*
restrict
pos
Delta
,
__global
float
4*
restrict
velm
,
__kernel
void
computePerDof
(
__global
real
4*
restrict
posq,
__global
real
4*
restrict
pos
qCorrection
,
__global
mixed
4*
restrict
posDelta
,
__global
const
float
4*
restrict
force,
__global
const
float
2*
restrict
dt,
__global
const
float
*
restrict
globals,
__global
mixed4*
restrict
velm,
__global
const
real
4*
restrict
force,
__global
const
mixed
2*
restrict
dt,
__global
const
mixed
*
restrict
globals,
__global
const
float
*
restrict
params,
__global
float
*
restrict
sum,
__global
const
float4*
restrict
gaussianValues,
__global
const
mixed
*
restrict
params,
__global
mixed
*
restrict
sum,
__global
const
float4*
restrict
gaussianValues,
unsigned
int
randomIndex,
__global
const
float4*
restrict
uniformValues,
__global
const
float
*
restrict
energy
unsigned
int
randomIndex,
__global
const
float4*
restrict
uniformValues,
__global
const
real
*
restrict
energy
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
float
stepSize
=
dt[0].y
;
mixed
stepSize
=
dt[0].y
;
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
randomIndex
+=
index
;
randomIndex
+=
index
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
#
ifdef
SUPPORTS_DOUBLE_PRECISION
#
ifdef
LOAD_POS_AS_DELTA
#
ifdef
LOAD_POS_AS_DELTA
double
4
position
=
convert_double4
(
posq[
index
]
+posDelta[index]
)
;
mixed
4
position
=
loadPos
(
posq,
posqCorrection,
index
)
+posDelta[index]
;
#
else
#
else
double4
position
=
convert_double4
(
posq[index]
)
;
mixed4
position
=
loadPos
(
posq,
posqCorrection,
index
)
;
#
endif
double4
velocity
=
convert_double4
(
velm[index]
)
;
double4
f
=
convert_double4
(
force[index]
)
;
double
mass
=
1.0/velocity.w
;
#
else
#
ifdef
LOAD_POS_AS_DELTA
float4
position
=
posq[index]+posDelta[index]
;
#
else
float4
position
=
posq[index]
;
#
endif
float4
velocity
=
velm[index]
;
float4
f
=
force[index]
;
float
mass
=
1.0f/velocity.w
;
#
endif
#
endif
mixed4
velocity
=
velm[index]
;
real4
f
=
force[index]
;
mixed
mass
=
1/velocity.w
;
if
(
velocity.w
!=
0.0
)
{
if
(
velocity.w
!=
0.0
)
{
float4
gaussian
=
gaussianValues[randomIndex]
;
float4
gaussian
=
gaussianValues[randomIndex]
;
float4
uniform
=
uniformValues[index]
;
float4
uniform
=
uniformValues[index]
;
...
...
platforms/opencl/src/kernels/ewald.cl
View file @
8d6a2a01
float2
multofFloat2
(
float2
a,
float2
b
)
{
float2
multofFloat2
(
float2
a,
float2
b
)
{
return
(
float2
)
(
a.x*b.x
-
a.y*b.y,
a.x*b.y
+
a.y*b.x
)
;
return
(
float2
)
(
a.x*b.x
-
a.y*b.y,
a.x*b.y
+
a.y*b.x
)
;
}
}
...
...
platforms/opencl/src/kernels/langevin.cl
View file @
8d6a2a01
#
ifdef
SUPPORTS_DOUBLE_PRECISION
#
pragma
OPENCL
EXTENSION
cl_khr_fp64
:
enable
#
endif
enum
{VelScale,
ForceScale,
NoiseScale,
MaxParams}
;
enum
{VelScale,
ForceScale,
NoiseScale,
MaxParams}
;
/**
/**
*
Perform
the
first
step
of
Langevin
integration.
*
Perform
the
first
step
of
Langevin
integration.
*/
*/
__kernel
void
integrateLangevinPart1
(
__global
float
4*
restrict
velm,
__global
const
float
4*
restrict
force,
__global
float
4*
restrict
posDelta,
__kernel
void
integrateLangevinPart1
(
__global
mixed
4*
restrict
velm,
__global
const
real
4*
restrict
force,
__global
mixed
4*
restrict
posDelta,
__global
const
float
*
restrict
paramBuffer,
__global
const
float
2*
restrict
dt,
__global
const
float4*
restrict
random,
unsigned
int
randomIndex
)
{
__global
const
mixed
*
restrict
paramBuffer,
__global
const
mixed
2*
restrict
dt,
__global
const
float4*
restrict
random,
unsigned
int
randomIndex
)
{
float
vscale
=
paramBuffer[VelScale]
;
mixed
vscale
=
paramBuffer[VelScale]
;
float
fscale
=
paramBuffer[ForceScale]
;
mixed
fscale
=
paramBuffer[ForceScale]
;
float
noisescale
=
paramBuffer[NoiseScale]
;
mixed
noisescale
=
paramBuffer[NoiseScale]
;
float
stepSize
=
dt[0].y
;
mixed
stepSize
=
dt[0].y
;
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
randomIndex
+=
index
;
randomIndex
+=
index
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
float
4
velocity
=
velm[index]
;
mixed
4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
{
if
(
velocity.w
!=
0.0
)
{
float
sqrtInvMass
=
sqrt
(
velocity.w
)
;
mixed
sqrtInvMass
=
sqrt
(
velocity.w
)
;
velocity.xyz
=
vscale*velocity.xyz
+
fscale*velocity.w*force[index].xyz
+
noisescale*sqrtInvMass*random[randomIndex].xyz
;
velocity.x
=
vscale*velocity.x
+
fscale*velocity.w*force[index].x
+
noisescale*sqrtInvMass*random[randomIndex].x
;
velocity.y
=
vscale*velocity.y
+
fscale*velocity.w*force[index].y
+
noisescale*sqrtInvMass*random[randomIndex].y
;
velocity.z
=
vscale*velocity.z
+
fscale*velocity.w*force[index].z
+
noisescale*sqrtInvMass*random[randomIndex].z
;
velm[index]
=
velocity
;
velm[index]
=
velocity
;
posDelta[index]
=
stepSize*velocity
;
posDelta[index]
=
stepSize*velocity
;
}
}
...
@@ -33,7 +31,7 @@ __kernel void integrateLangevinPart1(__global float4* restrict velm, __global co
...
@@ -33,7 +31,7 @@ __kernel void integrateLangevinPart1(__global float4* restrict velm, __global co
*
Perform
the
second
step
of
Langevin
integration.
*
Perform
the
second
step
of
Langevin
integration.
*/
*/
__kernel
void
integrateLangevinPart2
(
__global
float
4*
restrict
posq,
__global
const
float
4*
restrict
posDelta,
__global
float
4*
restrict
velm,
__global
const
float
2*
restrict
dt
)
{
__kernel
void
integrateLangevinPart2
(
__global
real
4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
const
mixed
4*
restrict
posDelta,
__global
mixed
4*
restrict
velm,
__global
const
mixed
2*
restrict
dt
)
{
#
ifdef
SUPPORTS_DOUBLE_PRECISION
#
ifdef
SUPPORTS_DOUBLE_PRECISION
double
invStepSize
=
1.0/dt[0].y
;
double
invStepSize
=
1.0/dt[0].y
;
#
else
#
else
...
@@ -41,17 +39,28 @@ __kernel void integrateLangevinPart2(__global float4* restrict posq, __global co
...
@@ -41,17 +39,28 @@ __kernel void integrateLangevinPart2(__global float4* restrict posq, __global co
#
endif
#
endif
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
float
4
vel
=
velm[index]
;
mixed
4
vel
=
velm[index]
;
if
(
vel.w
!=
0.0
)
{
if
(
vel.w
!=
0.0
)
{
float4
pos
=
posq[index]
;
#
ifdef
USE_MIXED_PRECISION
float4
delta
=
posDelta[index]
;
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
mixed4
delta
=
posDelta[index]
;
pos.xyz
+=
delta.xyz
;
pos.xyz
+=
delta.xyz
;
#
ifdef
SUPPORTS_DOUBLE_PRECISION
#
ifdef
SUPPORTS_DOUBLE_PRECISION
vel.xyz
=
convert_
float
4
(
invStepSize*convert_double4
(
delta
))
.
xyz
;
vel.xyz
=
convert_
mixed
4
(
invStepSize*convert_double4
(
delta
))
.
xyz
;
#
else
#
else
vel.xyz
=
invStepSize*delta.xyz
;
vel.xyz
=
invStepSize*delta.xyz
;
#
endif
#
endif
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
convert_real4
(
pos
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
posq[index]
=
pos
;
#
endif
velm[index]
=
vel
;
velm[index]
=
vel
;
}
}
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
...
@@ -62,15 +71,15 @@ __kernel void integrateLangevinPart2(__global float4* restrict posq, __global co
...
@@ -62,15 +71,15 @@ __kernel void integrateLangevinPart2(__global float4* restrict posq, __global co
*
Select
the
step
size
to
use
for
the
next
step.
*
Select
the
step
size
to
use
for
the
next
step.
*/
*/
__kernel
void
selectLangevinStepSize
(
float
maxStepSize,
float
errorTol,
float
tau,
float
kT,
__global
float
2*
restrict
dt,
__kernel
void
selectLangevinStepSize
(
mixed
maxStepSize,
mixed
errorTol,
mixed
tau,
mixed
kT,
__global
mixed
2*
restrict
dt,
__global
const
float
4*
restrict
velm,
__global
const
float
4*
restrict
force,
__global
float
*
restrict
paramBuffer,
__local
float
*
restrict
params,
__local
float
*
restrict
error
)
{
__global
const
mixed
4*
restrict
velm,
__global
const
real
4*
restrict
force,
__global
mixed
*
restrict
paramBuffer,
__local
mixed
*
restrict
params,
__local
mixed
*
restrict
error
)
{
//
Calculate
the
error.
//
Calculate
the
error.
float
err
=
0.0f
;
mixed
err
=
0.0f
;
unsigned
int
index
=
get_local_id
(
0
)
;
unsigned
int
index
=
get_local_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
float
4
f
=
force[index]
;
real
4
f
=
force[index]
;
float
invMass
=
velm[index].w
;
mixed
invMass
=
velm[index].w
;
err
+=
(
f.x*f.x
+
f.y*f.y
+
f.z*f.z
)
*invMass
;
err
+=
(
f.x*f.x
+
f.y*f.y
+
f.z*f.z
)
*invMass
;
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
...
@@ -87,9 +96,9 @@ __kernel void selectLangevinStepSize(float maxStepSize, float errorTol, float ta
...
@@ -87,9 +96,9 @@ __kernel void selectLangevinStepSize(float maxStepSize, float errorTol, float ta
if
(
get_global_id
(
0
)
==
0
)
{
if
(
get_global_id
(
0
)
==
0
)
{
//
Select
the
new
step
size.
//
Select
the
new
step
size.
float
totalError
=
sqrt
(
error[0]/
(
NUM_ATOMS*3
))
;
mixed
totalError
=
sqrt
(
error[0]/
(
NUM_ATOMS*3
))
;
float
newStepSize
=
sqrt
(
errorTol/totalError
)
;
mixed
newStepSize
=
sqrt
(
errorTol/totalError
)
;
float
oldStepSize
=
dt[0].y
;
mixed
oldStepSize
=
dt[0].y
;
if
(
oldStepSize
>
0.0f
)
if
(
oldStepSize
>
0.0f
)
newStepSize
=
min
(
newStepSize,
oldStepSize*2.0f
)
; // For safety, limit how quickly dt can increase.
newStepSize
=
min
(
newStepSize,
oldStepSize*2.0f
)
; // For safety, limit how quickly dt can increase.
if
(
newStepSize
>
oldStepSize
&&
newStepSize
<
1.1f*oldStepSize
)
if
(
newStepSize
>
oldStepSize
&&
newStepSize
<
1.1f*oldStepSize
)
...
@@ -100,9 +109,9 @@ __kernel void selectLangevinStepSize(float maxStepSize, float errorTol, float ta
...
@@ -100,9 +109,9 @@ __kernel void selectLangevinStepSize(float maxStepSize, float errorTol, float ta
//
Recalculate
the
integration
parameters.
//
Recalculate
the
integration
parameters.
float
vscale
=
exp
(
-newStepSize/tau
)
;
mixed
vscale
=
exp
(
-newStepSize/tau
)
;
float
fscale
=
(
1-vscale
)
*tau
;
mixed
fscale
=
(
1-vscale
)
*tau
;
float
noisescale
=
sqrt
(
2*kT/tau
)
*sqrt
(
0.5f*
(
1-vscale*vscale
)
*tau
)
;
mixed
noisescale
=
sqrt
(
2*kT/tau
)
*sqrt
(
0.5f*
(
1-vscale*vscale
)
*tau
)
;
params[VelScale]
=
vscale
;
params[VelScale]
=
vscale
;
params[ForceScale]
=
fscale
;
params[ForceScale]
=
fscale
;
params[NoiseScale]
=
noisescale
;
params[NoiseScale]
=
noisescale
;
...
...
platforms/opencl/src/kernels/removeCM.cl
View file @
8d6a2a01
...
@@ -2,13 +2,16 @@
...
@@ -2,13 +2,16 @@
*
Calculate
the
center
of
mass
momentum.
*
Calculate
the
center
of
mass
momentum.
*/
*/
__kernel
void
calcCenterOfMassMomentum
(
int
numAtoms,
__global
const
float
4*
restrict
velm,
__global
float4*
restrict
cmMomentum,
__local
volatile
float4*
restrict
temp
)
{
__kernel
void
calcCenterOfMassMomentum
(
int
numAtoms,
__global
const
mixed
4*
restrict
velm,
__global
float4*
restrict
cmMomentum,
__local
volatile
float4*
restrict
temp
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
float4
cm
=
0.0f
;
float4
cm
=
0.0f
;
while
(
index
<
numAtoms
)
{
while
(
index
<
numAtoms
)
{
float4
velocity
=
velm[index]
;
mixed4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
if
(
velocity.w
!=
0
)
{
cm.xyz
+=
velocity.xyz/velocity.w
;
cm.x
+=
velocity.x/velocity.w
;
cm.y
+=
velocity.y/velocity.w
;
cm.z
+=
velocity.z/velocity.w
;
}
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
...
@@ -54,7 +57,7 @@ __kernel void calcCenterOfMassMomentum(int numAtoms, __global const float4* rest
...
@@ -54,7 +57,7 @@ __kernel void calcCenterOfMassMomentum(int numAtoms, __global const float4* rest
*
Remove
center
of
mass
motion.
*
Remove
center
of
mass
motion.
*/
*/
__kernel
void
removeCenterOfMassMomentum
(
unsigned
int
numAtoms,
__global
float
4*
restrict
velm,
__global
const
float4*
restrict
cmMomentum,
__local
volatile
float4*
restrict
temp
)
{
__kernel
void
removeCenterOfMassMomentum
(
unsigned
int
numAtoms,
__global
mixed
4*
restrict
velm,
__global
const
float4*
restrict
cmMomentum,
__local
volatile
float4*
restrict
temp
)
{
//
First
sum
all
of
the
momenta
that
were
calculated
by
individual
groups.
//
First
sum
all
of
the
momenta
that
were
calculated
by
individual
groups.
unsigned
int
index
=
get_local_id
(
0
)
;
unsigned
int
index
=
get_local_id
(
0
)
;
...
@@ -101,7 +104,9 @@ __kernel void removeCenterOfMassMomentum(unsigned int numAtoms, __global float4*
...
@@ -101,7 +104,9 @@ __kernel void removeCenterOfMassMomentum(unsigned int numAtoms, __global float4*
index
=
get_global_id
(
0
)
;
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
while
(
index
<
numAtoms
)
{
velm[index].xyz
-=
cm.xyz
;
velm[index].x
-=
cm.x
;
velm[index].y
-=
cm.y
;
velm[index].z
-=
cm.z
;
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
}
}
platforms/opencl/src/kernels/settle.cl
View file @
8d6a2a01
mixed4
loadPos
(
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
posqCorrection,
int
index
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
return
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
return
posq[index]
;
#
endif
}
/**
/**
*
Enforce
constraints
on
SETTLE
clusters
*
Enforce
constraints
on
SETTLE
clusters
*/
*/
__kernel
void
applySettle
(
int
numClusters,
float
tol,
__global
const
float
4*
restrict
oldPos,
__global
float
4*
restrict
posDelta,
__global
const
float
4*
restrict
velm,
__global
const
int4*
restrict
clusterAtoms,
__global
const
float2*
restrict
clusterParams
)
{
__kernel
void
applySettle
(
int
numClusters,
mixed
tol,
__global
const
real
4*
restrict
oldPos,
__global
const
real4*
restrict
posCorrection,
__global
mixed
4*
restrict
posDelta,
__global
const
mixed
4*
restrict
velm,
__global
const
int4*
restrict
clusterAtoms,
__global
const
float2*
restrict
clusterParams
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numClusters
)
{
while
(
index
<
numClusters
)
{
//
Load
the
data
for
this
cluster.
//
Load
the
data
for
this
cluster.
int4
atoms
=
clusterAtoms[index]
;
int4
atoms
=
clusterAtoms[index]
;
float2
params
=
clusterParams[index]
;
float2
params
=
clusterParams[index]
;
float
4
apos0
=
oldPos[
atoms.x
]
;
mixed
4
apos0
=
loadPos
(
oldPos,
posCorrection,
atoms.x
)
;
float
4
xp0
=
posDelta[atoms.x]
;
mixed
4
xp0
=
posDelta[atoms.x]
;
float
4
apos1
=
oldPos[
atoms.y
]
;
mixed
4
apos1
=
loadPos
(
oldPos,
posCorrection,
atoms.y
)
;
float
4
xp1
=
posDelta[atoms.y]
;
mixed
4
xp1
=
posDelta[atoms.y]
;
float
4
apos2
=
oldPos[
atoms.z
]
;
mixed
4
apos2
=
loadPos
(
oldPos,
posCorrection,
atoms.z
)
;
float
4
xp2
=
posDelta[atoms.z]
;
mixed
4
xp2
=
posDelta[atoms.z]
;
float
m0
=
RECIP
(
velm[atoms.x].w
)
;
mixed
m0
=
1/
velm[atoms.x].w
;
float
m1
=
RECIP
(
velm[atoms.y].w
)
;
mixed
m1
=
1/
velm[atoms.y].w
;
float
m2
=
RECIP
(
velm[atoms.z].w
)
;
mixed
m2
=
1/
velm[atoms.z].w
;
//
Apply
the
SETTLE
algorithm.
//
Apply
the
SETTLE
algorithm.
float
xb0
=
apos1.x-apos0.x
;
mixed
xb0
=
apos1.x-apos0.x
;
float
yb0
=
apos1.y-apos0.y
;
mixed
yb0
=
apos1.y-apos0.y
;
float
zb0
=
apos1.z-apos0.z
;
mixed
zb0
=
apos1.z-apos0.z
;
float
xc0
=
apos2.x-apos0.x
;
mixed
xc0
=
apos2.x-apos0.x
;
float
yc0
=
apos2.y-apos0.y
;
mixed
yc0
=
apos2.y-apos0.y
;
float
zc0
=
apos2.z-apos0.z
;
mixed
zc0
=
apos2.z-apos0.z
;
float
invTotalMass
=
1.0f/
(
m0+m1+m2
)
;
mixed
invTotalMass
=
1.0f/
(
m0+m1+m2
)
;
float
xcom
=
(
xp0.x*m0
+
(
xb0+xp1.x
)
*m1
+
(
xc0+xp2.x
)
*m2
)
*
invTotalMass
;
mixed
xcom
=
(
xp0.x*m0
+
(
xb0+xp1.x
)
*m1
+
(
xc0+xp2.x
)
*m2
)
*
invTotalMass
;
float
ycom
=
(
xp0.y*m0
+
(
yb0+xp1.y
)
*m1
+
(
yc0+xp2.y
)
*m2
)
*
invTotalMass
;
mixed
ycom
=
(
xp0.y*m0
+
(
yb0+xp1.y
)
*m1
+
(
yc0+xp2.y
)
*m2
)
*
invTotalMass
;
float
zcom
=
(
xp0.z*m0
+
(
zb0+xp1.z
)
*m1
+
(
zc0+xp2.z
)
*m2
)
*
invTotalMass
;
mixed
zcom
=
(
xp0.z*m0
+
(
zb0+xp1.z
)
*m1
+
(
zc0+xp2.z
)
*m2
)
*
invTotalMass
;
float
xa1
=
xp0.x
-
xcom
;
mixed
xa1
=
xp0.x
-
xcom
;
float
ya1
=
xp0.y
-
ycom
;
mixed
ya1
=
xp0.y
-
ycom
;
float
za1
=
xp0.z
-
zcom
;
mixed
za1
=
xp0.z
-
zcom
;
float
xb1
=
xb0
+
xp1.x
-
xcom
;
mixed
xb1
=
xb0
+
xp1.x
-
xcom
;
float
yb1
=
yb0
+
xp1.y
-
ycom
;
mixed
yb1
=
yb0
+
xp1.y
-
ycom
;
float
zb1
=
zb0
+
xp1.z
-
zcom
;
mixed
zb1
=
zb0
+
xp1.z
-
zcom
;
float
xc1
=
xc0
+
xp2.x
-
xcom
;
mixed
xc1
=
xc0
+
xp2.x
-
xcom
;
float
yc1
=
yc0
+
xp2.y
-
ycom
;
mixed
yc1
=
yc0
+
xp2.y
-
ycom
;
float
zc1
=
zc0
+
xp2.z
-
zcom
;
mixed
zc1
=
zc0
+
xp2.z
-
zcom
;
float
xaksZd
=
yb0*zc0
-
zb0*yc0
;
mixed
xaksZd
=
yb0*zc0
-
zb0*yc0
;
float
yaksZd
=
zb0*xc0
-
xb0*zc0
;
mixed
yaksZd
=
zb0*xc0
-
xb0*zc0
;
float
zaksZd
=
xb0*yc0
-
yb0*xc0
;
mixed
zaksZd
=
xb0*yc0
-
yb0*xc0
;
float
xaksXd
=
ya1*zaksZd
-
za1*yaksZd
;
mixed
xaksXd
=
ya1*zaksZd
-
za1*yaksZd
;
float
yaksXd
=
za1*xaksZd
-
xa1*zaksZd
;
mixed
yaksXd
=
za1*xaksZd
-
xa1*zaksZd
;
float
zaksXd
=
xa1*yaksZd
-
ya1*xaksZd
;
mixed
zaksXd
=
xa1*yaksZd
-
ya1*xaksZd
;
float
xaksYd
=
yaksZd*zaksXd
-
zaksZd*yaksXd
;
mixed
xaksYd
=
yaksZd*zaksXd
-
zaksZd*yaksXd
;
float
yaksYd
=
zaksZd*xaksXd
-
xaksZd*zaksXd
;
mixed
yaksYd
=
zaksZd*xaksXd
-
xaksZd*zaksXd
;
float
zaksYd
=
xaksZd*yaksXd
-
yaksZd*xaksXd
;
mixed
zaksYd
=
xaksZd*yaksXd
-
yaksZd*xaksXd
;
float
axlng
=
sqrt
(
xaksXd*xaksXd
+
yaksXd*yaksXd
+
zaksXd*zaksXd
)
;
mixed
axlng
=
sqrt
(
xaksXd*xaksXd
+
yaksXd*yaksXd
+
zaksXd*zaksXd
)
;
float
aylng
=
sqrt
(
xaksYd*xaksYd
+
yaksYd*yaksYd
+
zaksYd*zaksYd
)
;
mixed
aylng
=
sqrt
(
xaksYd*xaksYd
+
yaksYd*yaksYd
+
zaksYd*zaksYd
)
;
float
azlng
=
sqrt
(
xaksZd*xaksZd
+
yaksZd*yaksZd
+
zaksZd*zaksZd
)
;
mixed
azlng
=
sqrt
(
xaksZd*xaksZd
+
yaksZd*yaksZd
+
zaksZd*zaksZd
)
;
float
trns11
=
xaksXd
/
axlng
;
mixed
trns11
=
xaksXd
/
axlng
;
float
trns21
=
yaksXd
/
axlng
;
mixed
trns21
=
yaksXd
/
axlng
;
float
trns31
=
zaksXd
/
axlng
;
mixed
trns31
=
zaksXd
/
axlng
;
float
trns12
=
xaksYd
/
aylng
;
mixed
trns12
=
xaksYd
/
aylng
;
float
trns22
=
yaksYd
/
aylng
;
mixed
trns22
=
yaksYd
/
aylng
;
float
trns32
=
zaksYd
/
aylng
;
mixed
trns32
=
zaksYd
/
aylng
;
float
trns13
=
xaksZd
/
azlng
;
mixed
trns13
=
xaksZd
/
azlng
;
float
trns23
=
yaksZd
/
azlng
;
mixed
trns23
=
yaksZd
/
azlng
;
float
trns33
=
zaksZd
/
azlng
;
mixed
trns33
=
zaksZd
/
azlng
;
float
xb0d
=
trns11*xb0
+
trns21*yb0
+
trns31*zb0
;
mixed
xb0d
=
trns11*xb0
+
trns21*yb0
+
trns31*zb0
;
float
yb0d
=
trns12*xb0
+
trns22*yb0
+
trns32*zb0
;
mixed
yb0d
=
trns12*xb0
+
trns22*yb0
+
trns32*zb0
;
float
xc0d
=
trns11*xc0
+
trns21*yc0
+
trns31*zc0
;
mixed
xc0d
=
trns11*xc0
+
trns21*yc0
+
trns31*zc0
;
float
yc0d
=
trns12*xc0
+
trns22*yc0
+
trns32*zc0
;
mixed
yc0d
=
trns12*xc0
+
trns22*yc0
+
trns32*zc0
;
float
za1d
=
trns13*xa1
+
trns23*ya1
+
trns33*za1
;
mixed
za1d
=
trns13*xa1
+
trns23*ya1
+
trns33*za1
;
float
xb1d
=
trns11*xb1
+
trns21*yb1
+
trns31*zb1
;
mixed
xb1d
=
trns11*xb1
+
trns21*yb1
+
trns31*zb1
;
float
yb1d
=
trns12*xb1
+
trns22*yb1
+
trns32*zb1
;
mixed
yb1d
=
trns12*xb1
+
trns22*yb1
+
trns32*zb1
;
float
zb1d
=
trns13*xb1
+
trns23*yb1
+
trns33*zb1
;
mixed
zb1d
=
trns13*xb1
+
trns23*yb1
+
trns33*zb1
;
float
xc1d
=
trns11*xc1
+
trns21*yc1
+
trns31*zc1
;
mixed
xc1d
=
trns11*xc1
+
trns21*yc1
+
trns31*zc1
;
float
yc1d
=
trns12*xc1
+
trns22*yc1
+
trns32*zc1
;
mixed
yc1d
=
trns12*xc1
+
trns22*yc1
+
trns32*zc1
;
float
zc1d
=
trns13*xc1
+
trns23*yc1
+
trns33*zc1
;
mixed
zc1d
=
trns13*xc1
+
trns23*yc1
+
trns33*zc1
;
//
---
Step2
A2
'
---
//
---
Step2
A2
'
---
float
rc
=
0.5*params.y
;
float
rc
=
0.5*params.y
;
float
rb
=
sqrt
(
params.x*params.x-rc*rc
)
;
mixed
rb
=
sqrt
(
params.x*params.x-rc*rc
)
;
float
ra
=
rb*
(
m1+m2
)
*invTotalMass
;
mixed
ra
=
rb*
(
m1+m2
)
*invTotalMass
;
rb
-=
ra
;
rb
-=
ra
;
float
sinphi
=
za1d
/
ra
;
mixed
sinphi
=
za1d
/
ra
;
float
cosphi
=
sqrt
(
1.0f
-
sinphi*sinphi
)
;
mixed
cosphi
=
sqrt
(
1.0f
-
sinphi*sinphi
)
;
float
sinpsi
=
(
zb1d
-
zc1d
)
/
(
2*rc*cosphi
)
;
mixed
sinpsi
=
(
zb1d
-
zc1d
)
/
(
2*rc*cosphi
)
;
float
cospsi
=
sqrt
(
1.0f
-
sinpsi*sinpsi
)
;
mixed
cospsi
=
sqrt
(
1.0f
-
sinpsi*sinpsi
)
;
float
ya2d
=
ra*cosphi
;
mixed
ya2d
=
ra*cosphi
;
float
xb2d
=
-
rc*cospsi
;
mixed
xb2d
=
-
rc*cospsi
;
float
yb2d
=
-
rb*cosphi
-
rc*sinpsi*sinphi
;
mixed
yb2d
=
-
rb*cosphi
-
rc*sinpsi*sinphi
;
float
yc2d
=
-
rb*cosphi
+
rc*sinpsi*sinphi
;
mixed
yc2d
=
-
rb*cosphi
+
rc*sinpsi*sinphi
;
float
xb2d2
=
xb2d*xb2d
;
mixed
xb2d2
=
xb2d*xb2d
;
float
hh2
=
4.0f*xb2d2
+
(
yb2d-yc2d
)
*
(
yb2d-yc2d
)
+
(
zb1d-zc1d
)
*
(
zb1d-zc1d
)
;
mixed
hh2
=
4.0f*xb2d2
+
(
yb2d-yc2d
)
*
(
yb2d-yc2d
)
+
(
zb1d-zc1d
)
*
(
zb1d-zc1d
)
;
float
deltx
=
2.0f*xb2d
+
sqrt
(
4.0f*xb2d2
-
hh2
+
params.y*params.y
)
;
mixed
deltx
=
2.0f*xb2d
+
sqrt
(
4.0f*xb2d2
-
hh2
+
params.y*params.y
)
;
xb2d
-=
deltx*0.5
;
xb2d
-=
deltx*0.5
;
//
---
Step3
al,be,ga
---
//
---
Step3
al,be,ga
---
float
alpha
=
(
xb2d*
(
xb0d-xc0d
)
+
yb0d*yb2d
+
yc0d*yc2d
)
;
mixed
alpha
=
(
xb2d*
(
xb0d-xc0d
)
+
yb0d*yb2d
+
yc0d*yc2d
)
;
float
beta
=
(
xb2d*
(
yc0d-yb0d
)
+
xb0d*yb2d
+
xc0d*yc2d
)
;
mixed
beta
=
(
xb2d*
(
yc0d-yb0d
)
+
xb0d*yb2d
+
xc0d*yc2d
)
;
float
gamma
=
xb0d*yb1d
-
xb1d*yb0d
+
xc0d*yc1d
-
xc1d*yc0d
;
mixed
gamma
=
xb0d*yb1d
-
xb1d*yb0d
+
xc0d*yc1d
-
xc1d*yc0d
;
float
al2be2
=
alpha*alpha
+
beta*beta
;
mixed
al2be2
=
alpha*alpha
+
beta*beta
;
float
sintheta
=
(
alpha*gamma
-
beta*sqrt
(
al2be2
-
gamma*gamma
))
/
al2be2
;
mixed
sintheta
=
(
alpha*gamma
-
beta*sqrt
(
al2be2
-
gamma*gamma
))
/
al2be2
;
//
---
Step4
A3
'
---
//
---
Step4
A3
'
---
float
costheta
=
sqrt
(
1.0f
-
sintheta*sintheta
)
;
mixed
costheta
=
sqrt
(
1.0f
-
sintheta*sintheta
)
;
float
xa3d
=
-
ya2d*sintheta
;
mixed
xa3d
=
-
ya2d*sintheta
;
float
ya3d
=
ya2d*costheta
;
mixed
ya3d
=
ya2d*costheta
;
float
za3d
=
za1d
;
mixed
za3d
=
za1d
;
float
xb3d
=
xb2d*costheta
-
yb2d*sintheta
;
mixed
xb3d
=
xb2d*costheta
-
yb2d*sintheta
;
float
yb3d
=
xb2d*sintheta
+
yb2d*costheta
;
mixed
yb3d
=
xb2d*sintheta
+
yb2d*costheta
;
float
zb3d
=
zb1d
;
mixed
zb3d
=
zb1d
;
float
xc3d
=
-
xb2d*costheta
-
yc2d*sintheta
;
mixed
xc3d
=
-
xb2d*costheta
-
yc2d*sintheta
;
float
yc3d
=
-
xb2d*sintheta
+
yc2d*costheta
;
mixed
yc3d
=
-
xb2d*sintheta
+
yc2d*costheta
;
float
zc3d
=
zc1d
;
mixed
zc3d
=
zc1d
;
//
---
Step5
A3
---
//
---
Step5
A3
---
float
xa3
=
trns11*xa3d
+
trns12*ya3d
+
trns13*za3d
;
mixed
xa3
=
trns11*xa3d
+
trns12*ya3d
+
trns13*za3d
;
float
ya3
=
trns21*xa3d
+
trns22*ya3d
+
trns23*za3d
;
mixed
ya3
=
trns21*xa3d
+
trns22*ya3d
+
trns23*za3d
;
float
za3
=
trns31*xa3d
+
trns32*ya3d
+
trns33*za3d
;
mixed
za3
=
trns31*xa3d
+
trns32*ya3d
+
trns33*za3d
;
float
xb3
=
trns11*xb3d
+
trns12*yb3d
+
trns13*zb3d
;
mixed
xb3
=
trns11*xb3d
+
trns12*yb3d
+
trns13*zb3d
;
float
yb3
=
trns21*xb3d
+
trns22*yb3d
+
trns23*zb3d
;
mixed
yb3
=
trns21*xb3d
+
trns22*yb3d
+
trns23*zb3d
;
float
zb3
=
trns31*xb3d
+
trns32*yb3d
+
trns33*zb3d
;
mixed
zb3
=
trns31*xb3d
+
trns32*yb3d
+
trns33*zb3d
;
float
xc3
=
trns11*xc3d
+
trns12*yc3d
+
trns13*zc3d
;
mixed
xc3
=
trns11*xc3d
+
trns12*yc3d
+
trns13*zc3d
;
float
yc3
=
trns21*xc3d
+
trns22*yc3d
+
trns23*zc3d
;
mixed
yc3
=
trns21*xc3d
+
trns22*yc3d
+
trns23*zc3d
;
float
zc3
=
trns31*xc3d
+
trns32*yc3d
+
trns33*zc3d
;
mixed
zc3
=
trns31*xc3d
+
trns32*yc3d
+
trns33*zc3d
;
xp0.x
=
xcom
+
xa3
;
xp0.x
=
xcom
+
xa3
;
xp0.y
=
ycom
+
ya3
;
xp0.y
=
ycom
+
ya3
;
...
@@ -155,49 +165,49 @@ __kernel void applySettle(int numClusters, float tol, __global const float4* res
...
@@ -155,49 +165,49 @@ __kernel void applySettle(int numClusters, float tol, __global const float4* res
*
Enforce
velocity
constraints
on
SETTLE
clusters
*
Enforce
velocity
constraints
on
SETTLE
clusters
*/
*/
__kernel
void
constrainVelocities
(
int
numClusters,
float
tol,
__global
const
float
4*
restrict
oldPos,
__global
float
4*
restrict
posDelta,
__global
float
4*
restrict
velm,
__global
const
int4*
restrict
clusterAtoms,
__global
const
float2*
restrict
clusterParams
)
{
__kernel
void
constrainVelocities
(
int
numClusters,
mixed
tol,
__global
const
real
4*
restrict
oldPos,
__global
const
real4*
restrict
posCorrection,
__global
mixed
4*
restrict
posDelta,
__global
mixed
4*
restrict
velm,
__global
const
int4*
restrict
clusterAtoms,
__global
const
float2*
restrict
clusterParams
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < numClusters; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < numClusters; index += get_global_size(0)) {
//
Load
the
data
for
this
cluster.
//
Load
the
data
for
this
cluster.
int4
atoms
=
clusterAtoms[index]
;
int4
atoms
=
clusterAtoms[index]
;
float
4
apos0
=
oldPos[
atoms.x
]
;
mixed
4
apos0
=
loadPos
(
oldPos,
posCorrection,
atoms.x
)
;
float
4
apos1
=
oldPos[
atoms.y
]
;
mixed
4
apos1
=
loadPos
(
oldPos,
posCorrection,
atoms.y
)
;
float
4
apos2
=
oldPos[
atoms.z
]
;
mixed
4
apos2
=
loadPos
(
oldPos,
posCorrection,
atoms.z
)
;
float
4
v0
=
velm[atoms.x]
;
mixed
4
v0
=
velm[atoms.x]
;
float
4
v1
=
velm[atoms.y]
;
mixed
4
v1
=
velm[atoms.y]
;
float
4
v2
=
velm[atoms.z]
;
mixed
4
v2
=
velm[atoms.z]
;
//
Compute
intermediate
quantities:
the
atom
masses,
the
bond
directions,
the
relative
velocities,
//
Compute
intermediate
quantities:
the
atom
masses,
the
bond
directions,
the
relative
velocities,
//
and
the
angle
cosines
and
sines.
//
and
the
angle
cosines
and
sines.
float
mA
=
RECIP
(
v0.w
)
;
mixed
mA
=
1/
v0.w
;
float
mB
=
RECIP
(
v1.w
)
;
mixed
mB
=
1/
v1.w
;
float
mC
=
RECIP
(
v2.w
)
;
mixed
mC
=
1/
v2.w
;
float
4
eAB
=
apos1-apos0
;
mixed
4
eAB
=
apos1-apos0
;
float
4
eBC
=
apos2-apos1
;
mixed
4
eBC
=
apos2-apos1
;
float
4
eCA
=
apos0-apos2
;
mixed
4
eCA
=
apos0-apos2
;
eAB.xyz
/=
SQRT
(
eAB.x*eAB.x
+
eAB.y*eAB.y
+
eAB.z*eAB.z
)
;
eAB.xyz
/=
sqrt
(
eAB.x*eAB.x
+
eAB.y*eAB.y
+
eAB.z*eAB.z
)
;
eBC.xyz
/=
SQRT
(
eBC.x*eBC.x
+
eBC.y*eBC.y
+
eBC.z*eBC.z
)
;
eBC.xyz
/=
sqrt
(
eBC.x*eBC.x
+
eBC.y*eBC.y
+
eBC.z*eBC.z
)
;
eCA.xyz
/=
SQRT
(
eCA.x*eCA.x
+
eCA.y*eCA.y
+
eCA.z*eCA.z
)
;
eCA.xyz
/=
sqrt
(
eCA.x*eCA.x
+
eCA.y*eCA.y
+
eCA.z*eCA.z
)
;
float
vAB
=
(
v1.x-v0.x
)
*eAB.x
+
(
v1.y-v0.y
)
*eAB.y
+
(
v1.z-v0.z
)
*eAB.z
;
mixed
vAB
=
(
v1.x-v0.x
)
*eAB.x
+
(
v1.y-v0.y
)
*eAB.y
+
(
v1.z-v0.z
)
*eAB.z
;
float
vBC
=
(
v2.x-v1.x
)
*eBC.x
+
(
v2.y-v1.y
)
*eBC.y
+
(
v2.z-v1.z
)
*eBC.z
;
mixed
vBC
=
(
v2.x-v1.x
)
*eBC.x
+
(
v2.y-v1.y
)
*eBC.y
+
(
v2.z-v1.z
)
*eBC.z
;
float
vCA
=
(
v0.x-v2.x
)
*eCA.x
+
(
v0.y-v2.y
)
*eCA.y
+
(
v0.z-v2.z
)
*eCA.z
;
mixed
vCA
=
(
v0.x-v2.x
)
*eCA.x
+
(
v0.y-v2.y
)
*eCA.y
+
(
v0.z-v2.z
)
*eCA.z
;
float
cA
=
-
(
eAB.x*eCA.x
+
eAB.y*eCA.y
+
eAB.z*eCA.z
)
;
mixed
cA
=
-
(
eAB.x*eCA.x
+
eAB.y*eCA.y
+
eAB.z*eCA.z
)
;
float
cB
=
-
(
eAB.x*eBC.x
+
eAB.y*eBC.y
+
eAB.z*eBC.z
)
;
mixed
cB
=
-
(
eAB.x*eBC.x
+
eAB.y*eBC.y
+
eAB.z*eBC.z
)
;
float
cC
=
-
(
eBC.x*eCA.x
+
eBC.y*eCA.y
+
eBC.z*eCA.z
)
;
mixed
cC
=
-
(
eBC.x*eCA.x
+
eBC.y*eCA.y
+
eBC.z*eCA.z
)
;
float
s2A
=
1-cA*cA
;
mixed
s2A
=
1-cA*cA
;
float
s2B
=
1-cB*cB
;
mixed
s2B
=
1-cB*cB
;
float
s2C
=
1-cC*cC
;
mixed
s2C
=
1-cC*cC
;
//
Solve
the
equations.
These
are
different
from
those
in
the
SETTLE
paper
(
JCC
13
(
8
)
,
pp.
952-962,
1992
)
,
because
//
Solve
the
equations.
These
are
different
from
those
in
the
SETTLE
paper
(
JCC
13
(
8
)
,
pp.
952-962,
1992
)
,
because
//
in
going
from
equations
B1
to
B2,
they
make
the
assumption
that
mB=mC
(
but
don
't
bother
to
mention
they
're
//
in
going
from
equations
B1
to
B2,
they
make
the
assumption
that
mB=mC
(
but
don
't
bother
to
mention
they
're
//
making
that
assumption
)
.
We
allow
all
three
atoms
to
have
different
masses.
//
making
that
assumption
)
.
We
allow
all
three
atoms
to
have
different
masses.
float
mABCinv
=
RECIP
(
mA*mB*mC
)
;
mixed
mABCinv
=
1/
(
mA*mB*mC
)
;
float
denom
=
(((
s2A*mB+s2B*mA
)
*mC+
(
s2A*mB*mB+2*
(
cA*cB*cC+1
)
*mA*mB+s2B*mA*
mA
))
*mC+s2C*mA*mB*
(
mA+mB
))
*mABCinv
;
mixed
denom
=
(((
s2A*mB+s2B*mA
)
*mC+
(
s2A*mB*mB+2*
(
cA*cB*cC+1
)
*mA*mB+s2B*mA*
mA
))
*mC+s2C*mA*mB*
(
mA+mB
))
*mABCinv
;
float
tab
=
((
cB*cC*mA-cA*mB-cA*mC
)
*vCA
+
(
cA*cC*mB-cB*mC-cB*mA
)
*vBC
+
(
s2C*mA*mA*mB*mB*mABCinv+
(
mA+mB+mC
))
*vAB
)
/denom
;
mixed
tab
=
((
cB*cC*mA-cA*mB-cA*mC
)
*vCA
+
(
cA*cC*mB-cB*mC-cB*mA
)
*vBC
+
(
s2C*mA*mA*mB*mB*mABCinv+
(
mA+mB+mC
))
*vAB
)
/denom
;
float
tbc
=
((
cA*cB*mC-cC*mB-cC*mA
)
*vCA
+
(
s2A*mB*mB*mC*mC*mABCinv+
(
mA+mB+mC
))
*vBC
+
(
cA*cC*mB-cB*mA-cB*mC
)
*vAB
)
/denom
;
mixed
tbc
=
((
cA*cB*mC-cC*mB-cC*mA
)
*vCA
+
(
s2A*mB*mB*mC*mC*mABCinv+
(
mA+mB+mC
))
*vBC
+
(
cA*cC*mB-cB*mA-cB*mC
)
*vAB
)
/denom
;
float
tca
=
((
s2B*mA*mA*mC*mC*mABCinv+
(
mA+mB+mC
))
*vCA
+
(
cA*cB*mC-cC*mB-cC*mA
)
*vBC
+
(
cB*cC*mA-cA*mB-cA*mC
)
*vAB
)
/denom
;
mixed
tca
=
((
s2B*mA*mA*mC*mC*mABCinv+
(
mA+mB+mC
))
*vCA
+
(
cA*cB*mC-cC*mB-cC*mA
)
*vBC
+
(
cB*cC*mA-cA*mB-cA*mC
)
*vAB
)
/denom
;
v0.xyz
+=
(
tab*eAB.xyz
-
tca*eCA.xyz
)
*v0.w
;
v0.xyz
+=
(
tab*eAB.xyz
-
tca*eCA.xyz
)
*v0.w
;
v1.xyz
+=
(
tbc*eBC.xyz
-
tab*eAB.xyz
)
*v1.w
;
v1.xyz
+=
(
tbc*eBC.xyz
-
tab*eAB.xyz
)
*v1.w
;
v2.xyz
+=
(
tca*eCA.xyz
-
tbc*eBC.xyz
)
*v2.w
;
v2.xyz
+=
(
tca*eCA.xyz
-
tbc*eBC.xyz
)
*v2.w
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment